def __init__(self, test_context): super(Log4jAppenderTest, self).__init__(test_context, num_zk=1, num_brokers=1, topics={ TOPIC: {'partitions': 1, 'replication-factor': 1} }) self.num_nodes = 1 self.appender = KafkaLog4jAppender(self.test_context, self.num_nodes, self.kafka, TOPIC, MAX_MESSAGES) self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_nodes, kafka=self.kafka, topic=TOPIC, consumer_timeout_ms=1000)
def __init__(self, test_context): super(ConsoleConsumerTest, self).__init__(test_context) self.topic = "topic" self.zk = ZookeeperService(test_context, num_nodes=1) self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, zk_chroot="/kafka", topics={self.topic: {"partitions": 1, "replication-factor": 1}}) self.consumer = ConsoleConsumer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic)
def test_compatibility(self, producer_version, consumer_version, compression_types, new_consumer=True, timestamp_type=None, metadata_quorum=quorum.zk): if not new_consumer and metadata_quorum != quorum.zk: raise Exception( "ZooKeeper-based consumers are not supported when using a Raft-based metadata quorum" ) self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=DEV_BRANCH, topics={ self.topic: { "partitions": 3, "replication-factor": 3, 'configs': { "min.insync.replicas": 2 } } }, controller_num_nodes_override=1) for node in self.kafka.nodes: if timestamp_type is not None: node.config[ config_property.MESSAGE_TIMESTAMP_TYPE] = timestamp_type self.kafka.start() self.producer = VerifiableProducer( self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput, message_validator=is_int, compression_types=compression_types, version=KafkaVersion(producer_version)) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, consumer_timeout_ms=30000, new_consumer=new_consumer, message_validator=is_int, version=KafkaVersion(consumer_version)) self.run_produce_consume_validate(lambda: wait_until( lambda: self.producer.each_produced_at_least( self.messages_per_producer) == True, timeout_sec=120, backoff_sec=1, err_msg= "Producer did not produce all messages in reasonable amount of time" ))
def test_quota(self, quota_type, override_quota=True, producer_num=1, consumer_num=1, old_broker_throttling_behavior=False, old_client_throttling_behavior=False): # Old (pre-2.0) throttling behavior for broker throttles before sending a response to the client. if old_broker_throttling_behavior: self.kafka.set_version(LATEST_1_1) self.kafka.start() self.quota_config = QuotaConfig(quota_type, override_quota, self.kafka) producer_client_id = self.quota_config.client_id consumer_client_id = self.quota_config.client_id # Old (pre-2.0) throttling behavior for client does not throttle upon receiving a response with a non-zero throttle time. if old_client_throttling_behavior: client_version = LATEST_1_1 else: client_version = DEV_BRANCH # Produce all messages producer = ProducerPerformanceService(self.test_context, producer_num, self.kafka, topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=-1, client_id=producer_client_id, version=client_version) producer.run() # Consume all messages consumer = ConsoleConsumer( self.test_context, consumer_num, self.kafka, self.topic, consumer_timeout_ms=60000, client_id=consumer_client_id, jmx_object_names=[ 'kafka.consumer:type=consumer-fetch-manager-metrics,client-id=%s' % consumer_client_id ], jmx_attributes=['bytes-consumed-rate'], version=client_version) consumer.run() for idx, messages in consumer.messages_consumed.iteritems(): assert len( messages ) > 0, "consumer %d didn't consume any message before timeout" % idx success, msg = self.validate(self.kafka, producer, consumer) assert success, msg
def start_consumer(self): self.consumer = ConsoleConsumer( self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC, consumer_timeout_ms=10000, message_validator=self.custom_message_validator) self.consumer.start()
def start_consumer(self, security_protocol): enable_new_consumer = security_protocol == SecurityConfig.SSL self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC, consumer_timeout_ms=None, new_consumer=enable_new_consumer) self.consumer.start()
def create_producer_and_consumer(self): self.producer = VerifiableProducer( self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput) self.consumer = ConsoleConsumer( self.test_context, self.num_consumers, self.kafka, self.topic, consumer_timeout_ms=60000, message_validator=is_int, new_consumer=True) self.consumer.group_id = "group"
def start_consumer(self, security_protocol): enable_new_consumer = security_protocol != SecurityConfig.PLAINTEXT self.consumer = ConsoleConsumer( self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC, consumer_timeout_ms=1000, new_consumer=enable_new_consumer, message_validator=self.custom_message_validator) self.consumer.start()
def test_transformations(self, connect_protocol): self.CONNECT_PROTOCOL = connect_protocol self.setup_services(timestamp_type='CreateTime') self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node)) self.cc.start() ts_fieldname = 'the_timestamp' NamedConnector = namedtuple('Connector', ['name']) source_connector = NamedConnector(name='file-src') self.cc.create_connector({ 'name': source_connector.name, 'connector.class': 'org.apache.kafka.connect.file.FileStreamSourceConnector', 'tasks.max': 1, 'file': self.INPUT_FILE, 'topic': self.TOPIC, 'transforms': 'hoistToStruct,insertTimestampField', 'transforms.hoistToStruct.type': 'org.apache.kafka.connect.transforms.HoistField$Value', 'transforms.hoistToStruct.field': 'content', 'transforms.insertTimestampField.type': 'org.apache.kafka.connect.transforms.InsertField$Value', 'transforms.insertTimestampField.timestamp.field': ts_fieldname, }) wait_until(lambda: self.connector_is_running(source_connector), timeout_sec=30, err_msg='Failed to see connector transition to the RUNNING state') for node in self.cc.nodes: node.account.ssh("echo -e -n " + repr(self.FIRST_INPUTS) + " >> " + self.INPUT_FILE) consumer = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC, consumer_timeout_ms=15000, print_timestamp=True) consumer.run() assert len(consumer.messages_consumed[1]) == len(self.FIRST_INPUT_LIST) expected_schema = { 'type': 'struct', 'fields': [ {'field': 'content', 'type': 'string', 'optional': False}, {'field': ts_fieldname, 'name': 'org.apache.kafka.connect.data.Timestamp', 'type': 'int64', 'version': 1, 'optional': True}, ], 'optional': False } for msg in consumer.messages_consumed[1]: (ts_info, value) = msg.split('\t') assert ts_info.startswith('CreateTime:') ts = int(ts_info[len('CreateTime:'):]) obj = json.loads(value) assert obj['schema'] == expected_schema assert obj['payload']['content'] in self.FIRST_INPUT_LIST assert obj['payload'][ts_fieldname] == ts
def __init__(self, test_context): super(DelegationTokenTest, self).__init__(test_context) self.test_context = test_context self.topic = "topic" self.zk = ZookeeperService(test_context, num_nodes=1) self.kafka = KafkaService( self.test_context, num_nodes=1, zk=self.zk, zk_chroot="/kafka", topics={self.topic: { "partitions": 1, "replication-factor": 1 }}, server_prop_overides=[[ config_property.DELEGATION_TOKEN_MAX_LIFETIME_MS, "604800000" ], [config_property.DELEGATION_TOKEN_EXPIRY_TIME_MS, "86400000" ], [config_property.DELEGATION_TOKEN_SECRET_KEY, "test12345"], [ config_property.SASL_ENABLED_MECHANISMS, "GSSAPI,SCRAM-SHA-256" ]]) self.jaas_deleg_conf_path = "/tmp/jaas_deleg.conf" self.jaas_deleg_conf = "" self.client_properties_content = """ security.protocol=SASL_PLAINTEXT sasl.mechanism=SCRAM-SHA-256 sasl.kerberos.service.name=kafka client.id=console-consumer """ self.client_kafka_opts = ' -Djava.security.auth.login.config=' + self.jaas_deleg_conf_path self.producer = VerifiableProducer( self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, max_messages=1, throughput=1, kafka_opts_override=self.client_kafka_opts, client_prop_file_override=self.client_properties_content) self.consumer = ConsoleConsumer( self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, kafka_opts_override=self.client_kafka_opts, client_prop_file_override=self.client_properties_content) self.kafka.security_protocol = 'SASL_PLAINTEXT' self.kafka.client_sasl_mechanism = 'GSSAPI,SCRAM-SHA-256' self.kafka.interbroker_sasl_mechanism = 'GSSAPI'
def test_compatibility(self, producer_version, consumer_version, compression_types, new_consumer=True, timestamp_type=None): self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=TRUNK, topics={ self.topic: { "partitions": 3, "replication-factor": 3, 'configs': { "min.insync.replicas": 2 } } }) for node in self.kafka.nodes: if timestamp_type is not None: node.config[ config_property.MESSAGE_TIMESTAMP_TYPE] = timestamp_type self.kafka.start() self.producer = VerifiableProducer( self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput, message_validator=is_int, compression_types=compression_types, version=KafkaVersion(producer_version)) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, consumer_timeout_ms=30000, new_consumer=new_consumer, message_validator=is_int, version=KafkaVersion(consumer_version)) self.run_produce_consume_validate(lambda: wait_until( lambda: self.producer.each_produced_at_least( self.messages_per_producer) == True, timeout_sec=120, backoff_sec=1, err_msg= "Producer did not produce all messages in reasonable amount of time" ))
def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True, security_protocol='PLAINTEXT'): """ Validates basic end-to-end functionality of Connect standalone using the file source and sink converters. Includes parameterizations to test different converters (which also test per-connector converter overrides), schema/schemaless modes, and security support. """ assert converter != None, "converter type must be set" # Template parameters. Note that we don't set key/value.converter. These default to JsonConverter and we validate # converter overrides via the connector configuration. if converter != "org.apache.kafka.connect.json.JsonConverter": self.override_key_converter = converter self.override_value_converter = converter self.schemas = schemas self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, security_protocol=security_protocol, interbroker_security_protocol=security_protocol, topics=self.topics) self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE]) self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE]) self.consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC_TEST, consumer_timeout_ms=10000) self.zk.start() self.kafka.start() self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")]) self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")]) self.source.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node)) self.sink.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node)) self.source.start() self.sink.start() # Generating data on the source node should generate new records and create new output on the sink node self.source.node.account.ssh("echo -e -n " + repr(self.FIRST_INPUT) + " >> " + self.INPUT_FILE) wait_until(lambda: self.validate_output(self.FIRST_INPUT), timeout_sec=60, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.") # Restarting both should result in them picking up where they left off, # only processing new data. self.source.restart() self.sink.restart() self.source.node.account.ssh("echo -e -n " + repr(self.SECOND_INPUT) + " >> " + self.INPUT_FILE) wait_until(lambda: self.validate_output(self.FIRST_INPUT + self.SECOND_INPUT), timeout_sec=60, err_msg="Sink output file never converged to the same state as the input file") # Validate the format of the data in the Kafka topic self.consumer_validator.run() expected = json.dumps([line if not self.schemas else { "schema": self.SCHEMA, "payload": line } for line in self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST]) decoder = (json.loads if converter.endswith("JsonConverter") else str) actual = json.dumps([decoder(x) for x in self.consumer_validator.messages_consumed[1]]) assert expected == actual, "Expected %s but saw %s in Kafka" % (expected, actual)
def test_upgrade(self, from_kafka_version, to_message_format_version, compression_types, new_consumer=True, security_protocol="PLAINTEXT"): """Test upgrade of Kafka broker cluster from 0.8.2, 0.9.0 or 0.10.0 to the current version from_kafka_version is a Kafka version to upgrade from: either 0.8.2.X, 0.9.0.x or 0.10.0.x If to_message_format_version is None, it means that we will upgrade to default (latest) message format version. It is possible to upgrade to 0.10 brokers but still use message format version 0.9 - Start 3 node broker cluster on version 'from_kafka_version' - Start producer and consumer in the background - Perform two-phase rolling upgrade - First phase: upgrade brokers to 0.10 with inter.broker.protocol.version set to from_kafka_version and log.message.format.version set to from_kafka_version - Second phase: remove inter.broker.protocol.version config with rolling bounce; if to_message_format_version is set to 0.9, set log.message.format.version to to_message_format_version, otherwise remove log.message.format.version config - Finally, validate that every message acked by the producer was consumed by the consumer """ self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=KafkaVersion(from_kafka_version), topics={self.topic: {"partitions": 3, "replication-factor": 3, 'configs': {"min.insync.replicas": 2}}}) self.kafka.security_protocol = security_protocol self.kafka.interbroker_security_protocol = security_protocol self.kafka.start() self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput, message_validator=is_int, compression_types=compression_types, version=KafkaVersion(from_kafka_version)) assert self.zk.query("/cluster/id") is None # TODO - reduce the timeout self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, consumer_timeout_ms=30000, new_consumer=new_consumer, message_validator=is_int, version=KafkaVersion(from_kafka_version)) self.run_produce_consume_validate(core_test_action=lambda: self.perform_upgrade(from_kafka_version, to_message_format_version)) cluster_id_json = self.zk.query("/cluster/id") assert cluster_id_json is not None try: cluster_id = json.loads(cluster_id_json) except : self.logger.debug("Data in /cluster/id znode could not be parsed. Data = %s" % cluster_id_json) self.logger.debug("Cluster id [%s]", cluster_id) assert len(cluster_id["id"]) == 22
def test_quota(self, quota_type, override_quota=True, producer_num=1, consumer_num=1): self.quota_config = QuotaConfig(quota_type, override_quota, self.kafka) producer_client_id = self.quota_config.client_id consumer_client_id = self.quota_config.client_id # Produce all messages producer = ProducerPerformanceService( self.test_context, producer_num, self.kafka, topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=-1, client_id=producer_client_id, jmx_object_names=[ 'kafka.producer:type=producer-metrics,client-id=%s' % producer_client_id ], jmx_attributes=['outgoing-byte-rate']) producer.run() # Consume all messages consumer = ConsoleConsumer( self.test_context, consumer_num, self.kafka, self.topic, new_consumer=True, consumer_timeout_ms=60000, client_id=consumer_client_id, jmx_object_names=[ 'kafka.consumer:type=consumer-fetch-manager-metrics,client-id=%s' % consumer_client_id ], jmx_attributes=['bytes-consumed-rate']) consumer.run() for idx, messages in consumer.messages_consumed.iteritems(): assert len( messages ) > 0, "consumer %d didn't consume any message before timeout" % idx success, msg = self.validate(self.kafka, producer, consumer) assert success, msg
def produce_and_consume(self, producer_version, consumer_version, group): self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput, message_validator=is_int, version=KafkaVersion(producer_version)) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, new_consumer=False, consumer_timeout_ms=30000, message_validator=is_int, version=KafkaVersion(consumer_version)) self.consumer.group_id = group self.run_produce_consume_validate(lambda: wait_until( lambda: self.producer.each_produced_at_least(self.messages_per_producer) == True, timeout_sec=120, backoff_sec=1, err_msg="Producer did not produce all messages in reasonable amount of time"))
def test_replication_with_broker_failure( self, failure_mode, security_protocol, broker_type, client_sasl_mechanism="GSSAPI", interbroker_sasl_mechanism="GSSAPI", compression_type=None, enable_idempotence=False): """Replication tests. These tests verify that replication provides simple durability guarantees by checking that data acked by brokers is still available for consumption in the face of various failure scenarios. Setup: 1 zk, 3 kafka nodes, 1 topic with partitions=3, replication-factor=3, and min.insync.replicas=2 - Produce messages in the background - Consume messages in the background - Drive broker failures (shutdown, or bounce repeatedly with kill -15 or kill -9) - When done driving failures, stop producing, and finish consuming - Validate that every acked message was consumed """ self.kafka.security_protocol = security_protocol self.kafka.interbroker_security_protocol = security_protocol self.kafka.client_sasl_mechanism = client_sasl_mechanism self.kafka.interbroker_sasl_mechanism = interbroker_sasl_mechanism new_consumer = False if self.kafka.security_protocol == "PLAINTEXT" else True self.enable_idempotence = enable_idempotence compression_types = None if not compression_type else [ compression_type ] * self.num_producers self.producer = VerifiableProducer( self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput, compression_types=compression_types, enable_idempotence=enable_idempotence) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, new_consumer=new_consumer, consumer_timeout_ms=60000, message_validator=is_int) self.kafka.start() self.run_produce_consume_validate( core_test_action=lambda: failures[failure_mode](self, broker_type))
def test_quota(self, producer_id='default_id', producer_num=1, consumer_id='default_id', consumer_num=1): # Produce all messages producer = ProducerPerformanceService( self.test_context, producer_num, self.kafka, security_protocol=self.security_protocol, topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=-1, client_id=producer_id, jmx_object_names=[ 'kafka.producer:type=producer-metrics,client-id=%s' % producer_id ], jmx_attributes=['outgoing-byte-rate']) producer.run() # Consume all messages consumer = ConsoleConsumer( self.test_context, consumer_num, self.kafka, self.topic, security_protocol=self.security_protocol, new_consumer=False, consumer_timeout_ms=60000, client_id=consumer_id, jmx_object_names=[ 'kafka.consumer:type=ConsumerTopicMetrics,name=BytesPerSec,clientId=%s' % consumer_id ], jmx_attributes=['OneMinuteRate']) consumer.run() for idx, messages in consumer.messages_consumed.iteritems(): assert len( messages ) > 0, "consumer %d didn't consume any message before timeout" % idx success, msg = self.validate(self.kafka, producer, consumer) assert success, msg
def start_consumer(self, topic_to_read, group_id): consumer = ConsoleConsumer(context=self.test_context, num_nodes=1, kafka=self.kafka, topic=topic_to_read, group_id=group_id, message_validator=is_int, from_beginning=True, isolation_level="read_committed") consumer.start() # ensure that the consumer is up. wait_until(lambda: (len(consumer.messages_consumed[1]) > 0) == True, timeout_sec=60, err_msg="Consumer failed to consume any messages for %ds" %\ 60) return consumer
def __init__(self, test_context): super(TestMirrorMakerService, self).__init__(test_context) self.topic = "topic" self.source_zk = ZookeeperService(test_context, num_nodes=1) self.target_zk = ZookeeperService(test_context, num_nodes=1) self.source_kafka = KafkaService( test_context, num_nodes=1, zk=self.source_zk, topics={self.topic: { "partitions": 1, "replication-factor": 1 }}) self.target_kafka = KafkaService( test_context, num_nodes=1, zk=self.target_zk, topics={self.topic: { "partitions": 1, "replication-factor": 1 }}) self.num_messages = 1000 # This will produce to source kafka cluster self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.source_kafka, topic=self.topic, max_messages=self.num_messages, throughput=1000) # Use a regex whitelist to check that the start command is well-formed in this case self.mirror_maker = MirrorMaker(test_context, num_nodes=1, source=self.source_kafka, target=self.target_kafka, whitelist=".*", consumer_timeout_ms=2000) # This will consume from target kafka cluster self.consumer = ConsoleConsumer(test_context, num_nodes=1, kafka=self.target_kafka, topic=self.topic, consumer_timeout_ms=1000)
def test_produce_consume(self, broker_version): print("running producer_consumer_compat with broker_version = %s" % broker_version) self.kafka.set_version(KafkaVersion(broker_version)) self.kafka.security_protocol = "PLAINTEXT" self.kafka.interbroker_security_protocol = self.kafka.security_protocol self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput, message_validator=is_int_with_prefix) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, consumer_timeout_ms=60000, message_validator=is_int_with_prefix) self.kafka.start() self.run_produce_consume_validate(lambda: wait_until( lambda: self.producer.each_produced_at_least(self.messages_per_producer) == True, timeout_sec=120, backoff_sec=1, err_msg="Producer did not produce all messages in reasonable amount of time"))
def test_throttled_reassignment(self, bounce_brokers): security_protocol = 'PLAINTEXT' self.kafka.security_protocol = security_protocol self.kafka.interbroker_security_protocol = security_protocol producer_id = 'bulk_producer' bulk_producer = ProducerPerformanceService( context=self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=-1, client_id=producer_id) self.producer = VerifiableProducer(context=self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, message_validator=is_int, throughput=self.producer_throughput) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, consumer_timeout_ms=60000, message_validator=is_int, from_beginning=False, wait_until_partitions_assigned=True) self.kafka.start() bulk_producer.run() self.run_produce_consume_validate( core_test_action=lambda: self.reassign_partitions( bounce_brokers, self.throttle)) self.logger.debug( "Bulk producer outgoing-byte-rates: %s", (metric.value for k, metrics in bulk_producer.metrics(group='producer-metrics', name='outgoing-byte-rate', client_id=producer_id) for metric in metrics))
def __init__(self, test_context): super(CopycatStandaloneFileTest, self).__init__( test_context, num_zk=1, num_brokers=1, topics={'test': { 'partitions': 1, 'replication-factor': 1 }}) self.source = CopycatStandaloneService( test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE]) self.sink = CopycatStandaloneService( test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE]) self.consumer_validator = ConsoleConsumer(test_context, 1, self.kafka, self.TOPIC, consumer_timeout_ms=1000)
def __init__(self, test_context): super(TestMirrorMakerService, self).__init__(test_context) self.topic = "topic" self.source_zk = ZookeeperService(test_context, num_nodes=1) self.target_zk = ZookeeperService(test_context, num_nodes=1) self.source_kafka = KafkaService(test_context, num_nodes=1, zk=self.source_zk, topics={self.topic: {"partitions": 1, "replication-factor": 1}}) self.target_kafka = KafkaService(test_context, num_nodes=1, zk=self.target_zk, topics={self.topic: {"partitions": 1, "replication-factor": 1}}) # This will produce to source kafka cluster self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.source_kafka, topic=self.topic, throughput=1000) self.mirror_maker = MirrorMaker(test_context, num_nodes=1, source=self.source_kafka, target=self.target_kafka, whitelist=self.topic, offset_commit_interval_ms=1000) # This will consume from target kafka cluster self.consumer = ConsoleConsumer(test_context, num_nodes=1, kafka=self.target_kafka, topic=self.topic, message_validator=is_int, consumer_timeout_ms=60000)
def test_throttled_reassignment(self, bounce_brokers): security_protocol = 'PLAINTEXT' self.kafka.security_protocol = security_protocol self.kafka.interbroker_security_protocol = security_protocol producer_id = 'bulk_producer' bulk_producer = ProducerPerformanceService( context=self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=-1, client_id=producer_id, jmx_object_names=[ 'kafka.producer:type=producer-metrics,client-id=%s' % producer_id ], jmx_attributes=['outgoing-byte-rate']) self.producer = VerifiableProducer(context=self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, message_validator=is_int, throughput=self.producer_throughput) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, new_consumer=True, consumer_timeout_ms=60000, message_validator=is_int, from_beginning=False) self.kafka.start() bulk_producer.run() self.run_produce_consume_validate( core_test_action=lambda: self.reassign_partitions( bounce_brokers, self.throttle))
def test_compressed_topic(self, compression_types, metadata_quorum=quorum.zk): """Test produce => consume => validate for compressed topics Setup: 1 zk, 1 kafka node, 1 topic with partitions=10, replication-factor=1 compression_types parameter gives a list of compression types (or no compression if "none"). Each producer in a VerifiableProducer group (num_producers = number of compression types) will use a compression type from the list based on producer's index in the group. - Produce messages in the background - Consume messages in the background - Stop producing, and finish consuming - Validate that every acked message was consumed """ self.kafka.security_protocol = "PLAINTEXT" self.kafka.interbroker_security_protocol = self.kafka.security_protocol self.producer = VerifiableProducer( self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput, message_validator=is_int_with_prefix, compression_types=compression_types) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, consumer_timeout_ms=60000, message_validator=is_int_with_prefix) self.kafka.start() self.run_produce_consume_validate(lambda: wait_until( lambda: self.producer.each_produced_at_least( self.messages_per_producer) == True, timeout_sec=120, backoff_sec=1, err_msg= "Producer did not produce all messages in reasonable amount of time" ))
def test_reassign_partitions(self, bounce_brokers, security_protocol): """Reassign partitions tests. Setup: 1 zk, 3 kafka nodes, 1 topic with partitions=3, replication-factor=3, and min.insync.replicas=2 - Produce messages in the background - Consume messages in the background - Reassign partitions - If bounce_brokers is True, also bounce a few brokers while partition re-assignment is in progress - When done reassigning partitions and bouncing brokers, stop producing, and finish consuming - Validate that every acked message was consumed """ self.kafka.security_protocol = security_protocol self.kafka.interbroker_security_protocol = security_protocol new_consumer = False if self.kafka.security_protocol == "PLAINTEXT" else True self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, new_consumer=new_consumer, consumer_timeout_ms=60000, message_validator=is_int) self.kafka.start() self.run_produce_consume_validate(core_test_action=lambda: self.reassign_partitions(bounce_brokers))
def setUp(self): self.topic = "test_topic" self.zk = ZookeeperService(self.test_context, num_nodes=1) self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=LATEST_0_8_2, topics={self.topic: { "partitions": 3, "replication-factor": 3, 'configs': {"min.insync.replicas": 2}}}) self.zk.start() self.kafka.start() # Producer and consumer self.producer_throughput = 10000 self.num_producers = 1 self.num_consumers = 1 self.producer = VerifiableProducer( self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput, version=LATEST_0_8_2) # TODO - reduce the timeout self.consumer = ConsoleConsumer( self.test_context, self.num_consumers, self.kafka, self.topic, consumer_timeout_ms=30000, message_validator=is_int, version=LATEST_0_8_2)
def test_lifecycle(self, security_protocol, new_consumer): self.kafka = KafkaService( self.test_context, num_nodes=1, zk=self.zk, security_protocol=security_protocol, topics={self.topic: { "partitions": 1, "replication-factor": 1 }}) self.kafka.start() t0 = time.time() self.consumer = ConsoleConsumer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, security_protocol=security_protocol, new_consumer=new_consumer) self.consumer.start() node = self.consumer.nodes[0] wait_until(lambda: self.consumer.alive(node), timeout_sec=10, backoff_sec=.2, err_msg="Consumer was too slow to start") self.logger.info("consumer started in %s seconds " % str(time.time() - t0)) # Verify that log output is happening wait_until(lambda: file_exists(node, ConsoleConsumer.LOG_FILE), timeout_sec=10, err_msg="Timed out waiting for logging to start.") assert line_count(node, ConsoleConsumer.LOG_FILE) > 0 # Verify no consumed messages assert line_count(node, ConsoleConsumer.STDOUT_CAPTURE) == 0 self.consumer.stop_node(node)
def get_messages_from_output_topic(self): consumer = ConsoleConsumer(context=self.test_context, num_nodes=1, kafka=self.kafka, topic=self.output_topic, new_consumer=True, message_validator=is_int, from_beginning=True, consumer_timeout_ms=5000, isolation_level="read_committed") consumer.start() # ensure that the consumer is up. wait_until(lambda: consumer.alive(consumer.nodes[0]) == True, timeout_sec=60, err_msg="Consumer failed to start for %ds" %\ 60) # wait until the consumer closes, which will be 5 seconds after # receiving the last message. wait_until(lambda: consumer.alive(consumer.nodes[0]) == False, timeout_sec=60, err_msg="Consumer failed to consume %d messages in %ds" %\ (self.num_seed_messages, 60)) return consumer.messages_consumed[1]
def validate_success(self, topic=None): if topic is None: # Create a new topic topic = "%s%d" % (TestSnapshots.TOPIC_NAME_PREFIX, self.topics_created) self.topics_created += self.create_n_topics(topic_count=1) # Produce to the newly created topic to ensure broker has caught up self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, topic, throughput=self.producer_throughput, message_validator=is_int) self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, topic, consumer_timeout_ms=30000, message_validator=is_int) self.start_producer_and_consumer() self.stop_producer_and_consumer() self.validate()