def test_consumer_back_compatibility(self):
        """Run the scala 0.8.X consumer against an 0.9.X cluster.
        Expect 0.8.X scala consumer to fail with buffer underflow. This error is the same as when an 0.9.X producer
        is run against an 0.8.X broker: the broker responds to a V1 fetch request with a V0 fetch response; the
        client then tries to parse this V0 fetch response as a V1 fetch response, resulting in a BufferUnderflowException
        """
        num_messages = 10
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic, max_messages=num_messages,
            throughput=self.producer_throughput, version=LATEST_0_8_2)

        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic, group_id="consumer-09X",
            consumer_timeout_ms=10000, message_validator=is_int, version=TRUNK)

        self.old_consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic, group_id="consumer-08X",
            consumer_timeout_ms=10000, message_validator=is_int, version=LATEST_0_8_2)

        self.producer.run()
        self.consumer.run()
        self.old_consumer.run()

        consumed = len(self.consumer.messages_consumed[1])
        old_consumed = len(self.old_consumer.messages_consumed[1])
        assert old_consumed == num_messages, "Expected 0.8.X scala consumer to consume %d, but only got %d" % (num_messages, old_consumed)
        assert consumed == 0, "Expected 0.9.X scala consumer to fail to consume any messages, but got %d" % consumed

        self.logger.info("Grepping consumer log for expected error type")
        node = self.consumer.nodes[0]
        node.account.ssh("egrep -m 1 %s %s" % ("\"java\.nio\.BufferUnderflowException\"", self.consumer.LOG_FILE), allow_fail=False)
Exemple #2
0
class GetOffsetShellTest(Test):
    """
    Tests GetOffsetShell tool
    """
    def __init__(self, test_context):
        super(GetOffsetShellTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {
            TOPIC: {'partitions': NUM_PARTITIONS, 'replication-factor': REPLICATION_FACTOR}
        }

        self.zk = ZookeeperService(test_context, self.num_zk)


    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context, self.num_brokers,
            self.zk, security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol, topics=self.topics)
        self.kafka.start()

    def start_producer(self):
        # This will produce to kafka cluster
        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=TOPIC, throughput=1000, max_messages=MAX_MESSAGES)
        self.producer.start()
        current_acked = self.producer.num_acked
        wait_until(lambda: self.producer.num_acked >= current_acked + MAX_MESSAGES, timeout_sec=10,
                   err_msg="Timeout awaiting messages to be produced and acked")

    def start_consumer(self):
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                        consumer_timeout_ms=1000)
        self.consumer.start()

    @cluster(num_nodes=4)
    def test_get_offset_shell(self, security_protocol='PLAINTEXT'):
        """
        Tests if GetOffsetShell is getting offsets correctly
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_producer()

        # Assert that offset fetched without any consumers consuming is 0
        assert self.kafka.get_offset_shell(TOPIC, None, 1000, 1, -1), "%s:%s:%s" % (TOPIC, NUM_PARTITIONS - 1, 0)

        self.start_consumer()

        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node), timeout_sec=20, backoff_sec=.2, err_msg="Consumer was too slow to start")

        # Assert that offset is correctly indicated by GetOffsetShell tool
        wait_until(lambda: "%s:%s:%s" % (TOPIC, NUM_PARTITIONS - 1, MAX_MESSAGES) in self.kafka.get_offset_shell(TOPIC, None, 1000, 1, -1), timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")
class Log4jAppenderTest(Test):
    """
    Tests KafkaLog4jAppender using VerifiableKafkaLog4jAppender that appends increasing ints to a Kafka topic
    """
    def __init__(self, test_context):
        super(Log4jAppenderTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {
            TOPIC: {'partitions': 1, 'replication-factor': 1}
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context, self.num_brokers,
            self.zk, security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol, topics=self.topics)
        self.kafka.start()

    def start_appender(self, security_protocol):
        self.appender = KafkaLog4jAppender(self.test_context, self.num_brokers, self.kafka, TOPIC, MAX_MESSAGES,
                                           security_protocol=security_protocol)
        self.appender.start()

    def start_consumer(self, security_protocol):
        enable_new_consumer = security_protocol == SecurityConfig.SSL
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                        consumer_timeout_ms=1000, new_consumer=enable_new_consumer)
        self.consumer.start()

    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_log4j_appender(self, security_protocol='PLAINTEXT'):
        """
        Tests if KafkaLog4jAppender is producing to Kafka topic
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_appender(security_protocol)
        self.appender.wait()

        self.start_consumer(security_protocol)
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
            timeout_sec=10, backoff_sec=.2, err_msg="Consumer was too slow to start")

        # Verify consumed messages count
        expected_lines_count = MAX_MESSAGES * 2  # two times to account for new lines introduced by log4j
        wait_until(lambda: len(self.consumer.messages_consumed[1]) == expected_lines_count, timeout_sec=10,
                   err_msg="Timed out waiting to consume expected number of messages.")

        self.consumer.stop()
    def test_transformations(self):
        self.setup_services(timestamp_type='CreateTime')
        self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node))
        self.cc.start()

        ts_fieldname = 'the_timestamp'

        NamedConnector = namedtuple('Connector', ['name'])

        source_connector = NamedConnector(name='file-src')

        self.cc.create_connector({
            'name': source_connector.name,
            'connector.class': 'org.apache.kafka.connect.file.FileStreamSourceConnector',
            'tasks.max': 1,
            'file': self.INPUT_FILE,
            'topic': self.TOPIC,
            'transforms': 'hoistToStruct,insertTimestampField',
            'transforms.hoistToStruct.type': 'org.apache.kafka.connect.transforms.HoistField$Value',
            'transforms.hoistToStruct.field': 'content',
            'transforms.insertTimestampField.type': 'org.apache.kafka.connect.transforms.InsertField$Value',
            'transforms.insertTimestampField.timestamp.field': ts_fieldname,
        })

        wait_until(lambda: self.connector_is_running(source_connector), timeout_sec=30, err_msg='Failed to see connector transition to the RUNNING state')

        for node in self.cc.nodes:
            node.account.ssh("echo -e -n " + repr(self.FIRST_INPUTS) + " >> " + self.INPUT_FILE)

        consumer = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC, consumer_timeout_ms=15000, print_timestamp=True)
        consumer.run()

        assert len(consumer.messages_consumed[1]) == len(self.FIRST_INPUT_LIST)

        expected_schema = {
            'type': 'struct',
            'fields': [
                {'field': 'content', 'type': 'string', 'optional': False},
                {'field': ts_fieldname, 'name': 'org.apache.kafka.connect.data.Timestamp', 'type': 'int64', 'version': 1, 'optional': True},
            ],
            'optional': False
        }

        for msg in consumer.messages_consumed[1]:
            (ts_info, value) = msg.split('\t')

            assert ts_info.startswith('CreateTime:')
            ts = int(ts_info[len('CreateTime:'):])

            obj = json.loads(value)
            assert obj['schema'] == expected_schema
            assert obj['payload']['content'] in self.FIRST_INPUT_LIST
            assert obj['payload'][ts_fieldname] == ts
    def create_producer_and_consumer(self):
        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           self.topic,
                                           throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int)

        self.consumer.group_id = self.group
Exemple #6
0
    def test_quota(self,
                   producer_id='default_id',
                   producer_num=1,
                   consumer_id='default_id',
                   consumer_num=1):
        # Produce all messages
        producer = ProducerPerformanceService(
            self.test_context,
            producer_num,
            self.kafka,
            security_protocol=self.security_protocol,
            topic=self.topic,
            num_records=self.num_records,
            record_size=self.record_size,
            throughput=-1,
            client_id=producer_id,
            jmx_object_names=[
                'kafka.producer:type=producer-metrics,client-id=%s' %
                producer_id
            ],
            jmx_attributes=['outgoing-byte-rate'])

        producer.run()

        # Consume all messages
        consumer = ConsoleConsumer(
            self.test_context,
            consumer_num,
            self.kafka,
            self.topic,
            security_protocol=self.security_protocol,
            new_consumer=False,
            consumer_timeout_ms=60000,
            client_id=consumer_id,
            jmx_object_names=[
                'kafka.consumer:type=ConsumerTopicMetrics,name=BytesPerSec,clientId=%s'
                % consumer_id
            ],
            jmx_attributes=['OneMinuteRate'])
        consumer.run()

        for idx, messages in consumer.messages_consumed.iteritems():
            assert len(
                messages
            ) > 0, "consumer %d didn't consume any message before timeout" % idx

        success, msg = self.validate(self.kafka, producer, consumer)
        assert success, msg
Exemple #7
0
    def test_reassign_partitions(self, bounce_brokers,
                                 reassign_from_offset_zero):
        """Reassign partitions tests.
        Setup: 1 zk, 4 kafka nodes, 1 topic with partitions=20, replication-factor=3,
        and min.insync.replicas=3

            - Produce messages in the background
            - Consume messages in the background
            - Reassign partitions
            - If bounce_brokers is True, also bounce a few brokers while partition re-assignment is in progress
            - When done reassigning partitions and bouncing brokers, stop producing, and finish consuming
            - Validate that every acked message was consumed
            """
        self.kafka.start()
        if not reassign_from_offset_zero:
            self.move_start_offset()

        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           self.topic,
                                           throughput=self.producer_throughput,
                                           enable_idempotence=True)
        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int)

        self.enable_idempotence = True
        self.run_produce_consume_validate(
            core_test_action=lambda: self.reassign_partitions(bounce_brokers))
Exemple #8
0
 def start_consumer(self, topic_to_read, group_id):
     consumer = ConsoleConsumer(context=self.test_context,
                                num_nodes=1,
                                kafka=self.kafka,
                                topic=topic_to_read,
                                group_id=group_id,
                                message_validator=is_int,
                                from_beginning=True,
                                isolation_level="read_committed")
     consumer.start()
     # ensure that the consumer is up.
     wait_until(lambda: (len(consumer.messages_consumed[1]) > 0) == True,
                timeout_sec=60,
                err_msg="Consumer failed to consume any messages for %ds" %\
                60)
     return consumer
    def test_reassign_partitions(self, bounce_brokers, security_protocol):
        """Reassign partitions tests.
        Setup: 1 zk, 3 kafka nodes, 1 topic with partitions=3, replication-factor=3, and min.insync.replicas=2

            - Produce messages in the background
            - Consume messages in the background
            - Reassign partitions
            - If bounce_brokers is True, also bounce a few brokers while partition re-assignment is in progress
            - When done reassigning partitions and bouncing brokers, stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        new_consumer = False if self.kafka.security_protocol == "PLAINTEXT" else True
        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           self.topic,
                                           throughput=self.producer_throughput)
        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        new_consumer=new_consumer,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int)
        self.kafka.start()

        self.run_produce_consume_validate(
            core_test_action=lambda: self.reassign_partitions(bounce_brokers))
    def test_replication_with_broker_failure(self, failure_mode,
                                             security_protocol):
        """Replication tests.
        These tests verify that replication provides simple durability guarantees by checking that data acked by
        brokers is still available for consumption in the face of various failure scenarios.

        Setup: 1 zk, 3 kafka nodes, 1 topic with partitions=3, replication-factor=3, and min.insync.replicas=2

            - Produce messages in the background
            - Consume messages in the background
            - Drive broker failures (shutdown, or bounce repeatedly with kill -15 or kill -9)
            - When done driving failures, stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = 'PLAINTEXT'
        self.kafka.interbroker_security_protocol = security_protocol
        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           self.topic,
                                           throughput=self.producer_throughput)
        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int)
        self.kafka.start()

        self.run_produce_consume_validate(
            core_test_action=lambda: failures[failure_mode](self))
 def produce_and_consume(self, producer_version, consumer_version, group):
     self.producer = VerifiableProducer(
         self.test_context,
         self.num_producers,
         self.kafka,
         self.topic,
         throughput=self.producer_throughput,
         message_validator=is_int,
         version=KafkaVersion(producer_version))
     self.consumer = ConsoleConsumer(self.test_context,
                                     self.num_consumers,
                                     self.kafka,
                                     self.topic,
                                     consumer_timeout_ms=30000,
                                     message_validator=is_int,
                                     version=KafkaVersion(consumer_version))
     self.consumer.group_id = group
     self.run_produce_consume_validate(lambda: wait_until(
         lambda: self.producer.each_produced_at_least(
             self.messages_per_producer) == True,
         timeout_sec=120,
         backoff_sec=1,
         err_msg=
         "Producer did not produce all messages in reasonable amount of time"
     ))
    def test_replication_with_broker_failure(self, failure_mode, security_protocol, broker_type,
                                             client_sasl_mechanism="GSSAPI", interbroker_sasl_mechanism="GSSAPI",
                                             compression_type=None, enable_idempotence=False):
        """Replication tests.
        These tests verify that replication provides simple durability guarantees by checking that data acked by
        brokers is still available for consumption in the face of various failure scenarios.

        Setup: 1 zk, 3 kafka nodes, 1 topic with partitions=3, replication-factor=3, and min.insync.replicas=2

            - Produce messages in the background
            - Consume messages in the background
            - Drive broker failures (shutdown, or bounce repeatedly with kill -15 or kill -9)
            - When done driving failures, stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.client_sasl_mechanism = client_sasl_mechanism
        self.kafka.interbroker_sasl_mechanism = interbroker_sasl_mechanism
        new_consumer = False if self.kafka.security_protocol == "PLAINTEXT" else True
        self.enable_idempotence = enable_idempotence
        compression_types = None if not compression_type else [compression_type] * self.num_producers
        self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic,
                                           throughput=self.producer_throughput, compression_types=compression_types,
                                           enable_idempotence=enable_idempotence)
        self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, new_consumer=new_consumer, consumer_timeout_ms=60000, message_validator=is_int)
        self.kafka.start()
        self.run_produce_consume_validate(core_test_action=lambda: failures[failure_mode](self, broker_type))
    def test_produce_consume(self, broker_version):
        print("running producer_consumer_compat with broker_version = %s" %
              broker_version)
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.security_protocol = "PLAINTEXT"
        self.kafka.interbroker_security_protocol = self.kafka.security_protocol
        self.producer = VerifiableProducer(
            self.test_context,
            self.num_producers,
            self.kafka,
            self.topic,
            throughput=self.producer_throughput,
            message_validator=is_int_with_prefix)
        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int_with_prefix)
        self.kafka.start()

        self.run_produce_consume_validate(lambda: wait_until(
            lambda: self.producer.each_produced_at_least(
                self.messages_per_producer) == True,
            timeout_sec=120,
            backoff_sec=1,
            err_msg=
            "Producer did not produce all messages in reasonable amount of time"
        ))
Exemple #14
0
    def __init__(self, test_context):
        super(DelegationTokenTest, self).__init__(test_context)

        self.test_context = test_context
        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, zk_chroot="/kafka",
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}},
                                  server_prop_overides=[
                                      [config_property.DELEGATION_TOKEN_MAX_LIFETIME_MS, "604800000"],
                                      [config_property.DELEGATION_TOKEN_EXPIRY_TIME_MS, "86400000"],
                                      [config_property.DELEGATION_TOKEN_MASTER_KEY, "test12345"],
                                      [config_property.SASL_ENABLED_MECHANISMS, "GSSAPI,SCRAM-SHA-256"]
                                  ])
        self.jaas_deleg_conf_path = "/tmp/jaas_deleg.conf"
        self.jaas_deleg_conf = ""
        self.client_properties_content = """
security.protocol=SASL_PLAINTEXT
sasl.mechanism=SCRAM-SHA-256
sasl.kerberos.service.name=kafka
client.id=console-consumer
"""
        self.client_kafka_opts=' -Djava.security.auth.login.config=' + self.jaas_deleg_conf_path

        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, max_messages=1,
                                       throughput=1, kafka_opts_override=self.client_kafka_opts,
                                       client_prop_file_override=self.client_properties_content)

        self.consumer = ConsoleConsumer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                        kafka_opts_override=self.client_kafka_opts,
                                        client_prop_file_override=self.client_properties_content)

        self.kafka.security_protocol = 'SASL_PLAINTEXT'
        self.kafka.client_sasl_mechanism = 'GSSAPI,SCRAM-SHA-256'
        self.kafka.interbroker_sasl_mechanism = 'GSSAPI'
Exemple #15
0
    def test_compatibility(self, producer_version, consumer_version, compression_types, new_consumer=True, timestamp_type=None):

        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=DEV_BRANCH, topics={self.topic: {
                                                                    "partitions": 3,
                                                                    "replication-factor": 3,
                                                                    'configs': {"min.insync.replicas": 2}}})
        for node in self.kafka.nodes:
            if timestamp_type is not None:
                node.config[config_property.MESSAGE_TIMESTAMP_TYPE] = timestamp_type
        self.kafka.start()
         
        self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka,
                                           self.topic, throughput=self.producer_throughput,
                                           message_validator=is_int,
                                           compression_types=compression_types,
                                           version=KafkaVersion(producer_version))

        self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka,
                                        self.topic, consumer_timeout_ms=30000, new_consumer=new_consumer,
                                        message_validator=is_int, version=KafkaVersion(consumer_version))

        self.run_produce_consume_validate(lambda: wait_until(
            lambda: self.producer.each_produced_at_least(self.messages_per_producer) == True,
            timeout_sec=120, backoff_sec=1,
            err_msg="Producer did not produce all messages in reasonable amount of time"))
Exemple #16
0
 def start_consumer(self, topic_to_read, group_id):
     consumer = ConsoleConsumer(context=self.test_context,
                                num_nodes=1,
                                kafka=self.kafka,
                                topic=topic_to_read,
                                group_id=group_id,
                                message_validator=is_int,
                                from_beginning=True,
                                isolation_level="read_committed")
     consumer.start()
     # ensure that the consumer is up.
     wait_until(lambda: (len(consumer.messages_consumed[1]) > 0) == True,
                timeout_sec=60,
                err_msg="Consumer failed to consume any messages for %ds" %\
                60)
     return consumer
Exemple #17
0
    def __init__(self, test_context):
        super(TestMirrorMakerService, self).__init__(test_context)

        self.topic = "topic"
        self.source_zk = ZookeeperService(test_context, num_nodes=1)
        self.target_zk = ZookeeperService(test_context, num_nodes=1)

        self.source_kafka = KafkaService(
            test_context,
            num_nodes=1,
            zk=self.source_zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }})
        self.target_kafka = KafkaService(
            test_context,
            num_nodes=1,
            zk=self.target_zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }})

        self.num_messages = 1000
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context,
                                           num_nodes=1,
                                           kafka=self.source_kafka,
                                           topic=self.topic,
                                           max_messages=self.num_messages,
                                           throughput=1000)

        # Use a regex whitelist to check that the start command is well-formed in this case
        self.mirror_maker = MirrorMaker(test_context,
                                        num_nodes=1,
                                        source=self.source_kafka,
                                        target=self.target_kafka,
                                        whitelist=".*",
                                        consumer_timeout_ms=2000)

        # This will consume from target kafka cluster
        self.consumer = ConsoleConsumer(test_context,
                                        num_nodes=1,
                                        kafka=self.target_kafka,
                                        topic=self.topic,
                                        consumer_timeout_ms=1000)
Exemple #18
0
    def __init__(self, test_context):
        super(ConsoleConsumerTest, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, zk_chroot="/kafka",
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic)
    def __init__(self, test_context):
        super(ConnectStandaloneFileTest, self).__init__(
            test_context, num_zk=1, num_brokers=1, topics={"test": {"partitions": 1, "replication-factor": 1}}
        )

        self.source = ConnectStandaloneService(test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
        self.consumer_validator = ConsoleConsumer(test_context, 1, self.kafka, self.TOPIC, consumer_timeout_ms=1000)
Exemple #20
0
    def __init__(self, test_context):
        super(ConsoleConsumerTest, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(
            self.test_context,
            num_nodes=1,
            zk=self.zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }})
        self.consumer = ConsoleConsumer(self.test_context,
                                        num_nodes=1,
                                        kafka=self.kafka,
                                        topic=self.topic)
class ConsoleConsumerTest(Test):
    """Sanity checks on console consumer service class."""
    def __init__(self, test_context):
        super(ConsoleConsumerTest, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(
            test_context,
            num_nodes=1,
            zk=self.zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }})
        self.consumer = ConsoleConsumer(test_context,
                                        num_nodes=1,
                                        kafka=self.kafka,
                                        topic=self.topic)

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    def test_lifecycle(self):
        t0 = time.time()
        self.consumer.start()
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
                   timeout_sec=10,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")
        self.logger.info("consumer started in %s seconds " %
                         str(time.time() - t0))

        # Verify that log output is happening
        wait_until(lambda: file_exists(node, ConsoleConsumer.LOG_FILE),
                   timeout_sec=10,
                   err_msg="Timed out waiting for logging to start.")
        assert line_count(node, ConsoleConsumer.LOG_FILE) > 0

        # Verify no consumed messages
        assert line_count(node, ConsoleConsumer.STDOUT_CAPTURE) == 0

        self.consumer.stop_node(node)
    def create_producer_and_consumer(self):
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic,
            throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic,
            consumer_timeout_ms=60000, message_validator=is_int, new_consumer=True)

        self.consumer.group_id = "unique-test-group-" + str(random.random())
Exemple #23
0
    def test_quota(self,
                   quota_type,
                   override_quota=True,
                   producer_num=1,
                   consumer_num=1):
        self.quota_config = QuotaConfig(quota_type, override_quota, self.kafka)
        producer_client_id = self.quota_config.client_id
        consumer_client_id = self.quota_config.client_id

        # Produce all messages
        producer = ProducerPerformanceService(self.test_context,
                                              producer_num,
                                              self.kafka,
                                              topic=self.topic,
                                              num_records=self.num_records,
                                              record_size=self.record_size,
                                              throughput=-1,
                                              client_id=producer_client_id)

        producer.run()

        # Consume all messages
        consumer = ConsoleConsumer(
            self.test_context,
            consumer_num,
            self.kafka,
            self.topic,
            consumer_timeout_ms=60000,
            client_id=consumer_client_id,
            jmx_object_names=[
                'kafka.consumer:type=consumer-fetch-manager-metrics,client-id=%s'
                % consumer_client_id
            ],
            jmx_attributes=['bytes-consumed-rate'])
        consumer.run()

        for idx, messages in consumer.messages_consumed.iteritems():
            assert len(
                messages
            ) > 0, "consumer %d didn't consume any message before timeout" % idx

        success, msg = self.validate(self.kafka, producer, consumer)
        assert success, msg
Exemple #24
0
    def __init__(self, test_context):
        super(CopycatStandaloneFileTest, self).__init__(
            test_context,
            num_zk=1,
            num_brokers=1,
            topics={'test': {
                'partitions': 1,
                'replication-factor': 1
            }})

        self.source = CopycatStandaloneService(
            test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = CopycatStandaloneService(
            test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
        self.consumer_validator = ConsoleConsumer(test_context,
                                                  1,
                                                  self.kafka,
                                                  self.TOPIC,
                                                  consumer_timeout_ms=1000)
 def start_consumer(self, security_protocol):
     enable_new_consumer = security_protocol == SecurityConfig.SSL
     self.consumer = ConsoleConsumer(
         self.test_context,
         num_nodes=self.num_brokers,
         kafka=self.kafka,
         topic=TOPIC,
         consumer_timeout_ms=None,
         new_consumer=enable_new_consumer,
     )
     self.consumer.start()
Exemple #26
0
    def test_upgrade(self, from_kafka_version, to_message_format_version, compression_types,
                     new_consumer=True, security_protocol="PLAINTEXT"):
        """Test upgrade of Kafka broker cluster from 0.8.2, 0.9.0 or 0.10.0 to the current version

        from_kafka_version is a Kafka version to upgrade from: either 0.8.2.X, 0.9.0.x or 0.10.0.x

        If to_message_format_version is None, it means that we will upgrade to default (latest)
        message format version. It is possible to upgrade to 0.10 brokers but still use message
        format version 0.9

        - Start 3 node broker cluster on version 'from_kafka_version'
        - Start producer and consumer in the background
        - Perform two-phase rolling upgrade
            - First phase: upgrade brokers to 0.10 with inter.broker.protocol.version set to
            from_kafka_version and log.message.format.version set to from_kafka_version
            - Second phase: remove inter.broker.protocol.version config with rolling bounce; if
            to_message_format_version is set to 0.9, set log.message.format.version to
            to_message_format_version, otherwise remove log.message.format.version config
        - Finally, validate that every message acked by the producer was consumed by the consumer
        """
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk,
                                  version=KafkaVersion(from_kafka_version),
                                  topics={self.topic: {"partitions": 3, "replication-factor": 3,
                                                       'configs': {"min.insync.replicas": 2}}})
        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.start()

        self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka,
                                           self.topic, throughput=self.producer_throughput,
                                           message_validator=is_int,
                                           compression_types=compression_types,
                                           version=KafkaVersion(from_kafka_version))

        assert self.zk.query("/cluster/id") is None

        # TODO - reduce the timeout
        self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka,
                                        self.topic, consumer_timeout_ms=30000, new_consumer=new_consumer,
                                        message_validator=is_int, version=KafkaVersion(from_kafka_version))

        self.run_produce_consume_validate(core_test_action=lambda: self.perform_upgrade(from_kafka_version,
                                                                                        to_message_format_version))

        cluster_id_json = self.zk.query("/cluster/id")
        assert cluster_id_json is not None
        try:
            cluster_id = json.loads(cluster_id_json)
        except :
            self.logger.debug("Data in /cluster/id znode could not be parsed. Data = %s" % cluster_id_json)

        self.logger.debug("Cluster id [%s]", cluster_id)
        assert len(cluster_id["id"]) == 22
Exemple #27
0
    def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True, security_protocol='PLAINTEXT'):
        """
        Validates basic end-to-end functionality of Connect standalone using the file source and sink converters. Includes
        parameterizations to test different converters (which also test per-connector converter overrides), schema/schemaless
        modes, and security support.
        """
        assert converter != None, "converter type must be set"
        # Template parameters. Note that we don't set key/value.converter. These default to JsonConverter and we validate
        # converter overrides via the connector configuration.
        if converter != "org.apache.kafka.connect.json.JsonConverter":
            self.override_key_converter = converter
            self.override_value_converter = converter
        self.schemas = schemas

        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk,
                                  security_protocol=security_protocol, interbroker_security_protocol=security_protocol,
                                  topics=self.topics)

        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
        self.consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC_TEST,
                                                  consumer_timeout_ms=10000)

        self.zk.start()
        self.kafka.start()

        self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")])
        self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")])

        self.source.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))
        self.sink.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(self.FIRST_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self.validate_output(self.FIRST_INPUT), timeout_sec=60, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.")

        # Restarting both should result in them picking up where they left off,
        # only processing new data.
        self.source.restart()
        self.sink.restart()

        self.source.node.account.ssh("echo -e -n " + repr(self.SECOND_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self.validate_output(self.FIRST_INPUT + self.SECOND_INPUT), timeout_sec=60, err_msg="Sink output file never converged to the same state as the input file")

        # Validate the format of the data in the Kafka topic
        self.consumer_validator.run()
        expected = json.dumps([line if not self.schemas else { "schema": self.SCHEMA, "payload": line } for line in self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST])
        decoder = (json.loads if converter.endswith("JsonConverter") else str)
        actual = json.dumps([decoder(x) for x in self.consumer_validator.messages_consumed[1]])
        assert expected == actual, "Expected %s but saw %s in Kafka" % (expected, actual)
Exemple #28
0
    def test_lifecycle(self, security_protocol, new_consumer):
        self.kafka = KafkaService(
            self.test_context,
            num_nodes=1,
            zk=self.zk,
            security_protocol=security_protocol,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }})
        self.kafka.start()

        t0 = time.time()
        self.consumer = ConsoleConsumer(self.test_context,
                                        num_nodes=1,
                                        kafka=self.kafka,
                                        topic=self.topic,
                                        security_protocol=security_protocol,
                                        new_consumer=new_consumer)
        self.consumer.start()
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
                   timeout_sec=10,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")
        self.logger.info("consumer started in %s seconds " %
                         str(time.time() - t0))

        # Verify that log output is happening
        wait_until(lambda: file_exists(node, ConsoleConsumer.LOG_FILE),
                   timeout_sec=10,
                   err_msg="Timed out waiting for logging to start.")
        assert line_count(node, ConsoleConsumer.LOG_FILE) > 0

        # Verify no consumed messages
        assert line_count(node, ConsoleConsumer.STDOUT_CAPTURE) == 0

        self.consumer.stop_node(node)
    def test_quota(self, producer_id='default_id', producer_num=1, consumer_id='default_id', consumer_num=1):
        # Produce all messages
        producer = ProducerPerformanceService(
            self.test_context, producer_num, self.kafka,
            topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=-1, client_id=producer_id,
            jmx_object_names=['kafka.producer:type=producer-metrics,client-id=%s' % producer_id], jmx_attributes=['outgoing-byte-rate'])

        producer.run()

        # Consume all messages
        consumer = ConsoleConsumer(self.test_context, consumer_num, self.kafka, self.topic,
            new_consumer=False,
            consumer_timeout_ms=60000, client_id=consumer_id,
            jmx_object_names=['kafka.consumer:type=ConsumerTopicMetrics,name=BytesPerSec,clientId=%s' % consumer_id],
            jmx_attributes=['OneMinuteRate'])
        consumer.run()

        for idx, messages in consumer.messages_consumed.iteritems():
            assert len(messages) > 0, "consumer %d didn't consume any message before timeout" % idx

        success, msg = self.validate(self.kafka, producer, consumer)
        assert success, msg
Exemple #30
0
    def test_quota(self, quota_type, override_quota=True, producer_num=1, consumer_num=1,
                   old_broker_throttling_behavior=False, old_client_throttling_behavior=False):
        # Old (pre-2.0) throttling behavior for broker throttles before sending a response to the client.
        if old_broker_throttling_behavior:
            self.kafka.set_version(LATEST_1_1)
        self.kafka.start()

        self.quota_config = QuotaConfig(quota_type, override_quota, self.kafka)
        producer_client_id = self.quota_config.client_id
        consumer_client_id = self.quota_config.client_id

        # Old (pre-2.0) throttling behavior for client does not throttle upon receiving a response with a non-zero throttle time.
        if old_client_throttling_behavior:
            client_version = LATEST_1_1
        else:
            client_version = DEV_BRANCH

        # Produce all messages
        producer = ProducerPerformanceService(
            self.test_context, producer_num, self.kafka,
            topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=-1,
            client_id=producer_client_id, version=client_version)

        producer.run()

        # Consume all messages
        consumer = ConsoleConsumer(self.test_context, consumer_num, self.kafka, self.topic,
            consumer_timeout_ms=60000, client_id=consumer_client_id,
            jmx_object_names=['kafka.consumer:type=consumer-fetch-manager-metrics,client-id=%s' % consumer_client_id],
            jmx_attributes=['bytes-consumed-rate'], version=client_version)
        consumer.run()

        for idx, messages in consumer.messages_consumed.iteritems():
            assert len(messages) > 0, "consumer %d didn't consume any message before timeout" % idx

        success, msg = self.validate(self.kafka, producer, consumer)
        assert success, msg
Exemple #31
0
    def test_quota(self, quota_type, override_quota=True, producer_num=1, consumer_num=1):
        self.quota_config = QuotaConfig(quota_type, override_quota, self.kafka)
        producer_client_id = self.quota_config.client_id
        consumer_client_id = self.quota_config.client_id

        # Produce all messages
        producer = ProducerPerformanceService(
            self.test_context, producer_num, self.kafka,
            topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=-1, client_id=producer_client_id)

        producer.run()

        # Consume all messages
        consumer = ConsoleConsumer(self.test_context, consumer_num, self.kafka, self.topic,
            consumer_timeout_ms=60000, client_id=consumer_client_id,
            jmx_object_names=['kafka.consumer:type=consumer-fetch-manager-metrics,client-id=%s' % consumer_client_id],
            jmx_attributes=['bytes-consumed-rate'])
        consumer.run()

        for idx, messages in consumer.messages_consumed.iteritems():
            assert len(messages) > 0, "consumer %d didn't consume any message before timeout" % idx

        success, msg = self.validate(self.kafka, producer, consumer)
        assert success, msg
Exemple #32
0
    def test_throttled_reassignment(self, bounce_brokers):
        security_protocol = 'PLAINTEXT'
        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol

        producer_id = 'bulk_producer'
        bulk_producer = ProducerPerformanceService(
            context=self.test_context,
            num_nodes=1,
            kafka=self.kafka,
            topic=self.topic,
            num_records=self.num_records,
            record_size=self.record_size,
            throughput=-1,
            client_id=producer_id)

        self.producer = VerifiableProducer(context=self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=self.topic,
                                           message_validator=is_int,
                                           throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int,
                                        from_beginning=False,
                                        wait_until_partitions_assigned=True)

        self.kafka.start()
        bulk_producer.run()
        self.run_produce_consume_validate(
            core_test_action=lambda: self.reassign_partitions(
                bounce_brokers, self.throttle))

        self.logger.debug(
            "Bulk producer outgoing-byte-rates: %s",
            (metric.value
             for k, metrics in bulk_producer.metrics(group='producer-metrics',
                                                     name='outgoing-byte-rate',
                                                     client_id=producer_id)
             for metric in metrics))
    def __init__(self, test_context):
        super(TestMirrorMakerService, self).__init__(test_context)

        self.topic = "topic"
        self.source_zk = ZookeeperService(test_context, num_nodes=1)
        self.target_zk = ZookeeperService(test_context, num_nodes=1)

        self.source_kafka = KafkaService(test_context, num_nodes=1, zk=self.source_zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        self.target_kafka = KafkaService(test_context, num_nodes=1, zk=self.target_zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.source_kafka, topic=self.topic,
                                           throughput=1000)
        self.mirror_maker = MirrorMaker(test_context, num_nodes=1, source=self.source_kafka, target=self.target_kafka,
                                        whitelist=self.topic, offset_commit_interval_ms=1000)
        # This will consume from target kafka cluster
        self.consumer = ConsoleConsumer(test_context, num_nodes=1, kafka=self.target_kafka, topic=self.topic,
                                        message_validator=is_int, consumer_timeout_ms=60000)
    def test_throttled_reassignment(self, bounce_brokers):
        security_protocol = 'PLAINTEXT'
        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol

        producer_id = 'bulk_producer'
        bulk_producer = ProducerPerformanceService(
            context=self.test_context,
            num_nodes=1,
            kafka=self.kafka,
            topic=self.topic,
            num_records=self.num_records,
            record_size=self.record_size,
            throughput=-1,
            client_id=producer_id,
            jmx_object_names=[
                'kafka.producer:type=producer-metrics,client-id=%s' %
                producer_id
            ],
            jmx_attributes=['outgoing-byte-rate'])

        self.producer = VerifiableProducer(context=self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=self.topic,
                                           message_validator=is_int,
                                           throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        new_consumer=True,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int,
                                        from_beginning=False)

        self.kafka.start()
        bulk_producer.run()
        self.run_produce_consume_validate(
            core_test_action=lambda: self.reassign_partitions(
                bounce_brokers, self.throttle))
    def test_compressed_topic(self,
                              compression_types,
                              metadata_quorum=quorum.zk):
        """Test produce => consume => validate for compressed topics
        Setup: 1 zk, 1 kafka node, 1 topic with partitions=10, replication-factor=1

        compression_types parameter gives a list of compression types (or no compression if
        "none"). Each producer in a VerifiableProducer group (num_producers = number of compression
        types) will use a compression type from the list based on producer's index in the group.

            - Produce messages in the background
            - Consume messages in the background
            - Stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = "PLAINTEXT"
        self.kafka.interbroker_security_protocol = self.kafka.security_protocol
        self.producer = VerifiableProducer(
            self.test_context,
            self.num_producers,
            self.kafka,
            self.topic,
            throughput=self.producer_throughput,
            message_validator=is_int_with_prefix,
            compression_types=compression_types)
        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int_with_prefix)
        self.kafka.start()

        self.run_produce_consume_validate(lambda: wait_until(
            lambda: self.producer.each_produced_at_least(
                self.messages_per_producer) == True,
            timeout_sec=120,
            backoff_sec=1,
            err_msg=
            "Producer did not produce all messages in reasonable amount of time"
        ))
Exemple #36
0
    def setUp(self):
        self.topic = "test_topic"
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=LATEST_0_8_2, topics={self.topic: {
                                                                    "partitions": 3,
                                                                    "replication-factor": 3,
                                                                    'configs': {"min.insync.replicas": 2}}})
        self.zk.start()
        self.kafka.start()

        # Producer and consumer
        self.producer_throughput = 10000
        self.num_producers = 1
        self.num_consumers = 1
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic,
            throughput=self.producer_throughput, version=LATEST_0_8_2)

        # TODO - reduce the timeout
        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic,
            consumer_timeout_ms=30000, message_validator=is_int, version=LATEST_0_8_2)
Exemple #37
0
    def validate_success(self, topic=None):
        if topic is None:
            # Create a new topic
            topic = "%s%d" % (TestSnapshots.TOPIC_NAME_PREFIX,
                              self.topics_created)
            self.topics_created += self.create_n_topics(topic_count=1)

        # Produce to the newly created topic to ensure broker has caught up
        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           topic,
                                           throughput=self.producer_throughput,
                                           message_validator=is_int)

        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        topic,
                                        consumer_timeout_ms=30000,
                                        message_validator=is_int)
        self.start_producer_and_consumer()
        self.stop_producer_and_consumer()
        self.validate()
Exemple #38
0
 def get_messages_from_output_topic(self):
     consumer = ConsoleConsumer(context=self.test_context,
                                num_nodes=1,
                                kafka=self.kafka,
                                topic=self.output_topic,
                                new_consumer=True,
                                message_validator=is_int,
                                from_beginning=True,
                                consumer_timeout_ms=5000,
                                isolation_level="read_committed")
     consumer.start()
     # ensure that the consumer is up.
     wait_until(lambda: consumer.alive(consumer.nodes[0]) == True,
                timeout_sec=60,
                err_msg="Consumer failed to start for %ds" %\
                60)
     # wait until the consumer closes, which will be 5 seconds after
     # receiving the last message.
     wait_until(lambda: consumer.alive(consumer.nodes[0]) == False,
                timeout_sec=60,
                err_msg="Consumer failed to consume %d messages in %ds" %\
                (self.num_seed_messages, 60))
     return consumer.messages_consumed[1]
    def test_file_source_and_sink(
        self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True, security_protocol="PLAINTEXT"
    ):
        assert converter != None, "converter type must be set"
        # Template parameters
        self.key_converter = converter
        self.value_converter = converter
        self.schemas = schemas

        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=security_protocol,
            topics=self.topics,
        )

        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
        self.consumer_validator = ConsoleConsumer(
            self.test_context, 1, self.kafka, self.TOPIC, consumer_timeout_ms=1000, new_consumer=True
        )

        self.zk.start()
        self.kafka.start()

        self.source.set_configs(
            lambda node: self.render("connect-standalone.properties", node=node),
            [self.render("connect-file-source.properties")],
        )
        self.sink.set_configs(
            lambda node: self.render("connect-standalone.properties", node=node),
            [self.render("connect-file-sink.properties")],
        )

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(self.FIRST_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(
            lambda: self.validate_output(self.FIRST_INPUT),
            timeout_sec=60,
            err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.",
        )

        # Restarting both should result in them picking up where they left off,
        # only processing new data.
        self.source.restart()
        self.sink.restart()

        self.source.node.account.ssh("echo -e -n " + repr(self.SECOND_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(
            lambda: self.validate_output(self.FIRST_INPUT + self.SECOND_INPUT),
            timeout_sec=60,
            err_msg="Sink output file never converged to the same state as the input file",
        )

        # Validate the format of the data in the Kafka topic
        self.consumer_validator.run()
        expected = json.dumps(
            [
                line if not self.schemas else {"schema": self.SCHEMA, "payload": line}
                for line in self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST
            ]
        )
        decoder = json.loads if converter.endswith("JsonConverter") else str
        actual = json.dumps([decoder(x) for x in self.consumer_validator.messages_consumed[1]])
        assert expected == actual, "Expected %s but saw %s in Kafka" % (expected, actual)
Exemple #40
0
class ConsoleConsumerTest(Test):
    """Sanity checks on console consumer service class."""
    def __init__(self, test_context):
        super(ConsoleConsumerTest, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, zk_chroot="/kafka",
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic)

    def setUp(self):
        self.zk.start()

    @cluster(num_nodes=3)
    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    @cluster(num_nodes=4)
    @matrix(security_protocol=['SASL_SSL'], sasl_mechanism=['PLAIN', 'SCRAM-SHA-256', 'SCRAM-SHA-512'])
    @matrix(security_protocol=['SASL_PLAINTEXT', 'SASL_SSL'])
    def test_lifecycle(self, security_protocol, sasl_mechanism='GSSAPI'):
        """Check that console consumer starts/stops properly, and that we are capturing log output."""

        self.kafka.security_protocol = security_protocol
        self.kafka.client_sasl_mechanism = sasl_mechanism
        self.kafka.interbroker_sasl_mechanism = sasl_mechanism
        self.kafka.start()

        self.consumer.security_protocol = security_protocol

        t0 = time.time()
        self.consumer.start()
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
            timeout_sec=20, backoff_sec=.2, err_msg="Consumer was too slow to start")
        self.logger.info("consumer started in %s seconds " % str(time.time() - t0))

        # Verify that log output is happening
        wait_until(lambda: file_exists(node, ConsoleConsumer.LOG_FILE), timeout_sec=10,
                   err_msg="Timed out waiting for consumer log file to exist.")
        wait_until(lambda: line_count(node, ConsoleConsumer.LOG_FILE) > 0, timeout_sec=1,
                   backoff_sec=.25, err_msg="Timed out waiting for log entries to start.")

        # Verify no consumed messages
        assert line_count(node, ConsoleConsumer.STDOUT_CAPTURE) == 0

        self.consumer.stop_node(node)

    @cluster(num_nodes=4)
    def test_version(self):
        """Check that console consumer v0.8.2.X successfully starts and consumes messages."""
        self.kafka.start()

        num_messages = 1000
        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                           max_messages=num_messages, throughput=1000)
        self.producer.start()
        self.producer.wait()

        self.consumer.nodes[0].version = LATEST_0_8_2
        self.consumer.new_consumer = False
        self.consumer.consumer_timeout_ms = 1000
        self.consumer.start()
        self.consumer.wait()

        num_consumed = len(self.consumer.messages_consumed[1])
        num_produced = self.producer.num_acked
        assert num_produced == num_consumed, "num_produced: %d, num_consumed: %d" % (num_produced, num_consumed)
Exemple #41
0
class ConnectStandaloneFileTest(Test):
    """
    Simple test of Kafka Connect that produces data from a file in one
    standalone process and consumes it on another, validating the output is
    identical to the input.
    """

    FILE_SOURCE_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSourceConnector'
    FILE_SINK_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSinkConnector'

    INPUT_FILE = "/mnt/connect.input"
    OUTPUT_FILE = "/mnt/connect.output"

    OFFSETS_FILE = "/mnt/connect.offsets"

    TOPIC = "test"

    FIRST_INPUT_LIST = ["foo", "bar", "baz"]
    FIRST_INPUT = "\n".join(FIRST_INPUT_LIST) + "\n"
    SECOND_INPUT_LIST = ["razz", "ma", "tazz"]
    SECOND_INPUT = "\n".join(SECOND_INPUT_LIST) + "\n"

    SCHEMA = { "type": "string", "optional": False }

    def __init__(self, test_context):
        super(ConnectStandaloneFileTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {
            'test' : { 'partitions': 1, 'replication-factor': 1 }
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

    @cluster(num_nodes=5)
    @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=True)
    @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=False)
    @parametrize(converter="org.apache.kafka.connect.storage.StringConverter", schemas=None)
    @parametrize(security_protocol=SecurityConfig.PLAINTEXT)
    @cluster(num_nodes=6)
    @parametrize(security_protocol=SecurityConfig.SASL_SSL)
    def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True, security_protocol='PLAINTEXT'):
        """
        Validates basic end-to-end functionality of Connect standalone using the file source and sink converters. Includes
        parameterizations to test different converters (which also test per-connector converter overrides), schema/schemaless
        modes, and security support.
        """
        assert converter != None, "converter type must be set"
        # Template parameters. Note that we don't set key/value.converter. These default to JsonConverter and we validate
        # converter overrides via the connector configuration.
        if converter != "org.apache.kafka.connect.json.JsonConverter":
            self.override_key_converter = converter
            self.override_value_converter = converter
        self.schemas = schemas

        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk,
                                  security_protocol=security_protocol, interbroker_security_protocol=security_protocol,
                                  topics=self.topics)

        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
        self.consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC,
                                                  consumer_timeout_ms=10000)

        self.zk.start()
        self.kafka.start()

        self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")])
        self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")])

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(self.FIRST_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self.validate_output(self.FIRST_INPUT), timeout_sec=60, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.")

        # Restarting both should result in them picking up where they left off,
        # only processing new data.
        self.source.restart()
        self.sink.restart()

        self.source.node.account.ssh("echo -e -n " + repr(self.SECOND_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self.validate_output(self.FIRST_INPUT + self.SECOND_INPUT), timeout_sec=60, err_msg="Sink output file never converged to the same state as the input file")

        # Validate the format of the data in the Kafka topic
        self.consumer_validator.run()
        expected = json.dumps([line if not self.schemas else { "schema": self.SCHEMA, "payload": line } for line in self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST])
        decoder = (json.loads if converter.endswith("JsonConverter") else str)
        actual = json.dumps([decoder(x) for x in self.consumer_validator.messages_consumed[1]])
        assert expected == actual, "Expected %s but saw %s in Kafka" % (expected, actual)

    def validate_output(self, value):
        try:
            output_hash = list(self.sink.node.account.ssh_capture("md5sum " + self.OUTPUT_FILE))[0].strip().split()[0]
            return output_hash == hashlib.md5(value).hexdigest()
        except RemoteCommandError:
            return False
Exemple #42
0
class Log4jAppenderTest(Test):
    """
    Tests KafkaLog4jAppender using VerifiableKafkaLog4jAppender that appends increasing ints to a Kafka topic
    """
    def __init__(self, test_context):
        super(Log4jAppenderTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {TOPIC: {'partitions': 1, 'replication-factor': 1}}

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            topics=self.topics)
        self.kafka.start()

    def start_appender(self, security_protocol):
        self.appender = KafkaLog4jAppender(self.test_context,
                                           self.num_brokers,
                                           self.kafka,
                                           TOPIC,
                                           MAX_MESSAGES,
                                           security_protocol=security_protocol)
        self.appender.start()

    def start_consumer(self, security_protocol):
        enable_new_consumer = security_protocol == SecurityConfig.SSL
        self.consumer = ConsoleConsumer(self.test_context,
                                        num_nodes=self.num_brokers,
                                        kafka=self.kafka,
                                        topic=TOPIC,
                                        consumer_timeout_ms=1000,
                                        new_consumer=enable_new_consumer,
                                        security_protocol=security_protocol)
        self.consumer.start()

    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_log4j_appender(self, security_protocol='PLAINTEXT'):
        """
        Tests if KafkaLog4jAppender is producing to Kafka topic
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_appender(security_protocol)
        self.appender.wait()

        self.start_consumer(security_protocol)
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
                   timeout_sec=10,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")

        # Verify consumed messages count
        expected_lines_count = MAX_MESSAGES * 2  # two times to account for new lines introduced by log4j
        wait_until(
            lambda: len(self.consumer.messages_consumed[1]
                        ) == expected_lines_count,
            timeout_sec=10,
            err_msg="Timed out waiting to consume expected number of messages."
        )

        self.consumer.stop()
class Log4jAppenderTest(Test):
    """
    Tests KafkaLog4jAppender using VerifiableKafkaLog4jAppender that appends increasing ints to a Kafka topic
    """
    def __init__(self, test_context):
        super(Log4jAppenderTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {TOPIC: {'partitions': 1, 'replication-factor': 1}}

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            topics=self.topics)
        self.kafka.start()

    def start_appender(self, security_protocol):
        self.appender = KafkaLog4jAppender(self.test_context,
                                           self.num_brokers,
                                           self.kafka,
                                           TOPIC,
                                           MAX_MESSAGES,
                                           security_protocol=security_protocol)
        self.appender.start()

    def custom_message_validator(self, msg):
        if msg and "INFO : org.apache.kafka.tools.VerifiableLog4jAppender" in msg:
            self.logger.debug("Received message: %s" % msg)
            self.messages_received_count += 1

    def start_consumer(self):
        self.consumer = ConsoleConsumer(
            self.test_context,
            num_nodes=self.num_brokers,
            kafka=self.kafka,
            topic=TOPIC,
            consumer_timeout_ms=10000,
            message_validator=self.custom_message_validator)
        self.consumer.start()

    @cluster(num_nodes=4)
    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    @cluster(num_nodes=5)
    @matrix(security_protocol=['SASL_PLAINTEXT', 'SASL_SSL'])
    def test_log4j_appender(self, security_protocol='PLAINTEXT'):
        """
        Tests if KafkaLog4jAppender is producing to Kafka topic
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_appender(security_protocol)
        self.appender.wait()

        self.start_consumer()
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
                   timeout_sec=20,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")

        # Verify consumed messages count
        wait_until(
            lambda: self.messages_received_count == MAX_MESSAGES,
            timeout_sec=10,
            err_msg="Timed out waiting to consume expected number of messages."
        )

        self.consumer.stop()
class ConnectStandaloneFileTest(KafkaTest):
    """
    Simple test of Kafka Connect that produces data from a file in one
    standalone process and consumes it on another, validating the output is
    identical to the input.
    """

    INPUT_FILE = "/mnt/connect.input"
    OUTPUT_FILE = "/mnt/connect.output"

    OFFSETS_FILE = "/mnt/connect.offsets"

    TOPIC = "test"

    FIRST_INPUT_LIST = ["foo", "bar", "baz"]
    FIRST_INPUT = "\n".join(FIRST_INPUT_LIST) + "\n"
    SECOND_INPUT_LIST = ["razz", "ma", "tazz"]
    SECOND_INPUT = "\n".join(SECOND_INPUT_LIST) + "\n"

    SCHEMA = {"type": "string", "optional": False}

    def __init__(self, test_context):
        super(ConnectStandaloneFileTest, self).__init__(
            test_context, num_zk=1, num_brokers=1, topics={"test": {"partitions": 1, "replication-factor": 1}}
        )

        self.source = ConnectStandaloneService(test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
        self.consumer_validator = ConsoleConsumer(test_context, 1, self.kafka, self.TOPIC, consumer_timeout_ms=1000)

    @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=True)
    @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=False)
    @parametrize(converter="org.apache.kafka.connect.storage.StringConverter", schemas=None)
    def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True):
        assert converter != None, "converter type must be set"
        # Template parameters
        self.key_converter = converter
        self.value_converter = converter
        self.schemas = schemas

        self.source.set_configs(
            lambda node: self.render("connect-standalone.properties", node=node),
            [self.render("connect-file-source.properties")],
        )
        self.sink.set_configs(
            lambda node: self.render("connect-standalone.properties", node=node),
            [self.render("connect-file-sink.properties")],
        )

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(self.FIRST_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(
            lambda: self.validate_output(self.FIRST_INPUT),
            timeout_sec=60,
            err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.",
        )

        # Restarting both should result in them picking up where they left off,
        # only processing new data.
        self.source.restart()
        self.sink.restart()

        self.source.node.account.ssh("echo -e -n " + repr(self.SECOND_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(
            lambda: self.validate_output(self.FIRST_INPUT + self.SECOND_INPUT),
            timeout_sec=60,
            err_msg="Sink output file never converged to the same state as the input file",
        )

        # Validate the format of the data in the Kafka topic
        self.consumer_validator.run()
        expected = json.dumps(
            [
                line if not self.schemas else {"schema": self.SCHEMA, "payload": line}
                for line in self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST
            ]
        )
        decoder = json.loads if converter.endswith("JsonConverter") else str
        actual = json.dumps([decoder(x) for x in self.consumer_validator.messages_consumed[1]])
        assert expected == actual, "Expected %s but saw %s in Kafka" % (expected, actual)

    def validate_output(self, value):
        try:
            output_hash = list(self.sink.node.account.ssh_capture("md5sum " + self.OUTPUT_FILE))[0].strip().split()[0]
            return output_hash == hashlib.md5(value).hexdigest()
        except subprocess.CalledProcessError:
            return False
    def test_bounce(self, clean):
        """
        Validates that source and sink tasks that run continuously and produce a predictable sequence of messages
        run correctly and deliver messages exactly once when Kafka Connect workers undergo clean rolling bounces.
        """
        num_tasks = 3

        self.setup_services()
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))
        self.cc.start()

        self.source = VerifiableSource(self.cc,
                                       tasks=num_tasks,
                                       throughput=100)
        self.source.start()
        self.sink = VerifiableSink(self.cc, tasks=num_tasks)
        self.sink.start()

        for _ in range(3):
            for node in self.cc.nodes:
                started = time.time()
                self.logger.info("%s bouncing Kafka Connect on %s",
                                 clean and "Clean" or "Hard",
                                 str(node.account))
                self.cc.stop_node(node, clean_shutdown=clean)
                with node.account.monitor_log(self.cc.LOG_FILE) as monitor:
                    self.cc.start_node(node)
                    monitor.wait_until(
                        "Starting connectors and tasks using config offset",
                        timeout_sec=90,
                        err_msg=
                        "Kafka Connect worker didn't successfully join group and start work"
                    )
                self.logger.info(
                    "Bounced Kafka Connect on %s and rejoined in %f seconds",
                    node.account,
                    time.time() - started)

                # Give additional time for the consumer groups to recover. Even if it is not a hard bounce, there are
                # some cases where a restart can cause a rebalance to take the full length of the session timeout
                # (e.g. if the client shuts down before it has received the memberId from its initial JoinGroup).
                # If we don't give enough time for the group to stabilize, the next bounce may cause consumers to
                # be shut down before they have any time to process data and we can end up with zero data making it
                # through the test.
                time.sleep(15)

        self.source.stop()
        self.sink.stop()
        self.cc.stop()

        # Validate at least once delivery of everything that was reported as written since we should have flushed and
        # cleanly exited. Currently this only tests at least once delivery because the sink task may not have consumed
        # all the messages generated by the source task. This needs to be done per-task since seqnos are not unique across
        # tasks.
        success = True
        errors = []
        allow_dups = not clean
        src_messages = self.source.messages()
        sink_messages = self.sink.messages()
        for task in range(num_tasks):
            # Validate source messages
            src_seqnos = [
                msg['seqno'] for msg in src_messages if msg['task'] == task
            ]
            # Every seqno up to the largest one we ever saw should appear. Each seqno should only appear once because clean
            # bouncing should commit on rebalance.
            src_seqno_max = max(src_seqnos)
            self.logger.debug("Max source seqno: %d", src_seqno_max)
            src_seqno_counts = Counter(src_seqnos)
            missing_src_seqnos = sorted(
                set(range(src_seqno_max)).difference(set(src_seqnos)))
            duplicate_src_seqnos = sorted([
                seqno for seqno, count in src_seqno_counts.iteritems()
                if count > 1
            ])

            if missing_src_seqnos:
                self.logger.error("Missing source sequence numbers for task " +
                                  str(task))
                errors.append(
                    "Found missing source sequence numbers for task %d: %s" %
                    (task, missing_src_seqnos))
                success = False
            if not allow_dups and duplicate_src_seqnos:
                self.logger.error(
                    "Duplicate source sequence numbers for task " + str(task))
                errors.append(
                    "Found duplicate source sequence numbers for task %d: %s" %
                    (task, duplicate_src_seqnos))
                success = False

            # Validate sink messages
            sink_seqnos = [
                msg['seqno'] for msg in sink_messages
                if msg['task'] == task and 'flushed' in msg
            ]
            # Every seqno up to the largest one we ever saw should appear. Each seqno should only appear once because
            # clean bouncing should commit on rebalance.
            sink_seqno_max = max(sink_seqnos)
            self.logger.debug("Max sink seqno: %d", sink_seqno_max)
            sink_seqno_counts = Counter(sink_seqnos)
            missing_sink_seqnos = sorted(
                set(range(sink_seqno_max)).difference(set(sink_seqnos)))
            duplicate_sink_seqnos = sorted([
                seqno for seqno, count in sink_seqno_counts.iteritems()
                if count > 1
            ])

            if missing_sink_seqnos:
                self.logger.error("Missing sink sequence numbers for task " +
                                  str(task))
                errors.append(
                    "Found missing sink sequence numbers for task %d: %s" %
                    (task, missing_sink_seqnos))
                success = False
            if not allow_dups and duplicate_sink_seqnos:
                self.logger.error("Duplicate sink sequence numbers for task " +
                                  str(task))
                errors.append(
                    "Found duplicate sink sequence numbers for task %d: %s" %
                    (task, duplicate_sink_seqnos))
                success = False

            # Validate source and sink match
            if sink_seqno_max > src_seqno_max:
                self.logger.error(
                    "Found sink sequence number greater than any generated sink sequence number for task %d: %d > %d",
                    task, sink_seqno_max, src_seqno_max)
                errors.append(
                    "Found sink sequence number greater than any generated sink sequence number for task %d: %d > %d"
                    % (task, sink_seqno_max, src_seqno_max))
                success = False
            if src_seqno_max < 1000 or sink_seqno_max < 1000:
                errors.append(
                    "Not enough messages were processed: source:%d sink:%d" %
                    (src_seqno_max, sink_seqno_max))
                success = False

        if not success:
            self.mark_for_collect(self.cc)
            # Also collect the data in the topic to aid in debugging
            consumer_validator = ConsoleConsumer(self.test_context,
                                                 1,
                                                 self.kafka,
                                                 self.source.topic,
                                                 consumer_timeout_ms=1000,
                                                 print_key=True)
            consumer_validator.run()
            self.mark_for_collect(consumer_validator, "consumer_stdout")

        assert success, "Found validation errors:\n" + "\n  ".join(errors)
class ClientCompatibilityTest(Test):

    def __init__(self, test_context):
        super(ClientCompatibilityTest, self).__init__(test_context=test_context)

    def setUp(self):
        self.topic = "test_topic"
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=LATEST_0_8_2, topics={self.topic: {
                                                                    "partitions": 3,
                                                                    "replication-factor": 3,
                                                                    'configs': {"min.insync.replicas": 2}}})
        self.zk.start()
        self.kafka.start()

        # Producer and consumer
        self.producer_throughput = 10000
        self.num_producers = 1
        self.num_consumers = 1

    def test_producer_back_compatibility(self):
        """Run 0.9.X java producer against 0.8.X brokers.
        This test documents the fact that java producer v0.9.0.0 and later won't run against 0.8.X brokers
        the broker responds to a V1 produce request with a V0 fetch response; the client then tries to parse this V0
        produce response as a V1 produce response, resulting in a BufferUnderflowException
        """
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic, max_messages=100,
            throughput=self.producer_throughput, version=TRUNK)

        node = self.producer.nodes[0]
        try:
            self.producer.start()
            self.producer.wait()
            raise Exception("0.9.X java producer should not run successfully against 0.8.X broker")
        except:
            # Expected
            pass
        finally:
            self.producer.kill_node(node, clean_shutdown=False)

        self.logger.info("Grepping producer log for expected error type")
        node.account.ssh("egrep -m 1 %s %s" % ("\"org\.apache\.kafka\.common\.protocol\.types\.SchemaException.*throttle_time_ms.*: java\.nio\.BufferUnderflowException\"", self.producer.LOG_FILE), allow_fail=False)

    def test_consumer_back_compatibility(self):
        """Run the scala 0.8.X consumer against an 0.9.X cluster.
        Expect 0.8.X scala consumer to fail with buffer underflow. This error is the same as when an 0.9.X producer
        is run against an 0.8.X broker: the broker responds to a V1 fetch request with a V0 fetch response; the
        client then tries to parse this V0 fetch response as a V1 fetch response, resulting in a BufferUnderflowException
        """
        num_messages = 10
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic, max_messages=num_messages,
            throughput=self.producer_throughput, version=LATEST_0_8_2)

        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic, group_id="consumer-09X",
            consumer_timeout_ms=10000, message_validator=is_int, version=TRUNK)

        self.old_consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic, group_id="consumer-08X",
            consumer_timeout_ms=10000, message_validator=is_int, version=LATEST_0_8_2)

        self.producer.run()
        self.consumer.run()
        self.old_consumer.run()

        consumed = len(self.consumer.messages_consumed[1])
        old_consumed = len(self.old_consumer.messages_consumed[1])
        assert old_consumed == num_messages, "Expected 0.8.X scala consumer to consume %d, but only got %d" % (num_messages, old_consumed)
        assert consumed == 0, "Expected 0.9.X scala consumer to fail to consume any messages, but got %d" % consumed

        self.logger.info("Grepping consumer log for expected error type")
        node = self.consumer.nodes[0]
        node.account.ssh("egrep -m 1 %s %s" % ("\"java\.nio\.BufferUnderflowException\"", self.consumer.LOG_FILE), allow_fail=False)
Exemple #47
0
class ConnectStandaloneFileTest(Test):
    """
    Simple test of Kafka Connect that produces data from a file in one
    standalone process and consumes it on another, validating the output is
    identical to the input.
    """

    FILE_SOURCE_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSourceConnector'
    FILE_SINK_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSinkConnector'

    INPUT_FILE = "/mnt/connect.input"
    OUTPUT_FILE = "/mnt/connect.output"

    OFFSETS_FILE = "/mnt/connect.offsets"

    TOPIC = "${file:%s:topic.external}" % ConnectServiceBase.EXTERNAL_CONFIGS_FILE
    TOPIC_TEST = "test"

    FIRST_INPUT_LIST = ["foo", "bar", "baz"]
    FIRST_INPUT = "\n".join(FIRST_INPUT_LIST) + "\n"
    SECOND_INPUT_LIST = ["razz", "ma", "tazz"]
    SECOND_INPUT = "\n".join(SECOND_INPUT_LIST) + "\n"

    SCHEMA = { "type": "string", "optional": False }

    def __init__(self, test_context):
        super(ConnectStandaloneFileTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {
            'test' : { 'partitions': 1, 'replication-factor': 1 }
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

    @cluster(num_nodes=5)
    @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=True)
    @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=False)
    @parametrize(converter="org.apache.kafka.connect.storage.StringConverter", schemas=None)
    @parametrize(security_protocol=SecurityConfig.PLAINTEXT)
    @cluster(num_nodes=6)
    @parametrize(security_protocol=SecurityConfig.SASL_SSL)
    def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True, security_protocol='PLAINTEXT'):
        """
        Validates basic end-to-end functionality of Connect standalone using the file source and sink converters. Includes
        parameterizations to test different converters (which also test per-connector converter overrides), schema/schemaless
        modes, and security support.
        """
        assert converter != None, "converter type must be set"
        # Template parameters. Note that we don't set key/value.converter. These default to JsonConverter and we validate
        # converter overrides via the connector configuration.
        if converter != "org.apache.kafka.connect.json.JsonConverter":
            self.override_key_converter = converter
            self.override_value_converter = converter
        self.schemas = schemas

        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk,
                                  security_protocol=security_protocol, interbroker_security_protocol=security_protocol,
                                  topics=self.topics)

        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
        self.consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC_TEST,
                                                  consumer_timeout_ms=10000)

        self.zk.start()
        self.kafka.start()

        self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")])
        self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")])

        self.source.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))
        self.sink.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(self.FIRST_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self.validate_output(self.FIRST_INPUT), timeout_sec=60, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.")

        # Restarting both should result in them picking up where they left off,
        # only processing new data.
        self.source.restart()
        self.sink.restart()

        self.source.node.account.ssh("echo -e -n " + repr(self.SECOND_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self.validate_output(self.FIRST_INPUT + self.SECOND_INPUT), timeout_sec=60, err_msg="Sink output file never converged to the same state as the input file")

        # Validate the format of the data in the Kafka topic
        self.consumer_validator.run()
        expected = json.dumps([line if not self.schemas else { "schema": self.SCHEMA, "payload": line } for line in self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST])
        decoder = (json.loads if converter.endswith("JsonConverter") else str)
        actual = json.dumps([decoder(x) for x in self.consumer_validator.messages_consumed[1]])
        assert expected == actual, "Expected %s but saw %s in Kafka" % (expected, actual)

    def validate_output(self, value):
        try:
            output_hash = list(self.sink.node.account.ssh_capture("md5sum " + self.OUTPUT_FILE))[0].strip().split()[0]
            return output_hash == hashlib.md5(value).hexdigest()
        except RemoteCommandError:
            return False

    @cluster(num_nodes=5)
    @parametrize(error_tolerance=ErrorTolerance.ALL)
    @parametrize(error_tolerance=ErrorTolerance.NONE)
    def test_skip_and_log_to_dlq(self, error_tolerance):
        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, topics=self.topics)

        # set config props
        self.override_error_tolerance_props = error_tolerance
        self.enable_deadletterqueue = True

        successful_records = []
        faulty_records = []
        records = []
        for i in range(0, 1000):
            if i % 2 == 0:
                records.append('{"some_key":' + str(i) + '}')
                successful_records.append('{some_key=' + str(i) + '}')
            else:
                # badly formatted json records (missing a quote after the key)
                records.append('{"some_key:' + str(i) + '}')
                faulty_records.append('{"some_key:' + str(i) + '}')

        records = "\n".join(records) + "\n"
        successful_records = "\n".join(successful_records) + "\n"
        if error_tolerance == ErrorTolerance.ALL:
            faulty_records = ",".join(faulty_records)
        else:
            faulty_records = faulty_records[0]

        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])

        self.zk.start()
        self.kafka.start()

        self.override_key_converter = "org.apache.kafka.connect.storage.StringConverter"
        self.override_value_converter = "org.apache.kafka.connect.storage.StringConverter"
        self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")])

        self.override_key_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.override_value_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.override_key_converter_schemas_enable = False
        self.override_value_converter_schemas_enable = False
        self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")])

        self.source.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))
        self.sink.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(records) + " >> " + self.INPUT_FILE)

        if error_tolerance == ErrorTolerance.NONE:
            try:
                wait_until(lambda: self.validate_output(successful_records), timeout_sec=15,
                           err_msg="Clean records added to input file were not seen in the output file in a reasonable amount of time.")
                raise Exception("Expected to not find any results in this file.")
            except TimeoutError:
                self.logger.info("Caught expected exception")
        else:
            wait_until(lambda: self.validate_output(successful_records), timeout_sec=15,
                       err_msg="Clean records added to input file were not seen in the output file in a reasonable amount of time.")

        if self.enable_deadletterqueue:
            self.logger.info("Reading records from deadletterqueue")
            consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, "my-connector-errors",
                                                 consumer_timeout_ms=10000)
            consumer_validator.run()
            actual = ",".join(consumer_validator.messages_consumed[1])
            assert faulty_records == actual, "Expected %s but saw %s in dead letter queue" % (faulty_records, actual)
class Log4jAppenderTest(Test):
    """
    Tests KafkaLog4jAppender using VerifiableKafkaLog4jAppender that appends increasing ints to a Kafka topic
    """
    def __init__(self, test_context):
        super(Log4jAppenderTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {
            TOPIC: {'partitions': 1, 'replication-factor': 1}
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context, self.num_brokers,
            self.zk, security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol, topics=self.topics)
        self.kafka.start()

    def start_appender(self, security_protocol):
        self.appender = KafkaLog4jAppender(self.test_context, self.num_brokers, self.kafka, TOPIC, MAX_MESSAGES,
                                           security_protocol=security_protocol)
        self.appender.start()

    def custom_message_validator(self, msg):
        if msg and "INFO : org.apache.kafka.tools.VerifiableLog4jAppender" in msg:
            self.logger.debug("Received message: %s" % msg)
            self.messages_received_count += 1


    def start_consumer(self, security_protocol):
        enable_new_consumer = security_protocol != SecurityConfig.PLAINTEXT
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                        consumer_timeout_ms=1000, new_consumer=enable_new_consumer,
                                        message_validator=self.custom_message_validator)
        self.consumer.start()

    @matrix(security_protocol=['PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL'])
    def test_log4j_appender(self, security_protocol='PLAINTEXT'):
        """
        Tests if KafkaLog4jAppender is producing to Kafka topic
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_appender(security_protocol)
        self.appender.wait()

        self.start_consumer(security_protocol)
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
            timeout_sec=10, backoff_sec=.2, err_msg="Consumer was too slow to start")

        # Verify consumed messages count
        wait_until(lambda: self.messages_received_count == MAX_MESSAGES, timeout_sec=10,
                   err_msg="Timed out waiting to consume expected number of messages.")

        self.consumer.stop()
Exemple #49
0
    def test_skip_and_log_to_dlq(self, error_tolerance):
        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, topics=self.topics)

        # set config props
        self.override_error_tolerance_props = error_tolerance
        self.enable_deadletterqueue = True

        successful_records = []
        faulty_records = []
        records = []
        for i in range(0, 1000):
            if i % 2 == 0:
                records.append('{"some_key":' + str(i) + '}')
                successful_records.append('{some_key=' + str(i) + '}')
            else:
                # badly formatted json records (missing a quote after the key)
                records.append('{"some_key:' + str(i) + '}')
                faulty_records.append('{"some_key:' + str(i) + '}')

        records = "\n".join(records) + "\n"
        successful_records = "\n".join(successful_records) + "\n"
        if error_tolerance == ErrorTolerance.ALL:
            faulty_records = ",".join(faulty_records)
        else:
            faulty_records = faulty_records[0]

        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])

        self.zk.start()
        self.kafka.start()

        self.override_key_converter = "org.apache.kafka.connect.storage.StringConverter"
        self.override_value_converter = "org.apache.kafka.connect.storage.StringConverter"
        self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")])

        self.override_key_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.override_value_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.override_key_converter_schemas_enable = False
        self.override_value_converter_schemas_enable = False
        self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")])

        self.source.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))
        self.sink.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(records) + " >> " + self.INPUT_FILE)

        if error_tolerance == ErrorTolerance.NONE:
            try:
                wait_until(lambda: self.validate_output(successful_records), timeout_sec=15,
                           err_msg="Clean records added to input file were not seen in the output file in a reasonable amount of time.")
                raise Exception("Expected to not find any results in this file.")
            except TimeoutError:
                self.logger.info("Caught expected exception")
        else:
            wait_until(lambda: self.validate_output(successful_records), timeout_sec=15,
                       err_msg="Clean records added to input file were not seen in the output file in a reasonable amount of time.")

        if self.enable_deadletterqueue:
            self.logger.info("Reading records from deadletterqueue")
            consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, "my-connector-errors",
                                                 consumer_timeout_ms=10000)
            consumer_validator.run()
            actual = ",".join(consumer_validator.messages_consumed[1])
            assert faulty_records == actual, "Expected %s but saw %s in dead letter queue" % (faulty_records, actual)
 def start_consumer(self, security_protocol):
     enable_new_consumer = security_protocol != SecurityConfig.PLAINTEXT
     self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                     consumer_timeout_ms=1000, new_consumer=enable_new_consumer)
     self.consumer.start()
class ConsumerGroupCommandTest(Test):
    """
    Tests ConsumerGroupCommand
    """
    # Root directory for persistent output
    PERSISTENT_ROOT = "/mnt/consumer_group_command"
    COMMAND_CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "command.properties")

    def __init__(self, test_context):
        super(ConsumerGroupCommandTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {
            TOPIC: {'partitions': 1, 'replication-factor': 1}
        }
        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context, self.num_brokers,
            self.zk, security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol, topics=self.topics)
        self.kafka.start()

    def start_consumer(self, security_protocol):
        enable_new_consumer = security_protocol == SecurityConfig.SSL
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                        consumer_timeout_ms=None, new_consumer=enable_new_consumer)
        self.consumer.start()

    def setup_and_verify(self, security_protocol, group=None):
        self.start_kafka(security_protocol, security_protocol)
        self.start_consumer(security_protocol)
        consumer_node = self.consumer.nodes[0]
        wait_until(lambda: self.consumer.alive(consumer_node),
                   timeout_sec=10, backoff_sec=.2, err_msg="Consumer was too slow to start")
        kafka_node = self.kafka.nodes[0]
        if security_protocol is not SecurityConfig.PLAINTEXT:
            prop_file = str(self.kafka.security_config.client_config())
            self.logger.debug(prop_file)
            kafka_node.account.ssh("mkdir -p %s" % self.PERSISTENT_ROOT, allow_fail=False)
            kafka_node.account.create_file(self.COMMAND_CONFIG_FILE, prop_file)

        # Verify ConsumerGroupCommand lists expected consumer groups
        enable_new_consumer = security_protocol != SecurityConfig.PLAINTEXT
        command_config_file = None
        if enable_new_consumer:
            command_config_file = self.COMMAND_CONFIG_FILE

        if group:
            wait_until(lambda: re.search("topic-consumer-group-command",self.kafka.describe_consumer_group(group=group, node=kafka_node, new_consumer=enable_new_consumer, command_config=command_config_file)), timeout_sec=10,
                       err_msg="Timed out waiting to list expected consumer groups.")
        else:
            wait_until(lambda: "test-consumer-group" in self.kafka.list_consumer_groups(node=kafka_node, new_consumer=enable_new_consumer, command_config=command_config_file), timeout_sec=10,
                       err_msg="Timed out waiting to list expected consumer groups.")

        self.consumer.stop()

    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_list_consumer_groups(self, security_protocol='PLAINTEXT'):
        """
        Tests if ConsumerGroupCommand is listing correct consumer groups
        :return: None
        """
        self.setup_and_verify(security_protocol)

    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_describe_consumer_group(self, security_protocol='PLAINTEXT'):
        """
        Tests if ConsumerGroupCommand is describing a consumer group correctly
        :return: None
        """
        self.setup_and_verify(security_protocol, group="test-consumer-group")
Exemple #52
0
class ConsumerGroupCommandTest(Test):
    """
    Tests ConsumerGroupCommand
    """
    # Root directory for persistent output
    PERSISTENT_ROOT = "/mnt/consumer_group_command"
    COMMAND_CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "command.properties")

    def __init__(self, test_context):
        super(ConsumerGroupCommandTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {TOPIC: {'partitions': 1, 'replication-factor': 1}}
        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            topics=self.topics)
        self.kafka.start()

    def start_consumer(self, security_protocol):
        enable_new_consumer = security_protocol == SecurityConfig.SSL
        self.consumer = ConsoleConsumer(self.test_context,
                                        num_nodes=self.num_brokers,
                                        kafka=self.kafka,
                                        topic=TOPIC,
                                        consumer_timeout_ms=None,
                                        new_consumer=enable_new_consumer)
        self.consumer.start()

    def setup_and_verify(self, security_protocol, group=None):
        self.start_kafka(security_protocol, security_protocol)
        self.start_consumer(security_protocol)
        consumer_node = self.consumer.nodes[0]
        wait_until(lambda: self.consumer.alive(consumer_node),
                   timeout_sec=10,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")
        kafka_node = self.kafka.nodes[0]
        if security_protocol is not SecurityConfig.PLAINTEXT:
            prop_file = str(self.kafka.security_config.client_config())
            self.logger.debug(prop_file)
            kafka_node.account.ssh("mkdir -p %s" % self.PERSISTENT_ROOT,
                                   allow_fail=False)
            kafka_node.account.create_file(self.COMMAND_CONFIG_FILE, prop_file)

        # Verify ConsumerGroupCommand lists expected consumer groups
        enable_new_consumer = security_protocol != SecurityConfig.PLAINTEXT
        command_config_file = None
        if enable_new_consumer:
            command_config_file = self.COMMAND_CONFIG_FILE

        if group:
            wait_until(
                lambda:
                ("%s, topic-consumer-group-command, 0," % group) in self.kafka.
                describe_consumer_group(group=group,
                                        node=kafka_node,
                                        new_consumer=enable_new_consumer,
                                        command_config=command_config_file),
                timeout_sec=10,
                err_msg="Timed out waiting to list expected consumer groups.")
        else:
            wait_until(
                lambda: "test-consumer-group" in self.kafka.
                list_consumer_groups(node=kafka_node,
                                     new_consumer=enable_new_consumer,
                                     command_config=command_config_file),
                timeout_sec=10,
                err_msg="Timed out waiting to list expected consumer groups.")

        self.consumer.stop()

    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_list_consumer_groups(self, security_protocol='PLAINTEXT'):
        """
        Tests if ConsumerGroupCommand is listing correct consumer groups
        :return: None
        """
        self.setup_and_verify(security_protocol)

    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_describe_consumer_group(self, security_protocol='PLAINTEXT'):
        """
        Tests if ConsumerGroupCommand is describing a consumer group correctly
        :return: None
        """
        self.setup_and_verify(security_protocol, group="test-consumer-group")
Exemple #53
0
 def start_consumer(self):
     self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                     consumer_timeout_ms=1000)
     self.consumer.start()
Exemple #54
0
class CopycatStandaloneFileTest(KafkaTest):
    """
    Simple test of Copycat that produces data from a file in one Copycat
    standalone process and consumes it on another, validating the output is
    identical to the input.
    """

    INPUT_FILE = "/mnt/copycat.input"
    OUTPUT_FILE = "/mnt/copycat.output"

    OFFSETS_FILE = "/mnt/copycat.offsets"

    TOPIC = "test"

    FIRST_INPUT_LIST = ["foo", "bar", "baz"]
    FIRST_INPUT = "\n".join(FIRST_INPUT_LIST) + "\n"
    SECOND_INPUT_LIST = ["razz", "ma", "tazz"]
    SECOND_INPUT = "\n".join(SECOND_INPUT_LIST) + "\n"

    SCHEMA = {"type": "string", "optional": False}

    def __init__(self, test_context):
        super(CopycatStandaloneFileTest, self).__init__(
            test_context,
            num_zk=1,
            num_brokers=1,
            topics={'test': {
                'partitions': 1,
                'replication-factor': 1
            }})

        self.source = CopycatStandaloneService(
            test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = CopycatStandaloneService(
            test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
        self.consumer_validator = ConsoleConsumer(test_context,
                                                  1,
                                                  self.kafka,
                                                  self.TOPIC,
                                                  consumer_timeout_ms=1000)

    @parametrize(converter="org.apache.kafka.copycat.json.JsonConverter",
                 schemas=True)
    @parametrize(converter="org.apache.kafka.copycat.json.JsonConverter",
                 schemas=False)
    @parametrize(converter="org.apache.kafka.copycat.storage.StringConverter",
                 schemas=None)
    def test_file_source_and_sink(
            self,
            converter="org.apache.kafka.copycat.json.JsonConverter",
            schemas=True):
        assert converter != None, "converter type must be set"
        # Template parameters
        self.key_converter = converter
        self.value_converter = converter
        self.schemas = schemas

        self.source.set_configs(
            self.render("copycat-standalone.properties"),
            [self.render("copycat-file-source.properties")])
        self.sink.set_configs(self.render("copycat-standalone.properties"),
                              [self.render("copycat-file-sink.properties")])

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(self.FIRST_INPUT) +
                                     " >> " + self.INPUT_FILE)
        wait_until(
            lambda: self.validate_output(self.FIRST_INPUT),
            timeout_sec=60,
            err_msg=
            "Data added to input file was not seen in the output file in a reasonable amount of time."
        )

        # Restarting both should result in them picking up where they left off,
        # only processing new data.
        self.source.restart()
        self.sink.restart()

        self.source.node.account.ssh("echo -e -n " + repr(self.SECOND_INPUT) +
                                     " >> " + self.INPUT_FILE)
        wait_until(
            lambda: self.validate_output(self.FIRST_INPUT + self.SECOND_INPUT),
            timeout_sec=60,
            err_msg=
            "Sink output file never converged to the same state as the input file"
        )

        # Validate the format of the data in the Kafka topic
        self.consumer_validator.run()
        expected = json.dumps([
            line if not self.schemas else {
                "schema": self.SCHEMA,
                "payload": line
            } for line in self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST
        ])
        decoder = (json.loads if converter.endswith("JsonConverter") else str)
        actual = json.dumps(
            [decoder(x) for x in self.consumer_validator.messages_consumed[1]])
        assert expected == actual, "Expected %s but saw %s in Kafka" % (
            expected, actual)

    def validate_output(self, value):
        try:
            output_hash = list(
                self.sink.node.account.ssh_capture(
                    "md5sum " + self.OUTPUT_FILE))[0].strip().split()[0]
            return output_hash == hashlib.md5(value).hexdigest()
        except subprocess.CalledProcessError:
            return False
    def test_bounce(self, clean):
        """
        Validates that source and sink tasks that run continuously and produce a predictable sequence of messages
        run correctly and deliver messages exactly once when Kafka Connect workers undergo clean rolling bounces.
        """
        num_tasks = 3

        self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node))
        self.cc.start()

        self.source = VerifiableSource(self.cc, tasks=num_tasks)
        self.source.start()
        self.sink = VerifiableSink(self.cc, tasks=num_tasks)
        self.sink.start()

        for _ in range(3):
            for node in self.cc.nodes:
                started = time.time()
                self.logger.info("%s bouncing Kafka Connect on %s", clean and "Clean" or "Hard", str(node.account))
                self.cc.stop_node(node, clean_shutdown=clean)
                with node.account.monitor_log(self.cc.LOG_FILE) as monitor:
                    self.cc.start_node(node)
                    monitor.wait_until("Starting connectors and tasks using config offset", timeout_sec=90,
                                       err_msg="Kafka Connect worker didn't successfully join group and start work")
                self.logger.info("Bounced Kafka Connect on %s and rejoined in %f seconds", node.account, time.time() - started)
                # If this is a hard bounce, give additional time for the consumer groups to recover. If we don't give
                # some time here, the next bounce may cause consumers to be shut down before they have any time to process
                # data and we can end up with zero data making it through the test.
                if not clean:
                    time.sleep(15)


        self.source.stop()
        self.sink.stop()
        self.cc.stop()

        # Validate at least once delivery of everything that was reported as written since we should have flushed and
        # cleanly exited. Currently this only tests at least once delivery because the sink task may not have consumed
        # all the messages generated by the source task. This needs to be done per-task since seqnos are not unique across
        # tasks.
        success = True
        errors = []
        allow_dups = not clean
        src_messages = self.source.messages()
        sink_messages = self.sink.messages()
        for task in range(num_tasks):
            # Validate source messages
            src_seqnos = [msg['seqno'] for msg in src_messages if msg['task'] == task]
            # Every seqno up to the largest one we ever saw should appear. Each seqno should only appear once because clean
            # bouncing should commit on rebalance.
            src_seqno_max = max(src_seqnos)
            self.logger.debug("Max source seqno: %d", src_seqno_max)
            src_seqno_counts = Counter(src_seqnos)
            missing_src_seqnos = sorted(set(range(src_seqno_max)).difference(set(src_seqnos)))
            duplicate_src_seqnos = sorted([seqno for seqno,count in src_seqno_counts.iteritems() if count > 1])

            if missing_src_seqnos:
                self.logger.error("Missing source sequence numbers for task " + str(task))
                errors.append("Found missing source sequence numbers for task %d: %s" % (task, missing_src_seqnos))
                success = False
            if not allow_dups and duplicate_src_seqnos:
                self.logger.error("Duplicate source sequence numbers for task " + str(task))
                errors.append("Found duplicate source sequence numbers for task %d: %s" % (task, duplicate_src_seqnos))
                success = False


            # Validate sink messages
            sink_seqnos = [msg['seqno'] for msg in sink_messages if msg['task'] == task and 'flushed' in msg]
            # Every seqno up to the largest one we ever saw should appear. Each seqno should only appear once because
            # clean bouncing should commit on rebalance.
            sink_seqno_max = max(sink_seqnos)
            self.logger.debug("Max sink seqno: %d", sink_seqno_max)
            sink_seqno_counts = Counter(sink_seqnos)
            missing_sink_seqnos = sorted(set(range(sink_seqno_max)).difference(set(sink_seqnos)))
            duplicate_sink_seqnos = sorted([seqno for seqno,count in sink_seqno_counts.iteritems() if count > 1])

            if missing_sink_seqnos:
                self.logger.error("Missing sink sequence numbers for task " + str(task))
                errors.append("Found missing sink sequence numbers for task %d: %s" % (task, missing_sink_seqnos))
                success = False
            if not allow_dups and duplicate_sink_seqnos:
                self.logger.error("Duplicate sink sequence numbers for task " + str(task))
                errors.append("Found duplicate sink sequence numbers for task %d: %s" % (task, duplicate_sink_seqnos))
                success = False

            # Validate source and sink match
            if sink_seqno_max > src_seqno_max:
                self.logger.error("Found sink sequence number greater than any generated sink sequence number for task %d: %d > %d", task, sink_seqno_max, src_seqno_max)
                errors.append("Found sink sequence number greater than any generated sink sequence number for task %d: %d > %d" % (task, sink_seqno_max, src_seqno_max))
                success = False
            if src_seqno_max < 1000 or sink_seqno_max < 1000:
                errors.append("Not enough messages were processed: source:%d sink:%d" % (src_seqno_max, sink_seqno_max))
                success = False

        if not success:
            self.mark_for_collect(self.cc)
            # Also collect the data in the topic to aid in debugging
            consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.source.topic, consumer_timeout_ms=1000, print_key=True)
            consumer_validator.run()
            self.mark_for_collect(consumer_validator, "consumer_stdout")

        assert success, "Found validation errors:\n" + "\n  ".join(errors)
Exemple #56
0
class DelegationTokenTest(Test):
    def __init__(self, test_context):
        super(DelegationTokenTest, self).__init__(test_context)

        self.test_context = test_context
        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, zk_chroot="/kafka",
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}},
                                  server_prop_overides=[
                                      [config_property.DELEGATION_TOKEN_MAX_LIFETIME_MS, "604800000"],
                                      [config_property.DELEGATION_TOKEN_EXPIRY_TIME_MS, "86400000"],
                                      [config_property.DELEGATION_TOKEN_MASTER_KEY, "test12345"],
                                      [config_property.SASL_ENABLED_MECHANISMS, "GSSAPI,SCRAM-SHA-256"]
                                  ])
        self.jaas_deleg_conf_path = "/tmp/jaas_deleg.conf"
        self.jaas_deleg_conf = ""
        self.client_properties_content = """
security.protocol=SASL_PLAINTEXT
sasl.mechanism=SCRAM-SHA-256
sasl.kerberos.service.name=kafka
client.id=console-consumer
"""
        self.client_kafka_opts=' -Djava.security.auth.login.config=' + self.jaas_deleg_conf_path

        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic, max_messages=1,
                                       throughput=1, kafka_opts_override=self.client_kafka_opts,
                                       client_prop_file_override=self.client_properties_content)

        self.consumer = ConsoleConsumer(self.test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                        kafka_opts_override=self.client_kafka_opts,
                                        client_prop_file_override=self.client_properties_content)

        self.kafka.security_protocol = 'SASL_PLAINTEXT'
        self.kafka.client_sasl_mechanism = 'GSSAPI,SCRAM-SHA-256'
        self.kafka.interbroker_sasl_mechanism = 'GSSAPI'


    def setUp(self):
        self.zk.start()

    def tearDown(self):
        self.producer.nodes[0].account.remove(self.jaas_deleg_conf_path)
        self.consumer.nodes[0].account.remove(self.jaas_deleg_conf_path)

    def generate_delegation_token(self):
        self.logger.debug("Request delegation token")
        self.delegation_tokens.generate_delegation_token()
        self.jaas_deleg_conf = self.delegation_tokens.create_jaas_conf_with_delegation_token()

    def expire_delegation_token(self):
        self.kafka.client_sasl_mechanism = 'GSSAPI,SCRAM-SHA-256'
        token_hmac = self.delegation_tokens.token_hmac()
        self.delegation_tokens.expire_delegation_token(token_hmac)


    def produce_with_delegation_token(self):
        self.producer.acked_values = []
        self.producer.nodes[0].account.create_file(self.jaas_deleg_conf_path, self.jaas_deleg_conf)
        self.logger.debug(self.jaas_deleg_conf)
        self.producer.start()

    def consume_with_delegation_token(self):
        self.logger.debug("Consume messages with delegation token")

        self.consumer.nodes[0].account.create_file(self.jaas_deleg_conf_path, self.jaas_deleg_conf)
        self.logger.debug(self.jaas_deleg_conf)
        self.consumer.consumer_timeout_ms = 5000

        self.consumer.start()
        self.consumer.wait()

    def get_datetime_ms(self, input_date):
        return int(time.mktime(datetime.strptime(input_date,"%Y-%m-%dT%H:%M").timetuple()) * 1000)

    def renew_delegation_token(self):
        dt = self.delegation_tokens.parse_delegation_token_out()
        orig_expiry_date_ms = self.get_datetime_ms(dt["expirydate"])
        new_expirydate_ms = orig_expiry_date_ms + 1000

        self.delegation_tokens.renew_delegation_token(dt["hmac"], new_expirydate_ms)

    def test_delegation_token_lifecycle(self):
        self.kafka.start()
        self.delegation_tokens = DelegationTokens(self.kafka, self.test_context)

        self.generate_delegation_token()
        self.renew_delegation_token()
        self.produce_with_delegation_token()
        wait_until(lambda: self.producer.num_acked > 0, timeout_sec=30,
                   err_msg="Expected producer to still be producing.")
        assert 1 == self.producer.num_acked, "number of acked messages: %d" % self.producer.num_acked

        self.consume_with_delegation_token()
        num_consumed = len(self.consumer.messages_consumed[1])
        assert 1 == num_consumed, "number of consumed messages: %d" % num_consumed

        self.expire_delegation_token()

        self.produce_with_delegation_token()
        assert 0 == self.producer.num_acked, "number of acked messages: %d" % self.producer.num_acked
Exemple #57
0
 def start_consumer(self):
     self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                     consumer_timeout_ms=10000,
                                     message_validator=self.custom_message_validator)
     self.consumer.start()