Beispiel #1
0
    def __init__(self,
                 context,
                 num_nodes,
                 kafka,
                 security_protocol,
                 topic,
                 messages,
                 new_consumer=False,
                 settings={}):
        super(ConsumerPerformanceService, self).__init__(context, num_nodes)
        self.kafka = kafka
        self.security_config = SecurityConfig(security_protocol)
        self.security_protocol = security_protocol
        self.topic = topic
        self.messages = messages
        self.new_consumer = new_consumer
        self.settings = settings

        # These less-frequently used settings can be updated manually after instantiation
        self.fetch_size = None
        self.socket_buffer_size = None
        self.threads = None
        self.num_fetch_threads = None
        self.group = None
        self.from_latest = None
Beispiel #2
0
 def __init__(self,
              context,
              num_nodes,
              kafka,
              security_protocol,
              topic,
              num_records,
              record_size,
              throughput,
              settings={},
              intermediate_stats=False,
              client_id="producer-performance",
              jmx_object_names=None,
              jmx_attributes=[]):
     JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes)
     PerformanceService.__init__(self, context, num_nodes)
     self.kafka = kafka
     self.security_config = SecurityConfig(security_protocol)
     self.security_protocol = security_protocol
     self.args = {
         'topic': topic,
         'num_records': num_records,
         'record_size': record_size,
         'throughput': throughput
     }
     self.settings = settings
     self.intermediate_stats = intermediate_stats
     self.client_id = client_id
Beispiel #3
0
 def __init__(self,
              context,
              num_nodes,
              zk,
              security_protocol=SecurityConfig.PLAINTEXT,
              interbroker_security_protocol=SecurityConfig.PLAINTEXT,
              topics=None,
              quota_config=None,
              jmx_object_names=None,
              jmx_attributes=[]):
     """
     :type context
     :type zk: ZookeeperService
     :type topics: dict
     """
     Service.__init__(self, context, num_nodes)
     JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes)
     self.zk = zk
     if security_protocol == SecurityConfig.SSL or interbroker_security_protocol == SecurityConfig.SSL:
         self.security_config = SecurityConfig(SecurityConfig.SSL)
     else:
         self.security_config = SecurityConfig(SecurityConfig.PLAINTEXT)
     self.security_protocol = security_protocol
     self.interbroker_security_protocol = interbroker_security_protocol
     self.port = 9092 if security_protocol == SecurityConfig.PLAINTEXT else 9093
     self.topics = topics
     self.quota_config = quota_config
Beispiel #4
0
class EndToEndLatencyService(PerformanceService):

    logs = {
        "end_to_end_latency_log": {
            "path": "/mnt/end-to-end-latency.log",
            "collect_default": True
        },
    }

    def __init__(self,
                 context,
                 num_nodes,
                 kafka,
                 security_protocol,
                 topic,
                 num_records,
                 consumer_fetch_max_wait=100,
                 acks=1):
        super(EndToEndLatencyService, self).__init__(context, num_nodes)
        self.kafka = kafka
        self.security_config = SecurityConfig(security_protocol)
        self.security_protocol = security_protocol
        self.args = {
            'topic': topic,
            'num_records': num_records,
            'consumer_fetch_max_wait': consumer_fetch_max_wait,
            'acks': acks
        }

    def _worker(self, idx, node):
        args = self.args.copy()
        self.security_config.setup_node(node)
        if self.security_protocol == SecurityConfig.SSL:
            ssl_config_file = SecurityConfig.SSL_DIR + "/security.properties"
            node.account.create_file(ssl_config_file,
                                     str(self.security_config))
        else:
            ssl_config_file = ""
        args.update({
            'zk_connect': self.kafka.zk.connect_setting(),
            'bootstrap_servers': self.kafka.bootstrap_servers(),
            'ssl_config_file': ssl_config_file
        })

        cmd = "/opt/kafka/bin/kafka-run-class.sh kafka.tools.EndToEndLatency "\
              "%(bootstrap_servers)s %(topic)s %(num_records)d "\
              "%(acks)d 20 %(ssl_config_file)s" % args

        cmd += " | tee /mnt/end-to-end-latency.log"

        self.logger.debug("End-to-end latency %d command: %s", idx, cmd)
        results = {}
        for line in node.account.ssh_capture(cmd):
            if line.startswith("Avg latency:"):
                results['latency_avg_ms'] = float(line.split()[2])
            if line.startswith("Percentiles"):
                results['latency_50th_ms'] = float(line.split()[3][:-1])
                results['latency_99th_ms'] = float(line.split()[6][:-1])
                results['latency_999th_ms'] = float(line.split()[9])
        self.results[idx - 1] = results
Beispiel #5
0
    def __init__(self,
                 context,
                 num_nodes,
                 kafka,
                 topic,
                 security_protocol=None,
                 new_consumer=None,
                 message_validator=None,
                 from_beginning=True,
                 consumer_timeout_ms=None,
                 client_id="console-consumer",
                 jmx_object_names=None,
                 jmx_attributes=[]):
        """
        Args:
            context:                    standard context
            num_nodes:                  number of nodes to use (this should be 1)
            kafka:                      kafka service
            topic:                      consume from this topic
            security_protocol:          security protocol for Kafka connections
            new_consumer:               use new Kafka consumer if True
            message_validator:          function which returns message or None
            from_beginning:             consume from beginning if True, else from the end
            consumer_timeout_ms:        corresponds to consumer.timeout.ms. consumer process ends if time between
                                        successively consumed messages exceeds this timeout. Setting this and
                                        waiting for the consumer to stop is a pretty good way to consume all messages
                                        in a topic.
        """
        JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes)
        PerformanceService.__init__(self, context, num_nodes)
        self.kafka = kafka
        self.new_consumer = new_consumer
        self.args = {
            'topic': topic,
        }

        self.consumer_timeout_ms = consumer_timeout_ms

        self.from_beginning = from_beginning
        self.message_validator = message_validator
        self.messages_consumed = {idx: [] for idx in range(1, num_nodes + 1)}
        self.client_id = client_id

        # Process client configuration
        self.prop_file = self.render(
            'console_consumer.properties',
            consumer_timeout_ms=self.consumer_timeout_ms,
            client_id=self.client_id)

        # Add security properties to the config. If security protocol is not specified,
        # use the default in the template properties.
        self.security_config = SecurityConfig(security_protocol,
                                              self.prop_file)
        self.security_protocol = self.security_config.security_protocol
        if self.new_consumer is None:
            self.new_consumer = self.security_protocol == SecurityConfig.SSL
        if self.security_protocol == SecurityConfig.SSL and not self.new_consumer:
            raise Exception(
                "SSL protocol is supported only with the new consumer")
        self.prop_file += str(self.security_config)
Beispiel #6
0
    def __init__(self, context, num_nodes, kafka, topic, max_messages=-1, security_protocol="PLAINTEXT"):
        super(KafkaLog4jAppender, self).__init__(context, num_nodes)

        self.kafka = kafka
        self.topic = topic
        self.max_messages = max_messages
        self.security_protocol = security_protocol
        self.security_config = SecurityConfig(security_protocol)
Beispiel #7
0
 def __init__(self, context, num_nodes, kafka, security_protocol, topic, num_records, consumer_fetch_max_wait=100, acks=1):
     super(EndToEndLatencyService, self).__init__(context, num_nodes)
     self.kafka = kafka
     self.security_config = SecurityConfig(security_protocol)
     self.security_protocol = security_protocol
     self.args = {
         'topic': topic,
         'num_records': num_records,
         'consumer_fetch_max_wait': consumer_fetch_max_wait,
         'acks': acks
     }
Beispiel #8
0
class KafkaLog4jAppender(BackgroundThreadService):

    logs = {
        "producer_log": {
            "path": "/mnt/kafka_log4j_appender.log",
            "collect_default": False}
    }

    def __init__(self, context, num_nodes, kafka, topic, max_messages=-1, security_protocol="PLAINTEXT"):
        super(KafkaLog4jAppender, self).__init__(context, num_nodes)

        self.kafka = kafka
        self.topic = topic
        self.max_messages = max_messages
        self.security_protocol = security_protocol
        self.security_config = SecurityConfig(security_protocol)

    def _worker(self, idx, node):
        cmd = self.start_cmd(node)
        self.logger.debug("VerifiableLog4jAppender %d command: %s" % (idx, cmd))
        self.security_config.setup_node(node)
        node.account.ssh(cmd)

    def start_cmd(self, node):
        cmd = "/opt/%s/bin/" % kafka_dir(node)
        cmd += "kafka-run-class.sh org.apache.kafka.tools.VerifiableLog4jAppender"
        cmd += " --topic %s --broker-list %s" % (self.topic, self.kafka.bootstrap_servers())

        if self.max_messages > 0:
            cmd += " --max-messages %s" % str(self.max_messages)
        if self.security_protocol == SecurityConfig.SSL:
            cmd += " --security-protocol SSL"
            cmd += " --ssl-truststore-location %s" % str(SecurityConfig.TRUSTSTORE_PATH)
            cmd += " --ssl-truststore-password %s" % str(SecurityConfig.ssl_stores['ssl.truststore.password'])

        cmd += " 2>> /mnt/kafka_log4j_appender.log | tee -a /mnt/kafka_log4j_appender.log &"
        return cmd

    def stop_node(self, node):
        node.account.kill_process("VerifiableLog4jAppender", allow_fail=False)
        if self.worker_threads is None:
            return

        # block until the corresponding thread exits
        if len(self.worker_threads) >= self.idx(node):
            # Need to guard this because stop is preemptively called before the worker threads are added and started
            self.worker_threads[self.idx(node) - 1].join()

    def clean_node(self, node):
        node.account.kill_process("VerifiableLog4jAppender", clean_shutdown=False, allow_fail=False)
        node.account.ssh("rm -rf /mnt/kafka_log4j_appender.log", allow_fail=False)
Beispiel #9
0
    def prop_file(self, node):
        """Return a string which can be used to create a configuration file appropriate for the given node."""
        # Process client configuration
        prop_file = self.render('console_consumer.properties')
        if hasattr(node, "version") and node.version <= LATEST_0_8_2:
            # in 0.8.2.X and earlier, console consumer does not have --timeout-ms option
            # instead, we have to pass it through the config file
            prop_file += "\nconsumer.timeout.ms=%s\n" % str(
                self.consumer_timeout_ms)

        # Add security properties to the config. If security protocol is not specified,
        # use the default in the template properties.
        self.security_config = SecurityConfig(self.security_protocol,
                                              prop_file)
        self.security_protocol = self.security_config.security_protocol

        prop_file += str(self.security_config)
        return prop_file
Beispiel #10
0
    def __init__(self,
                 context,
                 num_nodes,
                 kafka,
                 topic,
                 security_protocol=None,
                 max_messages=-1,
                 throughput=100000):
        super(VerifiableProducer, self).__init__(context, num_nodes)

        self.kafka = kafka
        self.topic = topic
        self.max_messages = max_messages
        self.throughput = throughput

        self.acked_values = []
        self.not_acked_values = []

        self.prop_file = ""
        self.security_config = SecurityConfig(security_protocol,
                                              self.prop_file)
        self.security_protocol = self.security_config.security_protocol
        self.prop_file += str(self.security_config)
Beispiel #11
0
class ConsumerPerformanceService(PerformanceService):
    """
        See ConsumerPerformance.scala as the source of truth on these settings, but for reference:

        "zookeeper" "The connection string for the zookeeper connection in the form host:port. Multiple URLS can
                     be given to allow fail-over. This option is only used with the old consumer."

        "broker-list", "A broker list to use for connecting if using the new consumer."

        "topic", "REQUIRED: The topic to consume from."

        "group", "The group id to consume on."

        "fetch-size", "The amount of data to fetch in a single request."

        "from-latest", "If the consumer does not already have an establishedoffset to consume from,
                        start with the latest message present in the log rather than the earliest message."

        "socket-buffer-size", "The size of the tcp RECV size."

        "threads", "Number of processing threads."

        "num-fetch-threads", "Number of fetcher threads. Defaults to 1"

        "new-consumer", "Use the new consumer implementation."
        "consumer.config", "Consumer config properties file."
    """

    # Root directory for persistent output
    PERSISTENT_ROOT = "/mnt/consumer_performance"
    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
    STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT,
                                  "consumer_performance.stdout")
    STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT,
                                  "consumer_performance.stderr")
    LOG_FILE = os.path.join(LOG_DIR, "consumer_performance.log")
    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "consumer.properties")

    logs = {
        "consumer_performance_output": {
            "path": STDOUT_CAPTURE,
            "collect_default": True
        },
        "consumer_performance_stderr": {
            "path": STDERR_CAPTURE,
            "collect_default": True
        },
        "consumer_performance_log": {
            "path": LOG_FILE,
            "collect_default": True
        }
    }

    def __init__(self,
                 context,
                 num_nodes,
                 kafka,
                 security_protocol,
                 topic,
                 messages,
                 new_consumer=False,
                 settings={}):
        super(ConsumerPerformanceService, self).__init__(context, num_nodes)
        self.kafka = kafka
        self.security_config = SecurityConfig(security_protocol)
        self.security_protocol = security_protocol
        self.topic = topic
        self.messages = messages
        self.new_consumer = new_consumer
        self.settings = settings

        # These less-frequently used settings can be updated manually after instantiation
        self.fetch_size = None
        self.socket_buffer_size = None
        self.threads = None
        self.num_fetch_threads = None
        self.group = None
        self.from_latest = None

    @property
    def args(self):
        """Dictionary of arguments used to start the Consumer Performance script."""
        args = {
            'topic': self.topic,
            'messages': self.messages,
        }

        if self.new_consumer:
            args['new-consumer'] = ""
            args['broker-list'] = self.kafka.bootstrap_servers()
        else:
            args['zookeeper'] = self.kafka.zk.connect_setting()

        if self.fetch_size is not None:
            args['fetch-size'] = self.fetch_size

        if self.socket_buffer_size is not None:
            args['socket-buffer-size'] = self.socket_buffer_size

        if self.threads is not None:
            args['threads'] = self.threads

        if self.num_fetch_threads is not None:
            args['num-fetch-threads'] = self.num_fetch_threads

        if self.group is not None:
            args['group'] = self.group

        if self.from_latest:
            args['from-latest'] = ""

        return args

    def start_cmd(self, node):
        cmd = "export LOG_DIR=%s;" % ConsumerPerformanceService.LOG_DIR
        cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\";" % ConsumerPerformanceService.LOG4J_CONFIG
        cmd += " /opt/%s/bin/kafka-consumer-perf-test.sh" % kafka_dir(node)
        for key, value in self.args.items():
            cmd += " --%s %s" % (key, value)
        cmd += " --consumer.config %s" % ConsumerPerformanceService.CONFIG_FILE

        for key, value in self.settings.items():
            cmd += " %s=%s" % (str(key), str(value))

        cmd += " 2>> %(stderr)s | tee -a %(stdout)s" % {
            'stdout': ConsumerPerformanceService.STDOUT_CAPTURE,
            'stderr': ConsumerPerformanceService.STDERR_CAPTURE
        }
        return cmd

    def _worker(self, idx, node):
        node.account.ssh("mkdir -p %s" %
                         ConsumerPerformanceService.PERSISTENT_ROOT,
                         allow_fail=False)

        log_config = self.render('tools_log4j.properties',
                                 log_file=ConsumerPerformanceService.LOG_FILE)
        node.account.create_file(ConsumerPerformanceService.LOG4J_CONFIG,
                                 log_config)
        node.account.create_file(ConsumerPerformanceService.CONFIG_FILE,
                                 str(self.security_config))
        self.security_config.setup_node(node)

        cmd = self.start_cmd(node)
        self.logger.debug("Consumer performance %d command: %s", idx, cmd)
        last = None
        for line in node.account.ssh_capture(cmd):
            last = line
        # Parse and save the last line's information
        parts = last.split(',')

        self.results[idx - 1] = {
            'total_mb': float(parts[2]),
            'mbps': float(parts[3]),
            'records_per_sec': float(parts[5]),
        }
Beispiel #12
0
class ConsoleConsumer(JmxMixin, BackgroundThreadService):
    # Root directory for persistent output
    PERSISTENT_ROOT = "/mnt/console_consumer"
    STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "console_consumer.stdout")
    STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "console_consumer.stderr")
    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
    LOG_FILE = os.path.join(LOG_DIR, "console_consumer.log")
    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "console_consumer.properties")

    logs = {
        "consumer_stdout": {
            "path": STDOUT_CAPTURE,
            "collect_default": False
        },
        "consumer_stderr": {
            "path": STDERR_CAPTURE,
            "collect_default": False
        },
        "consumer_log": {
            "path": LOG_FILE,
            "collect_default": True
        }
    }

    def __init__(self,
                 context,
                 num_nodes,
                 kafka,
                 topic,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 new_consumer=False,
                 message_validator=None,
                 from_beginning=True,
                 consumer_timeout_ms=None,
                 version=TRUNK,
                 client_id="console-consumer",
                 jmx_object_names=None,
                 jmx_attributes=[]):
        """
        Args:
            context:                    standard context
            num_nodes:                  number of nodes to use (this should be 1)
            kafka:                      kafka service
            topic:                      consume from this topic
            security_protocol:          security protocol for Kafka connections
            new_consumer:               use new Kafka consumer if True
            message_validator:          function which returns message or None
            from_beginning:             consume from beginning if True, else from the end
            consumer_timeout_ms:        corresponds to consumer.timeout.ms. consumer process ends if time between
                                        successively consumed messages exceeds this timeout. Setting this and
                                        waiting for the consumer to stop is a pretty good way to consume all messages
                                        in a topic.
        """
        JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes)
        BackgroundThreadService.__init__(self, context, num_nodes)
        self.kafka = kafka
        self.new_consumer = new_consumer
        self.args = {
            'topic': topic,
        }

        self.consumer_timeout_ms = consumer_timeout_ms
        for node in self.nodes:
            node.version = version

        self.from_beginning = from_beginning
        self.message_validator = message_validator
        self.messages_consumed = {idx: [] for idx in range(1, num_nodes + 1)}
        self.client_id = client_id
        self.security_protocol = security_protocol

        # Validate a few configs
        if self.new_consumer is None:
            self.new_consumer = self.security_protocol == SecurityConfig.SSL
        if self.security_protocol == SecurityConfig.SSL and not self.new_consumer:
            raise Exception(
                "SSL protocol is supported only with the new consumer")

    def prop_file(self, node):
        """Return a string which can be used to create a configuration file appropriate for the given node."""
        # Process client configuration
        prop_file = self.render('console_consumer.properties')
        if hasattr(node, "version") and node.version <= LATEST_0_8_2:
            # in 0.8.2.X and earlier, console consumer does not have --timeout-ms option
            # instead, we have to pass it through the config file
            prop_file += "\nconsumer.timeout.ms=%s\n" % str(
                self.consumer_timeout_ms)

        # Add security properties to the config. If security protocol is not specified,
        # use the default in the template properties.
        self.security_config = SecurityConfig(self.security_protocol,
                                              prop_file)
        self.security_protocol = self.security_config.security_protocol

        prop_file += str(self.security_config)
        return prop_file

    def start_cmd(self, node):
        """Return the start command appropriate for the given node."""
        args = self.args.copy()
        args['zk_connect'] = self.kafka.zk.connect_setting()
        args['stdout'] = ConsoleConsumer.STDOUT_CAPTURE
        args['stderr'] = ConsoleConsumer.STDERR_CAPTURE
        args['log_dir'] = ConsoleConsumer.LOG_DIR
        args['log4j_config'] = ConsoleConsumer.LOG4J_CONFIG
        args['config_file'] = ConsoleConsumer.CONFIG_FILE
        args['stdout'] = ConsoleConsumer.STDOUT_CAPTURE
        args['jmx_port'] = self.jmx_port
        args['kafka_dir'] = kafka_dir(node)
        args['broker_list'] = self.kafka.bootstrap_servers()

        cmd = "export JMX_PORT=%(jmx_port)s; " \
              "export LOG_DIR=%(log_dir)s; " \
              "export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j_config)s\"; " \
              "/opt/%(kafka_dir)s/bin/kafka-console-consumer.sh " \
              "--topic %(topic)s --consumer.config %(config_file)s" % args

        if self.new_consumer:
            cmd += " --new-consumer --bootstrap-server %(broker_list)s" % args
        else:
            cmd += " --zookeeper %(zk_connect)s" % args
        if self.from_beginning:
            cmd += " --from-beginning"

        if self.consumer_timeout_ms is not None:
            # version 0.8.X and below do not support --timeout-ms option
            # This will be added in the properties file instead
            if node.version > LATEST_0_8_2:
                cmd += " --timeout-ms %s" % self.consumer_timeout_ms

        cmd += " 2>> %(stderr)s | tee -a %(stdout)s &" % args
        return cmd

    def pids(self, node):
        try:
            cmd = "ps ax | grep -i console_consumer | grep java | grep -v grep | awk '{print $1}'"
            pid_arr = [
                pid for pid in node.account.ssh_capture(
                    cmd, allow_fail=True, callback=int)
            ]
            return pid_arr
        except (subprocess.CalledProcessError, ValueError) as e:
            return []

    def alive(self, node):
        return len(self.pids(node)) > 0

    def _worker(self, idx, node):
        node.account.ssh("mkdir -p %s" % ConsoleConsumer.PERSISTENT_ROOT,
                         allow_fail=False)

        # Create and upload config file
        self.logger.info("console_consumer.properties:")

        prop_file = self.prop_file(node)
        self.logger.info(prop_file)
        node.account.create_file(ConsoleConsumer.CONFIG_FILE, prop_file)
        self.security_config.setup_node(node)

        # Create and upload log properties
        log_config = self.render('tools_log4j.properties',
                                 log_file=ConsoleConsumer.LOG_FILE)
        node.account.create_file(ConsoleConsumer.LOG4J_CONFIG, log_config)

        # Run and capture output
        cmd = self.start_cmd(node)
        self.logger.debug("Console consumer %d command: %s", idx, cmd)

        consumer_output = node.account.ssh_capture(cmd, allow_fail=False)
        first_line = next(consumer_output, None)

        if first_line is not None:
            self.start_jmx_tool(idx, node)

            for line in itertools.chain([first_line], consumer_output):
                msg = line.strip()
                if self.message_validator is not None:
                    msg = self.message_validator(msg)
                if msg is not None:
                    self.messages_consumed[idx].append(msg)

            self.read_jmx_output(idx, node)

    def start_node(self, node):
        BackgroundThreadService.start_node(self, node)

    def stop_node(self, node):
        node.account.kill_process("console_consumer", allow_fail=True)
        wait_until(lambda: not self.alive(node),
                   timeout_sec=10,
                   backoff_sec=.2,
                   err_msg="Timed out waiting for consumer to stop.")

    def clean_node(self, node):
        if self.alive(node):
            self.logger.warn(
                "%s %s was still alive at cleanup time. Killing forcefully..."
                % (self.__class__.__name__, node.account))
        JmxMixin.clean_node(self, node)
        node.account.kill_process("java",
                                  clean_shutdown=False,
                                  allow_fail=True)
        node.account.ssh("rm -rf %s" % ConsoleConsumer.PERSISTENT_ROOT,
                         allow_fail=False)
        self.security_config.clean_node(node)
Beispiel #13
0
class ProducerPerformanceService(JmxMixin, PerformanceService):

    logs = {
        "producer_performance_log": {
            "path": "/mnt/producer-performance.log",
            "collect_default": True},
    }

    def __init__(self, context, num_nodes, kafka, security_protocol, topic, num_records, record_size, throughput, settings={},
                 intermediate_stats=False, client_id="producer-performance", jmx_object_names=None, jmx_attributes=[]):
        JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes)
        PerformanceService.__init__(self, context, num_nodes)
        self.kafka = kafka
        self.security_config = SecurityConfig(security_protocol)
        self.security_protocol = security_protocol
        self.args = {
            'topic': topic,
            'num_records': num_records,
            'record_size': record_size,
            'throughput': throughput
        }
        self.settings = settings
        self.intermediate_stats = intermediate_stats
        self.client_id = client_id

    def _worker(self, idx, node):
        args = self.args.copy()
        args.update({'bootstrap_servers': self.kafka.bootstrap_servers(), 'jmx_port': self.jmx_port, 'client_id': self.client_id})
        cmd = "JMX_PORT=%(jmx_port)d /opt/kafka/bin/kafka-run-class.sh org.apache.kafka.clients.tools.ProducerPerformance " \
              "%(topic)s %(num_records)d %(record_size)d %(throughput)d bootstrap.servers=%(bootstrap_servers)s client.id=%(client_id)s" % args

        self.security_config.setup_node(node)
        if self.security_protocol == SecurityConfig.SSL:
            self.settings.update(self.security_config.properties)
        for key, value in self.settings.items():
            cmd += " %s=%s" % (str(key), str(value))
        cmd += " | tee /mnt/producer-performance.log"

        self.logger.debug("Producer performance %d command: %s", idx, cmd)

        def parse_stats(line):
            parts = line.split(',')
            return {
                'records': int(parts[0].split()[0]),
                'records_per_sec': float(parts[1].split()[0]),
                'mbps': float(parts[1].split('(')[1].split()[0]),
                'latency_avg_ms': float(parts[2].split()[0]),
                'latency_max_ms': float(parts[3].split()[0]),
                'latency_50th_ms': float(parts[4].split()[0]),
                'latency_95th_ms': float(parts[5].split()[0]),
                'latency_99th_ms': float(parts[6].split()[0]),
                'latency_999th_ms': float(parts[7].split()[0]),
            }
        last = None
        producer_output = node.account.ssh_capture(cmd)
        first_line = producer_output.next()
        self.start_jmx_tool(idx, node)
        for line in itertools.chain([first_line], producer_output):
            if self.intermediate_stats:
                try:
                    self.stats[idx-1].append(parse_stats(line))
                except:
                    # Sometimes there are extraneous log messages
                    pass

            last = line
        try:
            self.results[idx-1] = parse_stats(last)
        except:
            raise Exception("Unable to parse aggregate performance statistics on node %d: %s" % (idx, last))
        self.read_jmx_output(idx, node)
Beispiel #14
0
class KafkaService(JmxMixin, Service):

    logs = {
        "kafka_log": {
            "path": "/mnt/kafka.log",
            "collect_default": True
        },
        "kafka_data": {
            "path": "/mnt/kafka-logs",
            "collect_default": False
        }
    }

    def __init__(self,
                 context,
                 num_nodes,
                 zk,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 interbroker_security_protocol=SecurityConfig.PLAINTEXT,
                 topics=None,
                 quota_config=None,
                 jmx_object_names=None,
                 jmx_attributes=[]):
        """
        :type context
        :type zk: ZookeeperService
        :type topics: dict
        """
        Service.__init__(self, context, num_nodes)
        JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes)
        self.zk = zk
        if security_protocol == SecurityConfig.SSL or interbroker_security_protocol == SecurityConfig.SSL:
            self.security_config = SecurityConfig(SecurityConfig.SSL)
        else:
            self.security_config = SecurityConfig(SecurityConfig.PLAINTEXT)
        self.security_protocol = security_protocol
        self.interbroker_security_protocol = interbroker_security_protocol
        self.port = 9092 if security_protocol == SecurityConfig.PLAINTEXT else 9093
        self.topics = topics
        self.quota_config = quota_config

    def start(self):
        Service.start(self)

        # Create topics if necessary
        if self.topics is not None:
            for topic, topic_cfg in self.topics.items():
                if topic_cfg is None:
                    topic_cfg = {}

                topic_cfg["topic"] = topic
                self.create_topic(topic_cfg)

    def start_node(self, node):
        props_file = self.render(
            'kafka.properties',
            node=node,
            broker_id=self.idx(node),
            port=self.port,
            security_protocol=self.security_protocol,
            quota_config=self.quota_config,
            interbroker_security_protocol=self.interbroker_security_protocol)
        self.logger.info("kafka.properties:")
        self.logger.info(props_file)
        node.account.create_file("/mnt/kafka.properties", props_file)
        self.security_config.setup_node(node)

        cmd = "JMX_PORT=%d /opt/kafka/bin/kafka-server-start.sh /mnt/kafka.properties 1>> /mnt/kafka.log 2>> /mnt/kafka.log & echo $! > /mnt/kafka.pid" % self.jmx_port
        self.logger.debug(
            "Attempting to start KafkaService on %s with command: %s" %
            (str(node.account), cmd))
        with node.account.monitor_log("/mnt/kafka.log") as monitor:
            node.account.ssh(cmd)
            monitor.wait_until("Kafka Server.*started",
                               timeout_sec=30,
                               err_msg="Kafka server didn't finish startup")
        self.start_jmx_tool(self.idx(node), node)
        if len(self.pids(node)) == 0:
            raise Exception("No process ids recorded on node %s" % str(node))

    def pids(self, node):
        """Return process ids associated with running processes on the given node."""
        try:
            return [
                pid for pid in node.account.ssh_capture("cat /mnt/kafka.pid",
                                                        callback=int)
            ]
        except:
            return []

    def signal_node(self, node, sig=signal.SIGTERM):
        pids = self.pids(node)
        for pid in pids:
            node.account.signal(pid, sig)

    def signal_leader(self, topic, partition=0, sig=signal.SIGTERM):
        leader = self.leader(topic, partition)
        self.signal_node(leader, sig)

    def stop_node(self, node, clean_shutdown=True):
        pids = self.pids(node)
        sig = signal.SIGTERM if clean_shutdown else signal.SIGKILL

        for pid in pids:
            node.account.signal(pid, sig, allow_fail=False)

        node.account.ssh("rm -f /mnt/kafka.pid", allow_fail=False)

    def clean_node(self, node):
        JmxMixin.clean_node(self, node)
        node.account.kill_process("kafka",
                                  clean_shutdown=False,
                                  allow_fail=True)
        node.account.ssh(
            "rm -rf /mnt/kafka-logs /mnt/kafka.properties /mnt/kafka.log /mnt/kafka.pid",
            allow_fail=False)
        self.security_config.clean_node(node)

    def create_topic(self, topic_cfg):
        node = self.nodes[0]  # any node is fine here
        self.logger.info("Creating topic %s with settings %s",
                         topic_cfg["topic"], topic_cfg)

        cmd = "/opt/kafka/bin/kafka-topics.sh --zookeeper %(zk_connect)s --create "\
            "--topic %(topic)s --partitions %(partitions)d --replication-factor %(replication)d" % {
                'zk_connect': self.zk.connect_setting(),
                'topic': topic_cfg.get("topic"),
                'partitions': topic_cfg.get('partitions', 1),
                'replication': topic_cfg.get('replication-factor', 1)
            }

        if "configs" in topic_cfg.keys() and topic_cfg["configs"] is not None:
            for config_name, config_value in topic_cfg["configs"].items():
                cmd += " --config %s=%s" % (config_name, str(config_value))

        self.logger.info("Running topic creation command...\n%s" % cmd)
        node.account.ssh(cmd)

        time.sleep(1)
        self.logger.info(
            "Checking to see if topic was properly created...\n%s" % cmd)
        for line in self.describe_topic(topic_cfg["topic"]).split("\n"):
            self.logger.info(line)

    def describe_topic(self, topic):
        node = self.nodes[0]
        cmd = "/opt/kafka/bin/kafka-topics.sh --zookeeper %s --topic %s --describe" % \
              (self.zk.connect_setting(), topic)
        output = ""
        for line in node.account.ssh_capture(cmd):
            output += line
        return output

    def verify_reassign_partitions(self, reassignment):
        """Run the reassign partitions admin tool in "verify" mode
        """
        node = self.nodes[0]
        json_file = "/tmp/" + str(time.time()) + "_reassign.json"

        # reassignment to json
        json_str = json.dumps(reassignment)
        json_str = json.dumps(json_str)

        # create command
        cmd = "echo %s > %s && " % (json_str, json_file)
        cmd += "/opt/kafka/bin/kafka-reassign-partitions.sh "\
                "--zookeeper %(zk_connect)s "\
                "--reassignment-json-file %(reassignment_file)s "\
                "--verify" % {'zk_connect': self.zk.connect_setting(),
                                'reassignment_file': json_file}
        cmd += " && sleep 1 && rm -f %s" % json_file

        # send command
        self.logger.info("Verifying parition reassignment...")
        self.logger.debug(cmd)
        output = ""
        for line in node.account.ssh_capture(cmd):
            output += line

        self.logger.debug(output)

        if re.match(".*is in progress.*", output) is not None:
            return False

        return True

    def execute_reassign_partitions(self, reassignment):
        """Run the reassign partitions admin tool in "verify" mode
        """
        node = self.nodes[0]
        json_file = "/tmp/" + str(time.time()) + "_reassign.json"

        # reassignment to json
        json_str = json.dumps(reassignment)
        json_str = json.dumps(json_str)

        # create command
        cmd = "echo %s > %s && " % (json_str, json_file)
        cmd += "/opt/kafka/bin/kafka-reassign-partitions.sh "\
                "--zookeeper %(zk_connect)s "\
                "--reassignment-json-file %(reassignment_file)s "\
                "--execute" % {'zk_connect': self.zk.connect_setting(),
                                'reassignment_file': json_file}
        cmd += " && sleep 1 && rm -f %s" % json_file

        # send command
        self.logger.info("Executing parition reassignment...")
        self.logger.debug(cmd)
        output = ""
        for line in node.account.ssh_capture(cmd):
            output += line

        self.logger.debug("Verify partition reassignment:")
        self.logger.debug(output)

    def restart_node(self, node, wait_sec=0, clean_shutdown=True):
        """Restart the given node, waiting wait_sec in between stopping and starting up again."""
        self.stop_node(node, clean_shutdown)
        time.sleep(wait_sec)
        self.start_node(node)

    def leader(self, topic, partition=0):
        """ Get the leader replica for the given topic and partition.
        """
        cmd = "/opt/kafka/bin/kafka-run-class.sh kafka.tools.ZooKeeperMainWrapper -server %s " \
              % self.zk.connect_setting()
        cmd += "get /brokers/topics/%s/partitions/%d/state" % (topic,
                                                               partition)
        self.logger.debug(cmd)

        node = self.nodes[0]
        self.logger.debug(
            "Querying zookeeper to find leader replica for topic %s: \n%s" %
            (cmd, topic))
        partition_state = None
        for line in node.account.ssh_capture(cmd):
            match = re.match("^({.+})$", line)
            if match is not None:
                partition_state = match.groups()[0]
                break

        if partition_state is None:
            raise Exception(
                "Error finding partition state for topic %s and partition %d."
                % (topic, partition))

        partition_state = json.loads(partition_state)
        self.logger.info(partition_state)

        leader_idx = int(partition_state["leader"])
        self.logger.info("Leader for topic %s and partition %d is now: %d" %
                         (topic, partition, leader_idx))
        return self.get_node(leader_idx)

    def bootstrap_servers(self):
        """Get the broker list to connect to Kafka using the specified security protocol
        """
        return ','.join([
            node.account.hostname + ":" + ` self.port ` for node in self.nodes
        ])

    def read_jmx_output_all_nodes(self):
        for node in self.nodes:
            self.read_jmx_output(self.idx(node), node)
Beispiel #15
0
class VerifiableProducer(BackgroundThreadService):

    CONFIG_FILE = "/mnt/verifiable_producer.properties"
    logs = {
        "producer_log": {
            "path": "/mnt/producer.log",
            "collect_default": False
        }
    }

    def __init__(self,
                 context,
                 num_nodes,
                 kafka,
                 topic,
                 security_protocol=None,
                 max_messages=-1,
                 throughput=100000):
        super(VerifiableProducer, self).__init__(context, num_nodes)

        self.kafka = kafka
        self.topic = topic
        self.max_messages = max_messages
        self.throughput = throughput

        self.acked_values = []
        self.not_acked_values = []

        self.prop_file = ""
        self.security_config = SecurityConfig(security_protocol,
                                              self.prop_file)
        self.security_protocol = self.security_config.security_protocol
        self.prop_file += str(self.security_config)

    def _worker(self, idx, node):
        # Create and upload config file
        self.logger.info("verifiable_producer.properties:")
        self.logger.info(self.prop_file)
        node.account.create_file(VerifiableProducer.CONFIG_FILE,
                                 self.prop_file)
        self.security_config.setup_node(node)

        cmd = self.start_cmd
        self.logger.debug("VerifiableProducer %d command: %s" % (idx, cmd))

        for line in node.account.ssh_capture(cmd):
            line = line.strip()

            data = self.try_parse_json(line)
            if data is not None:

                with self.lock:
                    if data["name"] == "producer_send_error":
                        data["node"] = idx
                        self.not_acked_values.append(int(data["value"]))

                    elif data["name"] == "producer_send_success":
                        self.acked_values.append(int(data["value"]))

    @property
    def start_cmd(self):
        cmd = "/opt/kafka/bin/kafka-verifiable-producer.sh" \
              " --topic %s --broker-list %s" % (self.topic, self.kafka.bootstrap_servers())
        if self.max_messages > 0:
            cmd += " --max-messages %s" % str(self.max_messages)
        if self.throughput > 0:
            cmd += " --throughput %s" % str(self.throughput)

        cmd += " --producer.config %s" % VerifiableProducer.CONFIG_FILE
        cmd += " 2>> /mnt/producer.log | tee -a /mnt/producer.log &"
        return cmd

    @property
    def acked(self):
        with self.lock:
            return self.acked_values

    @property
    def not_acked(self):
        with self.lock:
            return self.not_acked_values

    @property
    def num_acked(self):
        with self.lock:
            return len(self.acked_values)

    @property
    def num_not_acked(self):
        with self.lock:
            return len(self.not_acked_values)

    def stop_node(self, node):
        node.account.kill_process("VerifiableProducer", allow_fail=False)
        if self.worker_threads is None:
            return

        # block until the corresponding thread exits
        if len(self.worker_threads) >= self.idx(node):
            # Need to guard this because stop is preemptively called before the worker threads are added and started
            self.worker_threads[self.idx(node) - 1].join()

    def clean_node(self, node):
        node.account.kill_process("VerifiableProducer",
                                  clean_shutdown=False,
                                  allow_fail=False)
        node.account.ssh(
            "rm -rf /mnt/producer.log /mnt/verifiable_producer.properties",
            allow_fail=False)
        self.security_config.clean_node(node)

    def try_parse_json(self, string):
        """Try to parse a string as json. Return None if not parseable."""
        try:
            record = json.loads(string)
            return record
        except ValueError:
            self.logger.debug("Could not parse as json: %s" % str(string))
            return None
Beispiel #16
0
 def security_config(self):
     if self.security_protocol == SecurityConfig.SSL or self.interbroker_security_protocol == SecurityConfig.SSL:
         return SecurityConfig(SecurityConfig.SSL)
     else:
         return SecurityConfig(SecurityConfig.PLAINTEXT)