Example #1
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = b'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     cls.consumer = cls.client.topics[cls.topic_name].get_simple_consumer(
         consumer_timeout_ms=1000)
Example #2
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = b'test-rdkafka-consumer'
     cls.n_partitions = 3
     cls.kafka.create_topic(cls.topic_name, cls.n_partitions, 2)
     cls.partition_ids = list(range(cls.n_partitions))
     cls.start_offsets = cls.n_partitions * [0]
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = 'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     prod = cls.client.topics[cls.topic_name].get_producer(batch_size=5)
     prod.produce('msg {num}'.format(num=i) for i in xrange(1000))
def mk_topics(num_partitions_range=NUM_PARTITIONS_RANGE,
              msg_size_bytes_range=MSG_SIZE_BYTES_RANGE,
              msg_sum_bytes=10**9,
              max_num_msgs=10**6):
    """
    Pre-populate an existing testinstances cluster with test messages

    A topic is created for each combination of partition-count and
    message-size (passed in iterables).  Note that it's easy to eat lots of
    disk space with this!

    :param msg_sum_bytes: Limit the number of messages written to a topic
                          so that topic will not exceed this number of bytes
    :param max_num_msgs: Limit to number of messages written to any topic
    """
    cluster = get_cluster()  # make this in advance, like in travis.yml
    topics = [(np, sz, get_topic_name(np, sz))
              for np in num_partitions_range
              for sz in msg_size_bytes_range]

    for num_partitions, msg_size_bytes, topic_name in topics:
        cluster.create_topic(topic_name, num_partitions, replication_factor=1)
        print "Producing into {}".format(topic_name)
        client = KafkaClient(cluster.brokers)
        n_msgs = min(msg_sum_bytes // msg_size_bytes, max_num_msgs)
        prod = client.topics[topic_name].get_producer()
        prod.produce(msg_size_bytes * b" " for _ in xrange(n_msgs))
Example #5
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = 'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     prod = cls.client.topics[cls.topic_name].get_producer(batch_size=5)
     prod.produce('msg {num}'.format(num=i) for i in xrange(1000))
Example #6
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = b'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     cls.consumer = cls.client.topics[cls.topic_name].get_simple_consumer(
         consumer_timeout_ms=1000)
Example #7
0
    def setUpClass(cls):
        cls._logger = logging.getLogger(__name__)
        cls._logger.setLevel(logging.DEBUG)
        cls._kafka = get_cluster()
        topics = ["in", "out", "fail"]
        list(map(lambda topic: cls._kafka.create_topic(topic, 3, 2), topics))
        cls._connection = cls._kafka.connection
        cls._client = KafkaClient(cls._kafka.brokers)
        if "in" not in cls._client.topics:
            cls._logger.debug("in not found in topics, waiting")
            time.sleep(1)
            cls._client.update_cluster()

        if "in" not in cls._client.topics:
            msg = (
                "Client topics {0} do not include required input topic".format(
                    cls._client.topics))
            cls._logger.error(msg)
            raise Exception(msg)

        cls._transformer = PureKafkaValueTransformer(
            brokers=cls._kafka.brokers,
            consumer_kwargs={'zookeeper_connect': cls._kafka.zookeeper},
            producer_kwargs={},
            transform_function=transform,
            in_topic="in",
            out_topic="out",
            backoff=1.0,
            failure_topic="fail")
Example #8
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = b'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers,
                              use_greenlets=cls.USE_GEVENT,
                              broker_version=kafka_version)
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = b'test-rdkafka-consumer'
     cls.n_partitions = 3
     cls.kafka.create_topic(cls.topic_name, cls.n_partitions, 2)
     cls.partition_ids = list(range(cls.n_partitions))
     cls.start_offsets = cls.n_partitions * [0]
Example #10
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = b'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers,
                              use_greenlets=cls.USE_GEVENT,
                              broker_version=kafka_version)
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = 'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.kafka.produce_messages(
         cls.topic_name,
         ('msg {}'.format(i) for i in xrange(1000))
     )
     cls.client = KafkaClient(cls.kafka.brokers)
Example #12
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = 'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     cls.producer = cls.client.topics[cls.topic_name].get_producer()
     cls.total_messages = 99
     for i in range(cls.total_messages):
         cls.producer.produce(["message %s" % i])
Example #13
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = 'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     topic = cls.client.topics[cls.topic_name]
     cls.producer = topic.get_producer(min_queued_messages=1)
     cls.total_messages = 99
     for i in range(cls.total_messages):
         cls.producer.produce("message %s" % i)
Example #14
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = uuid4().hex.encode()
     cls.n_partitions = 3
     cls.kafka.create_topic(cls.topic_name, cls.n_partitions, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     cls.prod = cls.client.topics[cls.topic_name].get_producer(
         min_queued_messages=1)
     for i in range(1000):
         cls.prod.produce('msg {num}'.format(num=i).encode())
Example #15
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = b'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     topic = cls.client.topics[cls.topic_name]
     cls.producer = topic.get_producer(min_queued_messages=1)
     cls.total_messages = 99
     for i in range(cls.total_messages):
         cls.producer.produce("message {}".format(i).encode())
Example #16
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = b'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     cls.prod = cls.client.topics[cls.topic_name].get_producer(
         min_queued_messages=1
     )
     for i in range(1000):
         cls.prod.produce('msg {num}'.format(num=i).encode())
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = b'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
     cls.prod = cls.client.topics[cls.topic_name].get_producer(
         min_queued_messages=1
     )
     for i in range(1000):
         cls.prod.produce('msg {num}'.format(num=i).encode())
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = uuid4().hex.encode()
     cls.n_partitions = 3
     cls.kafka.create_topic(cls.topic_name, cls.n_partitions, 2)
     cls.client = KafkaClient(cls.kafka.brokers, use_greenlets=cls.USE_GEVENT)
     cls.prod = cls.client.topics[cls.topic_name].get_producer(
         min_queued_messages=1
     )
     for i in range(1000):
         cls.prod.produce('msg {num}'.format(num=i).encode())
Example #19
0
    def setUpClass(cls):
        cls.kafka = get_cluster()
        cls.topic_name = uuid4().hex.encode()
        cls.kafka.create_topic(cls.topic_name, 3, 2)

        cls.total_msgs = 1000
        cls.client = KafkaClient(cls.kafka.brokers,
                                 broker_version=kafka_version)
        cls.prod = cls.client.topics[cls.topic_name].get_producer(
            min_queued_messages=1)
        for i in range(cls.total_msgs):
            cls.prod.produce('msg {i}'.format(i=i).encode())
Example #20
0
    def setUpClass(cls):
        cls.kafka = get_cluster()
        cls.topic_name = uuid4().hex.encode()
        cls.kafka.create_topic(cls.topic_name, 3, 2)

        cls.total_msgs = 1000
        cls.client = KafkaClient(cls.kafka.brokers, broker_version=kafka_version)
        cls.prod = cls.client.topics[cls.topic_name].get_producer(
            min_queued_messages=1
        )
        for i in range(cls.total_msgs):
            cls.prod.produce('msg {i}'.format(i=i).encode())
Example #21
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = uuid4().hex.encode()
     cls.n_partitions = 3
     cls.kafka.create_topic(cls.topic_name, cls.n_partitions, 2)
     cls.total_msgs = 1000
     cls.client = KafkaClient(cls.kafka.brokers,
                              use_greenlets=cls.USE_GEVENT,
                              broker_version=kafka_version_string)
     cls.prod = cls.client.topics[cls.topic_name].get_producer(
         min_queued_messages=1)
     for i in range(cls.total_msgs):
         cls.prod.produce('msg {num}'.format(num=i).encode())
def run_bench(consumer_type,
              num_partitions,
              msg_size_bytes,
              num_consumer_fetchers,
              queued_max_messages,
              num_iterations,
              filename_append="consumer_bench.json"):
    """
    Run a single timeit-benchmark for num_iterations of consume() calls

    This is a somewhat awkward-looking runner, because we currently have an
    issue where consumers don't get garbage-collected, and so their worker
    threads keep going even after timeit is done with them (and even if we do
    slip in a stop() call, they don't release the memory for their message
    queues - I tried that).  As a workaround, we must exit the interpreter
    after every call to run_bench (cf run_bench_in_shell), and then aggregate
    the running times in the analysis script instead.

    :param consumer_type: Either "pure-py" or "rdkafka"
    :param num_partitions: Number of partitions in topic. You need to prepare
                           a test-topic with this parameter beforehand, see
                           mk_topics()
    :param msg_size_bytes: Size of messages in topic. You need to prepare
                           a test-topic with this parameter beforehand, see
                           mk_topics()
    :param num_consumer_fetchers: Passed to consumer init
    :param queued_max_messages: Passed to consumer init
    :param num_iterations: Number of iterations in timeit
    :param filename_append: A line of JSONified benchmark-data is appended to
                            the specified file
    """
    setup = (SETUP.format(topic_name=get_topic_name(num_partitions,
                                                    msg_size_bytes),
                          num_consumer_fetchers=num_consumer_fetchers,
                          queued_max_messages=queued_max_messages)
             + SETUP_FIN[consumer_type])

    timer = timeit.Timer("cons.consume().value", setup)
    runtime_secs = timer.timeit(num_iterations)

    data = {k: v for k, v in vars().items() if k in ("consumer_type",
                                                     "num_partitions",
                                                     "msg_size_bytes",
                                                     "num_consumer_fetchers",
                                                     "queued_max_messages",
                                                     "num_iterations",
                                                     "runtime_secs")}
    data["num_brokers"] = len(get_cluster().brokers.split(','))
    with open(filename_append, 'a') as f:
        f.write(json.dumps(data) + '\n')
    print data
Example #23
0
    def setUpClass(cls):
        cls.kafka = get_cluster()
        cls.topic_name = uuid4().hex.encode()
        cls.kafka.create_topic(cls.topic_name, 3, 2)

        # It turns out that the underlying producer used by KafkaInstance will
        # write all messages in a batch to a single partition, though not the
        # same partition every time.  We try to attain some spread here by
        # sending more than one batch:
        batch = 300
        cls.total_msgs = 3 * batch
        for _ in range(3):
            cls.kafka.produce_messages(cls.topic_name, ('msg {i}'.format(i=i)
                                                        for i in range(batch)))

        cls.client = KafkaClient(cls.kafka.brokers)
Example #24
0
    def setUpClass(cls):
        cls.kafka = get_cluster()
        if not isinstance(cls.kafka, ManagedInstance):
            pytest.skip("Only test on ManagedInstance (run locally)")
        cls.client = KafkaClient(cls.kafka.brokers)

        # BrokerConnection
        ports = cls.kafka._port_generator(9092)
        cls.dest_port = next(ports)
        cls.src_port = next(ports)
        cls.conn = BrokerConnection('localhost',
                                 cls.dest_port,
                                 cls.client._handler,
                                 buffer_size=1024 * 1024,
                                 source_host='localhost',
                                 source_port=cls.src_port,
                                 ssl_config=None)
Example #25
0
    def setUpClass(cls):
        cls.kafka = get_cluster()
        if not isinstance(cls.kafka, ManagedInstance):
            pytest.skip("Only test on ManagedInstance (run locally)")
        cls.client = KafkaClient(cls.kafka.brokers)

        # BrokerConnection
        ports = cls.kafka._port_generator(9092)
        cls.dest_port = next(ports)
        cls.src_port = next(ports)
        cls.conn = BrokerConnection('localhost',
                                    cls.dest_port,
                                    cls.client._handler,
                                    buffer_size=1024 * 1024,
                                    source_host='localhost',
                                    source_port=cls.src_port,
                                    ssl_config=None)
Example #26
0
    def setUpClass(cls):
        cls.kafka = get_cluster()
        cls.topic_name = uuid4().hex.encode()
        cls.kafka.create_topic(cls.topic_name, 3, 2)

        # It turns out that the underlying producer used by KafkaInstance will
        # write all messages in a batch to a single partition, though not the
        # same partition every time.  We try to attain some spread here by
        # sending more than one batch:
        batch = 300
        cls.total_msgs = 3 * batch
        for _ in range(3):
            cls.kafka.produce_messages(
                cls.topic_name,
                ('msg {i}'.format(i=i) for i in range(batch)))

        cls.client = KafkaClient(cls.kafka.brokers)
Example #27
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = 'test-data'
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
Example #28
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.client = KafkaClient(cls.kafka.brokers)
Example #29
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     if cls.kafka.brokers_ssl is None:
         pytest.skip("Test-cluster doesn't advertise ssl ports.")
Example #30
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.topic_name = "test-data"
     cls.kafka.create_topic(cls.topic_name, 3, 2)
     cls.client = KafkaClient(cls.kafka.brokers)
Example #31
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     cls.client = KafkaClient(cls.kafka.brokers)
Example #32
0
 def setUpClass(cls):
     cls.kafka = get_cluster()
     if cls.kafka.brokers_ssl is None:
         pytest.skip("Test-cluster doesn't advertise ssl ports.")