def __init__(self, topic, broker_partitions, end_broker_partitions=None):
        """If broker_partitions is a list of BrokerPartitions, we assume that
        we'll start at the latest offset. If broker_partitions is a mapping of
        BrokerPartitions to offsets, we'll start at those offsets."""
        self._topic = topic
        self._broker_partitions = sorted(broker_partitions)
        self._stats = defaultdict(
            lambda: ConsumerStats(fetches=0, bytes=0, messages=0, max_fetch=0))
        self._bps_to_next_offsets = broker_partitions

        # This will collapse duplicaets so we only have one conn per host/port
        broker_conn_info = frozenset(
            (bp.broker_id, bp.host, bp.port) for bp in self._broker_partitions)
        self._connections = dict((broker_id, Kafka(host, port))
                                 for broker_id, host, port in broker_conn_info)

        # Figure out where we're going to start from...
        if isinstance(broker_partitions, Mapping):
            self._bps_to_next_offsets = broker_partitions
        else:
            self._bps_to_next_offsets = dict(
                (bp,
                 self._connections[bp].latest_offset(bp.topic, bp.partition))
                for bp in broker_partitions)

        self._end_broker_partitions = end_broker_partitions or {}
Esempio n. 2
0
    def test_kafka(self):
        kafka = Kafka()
        topic = get_unique_topic('test-kafka')
        start_offset = 0

        input_messages = ['message0', 'message1', 'message2']

        kafka.produce(topic, input_messages)
        time.sleep(MESSAGE_DELAY_SECS)
        fetch_results = kafka.fetch(topic, start_offset)

        output_messages = []
        offsets = []
        for offset, output_message in fetch_results:
            output_messages.append(output_message)
            offsets.append(offset)

        self.assertEquals(input_messages, output_messages)

        actual_latest_offsets = kafka.offsets(topic,
                                              LATEST_OFFSET,
                                              max_offsets=1)

        self.assertEquals(len(actual_latest_offsets), 1)
        expected_latest_offset = offsets[-1] + Lengths.MESSAGE_HEADER \
            + len(output_messages[-1])
        self.assertEquals(expected_latest_offset, actual_latest_offsets[0])

        actual_earliest_offsets = kafka.offsets(topic,
                                                EARLIEST_OFFSET,
                                                max_offsets=1)

        self.assertEquals(len(actual_earliest_offsets), 1)
        self.assertEquals(0, actual_earliest_offsets[0])
Esempio n. 3
0
    def setUp(self):
        self.k = Kafka()
        self.topic_name = get_unique_topic('test-kafka-topic')
        input_messages = ['Rusty', 'Patty', 'Jack', 'Clyde']
        self.k.produce(self.topic_name, input_messages)

        # If you don't do this sleep, then you can get into a condition where
        # a fetch immediately after a produce will cause a state where the
        # produce is duplicated (it really gets that way in Kafka).
        time.sleep(MESSAGE_DELAY_SECS)
        self.dogs_queue = self.k.topic(self.topic_name)
Esempio n. 4
0
def test_3x5_consumer_rebalancing():
    """Consumer rebalancing, with auto rebalancing."""
    log_break("test_3x5_consumer_rebalancing")
    for kafka_server in RunConfig.kafka_servers:
        k = Kafka("localhost", kafka_server.kafka_config.port)
        for topic in ["t1", "t2", "t3"]:
            k.produce(topic, ["bootstrap"], 0)
    delay()

    producer = ZKProducer(ZK_CONNECT_STR, "t1")
    assert_equals(len(producer.broker_partitions),
                  topology_3x5.total_partitions,
                  "We should be sending to all broker_partitions.")

    c1 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1")
    assert_equals(len(c1.broker_partitions), topology_3x5.total_partitions,
                  "Only one consumer, it should have all partitions.")
    c2 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1")
    assert_equals(len(c2.broker_partitions),
                  (topology_3x5.total_partitions) / 2)

    delay()
    assert_equals(len(set(c1.broker_partitions + c2.broker_partitions)),
                  topology_3x5.total_partitions,
                  "We should have all broker partitions covered.")

    c3 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1")
    assert_equals(len(c3.broker_partitions),
                  (topology_3x5.total_partitions) / 3)

    delay()
    assert_equals(sum(len(c.broker_partitions) for c in [c1, c2, c3]),
                  topology_3x5.total_partitions,
                  "All BrokerPartitions should be accounted for.")
    assert_equals(
        len(
            set(c1.broker_partitions + c2.broker_partitions +
                c3.broker_partitions)), topology_3x5.total_partitions,
        "There should be no overlaps")
Esempio n. 5
0
    def test_cant_connect(self):
        kafka = Kafka(host=str(time.time()))
        topic = get_unique_topic('test-cant-connect')

        self.assertRaises(ConnectionFailure, kafka.produce, topic,
                          'wont appear')
Esempio n. 6
0
def send_to_all_partitions(partitions_per_broker, topic, messages):
    for kafka_server in RunConfig.kafka_servers:
        k = Kafka("localhost", kafka_server.kafka_config.port)
        for partition in range(partitions_per_broker):
            k.produce(topic, messages, partition)