Beispiel #1
0
def test_3x5_reconnects():
    """Test that we keep trying to read, even if our brokers go down.

    We're going to:

    1. Send messages to all partitions in a topic, across all brokers
    2. Do a fetch (this will cause the Consumer to rebalance itself and find
       everything).
    3. Set the Consumer to disable rebalancing.
    4. Shut down one of the brokers
    5. Assert that nothing blows up
    6. Restart the broker and assert that it continues to run.

    Note that the partition split is always based on what's in ZooKeeper. So 
    even if the broker is dead or unreachable, we still keep its partitions and 
    try to contact it. Maybe there's a firewall issue preventing our server from
    hitting it. We don't want to risk messing up other consumers by grabbing
    partitions that might belong to them.
    """
    send_to_all_partitions("topic_3x5_reconnects", ["Rusty"])
    time.sleep(MESSAGE_DELAY_SECS)

    c1 = ZKConsumer(ZK_CONNECT_STR, "group_3x5_reconnects", "topic_3x5_reconnects")
    result = c1.fetch()
    assert_equal(topology_3x5.total_partitions, len(result))
    for msg_set in result:
        assert_equal(msg_set.messages, ["Rusty"])

    # Now send another round of messages to our broker partitions
    send_to_all_partitions("topic_3x5_reconnects", ["Jack"])
    time.sleep(MESSAGE_DELAY_SECS)

    # Disable rebalancing to force the consumer to read from the broker we're 
    # going to kill, and then kill it.
    c1.disable_rebalance()
    fail_server = RunConfig.kafka_servers[0]
    fail_server.stop()
    time.sleep(MESSAGE_DELAY_SECS)

    # A straight fetch will give us a connection failure because it couldn't
    # reach the first broker. It won't increment any of the other partitions --
    # the whole thing should fail without any side effect.
    assert_raises(ConnectionFailure, c1.fetch)

    # But a fetch told to ignore failures will return the results from the 
    # brokers that are still up
    result = c1.fetch(ignore_failures=True)
    assert_equal(topology_3x5.total_partitions - topology_3x5.partitions_per_broker,
                 len(result))
    for msg_set in result:
        assert_equal(msg_set.messages, ["Jack"])

    # Now we restart the failed Kafka broker, and do another fetch...
    fail_server.start()
    time.sleep(MESSAGE_DELAY_SECS)

    result = c1.fetch()
    # This should have MessageSets from all brokers (they're all reachable)
    assert_equal(topology_3x5.total_partitions, len(result))
    # But the only MessageSets that have messages in them should be from our
    # fail_server (the others were already read in a previous fetch, so will be
    # empty on this fetch).
    assert_equal(topology_3x5.total_partitions - topology_3x5.partitions_per_broker,
                 len([msg_set for msg_set in result if not msg_set]))
    # The messages from our resurrected fail_server will be "Jack"s
    assert_equal(topology_3x5.partitions_per_broker,
                 len([msg_set for msg_set in result
                      if msg_set.messages == ["Jack"]]))
Beispiel #2
0
def test_3x5_reconnects():
    """Test that we keep trying to read, even if our brokers go down.

    We're going to:

    1. Send messages to all partitions in a topic, across all brokers
    2. Do a fetch (this will cause the Consumer to rebalance itself and find
       everything).
    3. Set the Consumer to disable rebalancing.
    4. Shut down one of the brokers
    5. Assert that nothing blows up
    6. Restart the broker and assert that it continues to run.

    Note that the partition split is always based on what's in ZooKeeper. So 
    even if the broker is dead or unreachable, we still keep its partitions and 
    try to contact it. Maybe there's a firewall issue preventing our server from
    hitting it. We don't want to risk messing up other consumers by grabbing
    partitions that might belong to them.
    """
    send_to_all_partitions(5, "topic_3x5_reconnects", ["Rusty"])
    delay()

    c1 = ZKConsumer(ZK_CONNECT_STR, "group_3x5_reconnects",
                    "topic_3x5_reconnects")
    result = c1.fetch()
    assert_equal(topology_3x5.total_partitions, len(result))
    for msg_set in result:
        assert_equal(msg_set.messages, ["Rusty"])

    # Now send another round of messages to our broker partitions
    send_to_all_partitions(5, "topic_3x5_reconnects", ["Jack"])
    delay()

    # Disable rebalancing to force the consumer to read from the broker we're
    # going to kill, and then kill it.
    c1.disable_rebalance()
    fail_server = RunConfig.kafka_servers[0]
    fail_server.stop()
    delay()

    # A straight fetch will give us a connection failure because it couldn't
    # reach the first broker. It won't increment any of the other partitions --
    # the whole thing should fail without any side effect.
    assert_raises(ConnectionFailure, c1.fetch)

    # But a fetch told to ignore failures will return the results from the
    # brokers that are still up
    result = c1.fetch(ignore_failures=True)
    assert_equal(
        topology_3x5.total_partitions - topology_3x5.partitions_per_broker,
        len(result))
    for msg_set in result:
        assert_equal(msg_set.messages, ["Jack"])

    # Now we restart the failed Kafka broker, and do another fetch...
    fail_server.start()
    delay()

    result = c1.fetch()
    # This should have MessageSets from all brokers (they're all reachable)
    assert_equal(topology_3x5.total_partitions, len(result))
    # But the only MessageSets that have messages in them should be from our
    # fail_server (the others were already read in a previous fetch, so will be
    # empty on this fetch).
    assert_equal(
        topology_3x5.total_partitions - topology_3x5.partitions_per_broker,
        len([msg_set for msg_set in result if not msg_set]))
    # The messages from our resurrected fail_server will be "Jack"s
    assert_equal(
        topology_3x5.partitions_per_broker,
        len([msg_set for msg_set in result if msg_set.messages == ["Jack"]]))