Exemple #1
0
def test_etcd_kill_middle(startcredis_etcdonly):
    """ Test that if the middle node is removed, the tail continues to get updates
    once the chain is repaired.
    """
    # Start members with a quick heartbeat timeout.
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=2)

    # Launch driver thread.
    n = 100
    sleep_secs = 0.1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    middle_port = common.PortForNode(1)
    common.KillNode(index=1, notify=master_client)
    driver.join()

    assert ops_completed.value == n
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    assert len(chain) == 2 - 1 + 1, 'chain %s' % chain
    Check(ops_completed.value)
Exemple #2
0
def test_etcd_heartbeat_timeout(startcredis_etcdonly):
    """ Test that failure is detected and repaired within a heartbeat timeout.
    """
    # Start members with a quick heartbeat timeout.
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=2)

    # Launch driver thread. Note that it will take a minimum of 10 seconds.
    n = 10
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    middle_port = common.PortForNode(1)
    common.KillNode(index=1) # Don't notify master
    # Heartbeat should expire within 2 sec.
    driver.join()

    assert ops_completed.value == n

    import pdb; pdb.set_trace()
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    assert len(chain) == 2 - 1 + 1, 'chain %s' % chain
    Check(ops_completed.value)
Exemple #3
0
def test_dead_old_tail_when_adding(startcredis):
    # We set "sleep_secs" to a higher value.  So "kill tail", "add node" will
    # be trigered without a refresh request from the driver.  Master will have
    # the following view of its members:
    # init: [ live, live ]
    # kill: [ live, dead ]
    #    - master not told node 1 is dead
    # Tests that when adding, the master detects & removes the dead node first.

    # Launch driver thread.
    n = 5
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    common.KillNode(index=1)
    proc, _ = common.AddNode(master_client)
    driver.join()

    assert ops_completed.value == n
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    assert len(chain) == 2 - 1 + 1, 'chain %s' % chain
    Check(ops_completed.value)

    proc.kill()
Exemple #4
0
def test_demo(startcredis):
    master_mode = startcredis["master_mode"]
    # Launch driver thread.
    n = 1000
    sleep_secs = 0.01
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    # Kill / add.
    new_nodes = []
    time.sleep(0.1)
    common.KillNode(index=1)
    time.sleep(0.1)
    new_nodes.append(common.AddNode(master_client, master_mode=master_mode))
    time.sleep(0.1)
    driver.join()

    assert ops_completed.value == n
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    chain = [s.split(b':')[-1] for s in chain]
    assert chain == [b'6370', b'6372'], 'chain %s' % chain
    Check(ops_completed.value)

    for proc, _ in new_nodes:
        proc.kill()
    common.log('Total ops %d, completed ops %d' % (n, ops_completed.value))
Exemple #5
0
def test_multi_kill_add(startcredis):
    """Kill, add a few times."""
    # Launch driver thread.
    n = 1000
    sleep_secs = 0.01
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    # Notify the master that nodes have died, rather than wait for the
    # heartbeat by sleeping between kills.
    if startcredis["master_mode"] == MASTER_ETCD:
        notify = master_client
    else:
        notify = None

    # Kill / add.
    new_nodes = []
    time.sleep(0.1)
    common.KillNode(index=1, notify=notify)  # 6371 dead
    new_nodes.append(common.AddNode(master_client))  # 6372
    common.KillNode(index=1, notify=notify)  # 6372 dead
    new_nodes.append(common.AddNode(master_client))  # 6373
    common.KillNode(index=0, notify=notify)  # 6370 dead, now [6373]
    new_nodes.append(common.AddNode(master_client))  # 6374
    new_nodes.append(common.AddNode(master_client))  # 6375
    # Now [6373, 6374, 6375].
    common.KillNode(index=2, notify=notify)  # 6375 dead, now [6373, 6374]

    driver.join()

    assert ops_completed.value == n
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    chain = [s.split(b':')[-1] for s in chain]
    assert chain == [b'6373', b'6374'], 'chain %s' % chain
    Check(ops_completed.value)

    for proc, _ in new_nodes:
        proc.kill()
Exemple #6
0
def test_etcd_master_online_recovery(startcredis_etcdonly):
    """ Test that SeqPut succeeds when the master is killed and restarted mid-way, then a member is
    killed, then a member is added. The restarted master should be able to recover the chain, with
    the new member being the tail, and no updates should be lost.
    """
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=10)

    # Launch driver thread. Note that it will take a minimum of 10 seconds.
    n = 10
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    common.KillMaster()
    common.StartMaster(master_mode=MASTER_ETCD)
    time.sleep(0.1)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3

    time.sleep(0.1)
    middle_port = common.PortForNode(1)
    common.KillNode(index=1, notify=master_client)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2

    new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD)
    time.sleep(0.1)
    driver.join()
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3

    # Heartbeat should expire within 2 sec.
    driver.join()

    assert ops_completed.value == n
    Check(ops_completed.value)

    # Cleanup
    new_node.kill()
Exemple #7
0
def test_kaa(startcredis):
    """Kill, add, add."""
    # Launch driver thread.
    n = 1000
    sleep_secs = 0.01
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    new_nodes = []
    time.sleep(0.1)
    common.KillNode(index=1)
    new_nodes.append(common.AddNode(master_client))
    new_nodes.append(common.AddNode(master_client))

    driver.join()

    assert ops_completed.value == n
    chain = master_client.execute_command('MASTER.GET_CHAIN')
    assert len(chain) == 2 - 1 + len(new_nodes), 'chain %s' % chain
    Check(ops_completed.value)

    for proc, _ in new_nodes:
        proc.kill()
Exemple #8
0
def test_etcd_kill_node_while_master_is_dead(startcredis_etcdonly):
    """ Test that SeqPut succeeds when the master is killed and a node is killed WHILE the master is
    dead. The master is then restarted. No updates should be lost.

    TODO: Fails (3/28) because members are not checked for liveness when the master starts up.
    """
    # Choose a long heartbeat timeout so that the master never receives heartbeat expiry notifs.
    common.Start(
        chain=common.MakeChain(3),
        master_mode=MASTER_ETCD,
        heartbeat_interval=1,
        heartbeat_timeout=999)

    # Launch driver thread. Note that it will take a minimum of 10 seconds.
    n = 10
    sleep_secs = 1
    driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs))
    driver.start()

    time.sleep(0.1)
    common.KillMaster()
    common.KillNode(index=1)
    common.StartMaster(master_mode=MASTER_ETCD)
    time.sleep(0.2)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2

    new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD)
    time.sleep(0.1)
    assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3

    driver.join()

    assert ops_completed.value == n
    Check(ops_completed.value)

    # Cleanup
    new_node.kill()
Exemple #9
0
 def tearDownClass(cls):
     for p in common.INIT_PORTS:
         common.KillNode(port=p, stateless=True)