def test_etcd_kill_middle(startcredis_etcdonly): """ Test that if the middle node is removed, the tail continues to get updates once the chain is repaired. """ # Start members with a quick heartbeat timeout. common.Start( chain=common.MakeChain(3), master_mode=MASTER_ETCD, heartbeat_interval=1, heartbeat_timeout=2) # Launch driver thread. n = 100 sleep_secs = 0.1 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() time.sleep(0.1) middle_port = common.PortForNode(1) common.KillNode(index=1, notify=master_client) driver.join() assert ops_completed.value == n chain = master_client.execute_command('MASTER.GET_CHAIN') assert len(chain) == 2 - 1 + 1, 'chain %s' % chain Check(ops_completed.value)
def test_etcd_heartbeat_timeout(startcredis_etcdonly): """ Test that failure is detected and repaired within a heartbeat timeout. """ # Start members with a quick heartbeat timeout. common.Start( chain=common.MakeChain(3), master_mode=MASTER_ETCD, heartbeat_interval=1, heartbeat_timeout=2) # Launch driver thread. Note that it will take a minimum of 10 seconds. n = 10 sleep_secs = 1 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() time.sleep(0.1) middle_port = common.PortForNode(1) common.KillNode(index=1) # Don't notify master # Heartbeat should expire within 2 sec. driver.join() assert ops_completed.value == n import pdb; pdb.set_trace() chain = master_client.execute_command('MASTER.GET_CHAIN') assert len(chain) == 2 - 1 + 1, 'chain %s' % chain Check(ops_completed.value)
def test_dead_old_tail_when_adding(startcredis): # We set "sleep_secs" to a higher value. So "kill tail", "add node" will # be trigered without a refresh request from the driver. Master will have # the following view of its members: # init: [ live, live ] # kill: [ live, dead ] # - master not told node 1 is dead # Tests that when adding, the master detects & removes the dead node first. # Launch driver thread. n = 5 sleep_secs = 1 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() time.sleep(0.1) common.KillNode(index=1) proc, _ = common.AddNode(master_client) driver.join() assert ops_completed.value == n chain = master_client.execute_command('MASTER.GET_CHAIN') assert len(chain) == 2 - 1 + 1, 'chain %s' % chain Check(ops_completed.value) proc.kill()
def test_demo(startcredis): master_mode = startcredis["master_mode"] # Launch driver thread. n = 1000 sleep_secs = 0.01 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() # Kill / add. new_nodes = [] time.sleep(0.1) common.KillNode(index=1) time.sleep(0.1) new_nodes.append(common.AddNode(master_client, master_mode=master_mode)) time.sleep(0.1) driver.join() assert ops_completed.value == n chain = master_client.execute_command('MASTER.GET_CHAIN') chain = [s.split(b':')[-1] for s in chain] assert chain == [b'6370', b'6372'], 'chain %s' % chain Check(ops_completed.value) for proc, _ in new_nodes: proc.kill() common.log('Total ops %d, completed ops %d' % (n, ops_completed.value))
def test_multi_kill_add(startcredis): """Kill, add a few times.""" # Launch driver thread. n = 1000 sleep_secs = 0.01 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() # Notify the master that nodes have died, rather than wait for the # heartbeat by sleeping between kills. if startcredis["master_mode"] == MASTER_ETCD: notify = master_client else: notify = None # Kill / add. new_nodes = [] time.sleep(0.1) common.KillNode(index=1, notify=notify) # 6371 dead new_nodes.append(common.AddNode(master_client)) # 6372 common.KillNode(index=1, notify=notify) # 6372 dead new_nodes.append(common.AddNode(master_client)) # 6373 common.KillNode(index=0, notify=notify) # 6370 dead, now [6373] new_nodes.append(common.AddNode(master_client)) # 6374 new_nodes.append(common.AddNode(master_client)) # 6375 # Now [6373, 6374, 6375]. common.KillNode(index=2, notify=notify) # 6375 dead, now [6373, 6374] driver.join() assert ops_completed.value == n chain = master_client.execute_command('MASTER.GET_CHAIN') chain = [s.split(b':')[-1] for s in chain] assert chain == [b'6373', b'6374'], 'chain %s' % chain Check(ops_completed.value) for proc, _ in new_nodes: proc.kill()
def test_etcd_master_online_recovery(startcredis_etcdonly): """ Test that SeqPut succeeds when the master is killed and restarted mid-way, then a member is killed, then a member is added. The restarted master should be able to recover the chain, with the new member being the tail, and no updates should be lost. """ common.Start( chain=common.MakeChain(3), master_mode=MASTER_ETCD, heartbeat_interval=1, heartbeat_timeout=10) # Launch driver thread. Note that it will take a minimum of 10 seconds. n = 10 sleep_secs = 1 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() time.sleep(0.1) common.KillMaster() common.StartMaster(master_mode=MASTER_ETCD) time.sleep(0.1) assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3 time.sleep(0.1) middle_port = common.PortForNode(1) common.KillNode(index=1, notify=master_client) assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2 new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD) time.sleep(0.1) driver.join() assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3 # Heartbeat should expire within 2 sec. driver.join() assert ops_completed.value == n Check(ops_completed.value) # Cleanup new_node.kill()
def test_kaa(startcredis): """Kill, add, add.""" # Launch driver thread. n = 1000 sleep_secs = 0.01 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() new_nodes = [] time.sleep(0.1) common.KillNode(index=1) new_nodes.append(common.AddNode(master_client)) new_nodes.append(common.AddNode(master_client)) driver.join() assert ops_completed.value == n chain = master_client.execute_command('MASTER.GET_CHAIN') assert len(chain) == 2 - 1 + len(new_nodes), 'chain %s' % chain Check(ops_completed.value) for proc, _ in new_nodes: proc.kill()
def test_etcd_kill_node_while_master_is_dead(startcredis_etcdonly): """ Test that SeqPut succeeds when the master is killed and a node is killed WHILE the master is dead. The master is then restarted. No updates should be lost. TODO: Fails (3/28) because members are not checked for liveness when the master starts up. """ # Choose a long heartbeat timeout so that the master never receives heartbeat expiry notifs. common.Start( chain=common.MakeChain(3), master_mode=MASTER_ETCD, heartbeat_interval=1, heartbeat_timeout=999) # Launch driver thread. Note that it will take a minimum of 10 seconds. n = 10 sleep_secs = 1 driver = multiprocessing.Process(target=SeqPut, args=(n, sleep_secs)) driver.start() time.sleep(0.1) common.KillMaster() common.KillNode(index=1) common.StartMaster(master_mode=MASTER_ETCD) time.sleep(0.2) assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 2 new_node, _ = common.AddNode(master_client, master_mode=MASTER_ETCD) time.sleep(0.1) assert len(master_client.execute_command('MASTER.GET_CHAIN')) == 3 driver.join() assert ops_completed.value == n Check(ops_completed.value) # Cleanup new_node.kill()
def tearDownClass(cls): for p in common.INIT_PORTS: common.KillNode(port=p, stateless=True)