def cleanup_iptables(self): while True: if util.iptables_redirect('D', '127.0.0.100', '127.0.0.1') == False: break while True: if util.iptables_drop('D', '127.0.0.100') == False: break while True: if util.iptables_drop('D', '127.0.0.101') == False: break while True: if util.iptables_drop('D', '127.0.0.102') == False: break
def test_7_dirty_network_fi(self): util.print_frame() clnts = [] try: util.iptables_print_list() # Add forwarding role self.assertTrue(util.iptables_redirect('A', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.') cluster_name = 'network_isolation_cluster_1' cluster = filter(lambda x: x['cluster_name'] == cluster_name, config.clusters)[0] util.log(util.json_to_str(cluster)) self.leader_cm = cluster['servers'][0] # MGMT mgmt_ip = cluster['servers'][0]['real_ip'] mgmt_port = cluster['servers'][0]['cm_port'] # Create cluster conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster, conf={'cm_context':'applicationContext-fi.xml'}) self.assertIsNotNone(conf_checker, 'failed to initialize cluster') # Print initial state of cluster util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ') initial_state = [] self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state') # Start crc16 client for s in cluster['servers']: c = load_generator_crc16.Crc16Client(s['id'], s['ip'], s['redis_port'], 600, verbose=False) c.start() clnts.append(c) # Network isolation test cmfi = fi_confmaster.ConfmasterWfFi(['ra', 'me', 'yj', 'bj', 'mg'], ['lconn', 'slave', 'master', 'setquorum'], [True, False], 1) for fi in cmfi: # Block network util.log('\n\n\n ### BLOCK NETWORK, %s ### ' % str(fi)) ret = block_network(cluster, mgmt_ip, mgmt_port) self.assertTrue(ret, '[%s] failed to block network.' % str(fi)) for i in xrange(4): util.log('waiting... %d' % (i + 1)) time.sleep(1) # Check cluster state ok = False for i in xrange(10): isolated_states = [] util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True) state_transition_done = True for s in isolated_states: if s['ip'] != '127.0.0.100': continue if s['active_role'] != '?' or s['mgmt_role'] != 'N': state_transition_done = False if state_transition_done: ok = True break time.sleep(1) self.assertTrue(ok, 'Fail, state transition') # Fault injection try: self.assertTrue(fi_confmaster.fi_add(fi, 1, mgmt_ip, mgmt_port), "Confmaster command fail. fi: %s" % str(fi)) except ValueError as e: self.fail("Confmaster command error. cmd: \"%s\", reply: \"%s\"" % (cmd, reply)) # Unblock network util.log('\n\n\n ### UNBLOCK NETWORK, %s ### ' % str(fi)) ret = unblock_network(cluster, mgmt_ip, mgmt_port, None) self.assertTrue(ret, '[%s] failed to unblock network.' % str(fi)) for i in xrange(4): util.log('waiting... %d' % (i + 1)) time.sleep(1) # Check cluster state ok = False for i in xrange(10): isolated_states = [] ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True) if ok: break time.sleep(1) self.assertTrue(ok, '[%s] Fail. unstable cluster.' % str(fi)) check_cluster = False # 'bj', 'slave' if fi[0] == 'bj' and fi[1] == 'slave': m, s1, s2 = util.get_mss(cluster) ret = util.role_lconn(s1) self.assertEqual("+OK\r\n", ret, '[%s] role lconn fail.' % str(fi)) check_cluster = True # 'me', 'lconn' elif fi[0] == 'me': m, s1, s2 = util.get_mss(cluster) ret = util.role_lconn(m) self.assertEqual("+OK\r\n", ret, '[%s] role lconn fail.' % str(fi)) check_cluster = True # 'setquorum' elif fi[1] == 'setquorum': m, s1, s2 = util.get_mss(cluster) ret = util.cmd_to_smr_addr(s1['ip'], s1['smr_mgmt_port'], 'fi delay sleep 1 8000\r\n', timeout=20) self.assertEqual("+OK\r\n", ret, '[%s] "fi delay sleep 1 8000" fail. ret: "%s"' % (str(fi), ret)) check_cluster = True if check_cluster: # Check cluster state ok = False for i in xrange(20): isolated_states = [] ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True) if ok: break time.sleep(1) self.assertTrue(ok, '[%s] Fail. unstable cluster.' % str(fi)) # Check fault injection ok = False for i in xrange(10): count = fi_confmaster.fi_count(fi, mgmt_ip, mgmt_port) if count == 0: ok = True break time.sleep(0.5) self.assertTrue(ok, "[%s] fail. failt injection had not been triggered." % str(fi)) for c in clnts: self.assertTrue(c.is_consistency(), '[%s] data consistency error!' % str(fi)) for c in clnts: self.assertTrue(c.is_consistency(), '[%s] data consistency error!' % str(fi)) # Go back to initial configuration cmfi.init() for fi in cmfi: try: self.assertTrue(fi_confmaster.fi_add(fi, 0, mgmt_ip, mgmt_port), "Confmaster command fail. fi: %s" % str(fi)) except ValueError as e: self.fail("Confmaster command error. cmd: \"%s\", reply: \"%s\"" % (cmd, reply)) # Wait until workflows done ret = util.await(60, True)( lambda cinfo: cinfo['wf'] == 0, lambda : util.cluster_info(mgmt_ip, mgmt_port, cluster['cluster_name'])) self.assertTrue(ret, 'There are still some workflows.') self.assertTrue(conf_checker.final_check()) # Shutdown cluster default_cluster.finalize(cluster) finally: for c in clnts: c.quit() for c in clnts: c.join() # Delete forwarding role self.assertTrue(util.iptables_redirect('D', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.')
def test_6_repeat_isolation_and_no_opinion_linepay(self): util.print_frame() util.iptables_print_list() # Add forwarding role self.assertTrue(util.iptables_redirect('A', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.') self.assertTrue(util.iptables_redirect('A', '127.0.0.101', '127.0.0.1'), 'add a forwarding role to iptables fail.') self.assertTrue(util.iptables_redirect('A', '127.0.0.102', '127.0.0.1'), 'add a forwarding role to iptables fail.') cluster_name = 'no_opinion' cluster = filter(lambda x: x['cluster_name'] == cluster_name, config.clusters)[0] util.log(util.json_to_str(cluster)) self.leader_cm = cluster['servers'][0] # MGMT mgmt_ip = cluster['servers'][0]['real_ip'] mgmt_port = cluster['servers'][0]['cm_port'] # Create cluster conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster ) self.assertIsNotNone(conf_checker, 'failed to initialize cluster') # Print initial state of cluster util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ') initial_state = [] self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state') # Network isolation test loop_cnt = 0 while (loop_cnt < 20): loop_cnt += 1 # Block network util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % loop_cnt) self.assertTrue(util.iptables_drop('A', '127.0.0.102'), 'add a bloking role to iptables fail.') for i in range(1): util.log('waiting... %d' % (i + 1)) time.sleep(0.1) self.assertTrue(util.iptables_drop('A', '127.0.0.100'), 'add a bloking role to iptables fail.') for i in range(3): util.log('waiting... %d' % (i + 1)) time.sleep(1.2) self.assertTrue(util.iptables_drop('A', '127.0.0.101'), 'add a bloking role to iptables fail.') for i in range(1): util.log('waiting... %d' % (i + 1)) time.sleep(1) # Unblock network util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % loop_cnt) self.assertTrue(util.iptables_drop('D', '127.0.0.102'), 'delete a bloking role to iptables fail.') for i in range(0): util.log('waiting... %d' % (i + 1)) time.sleep(1) self.assertTrue(util.iptables_drop('D', '127.0.0.100'), 'delete a bloking role to iptables fail.') for i in range(0): util.log('waiting... %d' % (i + 1)) time.sleep(1) self.assertTrue(util.iptables_drop('D', '127.0.0.101'), 'delete a bloking role to iptables fail.') for i in range(3): util.log('waiting... %d' % (i + 1)) time.sleep(1) # Print state of cluster util.log('\n ### STATE OF CLUSTER ### ') cluster_state = False for i in range(10): cluster_state = util.check_cluster(cluster_name, mgmt_ip, mgmt_port, initial_state, check_quorum=True) if cluster_state == True: break else: time.sleep(1) self.assertTrue(cluster_state, 'failed to check cluster state') all_in_f = True for s in cluster['servers']: if checkLastState(mgmt_ip, s['cm_port'], cluster_name, 0, 'F') == False: all_in_f = False break self.assertFalse(all_in_f, 'PGS0`s last state remains in F') # Delete forwarding role self.assertTrue(util.iptables_redirect('D', '127.0.0.100', '127.0.0.1'), 'delete a forwarding role to iptables fail.') self.assertTrue(util.iptables_redirect('D', '127.0.0.101', '127.0.0.1'), 'delete a forwarding role to iptables fail.') self.assertTrue(util.iptables_redirect('D', '127.0.0.102', '127.0.0.1'), 'delete a forwarding role to iptables fail.') self.assertTrue(conf_checker.final_check()) # Shutdown cluster default_cluster.finalize(cluster)
def test_3_some_pgs_is_isolated_2copy(self): util.print_frame() out = util.sudo('iptables -L') util.log('====================================================================') util.log('out : %s' % out) util.log('out.return_code : %d' % out.return_code) util.log('out.stderr : %s' % out.stderr) util.log('out.succeeded : %s' % out.succeeded) # Add forwarding role (127.0.0.100 -> 127.0.0.1) self.assertTrue(util.iptables_redirect('A', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.') cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_1_2copy', config.clusters)[0] util.log(util.json_to_str(cluster)) # MGMT mgmt_ip = cluster['servers'][0]['ip'] mgmt_port = cluster['servers'][0]['cm_port'] # Create cluster conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster ) self.assertIsNotNone(conf_checker, 'failed to initialize cluster') # Place master on real ip address for pg_id in [0, 1]: m = util.get_server_by_role_and_pg(cluster['servers'], 'master', pg_id) s = util.get_server_by_role_and_pg(cluster['servers'], 'slave', pg_id) if m.has_key('ip') and m.has_key('real_ip'): if m['ip'] != m['real_ip']: ret = util.role_change(cluster['servers'][0], cluster['cluster_name'], s['id']) self.assertNotEquals(ret, -1, 'change %d to a master fail' % s['id']) # Print initial state of cluster util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ') initial_state = [] self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state') # Network isolation test for cnt in range(3): # Block network util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % cnt) self.assertTrue(util.iptables_drop('A', '127.0.0.100'), 'add a bloking role to iptables fail.') for i in range(4): util.log('waiting... %d' % (i + 1)) time.sleep(1) # Check cluster state ok = False for i in range(7): isolated_states = [] util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True) time.sleep(1) state_transition_done = True for s in isolated_states: if s['pgs_id'] == 0 or s['pgs_id'] == 1: continue if s['active_role'] != 'M' or s['mgmt_role'] != 'M': state_transition_done = False if s['quorum'] != 0: state_transition_done = False if state_transition_done: ok = True break time.sleep(1) self.assertTrue(ok, 'Fail, state transition') # Unblock network util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % cnt) self.assertTrue(util.iptables_drop('D', '127.0.0.100'), 'delete a bloking role to iptables fail.') # Check cluster state ok = False for i in range(7): final_state = [] if util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True) == False: time.sleep(1) continue state_consistency = True for s in final_state: if s['pgs_id'] == 1: continue if is_pgs_normal(s) == False: state_consistency = False if state_consistency: ok = True break time.sleep(1) self.assertTrue(ok, 'Fail, state consistency') # Check state self.assertNotEqual(initial_state, None, 'initial_state is None') self.assertNotEqual(final_state, None, 'final_state is None') # Delete forwarding role (127.0.0.100 -> 127.0.0.1) self.assertTrue(util.iptables_redirect('D', '127.0.0.100', '127.0.0.1'), 'delete a forwarding role to iptables fail.') self.assertTrue(conf_checker.final_check()) # Shutdown cluster default_cluster.finalize(cluster)
def test_2_some_pgs_is_isolated(self): util.print_frame() util.iptables_print_list() # Add forwarding role (127.0.0.100 -> 127.0.0.1) self.assertTrue(util.iptables_redirect('A', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.') cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_2', config.clusters)[0] util.log(util.json_to_str(cluster)) # MGMT mgmt_ip = cluster['servers'][0]['real_ip'] mgmt_port = cluster['servers'][0]['cm_port'] # Create cluster conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster ) self.assertIsNotNone(conf_checker, 'failed to initialize cluster') # Place master on virtual ip address in order to cause master election. pg_id = 0 m = util.get_server_by_role_and_pg(cluster['servers'], 'master', pg_id) s = util.get_server_by_role_and_pg(cluster['servers'], 'slave', pg_id) if m.has_key('ip') == True and m.has_key('real_ip') == False: ret = util.role_change(cluster['servers'][0], cluster['cluster_name'], s['id']) self.assertNotEquals(ret, -1, 'change %d to a master fail' % s['id']) # Print initial state of cluster util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ') initial_state = [] self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state') # Network isolation test for cnt in range(3): # Block network util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % cnt) self.assertTrue(util.iptables_drop('A', '127.0.0.100'), 'add a bloking role to iptables fail.') for i in range(4): util.log('waiting... %d' % (i + 1)) time.sleep(1) # Check cluster state ok = False for i in range(7): isolated_states = [] util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True) time.sleep(1) state_transition_done = True for s in isolated_states: if s['pgs_id'] == 1: continue if s['active_role'] != '?' or s['mgmt_role'] != 'N': state_transition_done = False if state_transition_done : ok = True break time.sleep(1) self.assertTrue(ok, 'Fail, state transition') # Unblock network util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % cnt) self.assertTrue(util.iptables_drop('D', '127.0.0.100'), 'delete a bloking role to iptables fail.') # Check cluster state ok = False for i in range(7): final_state = [] util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True) state_consistency = True for s in final_state: if s['pgs_id'] == 1: continue if is_pgs_normal(s) == False: state_consistency = False if state_consistency: ok = True break time.sleep(1) self.assertTrue(ok, 'Fail, state consistency') # Check state self.assertNotEqual(initial_state, None, 'initial_state is None') self.assertNotEqual(final_state, None, 'final_state is None') initial_state = sorted(initial_state, key=lambda x: int(x['pgs_id'])) final_state = sorted(final_state, key=lambda x: int(x['pgs_id'])) for i in range(len(final_state)): msg = 'ts (%d)%d -> (%d)%d' % (initial_state[i]['pgs_id'], initial_state[i]['active_ts'], final_state[i]['pgs_id'], final_state[i]['active_ts']) util.log(msg) self.assertNotEqual(initial_state[i]['active_ts'], final_state[i]['active_ts'], msg) # Delete forwarding role (127.0.0.100 -> 127.0.0.1) self.assertTrue(util.iptables_redirect('D', '127.0.0.100', '127.0.0.1'), 'delete a forwarding role to iptables fail') self.assertTrue(conf_checker.final_check()) # Shutdown cluster default_cluster.finalize(cluster)
def test_gateway_network_isolation(self): util.print_frame() cluster = self.cluster # Clear rules while True: if util.iptables_redirect('D', '127.0.0.100', '127.0.0.1') == False: break while True: if util.iptables_drop('D', '127.0.0.100') == False: break # Print rules util.iptables_print_list() # Start loadgenerators self.load_gen_list = {} for i in range(len(cluster['servers'])): arc_api = ARC_API(ZK_ADDR, CLUSTER_NAME, logFilePrefix = self.arcci_log, so_path = self.so_path) server = cluster['servers'][i] load_gen = LoadGenerator_ARCCI_FaultTolerance(server['id'], arc_api) load_gen.start() self.load_gen_list[i] = load_gen # Add forwarding role (127.0.0.100 -> 127.0.0.1) self.assertTrue(util.iptables_redirect('A', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.') # Add virtualhost information to MGMT VIRTUAL_HOST_NAME = 'virtualhost' VIRTUAL_HOST_IP = '127.0.0.100' ret = util.pm_add(VIRTUAL_HOST_NAME, VIRTUAL_HOST_IP, MGMT_IP, MGMT_PORT) self.assertTrue(ret, 'pm_add fail.') # Modify gateway information of MGMT server = cluster['servers'][0] gw_id = server['id'] gw_port = server['gateway_port'] # Delete gateway ret = util.gw_del(CLUSTER_NAME, gw_id, MGMT_IP, MGMT_PORT) self.assertTrue(ret, 'gw_del fail') # Add gateway ret= util.gw_add(CLUSTER_NAME, gw_id, VIRTUAL_HOST_NAME, VIRTUAL_HOST_IP, gw_port, MGMT_IP, MGMT_PORT) self.assertTrue(ret, 'gw_add fail') # Check load balancing for i in range(5): ok = True for s in cluster['servers']: tps = util.get_tps(s['ip'], s['gateway_port'], 'gw') util.log('%s:%d TPS:%d' % (s['ip'], s['gateway_port'], tps)) if tps < 50: ok = False if ok: break time.sleep(1) self.assertTrue(ok, 'load balancing fail') util.log('load balancing success') # Block self.assertTrue(util.iptables_drop('A', '127.0.0.100'), 'add a bloking role to iptables fail.') # Check blocked gateway`s ops for i in range(5): ok = True tps = util.get_tps(server['ip'], server['gateway_port'], 'gw') util.log('%s:%d TPS:%d' % (server['ip'], server['gateway_port'], tps)) if tps > 10: ok = False if ok: break time.sleep(1) self.assertTrue(ok, 'load balancing fail - blocked gateway') util.log('load balancing success - blocked gateway') # Check unblocked gateway`s ops for i in range(10): ok = True for s in cluster['servers']: if s == server: continue tps = util.get_tps(s['ip'], s['gateway_port'], 'gw') util.log('%s:%d TPS:%d' % (s['ip'], s['gateway_port'], tps)) if tps < 50: ok = False if ok: break time.sleep(1) self.assertTrue(ok, 'load balancing fail - nonblocked gateways') util.log('load balancing success - nonblocked gateways') # Unblock self.assertTrue(util.iptables_drop('D', '127.0.0.100'), 'delete a bloking role to iptables fail.') # Check load balancing ok = False for i in xrange(5): condition = (lambda s: (s['ops'] <= 10 if s['id'] == gw_id else s['ops'] >= 50)) if util.check_ops(cluster['servers'], 'gw', condition): ok = True break time.sleep(1) self.assertTrue(ok, 'load balancing fail - all gateways after unblocking network') util.log('load balancing success - all gateways after unblocking network') server = cluster['servers'][0] # Wait until opinion for the gateway deleted. for i in xrange(5): util.log('Wait until opinions for the gateway have been deleted... %d' % i) time.sleep(1) # Delete gateway ret = util.gw_del(CLUSTER_NAME, gw_id, MGMT_IP, MGMT_PORT) self.assertTrue(ret, 'gw_del fail') # Add gateway ret= util.gw_add(CLUSTER_NAME, gw_id, server['pm_name'], server['ip'], gw_port, MGMT_IP, MGMT_PORT) self.assertTrue(ret, 'gw_add fail') # Check load balancing for i in range(10): ok = True for s in cluster['servers']: tps = util.get_tps(s['ip'], s['gateway_port'], 'gw') util.log('%s:%d TPS:%d' % (s['ip'], s['gateway_port'], tps)) if tps < 50: ok = False if ok: break time.sleep(1) self.assertTrue(ok, 'load balancing fail - all gateways after unblocking network') util.log('load balancing success - all gateways after unblocking network') # Go back to initial configuration self.assertTrue(util.pm_del(MGMT_IP, MGMT_PORT, VIRTUAL_HOST_NAME), 'failed to pm_del to go back to initial configuration.')