def test_all_pgs_hang( self ): util.print_frame() self.setup_test_cluster( self.cluster_3copy ) # get gateway info ip, port = util.get_rand_gateway( self.cluster ) gw = gateway_mgmt.Gateway( self.cluster['servers'][0]['id'] ) ret = gw.connect( ip, port ) self.assertEqual( ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port) ) # set values for i in range( 0, 10000 ): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) gw.write( cmd ) res = gw.read_until( '\r\n' ) self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res) ) # get master, slave1, slave2 m, s1, s2 = util.get_mss( self.cluster ) self.assertNotEqual( m, None, 'master is None.' ) self.assertNotEqual( s1, None, 'slave1 is None.' ) self.assertNotEqual( s2, None, 'slave2 is None.' ) util.log( 'server state before hang' ) util.log_server_state( self.cluster ) # hang smr_master = smr_mgmt.SMR( m['id'] ) ret = smr_master.connect( m['ip'], m['smr_mgmt_port'] ) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (m['ip'], m['smr_mgmt_port']) ) smr_slave1 = smr_mgmt.SMR( s1['id'] ) ret = smr_slave1.connect( s1['ip'], s1['smr_mgmt_port'] ) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s1['ip'], s1['smr_mgmt_port']) ) smr_slave2 = smr_mgmt.SMR( s2['id'] ) ret = smr_slave2.connect( s2['ip'], s2['smr_mgmt_port'] ) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s2['ip'], s2['smr_mgmt_port']) ) m_ts = util.get_timestamp_of_pgs( m ) s1_ts = util.get_timestamp_of_pgs( s1 ) s2_ts = util.get_timestamp_of_pgs( s2 ) smr_master.write( 'fi delay sleep 1 8000\r\n' ) reply = smr_master.read_until( '\r\n', 1 ) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual( 0, 1, 'make sure that smr has compiled with gcov option.' ) smr_slave1.write( 'fi delay sleep 1 8000\r\n' ) smr_slave2.write( 'fi delay sleep 1 8000\r\n' ) time.sleep( 10 ) # check consistency ok = False for try_cnt in xrange(20): ok = util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port) if ok: break time.sleep(0.5) self.assertTrue(ok, 'Unstable cluster state') util.log( 'server state transition after hang' ) util.log_server_state( self.cluster ) redis0 = redis_mgmt.Redis( m['id'] ) ret = redis0.connect( m['ip'], m['redis_port'] ) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (m['ip'], m['redis_port']) ) # set values for i in range( 10000, 20000 ): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) redis0 .write( cmd ) res = redis0.read_until( '\r\n' ) self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res) ) redis1 = redis_mgmt.Redis( s1['id'] ) ret = redis1.connect( s1['ip'], s1['redis_port'] ) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s1['ip'], s1['redis_port']) ) redis2 = redis_mgmt.Redis( s2['id'] ) ret = redis2.connect( s2['ip'], s2['redis_port'] ) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s2['ip'], s2['redis_port']) ) # check new values (m) for i in range( 10000, 20000 ): cmd = 'get %s%d\r\n' % (self.key_base, i) redis0.write( cmd ) redis0.read_until( '\r\n' ) res = redis0.read_until( '\r\n' ) self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis(id:%d). %s != %d' % (m['id'], res, i) ) # check new values (s1) for i in range( 10000, 20000 ): cmd = 'get %s%d\r\n' % (self.key_base, i) redis1.write( cmd ) redis1.read_until( '\r\n' ) res = redis1.read_until( '\r\n' ) self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis(id:%d). %s != %d' % (s1['id'], res[:-2], i) ) # check new values (s2) for i in range( 10000, 20000 ): cmd = 'get %s%d\r\n' % (self.key_base, i) redis2.write( cmd ) redis2.read_until( '\r\n' ) res = redis2.read_until( '\r\n' ) self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis(id:%d). %s != %d' % (s2['id'], res[:-2], i) ) # check consistency ok = False for try_cnt in range(0, 10): ok = util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port) print ok if ok: break time.sleep(1) self.assertEqual(ok, True, 'role consistency fail') return 0
def failover_while_hang( self, server ): # timestamp before hang ts_before = util.get_timestamp_of_pgs( server ) self.assertNotEqual( ts_before, -1, 'failed to get a timestamp of pgs(%d), ts_before:%d' % (server['id'], ts_before) ) # hang util.log('pgs(id:%d, ip:%s, port:%d) is going to hang.' % (server['id'], server['ip'], server['smr_mgmt_port'])) smr = smr_mgmt.SMR( server['id'] ) ret = smr.connect( server['ip'], server['smr_mgmt_port'] ) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (server['ip'], server['smr_mgmt_port']) ) smr.write( 'fi delay sleep 1 10000\r\n' ) reply = smr.read_until( '\r\n', 1 ) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual( 0, 1, 'make sure that smr has compiled with gcov option.' ) time.sleep( 4 ) # check state F max_try = 20 expected = 'F' for i in range( 0, max_try): state = util.get_smr_state( server, self.leader_cm ) if expected == state: break; time.sleep( 1 ) self.assertEquals( expected , state, 'server%d - state:%s, expected:%s' % (server['id'], state, expected) ) util.log( 'succeeded : pgs%d state changed to F.' % server['id'] ) # shutdown util.log( 'shutdown pgs%d while hanging.' % server['id'] ) ret = testbase.request_to_shutdown_smr( server ) self.assertEqual( ret, 0, 'failed to shutdown smr. id:%d' % server['id'] ) ret = testbase.request_to_shutdown_redis( server ) self.assertEquals( ret, 0, 'failed to shutdown redis. id:%d' % server['id'] ) # check state F max_try = 20 expected = 'F' for i in range( 0, max_try): state = util.get_smr_state( server, self.leader_cm ) if expected == state: break; time.sleep( 1 ) self.assertEquals( expected , state, 'server%d - state:%s, expected:%s' % (server['id'], state, expected) ) util.log( 'succeeded : pgs%d state changed to F.' % server['id'] ) # recovery util.log( 'restart pgs%d.' % server['id'] ) ret = testbase.request_to_start_smr( server ) self.assertEqual( ret, 0, 'failed to start smr. id:%d' % server['id'] ) ret = testbase.request_to_start_redis( server ) self.assertEqual( ret, 0, 'failed to start redis. id:%d' % server['id'] ) wait_count = 20 ret = testbase.wait_until_finished_to_set_up_role( server, wait_count ) self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (server['id']) ) redis = redis_mgmt.Redis( server['id'] ) ret = redis.connect( server['ip'], server['redis_port'] ) self.assertEquals( ret, 0, 'failed to connect to redis' ) # check state N max_try = 20 expected = 'N' for i in range( 0, max_try): state = util.get_smr_state( server, self.leader_cm ) if expected == state: break; time.sleep( 1 ) self.assertEquals( expected , state, 'server%d - state:%s, expected:%s' % (server['id'], state, expected) ) util.log( 'succeeded : pgs%d state changed to N.' % server['id'] ) # wait for rejoin as a slave success = False for i in range( 20 ): role = util.get_role_of_server( server ) if role == c.ROLE_SLAVE: ts_after = util.get_timestamp_of_pgs( server ) if ts_after != -1 and ts_before != ts_after: success = True break time.sleep( 1 ) self.assertEqual( success, True, 'failed to rejoin as a slave' ) util.log( 'succeeded : pgs%d joined as a slave.' % server['id'] ) return 0
def test_4_role_change_with_failover(self): util.print_frame() loop_cnt = 0 while loop_cnt < 5: util.log('') util.log('Loop:%d' % loop_cnt) util.log("States (before role change)") util.log_server_state(self.cluster) target = random.choice(self.cluster['servers']) # bgsave ret = util.bgsave(target) self.assertTrue(ret, 'failed to bgsave. pgs:%d' % target['id']) # shutdown util.log('shutdown pgs%d(%s:%d)' % (target['id'], target['ip'], target['smr_base_port'])) ret = testbase.request_to_shutdown_smr(target) self.assertEqual(ret, 0, 'failed to shutdown smr') ret = testbase.request_to_shutdown_redis(target) self.assertEquals(ret, 0, 'failed to shutdown redis') r = '' expected = 'N' for fc_cnt in xrange(20): r = util.get_smr_role_of_cm(target, self.leader_cm) if r == expected: break time.sleep(0.5) self.assertEquals(r, expected, 'failure detection error.') running_servers = [] for s in self.cluster['servers']: if s != target: running_servers.append(s) # Get old timestamp old_timestamps = {} for s in running_servers: ts = util.get_timestamp_of_pgs(s) old_timestamps[s['id']] = ts # Start load generator self.load_gen_list = {} util.log('start load generator') for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen = load_generator.LoadGenerator(i, ip, port) load_gen.start() self.load_gen_list[i] = load_gen m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') # Role change master_id = util.role_change(self.leader_cm, self.cluster['cluster_name'], s1['id']) self.assertNotEqual(master_id, -1, 'role_change failed') util.log("States (after role change)") util.log_server_state(self.cluster) # Check - get new timestamp new_timestamps = {} for s in running_servers: ts = util.get_timestamp_of_pgs(s) new_timestamps[s['id']] = ts # Check - compare old timestamps and new timestamps for s in running_servers: old_ts = old_timestamps[s['id']] new_ts = new_timestamps[s['id']] self.assertNotEqual( old_ts, new_ts, 'Timestamp of a running server has not changed. %d->%d' % (old_ts, new_ts)) # Check quorum m = self.cluster['servers'][master_id] expected = 1 ok = self.__check_quorum(m, expected) self.assertTrue( ok, 'unexpected quorum(after role change). expected:%s' % (expected)) # recovery util.log('recovery pgs%d(%s:%d)' % (target['id'], target['ip'], target['smr_base_port'])) ret = testbase.request_to_start_smr(target) self.assertEqual(ret, 0, 'failed to start smr') util.log('start smr-replicator done') ret = testbase.request_to_start_redis(target, 60) self.assertEqual(ret, 0, 'failed to start redis') util.log('start redis-arc done') ret = testbase.wait_until_finished_to_set_up_role(target, max_try=300) self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (target['id'])) util.log("States (after recovery)") util.log_server_state(self.cluster) # Check cluster state normal_state = False for i in xrange(20): normal_state = util.check_cluster(self.cluster['cluster_name'], self.leader_cm['ip'], self.leader_cm['cm_port'], check_quorum=True) if normal_state: break time.sleep(0.5) self.assertTrue(normal_state, "Unstable cluster state") # Check quorum expected = 2 ok = self.__check_quorum(m, expected) self.assertTrue( ok, 'unexpected quorum(after recovery). expected:%s' % (expected)) # Cheeck Consistency util.log('stop load generator') for i in range(self.max_load_generator): self.load_gen_list[i].quit() for i in range(self.max_load_generator): self.load_gen_list[i].join() self.assertTrue(self.load_gen_list[i].isConsistent(), 'Inconsistent after migration') self.load_gen_list.pop(i, None) loop_cnt += 1 return 0
def test_two_slaves_hang( self ): util.print_frame() self.setup_test_cluster( self.cluster_3copy ) # get gateway info ip, port = util.get_rand_gateway( self.cluster ) gw = gateway_mgmt.Gateway( self.cluster['servers'][0]['id'] ) ret = gw.connect( ip, port ) self.assertEqual( ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port) ) # set values for i in range( 0, 10000 ): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) gw.write( cmd ) res = gw.read_until( '\r\n' ) self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res) ) # get master, slave1, slave2 m, s1, s2 = util.get_mss( self.cluster ) self.assertNotEqual( m, None, 'master is None.' ) self.assertNotEqual( s1, None, 'slave1 is None.' ) self.assertNotEqual( s2, None, 'slave2 is None.' ) util.log( 'server state before hang' ) util.log_server_state( self.cluster ) # timestamp before hang ts_before1 = util.get_timestamp_of_pgs( s1 ) self.assertNotEqual( ts_before1, -1, 'failed to get a timestamp of pgs(%d), ts_before:%d' % (s1['id'], ts_before1) ) ts_before2 = util.get_timestamp_of_pgs( s2 ) self.assertNotEqual( ts_before2, -1, 'failed to get a timestamp of pgs(%d), ts_before:%d' % (s2['id'], ts_before2) ) # hang smr1 = smr_mgmt.SMR( s1['id'] ) ret = smr1.connect( s1['ip'], s1['smr_mgmt_port'] ) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s1['ip'], s1['smr_mgmt_port']) ) smr2 = smr_mgmt.SMR( s2['id'] ) ret = smr2.connect( s2['ip'], s2['smr_mgmt_port'] ) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s1['ip'], s1['smr_mgmt_port']) ) smr1.write( 'fi delay sleep 1 8000\r\n' ) reply = smr1.read_until( '\r\n', 1 ) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual( 0, 1, 'make sure that smr has compiled with gcov option.' ) smr2.write( 'fi delay sleep 1 8000\r\n' ) time.sleep( 7 ) # wait for rejoin as a slave success = False for i in range( 20 ): role = util.get_role_of_server( s1 ) if role == c.ROLE_SLAVE: ts_after = util.get_timestamp_of_pgs( s1 ) if ts_after != -1 and ts_before1 == ts_after: success = True break time.sleep( 1 ) self.assertEqual( success, True, 'failed to rejoin as a slave. %s:%d' % (s2['ip'], s2['smr_mgmt_port']) ) success = False for i in range( 20 ): role = util.get_role_of_server( s2 ) if role == c.ROLE_SLAVE: ts_after = util.get_timestamp_of_pgs( s2 ) if ts_after != -1 and ts_before2 == ts_after: success = True break time.sleep( 1 ) self.assertEqual( success, True, 'failed to rejoin as a slave. %s:%d' % (s2['ip'], s2['smr_mgmt_port']) ) util.log( 'server state transition after hang' ) util.log_server_state( self.cluster ) redis1 = redis_mgmt.Redis( s1['id'] ) ret = redis1.connect( s1['ip'], s1['redis_port'] ) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s1['ip'], s1['redis_port']) ) redis2 = redis_mgmt.Redis( s2['id'] ) ret = redis2.connect( s2['ip'], s2['redis_port'] ) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s2['ip'], s2['redis_port']) ) # set new values for i in range( 10000, 20000 ): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) redis1.write( cmd ) res = redis1.read_until( '\r\n' ) self.assertEqual( res, '+OK\r\n', 'failed to set values to redis1. cmd:%s, res:%s' % (cmd[:-2], res) ) # check new values for i in range( 10000, 20000 ): cmd = 'get %s%d\r\n' % (self.key_base, i) redis2.write( cmd ) redis2.read_until( '\r\n' ) res = redis2.read_until( '\r\n' ) self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis2. %s != %d' % (res, i) ) # check consistency self.assertEqual(util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port), True, 'role consistency fail') return 0
def test_1_role_change(self): util.print_frame() self.load_gen_list = {} # Start load generator util.log("Start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen = load_generator.LoadGenerator(i, ip, port) load_gen.start() self.load_gen_list[i] = load_gen # Loop (smr: 3 copy) for i in range(30): target_server = util.get_server_by_role(self.cluster['servers'], 'slave') self.assertNotEquals(target_server, None, 'Get slave fail.') target = target_server['id'] print '' util.log("(3 copy) Loop:%d, target pgs:%d" % (i, target)) # Get old timestamp util.log_server_state(self.cluster) old_timestamp_list = [] for s in self.cluster['servers']: ts = util.get_timestamp_of_pgs(s) old_timestamp_list.append(ts) # Role change master = util.role_change(self.leader_cm, self.cluster['cluster_name'], target) self.assertNotEqual(master, -1, 'role_change error.') while target == master: target = (target + 1) % 3 util.log('Change role success.') # Wait until role change finished for s in self.cluster['servers']: max_try_cnt = 20 ok = False for try_cnt in range(max_try_cnt): try: pong = util.pingpong(s['ip'], s['redis_port']) if pong != None and pong == '+PONG\r\n': ok = True break except: pass time.sleep(0.2) self.assertTrue(ok, 'redis state error.') # Get new timestamp util.log_server_state(self.cluster) new_timestamp_list = [] for s in self.cluster['servers']: ts = util.get_timestamp_of_pgs(s) new_timestamp_list.append(ts) # Compare old timestamps and new timestamps for i in range(3): self.assertNotEqual( old_timestamp_list[i], new_timestamp_list[i], 'Timestamp is not changed. %d->%d' % (old_timestamp_list[i], new_timestamp_list[i])) # Cheeck Consistency for load_gen_id, load_gen in self.load_gen_list.items(): self.assertTrue(load_gen.isConsistent(), 'Data inconsistency after role_change') # Loop (smr: 2 copy) self.__del_server(self.cluster['servers'][0]) servers = [self.cluster['servers'][1], self.cluster['servers'][2]] normal_state = False for i in xrange(20): normal_state = util.check_cluster(self.cluster['cluster_name'], self.leader_cm['ip'], self.leader_cm['cm_port'], check_quorum=True) if normal_state: break time.sleep(0.5) self.assertTrue(normal_state, "Unstable cluster state") for i in range(30): print '' util.log("(2 copy) Loop:%d, target pgs:%d" % (i, target)) s = util.get_server_by_role(servers, 'slave') target = s['id'] # Get old timestamp util.log_server_state(self.cluster) old_timestamp_list = [] for s in servers: ts = util.get_timestamp_of_pgs(s) old_timestamp_list.append(ts) # Role change master = util.role_change(self.leader_cm, self.cluster['cluster_name'], target) self.assertNotEqual(master, -1, 'role_change error.') while target == master: target = (target) % 2 + 1 util.log('Change role success.') # Wait until role change finished for s in servers: max_try_cnt = 20 ok = False for try_cnt in range(max_try_cnt): pong = util.pingpong(s['ip'], s['redis_port']) if pong != None and pong == '+PONG\r\n': ok = True break time.sleep(0.1) self.assertTrue(ok, 'redis state error.') # Get new timestamp util.log_server_state(self.cluster) new_timestamp_list = [] for s in servers: ts = util.get_timestamp_of_pgs(s) new_timestamp_list.append(ts) # Compare old timestamps and new timestamps for i in range(2): self.assertNotEqual( old_timestamp_list[i], new_timestamp_list[i], 'Timestamp is not changed. %d->%d' % (old_timestamp_list[i], new_timestamp_list[i])) # Cheeck Consistency for load_gen_id, load_gen in self.load_gen_list.items(): self.assertTrue(load_gen.isConsistent(), 'Data inconsistency after role_change') # Go back to initial configuration self.assertTrue( util.install_pgs(self.cluster, self.cluster['servers'][0], self.leader_cm, rm_ckpt=False), 'failed to recover pgs.')
def role_change_with_hanging_pgs(self, hanging_servers, running_servers, target_id, master): util.log('hanging_servers:%s' % hanging_servers) util.log('running_servers:%s' % running_servers) util.log('target_id:%s' % target_id) # Initial data util.put_some_data(self.cluster, 3, 10) util.log("States (before role change)") util.log_server_state(self.cluster) # Get old timestamp old_timestamps = {} for s in self.cluster['servers']: ts = util.get_timestamp_of_pgs(s) old_timestamps[s['id']] = ts # hang for s in hanging_servers: smr = smr_mgmt.SMR(s['id']) ret = smr.connect(s['ip'], s['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s['ip'], s['smr_mgmt_port'])) util.log("PGS '%d' hang" % s['id']) smr.write('fi delay sleep 1 13000\r\n') reply = smr.read_until('\r\n', 1) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual( 0, 1, 'make sure that smr has compiled with gcov option.') smr.disconnect() # Role change master_id = util.role_change(self.leader_cm, self.cluster['cluster_name'], target_id) self.assertEqual(master_id, -1, 'We expected that role_change failed, but success') # Check rollback - check quorum if master not in hanging_servers: expected = 2 ok = self.__check_quorum(master, expected) self.assertTrue(ok, 'rollback quorum fail. expected:%s' % (expected)) # Check rollback - get new timestamp new_timestamps_in_hang = {} for s in running_servers: ts = util.get_timestamp_of_pgs(s) new_timestamps_in_hang[s['id']] = ts # Check rollback - compare old timestamps and new timestamps for s in running_servers: old_ts = old_timestamps[s['id']] new_ts = new_timestamps_in_hang[s['id']] self.assertEqual( old_ts, new_ts, 'Timestamp of a running server has changed. %d->%d' % (old_ts, new_ts)) time.sleep(16) util.log("States (after role change)") util.log_server_state(self.cluster) self.load_gen_list = {} # Start load generator for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen = load_generator.LoadGenerator(i, ip, port) load_gen.start() self.load_gen_list[i] = load_gen # Check quorum if master in hanging_servers: m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') self.assertNotEqual(s2, None, 'slave2 is None.') expected = 2 ok = self.__check_quorum(m, expected) self.assertTrue(ok, 'rollback quorum fail. expected:%s' % (expected)) # Check cluster state normal_state = False for i in xrange(20): normal_state = util.check_cluster(self.cluster['cluster_name'], self.leader_cm['ip'], self.leader_cm['cm_port'], check_quorum=True) if normal_state: break time.sleep(0.5) self.assertTrue(normal_state, "Unstable cluster state") # Cheeck Consistency for i in range(self.max_load_generator): self.load_gen_list[i].quit() for i in range(self.max_load_generator): self.load_gen_list[i].join() self.assertTrue(self.load_gen_list[i].isConsistent(), 'Inconsistent after migration') self.load_gen_list.pop(i, None)
def role_change_with_hanging_pgs(self, hanging_servers, running_servers, target_id, master): util.log('hanging_servers:%s' % hanging_servers) util.log('running_servers:%s' % running_servers) util.log('target_id:%s' % target_id) # Initial data util.put_some_data(self.cluster, 3, 10) util.log("States (before role change)") util.log_server_state(self.cluster) # Get old timestamp old_timestamps = {} for s in self.cluster['servers']: ts = util.get_timestamp_of_pgs(s) old_timestamps[s['id']] = ts # hang for s in hanging_servers: smr = smr_mgmt.SMR(s['id']) ret = smr.connect(s['ip'], s['smr_mgmt_port']) self.assertEqual(ret, 0, 'failed to connect to master. %s:%d' % (s['ip'], s['smr_mgmt_port'])) util.log("PGS '%d' hang" % s['id']) smr.write('fi delay sleep 1 13000\r\n') reply = smr.read_until('\r\n', 1) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual(0, 1, 'make sure that smr has compiled with gcov option.') smr.disconnect() # Role change master_id = util.role_change(self.leader_cm, self.cluster['cluster_name'], target_id) self.assertEqual(master_id, -1, 'We expected that role_change failed, but success') # Check rollback - check quorum if master not in hanging_servers: expected = 1 ok = self.__check_quorum(master, expected) self.assertTrue(ok, 'rollback quorum fail. expected:%s' % (expected)) # Check rollback - get new timestamp new_timestamps_in_hang = {} for s in running_servers: ts = util.get_timestamp_of_pgs( s ) new_timestamps_in_hang[s['id']] = ts # Check rollback - compare old timestamps and new timestamps for s in running_servers: old_ts = old_timestamps[s['id']] new_ts = new_timestamps_in_hang[s['id']] self.assertEqual(old_ts, new_ts, 'Timestamp of a running server has changed. %d->%d' % (old_ts, new_ts)) time.sleep(16) util.log("States (after role change)") util.log_server_state( self.cluster ) self.load_gen_list = {} # Start load generator for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen = load_generator.LoadGenerator(i, ip, port) load_gen.start() self.load_gen_list[i] = load_gen # Check quorum if master in hanging_servers: m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') self.assertNotEqual(s2, None, 'slave2 is None.') expected = 1 ok = self.__check_quorum(m, expected) self.assertTrue(ok, 'rollback quorum fail. expected:%s' % (expected)) # Get new timestamp new_timestamps = {} for s in self.cluster['servers']: ts = util.get_timestamp_of_pgs( s ) new_timestamps[s['id']] = ts # Compare old timestamps and new timestamps for s in self.cluster['servers']: old_ts = old_timestamps[s['id']] new_ts = new_timestamps[s['id']] if master in hanging_servers and len(running_servers) != 0: self.assertNotEqual(old_ts, new_ts, 'Timestamp of a hanging server has not changed. %d->%d' % (old_ts, new_ts)) else: self.assertEqual(old_ts, new_ts, 'Timestamp of a running server has changed. %d->%d' % (old_ts, new_ts)) # Cheeck Consistency for i in range(self.max_load_generator): self.load_gen_list[i].quit() for i in range(self.max_load_generator): self.load_gen_list[i].join() self.assertTrue(self.load_gen_list[i].isConsistent(), 'Inconsistent after migration') self.load_gen_list.pop(i, None)
def test_4_role_change_with_failover(self): util.print_frame() loop_cnt = 0 while loop_cnt < 5: util.log('') util.log('Loop:%d' % loop_cnt) util.log("States (before role change)") util.log_server_state(self.cluster) target = random.choice(self.cluster['servers']) # bgsave ret = util.bgsave(target) self.assertTrue(ret, 'failed to bgsave. pgs:%d' % target['id']) # shutdown util.log('shutdown pgs%d(%s:%d)' % (target['id'], target['ip'], target['smr_base_port'])) ret = testbase.request_to_shutdown_smr( target ) self.assertEqual( ret, 0, 'failed to shutdown smr' ) ret = testbase.request_to_shutdown_redis( target ) self.assertEquals( ret, 0, 'failed to shutdown redis' ) running_servers = [] for s in self.cluster['servers']: if s != target: running_servers.append(s) # Get old timestamp old_timestamps = {} for s in running_servers: ts = util.get_timestamp_of_pgs(s) old_timestamps[s['id']] = ts # Start load generator self.load_gen_list = {} util.log('start load generator') for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen = load_generator.LoadGenerator(i, ip, port) load_gen.start() self.load_gen_list[i] = load_gen m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') # Role change master_id = util.role_change(self.leader_cm, self.cluster['cluster_name'], s1['id']) self.assertNotEqual(master_id, -1, 'role_change failed') util.log("States (after role change)") util.log_server_state(self.cluster) # Check - get new timestamp new_timestamps= {} for s in running_servers: ts = util.get_timestamp_of_pgs( s ) new_timestamps[s['id']] = ts # Check - compare old timestamps and new timestamps for s in running_servers: old_ts = old_timestamps[s['id']] new_ts = new_timestamps[s['id']] self.assertNotEqual(old_ts, new_ts, 'Timestamp of a running server has not changed. %d->%d' % (old_ts, new_ts)) # Check quorum m = self.cluster['servers'][master_id] expected = 1 ok = self.__check_quorum(m, expected) self.assertTrue(ok, 'unexpected quorum(after role change). expected:%s' % (expected)) # recovery util.log('recovery pgs%d(%s:%d)' % (target['id'], target['ip'], target['smr_base_port'])) ret = testbase.request_to_start_smr( target ) self.assertEqual( ret, 0, 'failed to start smr' ) util.log('start smr-replicator done') ret = testbase.request_to_start_redis( target, 60 ) self.assertEqual( ret, 0, 'failed to start redis' ) util.log('start redis-arc done') ret = testbase.wait_until_finished_to_set_up_role( target, max_try=300) self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (target['id']) ) util.log("States (after recovery)") util.log_server_state(self.cluster) # Check quorum expected = 1 ok = self.__check_quorum(m, expected) self.assertTrue(ok, 'unexpected quorum(after recovery). expected:%s' % (expected)) # Cheeck Consistency util.log('stop load generator') for i in range(self.max_load_generator): self.load_gen_list[i].quit() for i in range(self.max_load_generator): self.load_gen_list[i].join() self.assertTrue(self.load_gen_list[i].isConsistent(), 'Inconsistent after migration') self.load_gen_list.pop(i, None) loop_cnt += 1 return 0
def test_1_role_change(self): util.print_frame() self.load_gen_list = {} # Start load generator util.log("Start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen = load_generator.LoadGenerator(i, ip, port) load_gen.start() self.load_gen_list[i] = load_gen # Loop (smr: 3 copy) target_server = util.get_server_by_role(self.cluster['servers'], 'slave') self.assertNotEquals(target_server, None, 'Get slave fail.') target = target_server['id'] for i in range(30): print '' util.log("(3 copy) Loop:%d, target pgs:%d" % (i, target)) # Get old timestamp util.log_server_state( self.cluster ) old_timestamp_list = [] for s in self.cluster['servers']: ts = util.get_timestamp_of_pgs( s ) old_timestamp_list.append(ts) # Role change master = util.role_change(self.leader_cm, self.cluster['cluster_name'], target) self.assertNotEqual(master, -1, 'role_change error.') while target == master: target = (target + 1) % 3 util.log('Change role success.') # Wait until role change finished for s in self.cluster['servers']: max_try_cnt = 20 ok = False for try_cnt in range(max_try_cnt): pong = util.pingpong(s['ip'], s['redis_port']) if pong != None and pong == '+PONG\r\n': ok = True break time.sleep(0.1) self.assertTrue(ok, 'redis state error.') # Get new timestamp util.log_server_state( self.cluster ) new_timestamp_list = [] for s in self.cluster['servers']: ts = util.get_timestamp_of_pgs( s ) new_timestamp_list.append(ts) # Compare old timestamps and new timestamps for i in range(3): self.assertNotEqual(old_timestamp_list[i], new_timestamp_list[i], 'Timestamp is not changed. %d->%d' % (old_timestamp_list[i], new_timestamp_list[i])) # Cheeck Consistency for load_gen_id, load_gen in self.load_gen_list.items(): self.assertTrue(load_gen.isConsistent(), 'Data inconsistency after role_change') # Loop (smr: 2 copy) self.__del_server(self.cluster['servers'][0]) servers = [self.cluster['servers'][1], self.cluster['servers'][2]] s = util.get_server_by_role(servers, 'slave') target = s['id'] for i in range(30): print '' util.log("(2 copy) Loop:%d, target pgs:%d" % (i, target)) # Get old timestamp util.log_server_state( self.cluster ) old_timestamp_list = [] for s in servers: ts = util.get_timestamp_of_pgs( s ) old_timestamp_list.append(ts) # Role change master = util.role_change(self.leader_cm, self.cluster['cluster_name'], target) self.assertNotEqual(master, -1, 'role_change error.') while target == master: target = (target) % 2 + 1 util.log('Change role success.') # Wait until role change finished for s in servers: max_try_cnt = 20 ok = False for try_cnt in range(max_try_cnt): pong = util.pingpong(s['ip'], s['redis_port']) if pong != None and pong == '+PONG\r\n': ok = True break time.sleep(0.1) self.assertTrue(ok, 'redis state error.') # Get new timestamp util.log_server_state( self.cluster ) new_timestamp_list = [] for s in servers: ts = util.get_timestamp_of_pgs( s ) new_timestamp_list.append(ts) # Compare old timestamps and new timestamps for i in range(2): self.assertNotEqual(old_timestamp_list[i], new_timestamp_list[i], 'Timestamp is not changed. %d->%d' % (old_timestamp_list[i], new_timestamp_list[i])) # Cheeck Consistency for load_gen_id, load_gen in self.load_gen_list.items(): self.assertTrue(load_gen.isConsistent(), 'Data inconsistency after role_change')
def failover_while_hang(self, server): # timestamp before hang ts_before = util.get_timestamp_of_pgs(server) self.assertNotEqual( ts_before, -1, 'failed to get a timestamp of pgs(%d), ts_before:%d' % (server['id'], ts_before)) # hang util.log('pgs(id:%d, ip:%s, port:%d) is going to hang.' % (server['id'], server['ip'], server['smr_mgmt_port'])) smr = smr_mgmt.SMR(server['id']) ret = smr.connect(server['ip'], server['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (server['ip'], server['smr_mgmt_port'])) smr.write('fi delay sleep 1 10000\r\n') reply = smr.read_until('\r\n', 1) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual( 0, 1, 'make sure that smr has compiled with gcov option.') time.sleep(4) # check state F max_try = 20 expected = 'F' for i in range(0, max_try): state = util.get_smr_state(server, self.leader_cm) if expected == state: break time.sleep(1) self.assertEquals( expected, state, 'server%d - state:%s, expected:%s' % (server['id'], state, expected)) util.log('succeeded : pgs%d state changed to F.' % server['id']) # shutdown util.log('shutdown pgs%d while hanging.' % server['id']) ret = testbase.request_to_shutdown_smr(server) self.assertEqual(ret, 0, 'failed to shutdown smr. id:%d' % server['id']) ret = testbase.request_to_shutdown_redis(server) self.assertEquals(ret, 0, 'failed to shutdown redis. id:%d' % server['id']) # check state F max_try = 20 expected = 'F' for i in range(0, max_try): state = util.get_smr_state(server, self.leader_cm) if expected == state: break time.sleep(1) self.assertEquals( expected, state, 'server%d - state:%s, expected:%s' % (server['id'], state, expected)) util.log('succeeded : pgs%d state changed to F.' % server['id']) # recovery util.log('restart pgs%d.' % server['id']) ret = testbase.request_to_start_smr(server) self.assertEqual(ret, 0, 'failed to start smr. id:%d' % server['id']) ret = testbase.request_to_start_redis(server) self.assertEqual(ret, 0, 'failed to start redis. id:%d' % server['id']) wait_count = 20 ret = testbase.wait_until_finished_to_set_up_role(server, wait_count) self.assertEquals(ret, 0, 'failed to role change. smr_id:%d' % (server['id'])) redis = redis_mgmt.Redis(server['id']) ret = redis.connect(server['ip'], server['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') # check state N max_try = 20 expected = 'N' for i in range(0, max_try): state = util.get_smr_state(server, self.leader_cm) if expected == state: break time.sleep(1) self.assertEquals( expected, state, 'server%d - state:%s, expected:%s' % (server['id'], state, expected)) util.log('succeeded : pgs%d state changed to N.' % server['id']) # wait for rejoin as a slave success = False for i in range(20): role = util.get_role_of_server(server) if role == c.ROLE_SLAVE: ts_after = util.get_timestamp_of_pgs(server) if ts_after != -1 and ts_before != ts_after: success = True break time.sleep(1) self.assertEqual(success, True, 'failed to rejoin as a slave') util.log('succeeded : pgs%d joined as a slave.' % server['id']) return 0
def test_all_pgs_hang(self): util.print_frame() self.setup_test_cluster(self.cluster_3copy) # get gateway info ip, port = util.get_rand_gateway(self.cluster) gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id']) ret = gw.connect(ip, port) self.assertEqual(ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port)) # set values for i in range(0, 10000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) gw.write(cmd) res = gw.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res)) # get master, slave1, slave2 m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') self.assertNotEqual(s2, None, 'slave2 is None.') util.log('server state before hang') util.log_server_state(self.cluster) # hang smr_master = smr_mgmt.SMR(m['id']) ret = smr_master.connect(m['ip'], m['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (m['ip'], m['smr_mgmt_port'])) smr_slave1 = smr_mgmt.SMR(s1['id']) ret = smr_slave1.connect(s1['ip'], s1['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s1['ip'], s1['smr_mgmt_port'])) smr_slave2 = smr_mgmt.SMR(s2['id']) ret = smr_slave2.connect(s2['ip'], s2['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s2['ip'], s2['smr_mgmt_port'])) m_ts = util.get_timestamp_of_pgs(m) s1_ts = util.get_timestamp_of_pgs(s1) s2_ts = util.get_timestamp_of_pgs(s2) smr_master.write('fi delay sleep 1 8000\r\n') reply = smr_master.read_until('\r\n', 1) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual( 0, 1, 'make sure that smr has compiled with gcov option.') smr_slave1.write('fi delay sleep 1 8000\r\n') smr_slave2.write('fi delay sleep 1 8000\r\n') time.sleep(10) # check consistency ok = False for try_cnt in xrange(20): ok = util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port) if ok: break time.sleep(0.5) self.assertTrue(ok, 'Unstable cluster state') util.log('server state transition after hang') util.log_server_state(self.cluster) redis0 = redis_mgmt.Redis(m['id']) ret = redis0.connect(m['ip'], m['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (m['ip'], m['redis_port'])) # set values for i in range(10000, 20000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) redis0.write(cmd) res = redis0.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res)) redis1 = redis_mgmt.Redis(s1['id']) ret = redis1.connect(s1['ip'], s1['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s1['ip'], s1['redis_port'])) redis2 = redis_mgmt.Redis(s2['id']) ret = redis2.connect(s2['ip'], s2['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s2['ip'], s2['redis_port'])) # check new values (m) for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis0.write(cmd) redis0.read_until('\r\n') res = redis0.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis(id:%d). %s != %d' % (m['id'], res, i)) # check new values (s1) for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis1.write(cmd) redis1.read_until('\r\n') res = redis1.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis(id:%d). %s != %d' % (s1['id'], res[:-2], i)) # check new values (s2) for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis2.write(cmd) redis2.read_until('\r\n') res = redis2.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis(id:%d). %s != %d' % (s2['id'], res[:-2], i)) # check consistency ok = False for try_cnt in range(0, 10): ok = util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port) print ok if ok: break time.sleep(1) self.assertEqual(ok, True, 'role consistency fail') return 0
def test_two_slaves_hang(self): util.print_frame() self.setup_test_cluster(self.cluster_3copy) # get gateway info ip, port = util.get_rand_gateway(self.cluster) gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id']) ret = gw.connect(ip, port) self.assertEqual(ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port)) # set values for i in range(0, 10000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) gw.write(cmd) res = gw.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res)) # get master, slave1, slave2 m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') self.assertNotEqual(s2, None, 'slave2 is None.') util.log('server state before hang') util.log_server_state(self.cluster) # timestamp before hang ts_before1 = util.get_timestamp_of_pgs(s1) self.assertNotEqual( ts_before1, -1, 'failed to get a timestamp of pgs(%d), ts_before:%d' % (s1['id'], ts_before1)) ts_before2 = util.get_timestamp_of_pgs(s2) self.assertNotEqual( ts_before2, -1, 'failed to get a timestamp of pgs(%d), ts_before:%d' % (s2['id'], ts_before2)) # hang smr1 = smr_mgmt.SMR(s1['id']) ret = smr1.connect(s1['ip'], s1['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s1['ip'], s1['smr_mgmt_port'])) smr2 = smr_mgmt.SMR(s2['id']) ret = smr2.connect(s2['ip'], s2['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s1['ip'], s1['smr_mgmt_port'])) smr1.write('fi delay sleep 1 8000\r\n') reply = smr1.read_until('\r\n', 1) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual( 0, 1, 'make sure that smr has compiled with gcov option.') smr2.write('fi delay sleep 1 8000\r\n') time.sleep(7) success = False for i in xrange(20): ret = util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port, check_quorum=True) if ret: success = True break time.sleep(1) self.assertEqual(success, True, 'unstable cluster') # get master, slave1, slave2 m, s1, s2 = util.get_mss(self.cluster) redis1 = redis_mgmt.Redis(s1['id']) ret = redis1.connect(s1['ip'], s1['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s1['ip'], s1['redis_port'])) redis2 = redis_mgmt.Redis(s2['id']) ret = redis2.connect(s2['ip'], s2['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s2['ip'], s2['redis_port'])) # set new values for i in range(10000, 20000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) redis1.write(cmd) res = redis1.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values to redis1. cmd:%s, res:%s' % (cmd[:-2], res)) # check new values for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis2.write(cmd) redis2.read_until('\r\n') res = redis2.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis2. %s != %d' % (res, i)) # check consistency self.assertEqual( util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port), True, 'role consistency fail') return 0
def slave_hang(self): # get gateway info ip, port = util.get_rand_gateway(self.cluster) gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id']) ret = gw.connect(ip, port) self.assertEqual(ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port)) # set values for i in range(0, 10000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) gw.write(cmd) res = gw.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res)) # get master, slave1, slave2 if len(self.cluster['servers']) == 3: m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') self.assertNotEqual(s2, None, 'slave2 is None.') else: m, s1 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') util.log('server state before hang') util.log_server_state(self.cluster) # timestamp before hang ts_before = util.get_timestamp_of_pgs(s1) self.assertNotEqual( ts_before, -1, 'failed to get a timestamp of pgs(%d), ts_before:%d' % (s1['id'], ts_before)) # hang util.log('pgs(id:%d, ip:%s, port:%d) is going to hang.' % (s1['id'], s1['ip'], s1['smr_mgmt_port'])) smr = smr_mgmt.SMR(s1['id']) ret = smr.connect(s1['ip'], s1['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s1['ip'], s1['smr_mgmt_port'])) smr.write('fi delay sleep 1 6000\r\n') reply = smr.read_until('\r\n', 1) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual( 0, 1, 'make sure that smr has compiled with gcov option.') time.sleep(7) # wait for rejoin as a slave success = False for i in range(20): role = util.get_role_of_server(s1) if role == c.ROLE_SLAVE: ts_after = util.get_timestamp_of_pgs(s1) if ts_after != -1 and ts_before != ts_after: success = True break time.sleep(1) self.assertEqual(success, True, 'failed to rejoin as a slave') util.log('server state transition after hang') util.log_server_state(self.cluster) redis1 = redis_mgmt.Redis(s1['id']) ret = redis1.connect(s1['ip'], s1['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s1['ip'], s1['redis_port'])) # set new values for i in range(10000, 20000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) redis1.write(cmd) res = redis1.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values to redis1. cmd:%s, res:%s' % (cmd[:-2], res)) if len(self.cluster['servers']) == 3: redis2 = redis_mgmt.Redis(s2['id']) ret = redis2.connect(s2['ip'], s2['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s2['ip'], s2['redis_port'])) # check new values for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis2.write(cmd) redis2.read_until('\r\n') res = redis2.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis2. %s != %d' % (res, i)) # check new values redis0 = redis_mgmt.Redis(m['id']) ret = redis0.connect(m['ip'], m['redis_port']) self.assertEquals( ret, 0, 'failed to connect to redis(%s:%d).' % (m['ip'], m['redis_port'])) for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis0.write(cmd) redis0.read_until('\r\n') res = redis0.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis2. %s != %d' % (res[:-2], i)) # check consistency self.assertEqual( util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port), True, 'role consistency fail') return 0