def test_6_from_3_to_6_heartbeat_checkers( self ): util.print_frame() hbc_svr_list = [] i = 5000 + len( self.cluster['servers'] ) for server in self.cluster['servers']: i = i + 1 hbc_svr = {} hbc_svr['id'] = i hbc_svr['ip'] = server['ip'] hbc_svr['zk_port'] = server['zk_port'] hbc_svr_list.append(hbc_svr) ret = testbase.setup_cm( i ) self.assertEquals( 0, ret, 'failed to copy heartbeat checker, server:%d' % hbc_svr['id'] ) ret = testbase.request_to_start_cm( i, i ) self.assertEquals( 0, ret, 'failed to request_to_start_cm, server:%d' % hbc_svr['id'] ) self.state_transition() # Go back to initial configuration for hbc_svr in hbc_svr_list: self.assertEqual(0, testbase.request_to_shutdown_cm(hbc_svr), 'failed to shutdown confmaster')
def test_2_role_change_with_hanging_pgs(self): util.print_frame() i = 0 while i < 5: util.log('') util.log('Loop:%d' % i) # get master, slave1, slave2 m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') self.assertNotEqual(s2, None, 'slave2 is None.') hang = random.choice(self.cluster['servers']) if hang == m: hanging_servers = [m] running_servers = [s1, s2] type = 'master' else: hanging_servers = [s1] running_servers = [m, s2] type = 'slave' s = random.choice([s1, s2]) util.log('hanging pgs(id:%d, type:%s), expected_master_id:%d' % (hang['id'], type, s['id'])) self.role_change_with_hanging_pgs(hanging_servers, running_servers, s['id'], m) i += 1
def test_3_repeated_failure_recovery( self ): util.print_frame() for i in range( 0, 3 ): self.failure_recovery( 'master' ) util.log( 'succeeded to failure_recovery, role:master, cnt:%d' % i ) self.failure_recovery( 'slave' ) util.log( 'succeeded to failure_recovery, role:slave, cnt:%d' % i )
def test_redis_hang(self): util.print_frame() server = self.cluster['servers'][0] gw = telnetlib.Telnet(server['ip'], server['gateway_port']) redis = telnetlib.Telnet(server['ip'], server['redis_port']) redis.write("debug sleep 1000\r\n") gw.write("dbsize\r\n") gw.read_until("\r\n") ts = time.time() while (time.time() - ts < 6): gw.write("dbsize\r\n") time.sleep(0.1) gw.write("ping\r\n") gw.read_until("+PONG\r\n") gw.write("info cluster\r\n") ret = gw.read_until("\r\n\r\n", 3) util.log(ret) if "redis_instances_available:5" not in ret: self.assertFalse(True, "Disconnection of timed-out redis is not processed in gateway")
def test_getandplay(self): util.print_frame() test_limit_mb = 10 server = self.cluster['servers'][0] redis = telnetlib.Telnet(server['ip'], server['redis_port']) util.log("Insert large key about 100MB") self.insertLargeKey(redis, "test_key") # Test getandplay start_time = time.time() util.log("Start getandplay, start ts:%d" % start_time) cmd = "./cluster-util --getandplay %s %d %s %d 0-8191 30000 %d" % (server['ip'], self.getdump_proxy_port, server['ip'], self.playdump_proxy_port, test_limit_mb) proc = util.exec_proc_async(util.cluster_util_dir(0), cmd, True, None, subprocess.PIPE, None) monitor_file1 = "%s/%s" % (util.cluster_util_dir(0), self.getdump_proxy_log) monitor_file2 = "%s/%s" % (util.cluster_util_dir(0), self.playdump_proxy_log) self.monitor_filesize_diff(proc, test_limit_mb, monitor_file1, monitor_file2) ret = proc.wait() self.assertEqual(0, ret) elapse_time = time.time() - start_time util.log("End getandplay, elapsed:%d" % elapse_time) dump_file_size = os.path.getsize("%s/%s" % (util.cluster_util_dir(0), self.getdump_proxy_log)) play_file_size = os.path.getsize("%s/%s" % (util.cluster_util_dir(0), self.playdump_proxy_log)) util.log("Dump File Size:%d, Play File Size:%d, elapsed:%d, limit:%dMB/s, actual:%dMB/s" % (dump_file_size, play_file_size, elapse_time, test_limit_mb, (dump_file_size + play_file_size) / elapse_time / 1024 / 1024)) self.assertTrue((dump_file_size + play_file_size) / (10 * 1024 * 1024) < elapse_time)
def pgs_add_and_del( self, upgrade_server, type ): util.print_frame() util.log( '[start] add and del pgs%d. type:%s' % (upgrade_server['id'], type) ) util.log_server_state( self.cluster ) # start load generator load_gen_list = {} for i in range( len(self.cluster['servers']) ): server = self.cluster['servers'][i] load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port']) load_gen.start() load_gen_list[i] = load_gen # detach pgs from cluster cmd = 'pgs_leave %s %d\r\n' % (upgrade_server['cluster_name'], upgrade_server['id']) ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd ) jobj = json.loads(ret) self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) ) util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) ) # set new values ip, port = util.get_rand_gateway(self.cluster) gw = gateway_mgmt.Gateway( '0' ) gw.connect( ip, port ) for i in range( 0, 50 ): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) gw.write( cmd ) res = gw.read_until( '\r\n' ) self.assertEqual( res, '+OK\r\n', 'failed to set values to gw(%s:%d). cmd:%s, res:%s' % (ip, port, cmd[:-2], res[:-2]) ) # attach pgs from cluster cmd = 'pgs_join %s %d\r\n' % (upgrade_server['cluster_name'], upgrade_server['id']) ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd ) jobj = json.loads(ret) self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret) ) util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) ) time.sleep( 3 ) # check new values redis = redis_mgmt.Redis( upgrade_server['id'] ) ret = redis.connect( upgrade_server['ip'], upgrade_server['redis_port'] ) self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (upgrade_server['id'], upgrade_server['ip'], upgrade_server['redis_port']) ) for i in range( 0, 50 ): cmd = 'get %s%d\r\n' % (self.key_base, i) redis.write( cmd ) redis.read_until( '\r\n' ) res = redis.read_until( '\r\n' ) self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis%d. %s != %d' % (upgrade_server['id'], res, i) ) util.log( 'succeeded : check values with get operations on pgs%d.' % (upgrade_server['id']) ) # shutdown load generators for i in range( len(load_gen_list) ): load_gen_list[i].quit() load_gen_list[i].join() util.log_server_state( self.cluster ) return 0
def _test_protocol_error_with_quote(self, server, optional = None): util.print_frame() self.send_command_and_expect(server , 'set "key_without_closing_quote value\r\n' , '-ERR Protocol error: unbalanced quotes in request\r\n') self.server_reconnect(server) if (optional): self.send_command_and_expect(optional , "set 'key_without_closing_quote value\r\n" , '-ERR Protocol error: unbalanced quotes in request\r\n') self.server_reconnect(optional) self.send_command_and_expect(server , "set 'key_without_closing_quote value_without_opening_quote'\r\n" , "-ERR wrong number of arguments for 'set' command\r\n") if (optional): self.send_command_and_expect(optional , 'set "key_without_closing_quote value_without_opening_quote"\r\n' , "-ERR wrong number of arguments for 'set' command\r\n") self.send_command_and_expect(server , 'set key value_without_opening_quote"\r\n' , '-ERR Protocol error: unbalanced quotes in request\r\n') self.server_reconnect(server) if (optional): self.send_command_and_expect(optional , "set key value_without_opening_quote'\r\n" , '-ERR Protocol error: unbalanced quotes in request\r\n') self.server_reconnect(optional)
def test_migrate_empty_s3obj(self): util.print_frame() ip, port = util.get_rand_gateway(self.cluster) client = redis_sock.RedisClient(ip, port) # Fill some string and empty s3 objects keyprefix = 'test_migrate_empty_s3obj' for i in range (1000): ok, data = client.do_request('set %s_string_%d %d\r\n' % (keyprefix, i, i)) assert (ok == True) ok, data = client.do_request('s3ladd ks %s_s3_%d svc key val 0\r\n' % (keyprefix, i)) assert (ok == True and data == 1) ok, data = client.do_request('s3lrem ks %s_s3_%d svc key val\r\n' % (keyprefix, i)) assert (ok == True and data == 1) ## migration pg0 -> pg1 then pg1 -> pg0 ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') ret = util.migration(self.cluster, 1, 0, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') # Check string object for i in range (1000): ok, data = client.do_request('get %s_string_%d\r\n' % (keyprefix, i)) assert (ok == True and int(data) == i) client.close()
def save_pages(self, pages): '''Write pages to disk pages is a list of url, lang tuples ''' parallelpath = '' for (url, lang) in pages: try: (r, tmpname) = self.downloader.download(url) except adder.AdderException as e: util.print_frame(debug=str(e) + '\n') else: normalised_name = namechanger.normalise_filename( os.path.basename(tmpname)) normalised_path = os.path.join( self.corpus_adders[lang].goaldir, normalised_name) if not os.path.exists(normalised_path): parallelpath = self.corpus_adders[lang].copy_file_to_corpus( tmpname, url, parallelpath=parallelpath) util.print_frame( debug='adding {}'.format(parallelpath)) else: parallelpath = normalised_path print(file=sys.stderr)
def links(self): links = set() for address in self.tree.findall('.//a'): href = address.get('href') if href is not None: if not re.search( 'tv.samediggi.no|^#|/rss/feed|switchlanguage|facebook.com|' 'Web-tv|user/login|mailto|/Dokumenter|/Dokumeantta|' '/Tjaatsegh|.pdf|.doc|.xls|/images/|/download/|/Biejjielaahkoe|/Kalender|' '/Dahpahusat|javascript|tel:', href): if href.startswith('/'): href = urlparse.urlunparse( (self.parsed_url.scheme, self.parsed_url.netloc, href, '', '', '')) add = False for uff in self.ok_netlocs: if uff in href: add = True links.add(href) if not add: util.print_frame(debug=href.encode('utf8') + '\n') return links
def test_1_consistent_while_slave_is_in_load( self ): util.print_frame() ip, port = util.get_rand_gateway( self.cluster ) gw = gateway_mgmt.Gateway( ip ) gw.connect( ip, port ) max_key = 5 key_base = 'load_gen_key' for idx in range( max_key ): cmd = 'set %s%d 0\r\n' % (key_base, idx) gw.write( cmd ) gw.read_until( '\r\n', 10 ) try_count = 9999 for value in range( try_count ): for idx in range( max_key ): cmd = 'set %s%d %d\r\n' % (key_base, idx, value) gw.write( cmd ) response = gw.read_until( '\r\n', 10 ) self.assertEquals( response, '+OK\r\n' ) cmd = 'get %s%d\r\n' % (key_base, idx) gw.write( cmd ) response = gw.read_until( '\r\n', 10 ) response = gw.read_until( '\r\n', 10 ) self.assertEquals( response, '%s\r\n' % (value), 'fail! original_value:%d, return_from_slave:%s' % (value, response[1:]) )
def test_error_log(self): util.print_frame() cluster = self.cluster # Start test-fiall p = util.exec_proc_async('%s/.obj%d' % (constant.ARCCI_DIR, self.arch), "./test-fiall -z localhost:2181 -c %s -s 10" % cluster['cluster_name'], subprocess.PIPE, subprocess.PIPE, subprocess.PIPE); # Set up arguments server = cluster['servers'][0] gw_id = server['id'] gw_port = server['gateway_port'] # Check load for i in range(20): ok = True for s in cluster['servers']: tps = util.get_tps(s['ip'], s['gateway_port'], 'gw') util.log('%s:%d TPS:%d' % (s['ip'], s['gateway_port'], tps)) time.sleep(1) # Check no error util.log(' ### BEGIN - ARCCI LOGS ### ') (stdout, stderr) = p.communicate() for line in stdout.split("\n"): util.log(line) util.log(' ### END - ARCCI LOGS ### ')
def test_cscandigest(self): util.print_frame() gateway_list = [] for server in self.cluster['servers']: gateway_list.append(telnetlib.Telnet(server['ip'], server['gateway_port'])) util.log("run CSCAN test") svr = random.choice(gateway_list) expect = self.insert_key(svr) svr.write('CSCANDIGEST\r\n') svr.read_until('\r\n', 3) digest1 = svr.read_until('\r\n', 3) ret = util.migration(self.cluster, 0, 1, 0, 4095, 40000) self.assertEqual(True, ret, 'Migration Fail') svr.write('CSCANDIGEST\r\n') svr.read_until('\r\n', 3) digest2 = svr.read_until('\r\n', 3) ret = util.migration(self.cluster, 1, 0, 0, 4095, 40000) self.assertEqual(True, ret, 'Migration Fail') svr.write('CSCANDIGEST\r\n') svr.read_until('\r\n', 3) digest3 = svr.read_until('\r\n', 3) self.assertEqual(digest1, digest3, "Incompatible Cluster Digest") self.assertNotEqual(digest1, digest2, "Incompatible Cluster Digest")
def test_quorum_policy_of_hanging_master( self ): util.print_frame() # get master, slave1, slave2 m, s1, s2 = util.get_mss( self.cluster ) self.assertNotEqual( m, None, 'master is None.' ) self.assertNotEqual( s1, None, 'slave1 is None.' ) self.assertNotEqual( s2, None, 'slave2 is None.' ) # hang smr = smr_mgmt.SMR( m['id'] ) ret = smr.connect( m['ip'], m['smr_mgmt_port'] ) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (m['ip'], m['smr_mgmt_port']) ) smr.write( 'fi delay sleep 1 15000\r\n' ) time.sleep( 5 ) # wait for forced master election success = False new_master = None for i in range( 7 ): role = util.get_role_of_server( s1 ) if role == c.ROLE_MASTER: success = True new_master = s1 break role = util.get_role_of_server( s2 ) if role == c.ROLE_MASTER: success = True new_master = s2 break time.sleep( 1 ) self.assertEqual( success, True, 'failed to forced master election' ) # shutdown confmaster for server in self.cluster['servers']: util.shutdown_cm( server['id'] ) # wait until hanging master wake up time.sleep( 5 ) # check quorum policy quorum_of_haning_master = util.get_quorum( m ) self.assertEqual( 2, quorum_of_haning_master, 'invalid quorum of haning master, expected:%d, but:%d' %(2, quorum_of_haning_master) ) util.log( 'succeeded : quorum of haning master=%d' % quorum_of_haning_master ) # check quorum policy quorum_of_new_master = util.get_quorum( new_master ) self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' ) self.assertEqual( 1, quorum_of_new_master , 'invalid quorum of new master, expected:%d, but:%d' % (1, quorum_of_new_master) ) util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master ) # Go back to initial configuration # Recover Confmaster self.assertTrue(util.recover_confmaster(self.cluster, [0,1,2], 0), 'failed to recover confmaster') return 0
def test_gateway_lookup( self ): util.print_frame() # check initial states for server in self.cluster['servers']: success = False for try_cnt in range( 5 ): cmd = 'get /RC/NOTIFICATION/CLUSTER/%s/GW/%d' % (self.cluster['cluster_name'], server['id']) ret = util.zk_cmd( cmd ) ret = ret['err'] if -1 != ret.find('cZxid'): success = True break time.sleep( 1 ) self.assertEqual( success, True, 'failed : cmd="%s", ret="%s"' % (cmd, ret) ) util.log( 'succeeded : cmd="%s", ret="%s"' % (cmd, ret) ) # shutdown gateway for server in self.cluster['servers']: success = False ret = util.shutdown_gateway( server['id'], server['gateway_port'] ) self.assertEqual( ret, 0, 'failed : shutdown gateawy%d' % server['id'] ) for try_cnt in range( 10 ): cmd = 'get /RC/NOTIFICATION/CLUSTER/%s/GW/%d' % (self.cluster['cluster_name'], server['id']) ret = util.zk_cmd( cmd ) ret = ret['err'] if -1 != ret.find('Node does not exist'): success = True break time.sleep( 1 ) self.assertEqual( success, True, 'failed : cmd="%s", ret="%s"' % (cmd, ret) ) util.log( 'succeeded : cmd="%s", ret="%s"' % (cmd, ret) ) # restart gateway for server in self.cluster['servers']: success = False ret = util.start_gateway( server['id'], server['ip'], self.leader_cm['cm_port'], server['cluster_name'], server['gateway_port']) self.assertEqual( ret, 0, 'failed : start gateawy%d' % server['id'] ) for try_cnt in range( 5 ): cmd = 'get /RC/NOTIFICATION/CLUSTER/%s/GW/%d' % (self.cluster['cluster_name'], server['id']) ret = util.zk_cmd( cmd ) ret = ret['err'] if -1 != ret.find('cZxid'): success = True break time.sleep( 1 ) self.assertEqual( success, True, 'failed : cmd="%s", ret="%s"' % (cmd, ret) ) util.log( 'succeeded : cmd="%s", ret="%s"' % (cmd, ret) ) return 0
def _test_multibulk_protocol_error(self, server): util.print_frame() REDIS_INLINE_MAX_SIZE = 1024*64 self.send_command_and_expect(server , '*%s\r\n' % ('1' * (REDIS_INLINE_MAX_SIZE * 3)) , '-ERR Protocol error: too big mbulk count string\r\n') self.server_reconnect(server) self.send_command_and_expect(server , '*%s\r\n' % ('1' * (1024*48)) , '-ERR Protocol error: invalid multibulk length\r\n') self.server_reconnect(server) self.send_command_and_expect(server , '*%d\r\n' % (1024*1024 + 1) , '-ERR Protocol error: invalid multibulk length\r\n') self.server_reconnect(server) self.send_command_and_expect(server , '*%d\r\n!\r\n' % (1024*1024) , "-ERR Protocol error: expected '$', got '!'\r\n") self.server_reconnect(server) self.send_command_and_expect(server , '*not_a_number\r\n' , '-ERR Protocol error: invalid multibulk length\r\n') self.server_reconnect(server) self.send_command_and_expect(server , '*-1\r\nset normal command\r\n' , '+OK\r\n') self.send_command_and_expect(server , '*2\r\n$3\r\nget\r\n$%s\r\n' % ('1' * (REDIS_INLINE_MAX_SIZE * 3)) , '-ERR Protocol error: too big bulk count string\r\n') self.server_reconnect(server) self.send_command_and_expect(server , '*2\r\n!3\r\nget\r\n$3\r\nkey\r\n' , "-ERR Protocol error: expected '$', got '!'\r\n") self.server_reconnect(server) self.send_command_and_expect(server , '*2\r\n$-1\r\n' , '-ERR Protocol error: invalid bulk length\r\n') self.server_reconnect(server) self.send_command_and_expect(server , '*3\r\n$3\r\nSET\r\n$3\r\nKEY\r\n$%d\r\n' % (512*1024*1024+1) , '-ERR Protocol error: invalid bulk length\r\n') self.server_reconnect(server) self.send_command_and_expect(server , '*3\r\n$3\r\nSET\r\n$1\r\na\r\n$0\r\n\r\n' , '+OK\r\n')
def test_basic_op_smr(self): util.print_frame() f = open("%s/test_basicop_output_smr" % constant.logdir, 'w') p = util.exec_proc_async("../redis-2.8.8", "./runtest_smr --accurate", True, None, f, None) ret = p.wait() f.close() self.assertEquals(0, ret)
def test_pipelining_and_join(self): util.print_frame() api = ARC_API(ZK_ADDR, CLUSTER_NAME, logFilePrefix = self.arcci_log, so_path = self.so_path) i = 0; while i < 1000: i += 1 rqst = api.create_request() self.assertNotEqual(rqst, None, 'failed to create_request') # Set j = 0 while j < 30: j += 1 api.append_command(rqst, "set %d_%d %d_%d" % (i, j, i, j)) ret = api.do_request(rqst, 3000) self.assertEqual(ret, 0, 'failed to do_request, ret:%d' % ret) j = 0 while j: j += 1 be_errno, reply = api.get_reply(rqst) self.assertEqual(be_errno, 0, 'failed to get_reply, be_errno:%d' % be_errno) self.assertNotEqual(reply, None, 'failed to get_reply, reply is None') self.assertEqual(reply[0], ARC_REPLY_STATUS, 'failed to get_reply, reply[0]:%d' % reply[0]) api.free_request(rqst) # Get rqst = api.create_request() self.assertNotEqual(rqst, None, 'failed to create_request') j = 0 while j < 30: j += 1 api.append_command(rqst, "get %d_%d" % (i, j)) ret = api.do_request(rqst, 3000) self.assertEqual(ret, 0, 'failed to do_request, ret:%d' % ret) j = 0 while j: j += 1 be_errno, reply = api.get_reply(rqst) self.assertEqual(be_errno, 0, 'failed to get_reply, be_errno:%d' % be_errno) self.assertNotEqual(reply, None, 'failed to get_reply, reply is None') self.assertEqual(reply[0], ARC_REPLY_STRING, 'failed to get_reply, reply[0]:%d' % reply[0]) self.assertEqual(reply[1], '%d_%d' % (i, j), 'failed to get_reply, value:%s' % reply[1]) api.free_request(rqst)
def test_5_from_n_to_1_heartbeat_checkers( self ): util.print_frame() for i in range( 0, len( self.cluster['servers'] ) - 1 ): util.log( 'loop %d' % i ) server = self.cluster['servers'][i] self.assertEquals( 0, testbase.request_to_shutdown_cm( server ), 'failed to request_to_shutdown_cm, server:%d' % server['id'] ) time.sleep( 20 ) self.leader_cm = self.cluster['servers'][i+1] self.state_transition()
def test_7_remaining_hbc_connection( self ): util.print_frame() # check pgs for server in self.cluster['servers']: before_cnt_redis = util.get_clients_count_of_redis(server['ip'], server['redis_port']) before_cnt_smr = util.get_clients_count_of_smr(server['smr_mgmt_port']) cmd = 'pgs_leave %s %d forced\r\npgs_del %s %d' % (self.cluster['cluster_name'], server['id'], self.cluster['cluster_name'], server['id']) util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd) for server in self.cluster['servers']: # check redis success = False for i in range(5): after_cnt = util.get_clients_count_of_redis(server['ip'], server['redis_port']) if after_cnt <= 2: success = True break time.sleep(1) self.assertEquals( success, True, 'failed : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) ) util.log( 'succeeded : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) ) # check smr success = False expected = 0 for i in range(5): after_cnt = util.get_clients_count_of_smr(server['smr_mgmt_port']) if after_cnt == expected: success = True break time.sleep(1) self.assertEquals( success, True, 'failed : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) ) util.log( 'succeeded : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) ) # check gateway for server in self.cluster['servers']: before_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port']) cmd = 'gw_del %s %d' % (self.cluster['cluster_name'], server['id']) util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd) for server in self.cluster['servers']: success = False expected = 1 for i in range(5): after_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port']) if after_cnt == expected: success = True break time.sleep(1) self.assertEquals( success, True, 'failed : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) ) util.log( 'succeeded : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) )
def test_upgrade_slave_smr( self ): util.print_frame() # get master, slave1, slave2 m, s1, s2 = util.get_mss( self.cluster ) self.assertNotEqual( m, None, 'master is None.' ) self.assertNotEqual( s1, None, 'slave1 is None.' ) self.assertNotEqual( s2, None, 'slave2 is None.' ) ret = util.upgrade_pgs( s1, self.leader_cm, self.cluster ) self.assertTrue(ret, 'Failed to upgrade slave pgs%d' % s1['id'])
def test_basic_op_gateway(self): util.print_frame() ip, port = util.get_rand_gateway(self.cluster) f = open("%s/test_basicop_output_gw" % constant.logdir, 'w') p = util.exec_proc_async("../redis-2.8.8", "./runtest_gw --accurate --gw-port "+str(port), True, None, f, None) ret = p.wait() f.close() self.assertEquals(0, ret)
def _test_inline_protocol_error(self, server): util.print_frame() REDIS_INLINE_MAX_SIZE = 1024*64 self.send_command_and_expect(server , 'set key %s\r\n' % ('x' * (REDIS_INLINE_MAX_SIZE * 3)) , '-ERR Protocol error: too big inline request\r\n') self.server_reconnect(server) self.send_command_and_expect(server , 'set key value\nget key\n' , '+OK\r\n$5\r\nvalue\r\n')
def lang(self): uff = {} uff['no-bokmaal'] = 'nob' uff['sma-NO'] = 'sma' uff['sme-NO'] = 'sme' uff['smj-no'] = 'smj' content_language = self.tree.find('.//meta[@name="Content-language"]') if content_language is not None: return uff[content_language.get('content')] else: uff = 'no language {}'.format(self.url.encode('utf8')) util.print_frame(debug=uff)
def test_gateway_fault_failback_zk(self): util.print_frame() # Start load generation self.load_gen_list = {} for i in range(len(self.cluster['servers'])): arc_api = ARC_API(ZK_ADDR, CLUSTER_NAME, logFilePrefix = self.arcci_log, so_path = self.so_path) server = self.cluster['servers'][i] load_gen = LoadGenerator_ARCCI_FaultTolerance(server['id'], arc_api) load_gen.start() self.load_gen_list[i] = load_gen self._gateway_fault_failback(arc_api)
def test_fd_leak(self): util.print_frame() server = util.get_server_by_role_and_pg(self.cluster['servers'], 'master', 0) redis = redis_mgmt.Redis(server['id']) ret = redis.connect(server['ip'], server['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') smr = smr_mgmt.SMR(server['id']) ret = smr.connect(server['ip'], server['smr_mgmt_port']) self.assertEquals(ret, 0, 'failed to connect to smr') redis.write('info server\r\n') res = redis.read_until('process_id:') res = redis.read_until('\r\n') redis.write('quit\r\n') pid = copy.copy(res[:-2]) num1 = self.numOpenFds(pid) print "Initial : Open Fds: %s" % self.numOpenFds(pid) smr.write('fi delay sleep 1 1000000\r\n') smr.read_until('\r\n') for i in range(5): ret = redis.connect(server['ip'], server['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') redis.write('ping\r\n') res = redis.read_until('\r\n', 1) print "Try Ping : Open Fds: %s" % self.numOpenFds(pid) redis.disconnect() print "Disconnect : Open Fds: %s" % self.numOpenFds(pid) ret = redis.connect(server['ip'], server['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') redis.write('*1\r\nasdf\r\n') time.sleep(1) res = redis.read_until('\r\n', 1) print "Protocol Error : Open Fds: %s" % self.numOpenFds(pid) redis.disconnect() print "Disconnect : Open Fds: %s" % self.numOpenFds(pid) print "End : Open Fds: %s" % self.numOpenFds(pid) num2 = self.numOpenFds(pid) self.assertEquals(num1, num2) # Go back to initial configuration self.assertTrue(util.shutdown_pgs(server, self.cluster['servers'][0]), 'recover pgs fail. (shutdown_pgs)') self.assertTrue(util.recover_pgs(server, self.cluster['servers'][0]), 'recover pgs fail. (shutdown_pgs)')
def test_5_repeated_redis_failure_recovery( self ): util.print_frame() for i in range( 0, 10 ): print 'loop : %d' % i role = random.choice(['master', 'slave']) self.failure_recovery( role, redis_only=True ) util.log( 'succeeded to failure_recovery, role:%s, cnt:%d' % (role, i) ) if role == 'master': role = 'slave' else: role = 'master' self.failure_recovery( role, redis_only=True ) util.log( 'succeeded to failure_recovery, role:%s, cnt:%d' % (role, i) )
def test_info_and_dbsize_command(self): util.print_frame() servers = self.cluster['servers'] gw_list = [] for server in servers: gw = telnetlib.Telnet(server['ip'], server['gateway_port']) gw_list.append(gw) for i in range(10000): gw_list[0].write("set key_%d value_%d\r\n" %(i, i)) gw_list[0].read_until("+OK\r\n") for i in range(1000): gw_list[0].write("expire key_%d 10000000\r\n" % (i)) gw_list[0].read_until(":1\r\n") for gw in gw_list: gw.write("info all\r\n") gw.write("ping\r\n") ret = gw.read_until("+PONG\r\n") if "cluster_name:testCluster0" not in ret: util.log(ret) self.assertFalse(True, "Incorrect result of info commands, cluster_name") if "total_partition_groups:2" not in ret: util.log(ret) self.assertFalse(True, "Incorrect result of info commands, partition_groups") if "partition_groups_available:2" not in ret: util.log(ret) self.assertFalse(True, "Incorrect result of info commands, partition_groups") if "total_redis_instances:6" not in ret: util.log(ret) self.assertFalse(True, "Incorrect result of info commands, redis_instances_reachable") if "redis_instances_available:6" not in ret: util.log(ret) self.assertFalse(True, "Incorrect result of info commands, redis_instances_unreachable") if "gateway_disconnected_redis:0" not in ret: util.log(ret) self.assertFalse(True, "Incorrect result of info commands, inactive_connections") if "db0:keys=10000,expires=1000,avg_ttl=9999" not in ret: util.log(ret) self.assertFalse(True, "Incorrect result of info commands, keys")
def test_random_pgs_del_and_add(self): util.print_frame() # start load generator util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) self.load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) self.load_gen_thrd_list[i].start() util.log("started load_generator") servers = self.cluster['servers'] gw_list = [] for server in servers: gw = {} gw['mgmt'] = telnetlib.Telnet(server['ip'], server['gateway_port']+1) gw['normal'] = telnetlib.Telnet(server['ip'], server['gateway_port']) gw_list.append(gw) count = 10 while count > 0: c = random.choice(servers) for gw in gw_list: gw['mgmt'].write("pgs_del %d %d\r\n" % (c['id'], c['pg_id'])) gw['mgmt'].read_until("+OK\r\n") gw_list[0]['mgmt'].write("cluster_info\r\nping\r\n") print gw_list[0]['mgmt'].read_until("+PONG\r\n") for gw in gw_list: gw['mgmt'].write("pgs_add %d %d %s %d\r\n" % (c['id'], c['pg_id'], c['ip'], c['redis_port'])) gw['mgmt'].read_until("+OK\r\n") for gw in gw_list: while True: gw['normal'].write("info gateway\r\n") ret = gw['normal'].read_until("\r\n\r\n") if "gateway_disconnected_redis:0\r\n" in ret: break count -= 1 # check consistency of load_generator for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].quit() for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].join() self.assertTrue(self.load_gen_thrd_list[i].isConsistent(), 'Inconsistent after gateway_mgmt test')
def test_3_role_change_while_all_pgs_hanging(self): util.print_frame() # get master, slave1, slave2 m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') self.assertNotEqual(s2, None, 'slave2 is None.') hanging_servers = [m, s1, s2] running_servers = [] s = random.choice([s1, s2]) self.role_change_with_hanging_pgs(hanging_servers, running_servers, s['id'], m) return 0
def test_1_state_transition(self): util.print_frame() self.state_transition()
def test_gateway_lookup(self): util.print_frame() # check initial states for server in self.cluster['servers']: success = False for try_cnt in range(5): cmd = 'get /RC/NOTIFICATION/CLUSTER/%s/GW/%d' % ( self.cluster['cluster_name'], server['id']) ret = util.zk_cmd(cmd) ret = ret['err'] if -1 != ret.find('cZxid'): success = True break time.sleep(1) self.assertEqual(success, True, 'failed : cmd="%s", ret="%s"' % (cmd, ret)) util.log('succeeded : cmd="%s", ret="%s"' % (cmd, ret)) # shutdown gateway for server in self.cluster['servers']: success = False ret = util.shutdown_gateway(server['id'], server['gateway_port']) self.assertEqual(ret, 0, 'failed : shutdown gateawy%d' % server['id']) for try_cnt in range(10): cmd = 'get /RC/NOTIFICATION/CLUSTER/%s/GW/%d' % ( self.cluster['cluster_name'], server['id']) ret = util.zk_cmd(cmd) ret = ret['err'] if -1 != ret.find('Node does not exist'): success = True break time.sleep(1) self.assertEqual(success, True, 'failed : cmd="%s", ret="%s"' % (cmd, ret)) util.log('succeeded : cmd="%s", ret="%s"' % (cmd, ret)) # restart gateway for server in self.cluster['servers']: success = False ret = util.start_gateway(server['id'], server['ip'], self.leader_cm['cm_port'], server['cluster_name'], server['gateway_port']) self.assertEqual(ret, 0, 'failed : start gateawy%d' % server['id']) for try_cnt in range(5): cmd = 'get /RC/NOTIFICATION/CLUSTER/%s/GW/%d' % ( self.cluster['cluster_name'], server['id']) ret = util.zk_cmd(cmd) ret = ret['err'] if -1 != ret.find('cZxid'): success = True break time.sleep(1) self.assertEqual(success, True, 'failed : cmd="%s", ret="%s"' % (cmd, ret)) util.log('succeeded : cmd="%s", ret="%s"' % (cmd, ret)) return 0
def pgs_add_and_del(self, upgrade_server, type): util.print_frame() util.log('[start] add and del pgs%d. type:%s' % (upgrade_server['id'], type)) util.log_server_state(self.cluster) # start load generator load_gen_list = {} for i in range(len(self.cluster['servers'])): server = self.cluster['servers'][i] load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port']) load_gen.start() load_gen_list[i] = load_gen # detach pgs from cluster cmd = 'pgs_leave %s %d\r\n' % (upgrade_server['cluster_name'], upgrade_server['id']) ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd) jobj = json.loads(ret) self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2])) util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2])) # set new values ip, port = util.get_rand_gateway(self.cluster) gw = gateway_mgmt.Gateway('0') gw.connect(ip, port) for i in range(0, 50): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) gw.write(cmd) res = gw.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values to gw(%s:%d). cmd:%s, res:%s' % (ip, port, cmd[:-2], res[:-2])) # attach pgs from cluster cmd = 'pgs_join %s %d\r\n' % (upgrade_server['cluster_name'], upgrade_server['id']) ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd) jobj = json.loads(ret) self.assertEqual(jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret)) util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2])) time.sleep(3) # check new values redis = redis_mgmt.Redis(upgrade_server['id']) ret = redis.connect(upgrade_server['ip'], upgrade_server['redis_port']) self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (upgrade_server['id'], upgrade_server['ip'], upgrade_server['redis_port'])) for i in range(0, 50): cmd = 'get %s%d\r\n' % (self.key_base, i) redis.write(cmd) redis.read_until('\r\n') res = redis.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis%d. %s != %d' % (upgrade_server['id'], res, i)) util.log('succeeded : check values with get operations on pgs%d.' % (upgrade_server['id'])) # shutdown load generators for i in range(len(load_gen_list)): load_gen_list[i].quit() load_gen_list[i].join() util.log_server_state(self.cluster) return 0
def test_basic_op_capi32(self): util.print_frame() self.__test_basic_op_capi(32)
def test_master_hang_3copy(self): util.print_frame() self.setup_test_cluster(self.cluster_3copy) self.master_hang() return 0
def test_4_elect_master_randomly(self): util.print_frame() for i in range(1): self.elect_master_randomly()
def test_delete_smrlog_after_scaleout(self): util.print_frame() # start load generator util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) self.load_gen_thrd_list[i] = load_generator.LoadGenerator( i, ip, port) self.load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec util.log("started load_generator") # servers for scale out servers = [config.server4, config.server5, config.server6] leader_cm = self.cluster['servers'][0] # Scale out cluster = config.clusters[0] ret = util.pg_add(cluster, servers, leader_cm) self.assertEqual(True, ret, 'Scale out fail. util.pg_add returns false') time.sleep(5) # pg0 -> pg1 cluster = config.clusters[1] ret = util.migration(cluster, 0, 1, 8000, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail 0 -> 1') # get log file old_logs = {} for s in config.clusters[0]['servers']: parent_dir, log_dir = util.smr_log_dir(s['id']) path = '%s/%s' % (parent_dir, log_dir) old_logs[s['id']] = util.ls(path) # bgsave in order to make smrlogs deleted. for s in config.clusters[0]['servers']: bgsave_ret = util.bgsave(s) self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % s['id']) util.log('bgsave pgs%d is done.') # check consistency ok = True for j in range(len(self.load_gen_thrd_list)): self.assertTrue(self.load_gen_thrd_list[j].isConsistent(), 'Inconsistent after migration') # is smr-replicator delete smrlogs? i = 0 while i < 20: i += 1 # get current log files cur_logs = {} for s in config.clusters[0]['servers']: parent_dir, log_dir = util.smr_log_dir(s['id']) path = '%s/%s' % (parent_dir, log_dir) cur_logs[s['id']] = util.ls(path) # compare old and new temp_old_logs = copy.deepcopy(old_logs) for id, nl in cur_logs.items(): ol = temp_old_logs.get(id) self.assertNotEqual( ol, None, "failed to check logfiles. old logs for smr-replicator '%d' is not exist." % id) for log in nl: if log in ol: ol.remove(log) ok = True for id, ol in temp_old_logs.items(): if len(ol) == 0: ok = False util.log( 'Loop %d ---------------------------------------------------------' % i) util.log('deleted smrlog files: %s' % util.json_to_str(temp_old_logs)) if ok: break time.sleep(10) self.assertTrue(ok, 'smr-replicator does not delete smrlogs.') util.log('smr-replicator deletes smrlogs.') # check consistency of load_generator for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].quit() for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].join() self.assertTrue(self.load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
def test_quorum_with_left_pgs( self ): util.print_frame() # start load generators load_gen_list = {} for i in range( len(self.cluster['servers']) ): server = self.cluster['servers'][i] load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port']) load_gen.start() load_gen_list[i] = load_gen # get master, slave1, slave2 m, s1, s2 = util.get_mss( self.cluster ) self.assertNotEqual( m, None, 'master is None.' ) self.assertNotEqual( s1, None, 'slave1 is None.' ) self.assertNotEqual( s2, None, 'slave2 is None.' ) # detach pgs from cluster cmd = 'pgs_leave %s %d\r\n' % (m['cluster_name'], m['id']) ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd ) jobj = json.loads(ret) self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) ) util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) ) # check if pgs is removed success = False for try_cnt in range( 10 ): redis = redis_mgmt.Redis( m['id'] ) ret = redis.connect( m['ip'], m['redis_port'] ) self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) ) util.log( 'succeeded : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) ) redis.write( 'info stats\r\n' ) for i in range( 6 ): redis.read_until( '\r\n' ) res = redis.read_until( '\r\n' ) self.assertNotEqual( res, '', 'failed : get reply of "info stats" from redis%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) ) util.log( 'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"' % (m['id'], m['ip'], m['redis_port'], res[:-2]) ) no = int( res.split(':')[1] ) if no <= 100: success = True break time.sleep( 1 ) self.assertEquals( success, True, 'failed : pgs does not removed.' ) util.log( 'succeeded : pgs is removed' ) # check states of all pgs in pg for s in self.cluster['servers']: real_role = util.get_role_of_server( s ) real_role = util.roleNumberToChar( real_role ) smr_info = util.get_smr_info( s, self.leader_cm ) cc_role = smr_info['smr_Role'] cc_hb = smr_info['hb'] if cc_hb == 'N': continue self.assertEqual( real_role, cc_role, 'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) ) util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) ) # check quorum policy quorum_of_haning_master = util.get_quorum( m ) self.assertEqual( self.quorum_policy[1], quorum_of_haning_master, 'invalid quorum of left master, expected:%d, but:%d' %( self.quorum_policy[1], quorum_of_haning_master) ) util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master ) # 'role lconn' to master cmd = 'role lconn\r\n' ret = util.cmd_to_smr( m, cmd ) self.assertEqual( ret, '+OK\r\n', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) ) util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) ) # wait for master election success = False new_master = None for i in range( 10 ): role = util.get_role_of_server( s1 ) if role == c.ROLE_MASTER: success = True new_master = s1 break role = util.get_role_of_server( s2 ) if role == c.ROLE_MASTER: success = True new_master = s2 break time.sleep( 1 ) self.assertEqual( success, True, 'failed to elect new master' ) util.log( 'succeeded : elect new master, master_id=%d' % new_master['id'] ) time.sleep( 1 ) # check the numbers of master, slave, and lconn cnt_master = 0 cnt_slave = 0 cnt_lconn = 0 for s in self.cluster['servers']: role = util.get_role_of_server( s ) if role == c.ROLE_MASTER: cnt_master = cnt_master + 1 elif role == c.ROLE_SLAVE: cnt_slave = cnt_slave + 1 elif role == c.ROLE_LCONN: cnt_lconn = cnt_lconn + 1 self.assertEqual( cnt_master, 1, 'failed : the number of master is %s, expected 1' % cnt_master ) self.assertEqual( cnt_slave, 1, 'failed : the number of slave is %s, expected 1' % cnt_slave ) self.assertEqual( cnt_lconn, 1, 'failed : the number of lconn is %s, expected 1' % cnt_lconn ) # check states of all pgs in pg for s in self.cluster['servers']: real_role = util.get_role_of_server( s ) real_role = util.roleNumberToChar( real_role ) smr_info = util.get_smr_info( s, self.leader_cm ) cc_role = smr_info['smr_Role'] cc_hb = smr_info['hb'] if cc_hb == 'N': continue self.assertEqual( real_role, cc_role, 'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) ) util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) ) # check quorum policy quorum_of_new_master = util.get_quorum( new_master ) self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' ) self.assertEqual( self.quorum_policy[1], quorum_of_new_master , 'invalid quorum of new master, expected:%d, but:%d' % (self.quorum_policy[1], quorum_of_new_master) ) util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master ) # shutdown load generators for i in range( len(load_gen_list) ): load_gen_list[i].quit() load_gen_list[i].join() return 0
def __test_partial_load_with_s3command(self): util.print_frame() pass
def test_random_migrate(self): util.print_frame() # start load generator load_gen_thrd_list = {} util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) load_gen_thrd_list[i].start() ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') leader_cm = self.cluster['servers'][0] cluster_name = self.cluster['cluster_name'] mapping = [-1] * 8192 count = 50 while count > 0: # get PN -> PG map cmd = 'cluster_info %s' % cluster_name result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd) ret = json.loads(result) rle = ret['data']['cluster_info']['PN_PG_Map'] print "PN_PG_MAP = %s" % rle sp = rle.split() index = 0 for i in range(len(sp) / 2): for j in range(int(sp[i * 2 + 1])): mapping[index] = int(sp[i * 2]) index += 1 slot = random.randint(0, 8191) src_pgid = mapping[slot] dst_pgid = (src_pgid + 1) % 2 slot_end = slot while random.randint(0, 5) <= 4: if slot_end < 8191 and mapping[slot_end + 1] == src_pgid: slot_end += 1 else: break print "SLOT=%d, SRC_PGID=%d, DST_PGID=%d" % (slot, src_pgid, dst_pgid) ret = util.migration(self.cluster, src_pgid, dst_pgid, slot, slot_end, 40000) self.assertEqual(True, ret, 'Migration Fail') ok = True for j in range(len(load_gen_thrd_list)): if load_gen_thrd_list[j].isConsistent() == False: ok = False break if not ok: break count -= 1 # check consistency of load_generator for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].quit() for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].join() self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration') # Go back to initial configuration cinfo = util.cluster_info(leader_cm['ip'], leader_cm['cm_port'], cluster_name) for slot in util.get_slots(cinfo['cluster_info']['PN_PG_Map'], 1): self.assertTrue( util.migration(self.cluster, 1, 0, slot['begin'], slot['end'], 40000), 'failed to rollback migration')
def test_local_proxy32(self): util.print_frame() self.__test_local_proxy(32)
def __test_local_proxy(self, arch=64): util.print_frame() # Clean server log file p = util.exec_proc_async(util.capi_dir(0), 'rm capi_server-*', True, None, subprocess.PIPE, None) p.wait() # run test server _capi_server_conf = """ zookeeper 127.0.0.1:2181 cluster_name %s port 6200 daemonize no num_conn_per_gw 2 init_timeout_millis 10000 log_level INFO log_file_prefix "capi_server" max_fd 4096 conn_reconnect_millis 1000 zk_reconnect_millis 1000 zk_session_timeout_millis 10000 """ % self.cluster['cluster_name'] old_cwd = os.path.abspath(os.getcwd()) os.chdir(util.capi_dir(0)) f = open('capi_server.conf', 'w') f.write(_capi_server_conf) f.close() os.chdir(old_cwd) if arch is 32: cmd = "./%s capi_server.conf" % constant.CAPI32_TEST_SERVER else: cmd = "./%s capi_server.conf" % constant.CAPI_TEST_SERVER capi_server = util.exec_proc_async(util.capi_dir(0), cmd, True, None, subprocess.PIPE, None) # ping check while True: try: t = telnetlib.Telnet('127.0.0.1', 6200) break except: time.sleep(1) continue t.write("ping\r\n") t.read_until('+PONG\r\n') t.close() # Start load generator load_gen_thrd_list = {} for i in range(self.max_load_generator): load_gen_thrd_list[i] = load_generator.LoadGenerator( i, 'localhost', 6200) load_gen_thrd_list[i].start() time.sleep(5) # Check reconfiguration by SIGHUP p = util.exec_proc_async( util.capi_dir(0), 'grep "Connected to the zookeeper" capi_server-* | wc -l', True, None, subprocess.PIPE, None) p.wait() wc = p.stdout.readline() print 'grep "Connected to the zookeeper" result : ' + wc self.assertEquals(wc.strip(), '1') capi_server.send_signal(signal.SIGHUP) time.sleep(5) p = util.exec_proc_async( util.capi_dir(0), 'grep "Connected to the zookeeper" capi_server-* | wc -l', True, None, subprocess.PIPE, None) p.wait() wc = p.stdout.readline() print 'grep "Connected to the zookeeper" result : ' + wc self.assertEquals(wc.strip(), '2') p = util.exec_proc_async( util.capi_dir(0), 'grep "Graceful shutdown caused by API" capi_server-* | wc -l', True, None, subprocess.PIPE, None) p.wait() wc = p.stdout.readline() print 'grep "Graceful shutdown caused by API" result : ' + wc self.assertEquals(wc.strip(), '1') # Check consistency after sending many SIGHUP signal for i in range(50): capi_server.send_signal(signal.SIGHUP) time.sleep(0.1) # check consistency of load_generator for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].quit() for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].join() self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after sending signal') # Terminate test server capi_server.send_signal(signal.SIGTERM) capi_server.wait()
def test_basic_op_capi64(self): util.print_frame() self.__test_basic_op_capi(64)
def test_large_scale_master_election(self): util.print_frame() # initialize cluster information pgs_id = 10 cluster = { 'cluster_name': 'large_scale', 'keyspace_size': 8192, 'quorum_policy': '0:1', 'slots': [], 'pg_id_list': [], 'servers': [] } pg_max = 32 pgs_per_pg = 3 for pg_id in range(pg_max): cluster['pg_id_list'].append(pg_id) cluster['slots'].append(8192 / pg_max * pg_id) if pg_id == pg_max - 1: cluster['slots'].append(8191) else: cluster['slots'].append(8192 / pg_max * (pg_id + 1) - 1) for pgs in range(pgs_per_pg): smr_base_port = 15000 + pgs_id * 20 smr_mgmt_port = smr_base_port + 3 gateway_port = smr_base_port + 10 redis_port = smr_base_port + 9 server = {} server['id'] = pgs_id pgs_id = pgs_id + 1 server['cluster_name'] = cluster['cluster_name'] server['ip'] = self.cluster['servers'][0]['ip'] server['pm_name'] = self.cluster['servers'][0]['pm_name'] server['cm_port'] = None server['pg_id'] = pg_id server['smr_base_port'] = smr_base_port server['smr_mgmt_port'] = smr_mgmt_port server['gateway_port'] = gateway_port server['redis_port'] = redis_port server['zk_port'] = 2181 cluster['servers'].append(server) # send initialize commands to confmaster testbase.initialize_cluster(cluster, self.leader_cm) # set up pgs binaries try: for server in cluster['servers']: id = server['id'] util.log('copy binaries, server_id=%d' % id) util.copy_smrreplicator(id) util.copy_gw(id) util.copy_redis_server(id) util.copy_cluster_util(id) except IOError as e: util.log(e) util.log('Error: can not find file or read data') self.assertEqual(0, 1, 'Error: can not find file or read data') except: util.log('Error: file open error.') # cleanup servers`s directories for server in cluster['servers']: ret = testbase.cleanup_pgs_log_and_ckpt(cluster['cluster_name'], server) self.assertEqual( ret, 0, 'failed to cleanup_test_environment, id=%d' % server['id']) # start pgs for server in cluster['servers']: ret = testbase.request_to_start_smr(server) self.assertEqual( ret, 0, 'failed to request_to_start_smr, id=%d' % server['id']) for server in cluster['servers']: ret = testbase.request_to_start_redis(server, check=False) self.assertEqual( ret, 0, 'failed to request_to_start_smr, id=%d' % server['id']) for server in cluster['servers']: ret = testbase.wait_until_finished_to_set_up_role(server) self.assertEqual(ret, 0, 'failed to role set up, id=%d' % server['id']) for i in range(4): server = cluster['servers'][i] ret = testbase.request_to_start_gateway(cluster['cluster_name'], server, self.leader_cm) self.assertEqual( ret, 0, 'failed to request_to_start_gateway, id=%d' % server['id']) clusters = cluster_ls() self.assertNotEqual(len(clusters), 0, 'There is no clsuter.') ok = True for c in clusters: if not util.check_cluster(str(c), self.leader_cm['ip'], self.leader_cm['cm_port'], check_quorum=True): ok = False self.assertEqual(ok, True, 'failed to initlize roles of pgs')
def test_2_consistent_after_failover(self): util.print_frame() for i in range(3): util.log('loop %d' % i) self.consistent_after_failover()
def test_local_proxy64(self): util.print_frame() self.__test_local_proxy(64)
def test_3_heartbeat_target_connection_count(self): util.print_frame() util.log('wait until all connections are established') for i in range(1, 8): time.sleep(1) util.log('%d sec' % i) # check pgs for server in self.cluster['servers']: before_cnt_redis = util.get_clients_count_of_redis( server['ip'], server['redis_port']) before_cnt_smr = util.get_clients_count_of_smr( server['smr_mgmt_port']) cmd = 'pgs_leave %s %d forced' % (self.cluster['cluster_name'], server['id']) ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd) jobj = json.loads(ret) self.assertEqual(jobj['state'], 'success', 'failed : cmd="%s", reply="%s"' % (cmd, ret[:-2])) util.log('succeeded : cmd="%s", reply="%s"' % (cmd, ret[:-2])) # check redis success = False for i in range(5): after_cnt = util.get_clients_count_of_redis( server['ip'], server['redis_port']) if after_cnt <= 2: success = True break time.sleep(1) self.assertEquals( success, True, 'failed : the number of connections to redis%d(%s:%d) is %d, exptected:n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis)) util.log( 'succeeded : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis)) # check smr success = False expected = 1 for i in range(5): after_cnt = util.get_clients_count_of_smr( server['smr_mgmt_port']) if after_cnt == expected: success = True break time.sleep(1) self.assertEquals( success, True, 'failed : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr)) util.log( 'succeeded : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr)) # check gateway for server in self.cluster['servers']: before_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port']) cmd = 'gw_del %s %d' % (self.cluster['cluster_name'], server['id']) ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd) jobj = json.loads(ret) self.assertEqual(jobj['state'], 'success', 'failed : cmd="%s", reply="%s"' % (cmd, ret[:-2])) util.log('succeeded : cmd="%s", reply="%s"' % (cmd, ret[:-2])) success = False expected = 1 for i in range(5): after_cnt = util.get_clients_count_of_gw( server['ip'], server['gateway_port']) if after_cnt == expected: success = True break time.sleep(1) self.assertEquals( success, True, 'failed : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected)) util.log( 'succeeded : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected))
def test_restart_recovery_with_remote_checkpoint_and_remote_log(self): util.print_frame() key_base = 'key' target = util.get_server_by_role(self.cluster['servers'], 'slave') master = util.get_server_by_role(self.cluster['servers'], 'master') ip, port = util.get_rand_gateway(self.cluster) gw = gateway_mgmt.Gateway(master['id']) ret = gw.connect(ip, port) self.assertEqual(ret, 0, 'failed to connect to gateway') # set initial data in order to make an elapsed time for bgsave longer self.put_some_data() # generate some data for i in range(0, 100): key = '%s%d' % (key_base, i) cmd = 'set %s %d\r\n' % (key, i) gw.write(cmd) res = gw.read_until('\r\n') self.assertEquals(res, '+OK\r\n') gw.disconnect() # delete a local checkpoint util.log('delete pgs%d`s check point.' % target['id']) util.del_dumprdb(target['id']) # generate a remote check point bgsave_ret = util.bgsave(master) self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % master['id']) # shutdown util.log('shutdown target') ret = testbase.request_to_shutdown_smr(target) self.assertEqual(ret, 0, 'failed to shutdown smr') time.sleep(10) # generate some data ret = gw.connect(ip, port) self.assertEqual(ret, 0, 'failed to connect to gateway') for i in range(100, 200): key = '%s%d' % (key_base, i) cmd = 'set %s %d\r\n' % (key, i) gw.write(cmd) res = gw.read_until('\r\n') self.assertEquals(res, '+OK\r\n') gw.disconnect() # recovery util.log('recovery target') ret = testbase.request_to_start_smr(target) self.assertEqual(ret, 0, 'failed to start smr') ret = testbase.request_to_start_redis(target) self.assertEqual(ret, 0, 'failed to start redis') time.sleep(5) ret = testbase.wait_until_finished_to_set_up_role(target) self.assertEquals(ret, 0, 'failed to role change. smr_id:%d' % (target['id'])) # check value recovered_redis = redis_mgmt.Redis(target['id']) ret = recovered_redis.connect(target['ip'], target['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') for i in range(0, 200): key = '%s%d' % (key_base, i) cmd = 'get %s\r\n' % (key) recovered_redis.write(cmd) recovered_redis.read_until('\r\n') response = recovered_redis.read_until('\r\n') self.assertEqual(response, '%d\r\n' % i, 'inconsistent %s, %d' % (response, i))
def test_7_remaining_hbc_connection(self): util.print_frame() # check pgs for server in self.cluster['servers']: before_cnt_redis = util.get_clients_count_of_redis( server['ip'], server['redis_port']) before_cnt_smr = util.get_clients_count_of_smr( server['smr_mgmt_port']) cmd = 'pgs_leave %s %d forced\r\npgs_del %s %d' % ( self.cluster['cluster_name'], server['id'], self.cluster['cluster_name'], server['id']) util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd) for server in self.cluster['servers']: # check redis success = False for i in range(5): after_cnt = util.get_clients_count_of_redis( server['ip'], server['redis_port']) if after_cnt <= 2: success = True break time.sleep(1) self.assertEquals( success, True, 'failed : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis)) util.log( 'succeeded : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis)) # check smr success = False expected = 0 for i in range(5): after_cnt = util.get_clients_count_of_smr( server['smr_mgmt_port']) if after_cnt == expected: success = True break time.sleep(1) self.assertEquals( success, True, 'failed : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr)) util.log( 'succeeded : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr)) # check gateway for server in self.cluster['servers']: before_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port']) cmd = 'gw_del %s %d' % (self.cluster['cluster_name'], server['id']) util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd) for server in self.cluster['servers']: success = False expected = 1 for i in range(5): after_cnt = util.get_clients_count_of_gw( server['ip'], server['gateway_port']) if after_cnt == expected: success = True break time.sleep(1) self.assertEquals( success, True, 'failed : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected)) util.log( 'succeeded : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected))
def test_restart_recovery_with_local_checkpoint_and_remote_log_about_master( self): util.print_frame() for i in range(0, 5): self.recovery_with_local_checkpoint_and_remote_log('master')
def test_scaleout(self): util.print_frame() # start load generator util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) self.load_gen_thrd_list[i] = load_generator.LoadGenerator( i, ip, port) self.load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec util.log("started load_generator") # servers for scale out servers = [config.server4, config.server5, config.server6] leader_cm = self.cluster['servers'][0] # start migration migration_count = 5 for i in range(migration_count): # Scale out cluster = config.clusters[0] ret = util.pg_add(cluster, servers, leader_cm) self.assertEqual(True, ret, 'Scale out fail. util.pg_add returns false') time.sleep(5) # pg0 -> pg1 cluster = config.clusters[1] ret = util.migration(cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail 0 -> 1') # pg0 <- pg1 cluster = config.clusters[1] ret = util.migration(cluster, 1, 0, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail 1 <- 0') # Scale in #TODO Temporary #cluster = config.clusters[0] #for server in cluster['servers']: # if testbase.request_to_shutdown_hbc(server) is not 0: # util.log('scale in : failed to request to shutdown hbc') # self.assertFalse('scale in : failed to request to shutdown hbc') #time.sleep(5) ############### cluster = config.clusters[1] ret = util.pg_del(cluster, servers, leader_cm) self.assertEqual(True, ret, 'Scale in fail. util.pg_del returns false') #TODO Temporary #cluster = config.clusters[0] #for server in cluster['servers']: # if testbase.request_to_start_heartbeat_checker( server ) is not 0: # util.log('scale in : failed to start hbc') # self.assertFalse('scale in : failed to start hbc') #time.sleep(5) ############### # check consistency ok = True for j in range(len(self.load_gen_thrd_list)): if self.load_gen_thrd_list[j].isConsistent() == False: ok = False break if not ok: break time.sleep(5) # generate load for 5 sec # check consistency of load_generator for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].quit() for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].join() self.assertTrue(self.load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
def test_data_type(self): util.print_frame() dict = {} server0 = self.cluster['servers'][0] redis0 = telnetlib.Telnet(server0['ip'], server0['redis_port']) # String dict['string'] = {} key = self.string_gen(random.randint(1, 5)) + '_type_string' val = self.string_gen(random.randint(1, 5)) dict['string']['key'] = key dict['string']['val'] = val redis0.write('*3\r\n$3\r\nset\r\n') redis0.write('$%d\r\n%s\r\n' % (len(key), key)) redis0.write('$%d\r\n%s\r\n' % (len(val), val)) ret = redis0.read_until('\r\n', 1) self.assertEqual(ret, '+OK\r\n') # List dict['list'] = {} key = self.string_gen(random.randint(1, 5)) + '_type_list' val1 = self.string_gen(random.randint(1, 5)) val2 = self.string_gen(random.randint(1, 5)) dict['list']['key'] = key dict['list']['val1'] = val1 dict['list']['val2'] = val1 # Duplicate value dict['list']['val3'] = val2 redis0.write('*5\r\n$5\r\nrpush\r\n') redis0.write('$%d\r\n%s\r\n' % (len(key), key)) redis0.write('$%d\r\n%s\r\n' % (len(val1), val1)) redis0.write('$%d\r\n%s\r\n' % (len(val1), val1)) redis0.write('$%d\r\n%s\r\n' % (len(val2), val2)) ret = redis0.read_until('\r\n', 1) self.assertEqual(ret, ':3\r\n') # Set dict['set'] = {} key = self.string_gen(random.randint(1, 5)) + '_type_set' val1 = self.string_gen(random.randint(1, 5)) + '_v1' val2 = self.string_gen(random.randint(1, 5)) + '_v2' dict['set']['key'] = key dict['set']['val1'] = val1 dict['set']['val2'] = val2 redis0.write('*4\r\n$4\r\nsadd\r\n') redis0.write('$%d\r\n%s\r\n' % (len(key), key)) redis0.write('$%d\r\n%s\r\n' % (len(val1), val1)) redis0.write('$%d\r\n%s\r\n' % (len(val2), val2)) ret = redis0.read_until('\r\n', 1) self.assertEqual(ret, ':2\r\n') # Sorted Set dict['zset'] = {} key = self.string_gen(random.randint(1, 5)) + '_type_zset' val1 = self.string_gen(random.randint(1, 5)) + '_v1' val2 = self.string_gen(random.randint(1, 5)) + '_v2' dict['zset']['key'] = key dict['zset']['val1'] = val1 dict['zset']['score1'] = 20 dict['zset']['val2'] = val2 dict['zset']['score2'] = 10 redis0.write('*6\r\n$4\r\nzadd\r\n') redis0.write('$%d\r\n%s\r\n' % (len(key), key)) redis0.write('$2\r\n20\r\n$%d\r\n%s\r\n' % (len(val1), val1)) redis0.write('$2\r\n10\r\n$%d\r\n%s\r\n' % (len(val2), val2)) ret = redis0.read_until('\r\n', 1) self.assertEqual(ret, ':2\r\n') # Hash dict['hash'] = {} key = self.string_gen(random.randint(1, 5)) + '_type_hash' key1 = self.string_gen(random.randint(1, 5)) + '_k1' val1 = self.string_gen(random.randint(1, 5)) key2 = self.string_gen(random.randint(1, 5)) + '_k2' val2 = self.string_gen(random.randint(1, 5)) dict['hash']['key'] = key dict['hash'][key1] = val1 dict['hash'][key2] = val2 redis0.write('*6\r\n$5\r\nhmset\r\n') redis0.write('$%d\r\n%s\r\n' % (len(key), key)) redis0.write('$%d\r\n%s\r\n' % (len(key1), key1)) redis0.write('$%d\r\n%s\r\n' % (len(val1), val1)) redis0.write('$%d\r\n%s\r\n' % (len(key2), key2)) redis0.write('$%d\r\n%s\r\n' % (len(val2), val2)) ret = redis0.read_until('\r\n', 1) self.assertEqual(ret, '+OK\r\n') self.bgsave(redis0) cmd = "./dump-util --dump-iterator dump.rdb ./dump2json_base32hex.so out.json" p = util.exec_proc_async(util.dump_util_dir(0), cmd, True, None, subprocess.PIPE, None) ret = p.wait() self.assertTrue(ret == 0) f = file("%s/out.json" % util.dump_util_dir(0), "r") skip_line = 2 for line in f.readlines(): # skip first 2 lines (smr_seqnum, smr_mstime) if skip_line > 0: skip_line -= 1 continue data = json.loads(line.strip()) key = self.b32hexdecode(data['key']) if data['type'] == 'string': self.assertEqual( dict['string']['key'], key, "key(%s) is not match with %s" % (dict['string']['key'], key)) val = self.b32hexdecode(data['value']) self.assertEqual( dict['string']['val'], val, "val(%s) is not match with %s" % (dict['string']['val'], val)) elif data['type'] == 'list': self.assertEqual( dict['list']['key'], key, "key(%s) is not match with %s" % (dict['list']['key'], key)) val1 = self.b32hexdecode(data['value'][0]) val2 = self.b32hexdecode(data['value'][1]) val3 = self.b32hexdecode(data['value'][2]) self.assertEqual( dict['list']['val1'], val1, "val(%s) is not match with %s" % (dict['list']['val1'], val1)) self.assertEqual( dict['list']['val2'], val2, "val(%s) is not match with %s" % (dict['list']['val2'], val2)) self.assertEqual( dict['list']['val3'], val3, "val(%s) is not match with %s" % (dict['list']['val3'], val3)) elif data['type'] == 'set': self.assertEqual( dict['set']['key'], key, "key(%s) is not match with %s" % (dict['set']['key'], key)) val1 = self.b32hexdecode(data['value'][0]) val2 = self.b32hexdecode(data['value'][1]) if not (val1 == dict['set']['val1'] and val2 == dict['set']['val2'] or val1 == dict['set']['val2'] and val2 == dict['set']['val1']): util.log( "values(%s, %s) is not match with (%s, %s)" % (dict['set']['val1'], dict['set']['val2'], val1, val2)) self.assertTrue(False) elif data['type'] == 'zset': self.assertEqual( dict['zset']['key'], key, "key(%s) is not match with %s" % (dict['zset']['key'], key)) # Set variable as sort order val2 = self.b32hexdecode(data['value'][0]['data']) score2 = int(data['value'][0]['score']) val1 = self.b32hexdecode(data['value'][1]['data']) score1 = int(data['value'][1]['score']) self.assertEqual( dict['zset']['val1'], val1, "val(%s) is not match with %s" % (dict['zset']['val1'], val1)) self.assertEqual( dict['zset']['score1'], score1, "score(%d) is not match with %d" % (dict['zset']['score1'], score1)) self.assertEqual( dict['zset']['val2'], val2, "val(%s) is not match with %s" % (dict['zset']['val2'], val2)) self.assertEqual( dict['zset']['score2'], score2, "score(%d) is not match with %d" % (dict['zset']['score2'], score2)) elif data['type'] == 'hash': self.assertEqual( dict['hash']['key'], key, "key(%s) is not match with %s" % (dict['zset']['key'], key)) key1 = self.b32hexdecode(data['value'][0]['hkey']) val1 = self.b32hexdecode(data['value'][0]['hval']) key2 = self.b32hexdecode(data['value'][1]['hkey']) val2 = self.b32hexdecode(data['value'][1]['hval']) self.assertTrue(key1 in dict['hash'].keys(), 'hkey(%s) is not in json output' % key1) self.assertTrue(key2 in dict['hash'].keys(), 'hkey(%s) is not in json output' % key2) self.assertEqual( dict['hash'][key1], val1, "val(%s) is not match with %s" % (dict['hash'][key1], val1)) self.assertEqual( dict['hash'][key2], val2, "val(%s) is not match with %s" % (dict['hash'][key2], val2)) else: self.assertTrue(False, "Unknown type") f.close()
def test_local_proxy(self): util.print_frame() local_proxy.TestLocalProxy.test_local_proxy(self, 64)
def test_master_failover_while_hang_3copy(self): util.print_frame() self.setup_test_cluster(self.cluster_3copy) self.master_failover_while_hang()
def test_quorum( self ): util.print_frame() master, slave1, slave2 = self.get_mss() expected = self.quorum_policy[1] max_try = 20 for i in range( 0, max_try ): quorum = util.get_quorum( master ) if quorum == expected: break; time.sleep( 1 ) self.assertEquals( quorum, expected, 'quorum:%d, expected:%d' % (quorum, expected) ) ret = testbase.request_to_shutdown_smr( slave1 ) self.assertEqual( ret, 0, 'failed to shutdown smr, server:%d' % slave1['id'] ) time.sleep( 1 ) expected = self.quorum_policy[1] max_try = 20 for i in range( 0, max_try ): quorum = util.get_quorum( master ) if quorum == expected: break; time.sleep( 1 ) self.assertEquals( quorum, expected, 'quorum:%d, expected:%d' % (quorum, expected) ) ret = testbase.request_to_shutdown_smr( slave2 ) self.assertEqual( ret, 0, 'failed to shutdown smr, server:%d' % slave2['id'] ) time.sleep( 1 ) expected = self.quorum_policy[0] max_try = 20 for i in range( 0, max_try ): quorum = util.get_quorum( master ) if quorum == expected: break; time.sleep( 1 ) self.assertEquals( quorum, expected, 'quorum:%d, expected:%d' % (quorum, expected) ) # recovery ret = testbase.request_to_start_smr( slave1 ) self.assertEqual( ret, 0, 'failed to start smr' ) ret = testbase.request_to_start_redis( slave1 ) self.assertEqual( ret, 0, 'failed to start redis' ) ret = testbase.wait_until_finished_to_set_up_role( slave1 ) self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (slave1['id']) ) time.sleep( 1 ) expected = self.quorum_policy[1] max_try = 20 for i in range( 0, max_try ): quorum = util.get_quorum( master ) if quorum == expected: break; time.sleep( 1 ) self.assertEquals( quorum, expected, 'quorum:%d, expected:%d' % (quorum, expected) ) # recovery ret = testbase.request_to_start_smr( slave2 ) self.assertEqual( ret, 0, 'failed to start smr' ) ret = testbase.request_to_start_redis( slave2 ) self.assertEqual( ret, 0, 'failed to start redis' ) ret = testbase.wait_until_finished_to_set_up_role( slave2 ) self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (slave2['id']) ) time.sleep( 1 ) expected = self.quorum_policy[1] max_try = 20 for i in range( 0, max_try ): quorum = util.get_quorum( master ) if quorum == expected: break; time.sleep( 1 ) self.assertEquals( quorum, expected, 'quorum:%d, expected:%d' % (quorum, expected) )
def test_all_pgs_hang(self): util.print_frame() self.setup_test_cluster(self.cluster_3copy) # get gateway info ip, port = util.get_rand_gateway(self.cluster) gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id']) ret = gw.connect(ip, port) self.assertEqual(ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port)) # set values for i in range(0, 10000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) gw.write(cmd) res = gw.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res)) # get master, slave1, slave2 m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') self.assertNotEqual(s2, None, 'slave2 is None.') util.log('server state before hang') util.log_server_state(self.cluster) # hang smr_master = smr_mgmt.SMR(m['id']) ret = smr_master.connect(m['ip'], m['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (m['ip'], m['smr_mgmt_port'])) smr_slave1 = smr_mgmt.SMR(s1['id']) ret = smr_slave1.connect(s1['ip'], s1['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s1['ip'], s1['smr_mgmt_port'])) smr_slave2 = smr_mgmt.SMR(s2['id']) ret = smr_slave2.connect(s2['ip'], s2['smr_mgmt_port']) self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (s2['ip'], s2['smr_mgmt_port'])) m_ts = util.get_timestamp_of_pgs(m) s1_ts = util.get_timestamp_of_pgs(s1) s2_ts = util.get_timestamp_of_pgs(s2) smr_master.write('fi delay sleep 1 8000\r\n') reply = smr_master.read_until('\r\n', 1) if reply != None and reply.find('-ERR not supported') != -1: self.assertEqual( 0, 1, 'make sure that smr has compiled with gcov option.') smr_slave1.write('fi delay sleep 1 8000\r\n') smr_slave2.write('fi delay sleep 1 8000\r\n') time.sleep(10) # check consistency ok = False for try_cnt in xrange(20): ok = util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port) if ok: break time.sleep(0.5) self.assertTrue(ok, 'Unstable cluster state') util.log('server state transition after hang') util.log_server_state(self.cluster) redis0 = redis_mgmt.Redis(m['id']) ret = redis0.connect(m['ip'], m['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (m['ip'], m['redis_port'])) # set values for i in range(10000, 20000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) redis0.write(cmd) res = redis0.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res)) redis1 = redis_mgmt.Redis(s1['id']) ret = redis1.connect(s1['ip'], s1['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s1['ip'], s1['redis_port'])) redis2 = redis_mgmt.Redis(s2['id']) ret = redis2.connect(s2['ip'], s2['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s2['ip'], s2['redis_port'])) # check new values (m) for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis0.write(cmd) redis0.read_until('\r\n') res = redis0.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis(id:%d). %s != %d' % (m['id'], res, i)) # check new values (s1) for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis1.write(cmd) redis1.read_until('\r\n') res = redis1.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis(id:%d). %s != %d' % (s1['id'], res[:-2], i)) # check new values (s2) for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis2.write(cmd) redis2.read_until('\r\n') res = redis2.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis(id:%d). %s != %d' % (s2['id'], res[:-2], i)) # check consistency ok = False for try_cnt in range(0, 10): ok = util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port) print ok if ok: break time.sleep(1) self.assertEqual(ok, True, 'role consistency fail') return 0
def test_migration_with_expire_command(self): util.print_frame() util.log("start load_generator") load_gen_thrd_list = {} for i in range(1): ip, port = util.get_rand_gateway(self.cluster) load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec tps = 20000 src_pg_id = 0 dst_pg_id = 1 leader_cm = self.cluster['servers'][0] src_master = util.get_server_by_role_and_pg(self.cluster['servers'], 'master', src_pg_id) dst_master = util.get_server_by_role_and_pg(self.cluster['servers'], 'master', dst_pg_id) smr = smr_mgmt.SMR(src_master['id']) ret = smr.connect(src_master['ip'], src_master['smr_mgmt_port']) if ret != 0: util.log('failed to connect to smr(source master)') return False src_redis = redis_mgmt.Redis(src_master['id']) ret = src_redis.connect(src_master['ip'], src_master['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') dst_redis = redis_mgmt.Redis(dst_master['id']) ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') ts = time.time() self.setExpireKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:expired', 10) self.setExpireKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired', 10) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:PermanentKey', 0) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key( src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key( src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired') self.assertEquals(res, ":0\r\n") util.log(">>> migrate test with expire command start(%s), ts:%d" % (time.asctime(), ts)) ts = time.time() self.setExpireKey(src_redis, 'beforeCheckpoint~afterCheckpoint:expired', 10) self.setExpireKey(src_redis, 'beforeCheckpoint~afterCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired', 10) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist', 20) # notify dst_redis of migration start util.log(">>> notify dst_redis of migration start (%s)" % time.asctime()) cmd = 'migconf migstart %d-%d\r\n' % (0, 8191) dst_redis.write(cmd) res = dst_redis.read_until('\r\n') self.assertEquals(res, '+OK\r\n') # remote partial checkpoint util.log(">>> start remote checkpoint and load (%s)" % time.asctime()) cmd = "./cluster-util --getandplay %s %d %s %d %d-%d %d" % ( src_master['ip'], src_master['redis_port'], dst_master['ip'], dst_master['redis_port'], 0, 8191, tps) p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd, True, None, subprocess.PIPE, None) ret = p.wait() for line in p.stdout: if line.find("Checkpoint Sequence Number:") != -1: util.log("seqnumber : " + line[line.rfind(":") + 1:]) seq = int(line[line.rfind(":") + 1:]) util.log(">>>" + str(line.rstrip())) self.assertEqual(0, ret) util.log(">>> end remote checkpoint and load (%s)" % time.asctime()) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'beforeCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'beforeCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") # bgsave for testing later about recovery during migration util.log( ">>> bgsave for testing later about recovery during migration (%s)" % time.asctime()) cmd = 'bgsave\r\n' dst_redis.write(cmd) res = dst_redis.read_until('\r\n') self.assertEquals(res, '+Background saving started\r\n') ts = time.time() self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:expired', 10) self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:expired', 10) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:persist', 20) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'afterCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'afterCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:expired', 10) self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:persist', 100) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:expired', 10) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:persist', 100) # remote catchup (smr log migration) util.log(">>> start remote catchup (%s)" % time.asctime()) dst_host = dst_master['ip'] dst_smr_port = dst_master['smr_base_port'] rle = '1 8192' num_part = 8192 smr.write('migrate start %s %d %d %d %d %s\r\n' % (dst_host, dst_smr_port, seq, tps, num_part, rle)) response = smr.read_until('\r\n') if response[:3] != '+OK': util.log('failed to execute migrate start command, response:%s' % response) return False while True: smr.write('migrate info\r\n') response = smr.read_until('\r\n') seqs = response.split() logseq = int(seqs[1].split(':')[1]) mig = int(seqs[2].split(':')[1]) util.log('migrate info: %s' % response) if (logseq - mig < 500000): util.log('Remote catchup almost done. try mig2pc') break time.sleep(1) util.log(">>> sleep until 90 sec pass") self.assertFalse(time.time() - ts >= 90) time.sleep(90 - (time.time() - ts)) res = self.persistKey(src_redis, 'afterCheckpoint~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'afterCheckpoint~duringCatchup:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:expired', 10) self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:persist', 20) self.setExpireS3Key(src_redis, 'S3:duringCatchup~duringCatchup:expired', 10) self.setExpireS3Key(src_redis, 'S3:duringCatchup~duringCatchup:persist', 20) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:duringCatchup~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:duringCatchup~duringCatchup:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:expired', 10) self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:persist', 20) self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:expired', 10) self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:persist', 20) util.log(">>> remote catchup phase almost done (%s)" % time.asctime()) # mig2pc util.log(">>> start mig2pc (%s)" % time.asctime()) cmd = 'mig2pc %s %d %d %d %d' % (self.cluster['cluster_name'], src_pg_id, dst_pg_id, 0, 8191) result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd) util.log('mig2pc result : ' + result) if not result.startswith('{"state":"success","msg":"+OK"}\r\n'): util.log('failed to execute mig2pc command, result:%s' % result) return False util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(dst_redis, 'S3:duringCatchup~afterMig2pc:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(dst_redis, 'S3:duringCatchup~afterMig2pc:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:expired', 10) self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:persist', 20) self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired', 10) self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist', 20) # finish migration smr.write('migrate interrupt\r\n') response = smr.read_until('\r\n') util.log('migrate interrupt: %s' % response) smr.disconnect() # notify dst_redis of migration end util.log(">>> notify dst_redis of migration end (%s)" % time.asctime()) cmd = 'migconf migend\r\n' dst_redis.write(cmd) res = dst_redis.read_until('\r\n') self.assertEquals(res, '+OK\r\n') cmd = 'migconf clearstart %d-%d\r\n' % (0, 8191) src_redis.write(cmd) res = src_redis.read_until('\r\n') self.assertEquals(res, '+OK\r\n') util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired') self.assertEquals(res, ":0\r\n") ts = time.time() util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) self.assertTrue( self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired')) # remote partial checkpoint util.log(">>> start rangedel (%s)" % time.asctime()) cmd = "./cluster-util --rangedel %s %d %d-%d %d" % ( src_master['ip'], src_master['redis_port'], 0, 8191, tps) p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd, True, None, subprocess.PIPE, None) ret = p.wait() for line in p.stdout: util.log(">>>" + str(line.rstrip())) cmd = 'migconf clearend\r\n' src_redis.write(cmd) res = src_redis.read_until('\r\n') self.assertEqual(res, '+OK\r\n') time.sleep(5) # generate load for 5 sec # check consistency of load_generator for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].quit() for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].join() self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration') # kill dst_redis and recover from bgsave util.log(">>> kill dst_redis and recover from bgsave (%s)" % time.asctime()) dst_redis.disconnect() ret = testbase.request_to_shutdown_redis(dst_master) self.assertEquals(ret, 0, 'failed to shutdown redis') ret = testbase.request_to_shutdown_smr(dst_master) self.assertEquals(ret, 0, 'failed to shutdown smr') time.sleep(5) testbase.request_to_start_smr(dst_master) self.assertEqual(ret, 0, 'failed to start smr, server:%d' % dst_master['id']) ret = testbase.request_to_start_redis(dst_master) self.assertEqual(ret, 0, 'failed to start redis, server:%d' % dst_master['id']) ret = testbase.wait_until_finished_to_set_up_role(dst_master) self.assertEquals( ret, 0, 'failed to role change. server:%d' % (dst_master['id'])) dst_redis = redis_mgmt.Redis(dst_master['id']) ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') self.assertTrue( self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired')) self.getS3TTL(dst_redis, 'S3:PermanentKey') # kill dst_slave redis and recover without dump file util.log(">>> kill dst_redis and recover without dump file (%s)" % time.asctime()) dst_slave = util.get_server_by_role_and_pg(self.cluster['servers'], 'slave', dst_pg_id) ret = testbase.request_to_shutdown_redis(dst_slave) self.assertEquals(ret, 0, 'failed to shutdown redis') ret = testbase.request_to_shutdown_smr(dst_slave) self.assertEquals(ret, 0, 'failed to shutdown smr') time.sleep(5) testbase.request_to_start_smr(dst_slave) self.assertEqual(ret, 0, 'failed to start smr, server:%d' % dst_slave['id']) ret = testbase.request_to_start_redis(dst_slave) self.assertEqual(ret, 0, 'failed to start redis, server:%d' % dst_slave['id']) ret = testbase.wait_until_finished_to_set_up_role(dst_slave) self.assertEquals( ret, 0, 'failed to role change. server:%d' % (dst_slave['id'])) dst_redis_slave = redis_mgmt.Redis(dst_slave['id']) ret = dst_redis_slave.connect(dst_slave['ip'], dst_slave['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') self.assertTrue( self.isExist(dst_redis_slave, 'beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'duringCatchup~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'duringCatchup~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:afterMig2pc~migrateEnd:expired')) self.getS3TTL(dst_redis_slave, 'S3:PermanentKey') # Go back to initial configuration self.assertTrue( util.migration(self.cluster, dst_pg_id, src_pg_id, 0, 8191, 40000), 'failed to rollback migration')
def master_failover_while_hang(self): util.print_frame() # get gateway info ip, port = util.get_rand_gateway(self.cluster) gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id']) ret = gw.connect(ip, port) self.assertEqual(ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port)) # set values for i in range(0, 10000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) gw.write(cmd) res = gw.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values. cmd:%s, res:%s' % (cmd, res)) # get master, slave1, slave2 if len(self.cluster['servers']) == 3: m, s1, s2 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') self.assertNotEqual(s2, None, 'slave2 is None.') else: m, s1 = util.get_mss(self.cluster) self.assertNotEqual(m, None, 'master is None.') self.assertNotEqual(s1, None, 'slave1 is None.') util.log('server state before hang') util.log_server_state(self.cluster) self.failover_while_hang(m) util.log('server state transition after hang') util.log_server_state(self.cluster) redis1 = redis_mgmt.Redis(m['id']) ret = redis1.connect(m['ip'], m['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (m['ip'], m['redis_port'])) # set new values for i in range(10000, 20000): cmd = 'set %s%d %d\r\n' % (self.key_base, i, i) redis1.write(cmd) res = redis1.read_until('\r\n') self.assertEqual( res, '+OK\r\n', 'failed to set values to redis1. cmd:%s, res:%s' % (cmd[:-2], res)) if len(self.cluster['servers']) == 3: redis2 = redis_mgmt.Redis(s2['id']) ret = redis2.connect(s2['ip'], s2['redis_port']) self.assertEqual( ret, 0, 'failed to connect to redis(%s:%d).' % (s2['ip'], s2['redis_port'])) # check new values for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis2.write(cmd) redis2.read_until('\r\n') res = redis2.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis2. %s != %d' % (res, i)) util.log( 'succeeded : check values with set/get operations with pgs%d and pgs%d.' % (m['id'], s2['id'])) redis0 = redis_mgmt.Redis(m['id']) ret = redis0.connect(m['ip'], m['redis_port']) self.assertEquals( ret, 0, 'failed to connect to redis(%s:%d).' % (m['ip'], m['redis_port'])) # check new values for i in range(10000, 20000): cmd = 'get %s%d\r\n' % (self.key_base, i) redis0.write(cmd) redis0.read_until('\r\n') res = redis0.read_until('\r\n') self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis2. %s != %d' % (res[:-2], i)) # check consistency self.assertEqual( util.check_cluster(self.cluster['cluster_name'], self.mgmt_ip, self.mgmt_port), True, 'role consistency fail') return 0
def __test_pdump(self): util.print_frame() pass
def test_slave_failover_while_hang_2copy(self): util.print_frame() self.setup_test_cluster(self.cluster_2copy) self.slave_failover_while_hang()