def test_migrate_empty_s3obj(self): util.print_frame() ip, port = util.get_rand_gateway(self.cluster) client = redis_sock.RedisClient(ip, port) # Fill some string and empty s3 objects keyprefix = 'test_migrate_empty_s3obj' for i in range(1000): ok, data = client.do_request('set %s_string_%d %d\r\n' % (keyprefix, i, i)) assert (ok == True) ok, data = client.do_request( 's3ladd ks %s_s3_%d svc key val 0\r\n' % (keyprefix, i)) assert (ok == True and data == 1) ok, data = client.do_request('s3lrem ks %s_s3_%d svc key val\r\n' % (keyprefix, i)) assert (ok == True and data == 1) ## migration pg0 -> pg1 then pg1 -> pg0 ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') ret = util.migration(self.cluster, 1, 0, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') # Check string object for i in range(1000): ok, data = client.do_request('get %s_string_%d\r\n' % (keyprefix, i)) assert (ok == True and int(data) == i) client.close()
def test_cscandigest(self): util.print_frame() gateway_list = [] for server in self.cluster['servers']: gateway_list.append( telnetlib.Telnet(server['ip'], server['gateway_port'])) util.log("run CSCAN test") svr = random.choice(gateway_list) expect = self.insert_key(svr) svr.write('CSCANDIGEST\r\n') svr.read_until('\r\n', 3) digest1 = svr.read_until('\r\n', 3) ret = util.migration(self.cluster, 0, 1, 0, 4095, 40000) self.assertEqual(True, ret, 'Migration Fail') svr.write('CSCANDIGEST\r\n') svr.read_until('\r\n', 3) digest2 = svr.read_until('\r\n', 3) ret = util.migration(self.cluster, 1, 0, 0, 4095, 40000) self.assertEqual(True, ret, 'Migration Fail') svr.write('CSCANDIGEST\r\n') svr.read_until('\r\n', 3) digest3 = svr.read_until('\r\n', 3) self.assertEqual(digest1, digest3, "Incompatible Cluster Digest") self.assertNotEqual(digest1, digest2, "Incompatible Cluster Digest")
def test_migrate_empty_s3obj(self): util.print_frame() ip, port = util.get_rand_gateway(self.cluster) client = redis_sock.RedisClient(ip, port) # Fill some string and empty s3 objects keyprefix = 'test_migrate_empty_s3obj' for i in range (1000): ok, data = client.do_request('set %s_string_%d %d\r\n' % (keyprefix, i, i)) assert (ok == True) ok, data = client.do_request('s3ladd ks %s_s3_%d svc key val 0\r\n' % (keyprefix, i)) assert (ok == True and data == 1) ok, data = client.do_request('s3lrem ks %s_s3_%d svc key val\r\n' % (keyprefix, i)) assert (ok == True and data == 1) ## migration pg0 -> pg1 then pg1 -> pg0 ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') ret = util.migration(self.cluster, 1, 0, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') # Check string object for i in range (1000): ok, data = client.do_request('get %s_string_%d\r\n' % (keyprefix, i)) assert (ok == True and int(data) == i) client.close()
def test_cscandigest(self): util.print_frame() gateway_list = [] for server in self.cluster['servers']: gateway_list.append(telnetlib.Telnet(server['ip'], server['gateway_port'])) util.log("run CSCAN test") svr = random.choice(gateway_list) expect = self.insert_key(svr) svr.write('CSCANDIGEST\r\n') svr.read_until('\r\n', 3) digest1 = svr.read_until('\r\n', 3) ret = util.migration(self.cluster, 0, 1, 0, 4095, 40000) self.assertEqual(True, ret, 'Migration Fail') svr.write('CSCANDIGEST\r\n') svr.read_until('\r\n', 3) digest2 = svr.read_until('\r\n', 3) ret = util.migration(self.cluster, 1, 0, 0, 4095, 40000) self.assertEqual(True, ret, 'Migration Fail') svr.write('CSCANDIGEST\r\n') svr.read_until('\r\n', 3) digest3 = svr.read_until('\r\n', 3) self.assertEqual(digest1, digest3, "Incompatible Cluster Digest") self.assertNotEqual(digest1, digest2, "Incompatible Cluster Digest")
def test_migrate_all(self): util.print_frame() migration_count = 10 # start load generator load_gen_thrd_list = {} util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port, ops_limit=500) load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec # start migration for i in range(migration_count): # pg0 -> pg1 ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') # pg0 <- pg1 ret = util.migration(self.cluster, 1, 0, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') ok = True for j in range(len(load_gen_thrd_list)): if load_gen_thrd_list[j].isConsistent() == False: ok = False break if not ok: break time.sleep(5) # generate load for 5 sec # check consistency of load_generator for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].quit() for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].join() self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
def test_migrate_all(self): util.print_frame() migration_count = 10 # start load generator load_gen_thrd_list = {} util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec # start migration for i in range(migration_count): # pg0 -> pg1 ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') # pg0 <- pg1 ret = util.migration(self.cluster, 1, 0, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') ok = True for j in range(len(load_gen_thrd_list)): if load_gen_thrd_list[j].isConsistent() == False: ok = False break if not ok: break; time.sleep(5) # generate load for 5 sec # check consistency of load_generator for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].quit() for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].join() self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
def test_migration_with_expire_command(self): util.print_frame() util.log("start load_generator") load_gen_thrd_list = {} for i in range(1): ip, port = util.get_rand_gateway(self.cluster) load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec tps = 20000 src_pg_id = 0 dst_pg_id = 1 leader_cm = self.cluster['servers'][0] src_master = util.get_server_by_role_and_pg(self.cluster['servers'], 'master', src_pg_id) dst_master = util.get_server_by_role_and_pg(self.cluster['servers'], 'master', dst_pg_id) smr = smr_mgmt.SMR(src_master['id']) ret = smr.connect(src_master['ip'], src_master['smr_mgmt_port']) if ret != 0: util.log('failed to connect to smr(source master)') return False src_redis = redis_mgmt.Redis(src_master['id']) ret = src_redis.connect(src_master['ip'], src_master['redis_port'] ) self.assertEquals( ret, 0, 'failed to connect to redis' ) dst_redis = redis_mgmt.Redis(dst_master['id']) ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port'] ) self.assertEquals( ret, 0, 'failed to connect to redis' ) ts = time.time() self.setExpireKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:expired', 10) self.setExpireKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired', 10) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:PermanentKey', 0) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired') self.assertEquals(res, ":0\r\n") util.log(">>> migrate test with expire command start(%s), ts:%d" % (time.asctime(), ts)) ts = time.time() self.setExpireKey(src_redis, 'beforeCheckpoint~afterCheckpoint:expired', 10) self.setExpireKey(src_redis, 'beforeCheckpoint~afterCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired', 10) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist', 20) # notify dst_redis of migration start util.log(">>> notify dst_redis of migration start (%s)" % time.asctime()) cmd = 'migconf migstart %d-%d\r\n' % (0, 8191) dst_redis.write(cmd) res = dst_redis.read_until('\r\n') self.assertEquals( res, '+OK\r\n' ) # remote partial checkpoint util.log(">>> start remote checkpoint and load (%s)" % time.asctime()) cmd = "./cluster-util --getandplay %s %d %s %d %d-%d %d" % ( src_master['ip'], src_master['redis_port'], dst_master['ip'], dst_master['redis_port'], 0, 8191, tps) p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd, True, None, subprocess.PIPE, None) ret = p.wait() for line in p.stdout: if line.find("Checkpoint Sequence Number:") != -1: util.log("seqnumber : " + line[line.rfind(":")+1:]) seq = int(line[line.rfind(":")+1:]) util.log(">>>" + str(line.rstrip())) self.assertEqual(0, ret) util.log(">>> end remote checkpoint and load (%s)" % time.asctime()) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'beforeCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'beforeCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") # bgsave for testing later about recovery during migration util.log(">>> bgsave for testing later about recovery during migration (%s)" % time.asctime()) cmd = 'bgsave\r\n' dst_redis.write(cmd) res = dst_redis.read_until('\r\n') self.assertEquals( res, '+Background saving started\r\n' ) ts = time.time() self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:expired', 10) self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:expired', 10) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:persist', 20) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'afterCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'afterCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:expired', 10) self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:persist', 100) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:expired', 10) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:persist', 100) # remote catchup (smr log migration) util.log(">>> start remote catchup (%s)" % time.asctime()) dst_host = dst_master['ip'] dst_smr_port = dst_master['smr_base_port'] rle = '1 8192' num_part = 8192 smr.write('migrate start %s %d %d %d %d %s\r\n' % (dst_host, dst_smr_port, seq, tps, num_part, rle)) response = smr.read_until('\r\n') if response[:3] != '+OK': util.log('failed to execute migrate start command, response:%s' % response) return False while True: smr.write('migrate info\r\n') response = smr.read_until('\r\n') seqs = response.split() logseq = int(seqs[1].split(':')[1]) mig = int(seqs[2].split(':')[1]) util.log('migrate info: %s' % response) if (logseq-mig < 500000): util.log('Remote catchup almost done. try mig2pc') break time.sleep(1) util.log(">>> sleep until 90 sec pass") self.assertFalse(time.time() - ts >= 90) time.sleep(90 - (time.time() - ts)) res = self.persistKey(src_redis, 'afterCheckpoint~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'afterCheckpoint~duringCatchup:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:expired', 10) self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:persist', 20) self.setExpireS3Key(src_redis, 'S3:duringCatchup~duringCatchup:expired', 10) self.setExpireS3Key(src_redis, 'S3:duringCatchup~duringCatchup:persist', 20) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:duringCatchup~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:duringCatchup~duringCatchup:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:expired', 10) self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:persist', 20) self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:expired', 10) self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:persist', 20) util.log(">>> remote catchup phase almost done (%s)" % time.asctime()) # mig2pc util.log(">>> start mig2pc (%s)" % time.asctime()) cmd = 'mig2pc %s %d %d %d %d' % (self.cluster['cluster_name'], src_pg_id, dst_pg_id, 0, 8191) result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd) util.log('mig2pc result : ' + result) if not result.startswith('{"state":"success","msg":"+OK"}\r\n'): util.log('failed to execute mig2pc command, result:%s' % result) return False util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(dst_redis, 'S3:duringCatchup~afterMig2pc:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(dst_redis, 'S3:duringCatchup~afterMig2pc:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:expired', 10) self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:persist', 20) self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired', 10) self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist', 20) # finish migration smr.write('migrate interrupt\r\n') response = smr.read_until('\r\n') util.log('migrate interrupt: %s' % response) smr.disconnect() # notify dst_redis of migration end util.log(">>> notify dst_redis of migration end (%s)" % time.asctime()) cmd = 'migconf migend\r\n' dst_redis.write(cmd) res = dst_redis.read_until('\r\n') self.assertEquals( res, '+OK\r\n' ) cmd = 'migconf clearstart %d-%d\r\n' % (0, 8191) src_redis.write(cmd) res = src_redis.read_until('\r\n') self.assertEquals( res, '+OK\r\n' ) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired') self.assertEquals(res, ":0\r\n") ts = time.time() util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) self.assertTrue(self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse(self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue(self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist')) self.assertFalse(self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:expired')) self.assertTrue(self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist')) self.assertFalse(self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:expired')) self.assertTrue(self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist')) self.assertFalse(self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired')) self.assertTrue(self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist')) self.assertFalse(self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired')) # remote partial checkpoint util.log(">>> start rangedel (%s)" % time.asctime()) cmd = "./cluster-util --rangedel %s %d %d-%d %d" % ( src_master['ip'], src_master['redis_port'], 0, 8191, tps) p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd, True, None, subprocess.PIPE, None) ret = p.wait() for line in p.stdout: util.log(">>>" + str(line.rstrip())) cmd = 'migconf clearend\r\n' src_redis.write(cmd) res = src_redis.read_until('\r\n') self.assertEqual(res, '+OK\r\n') time.sleep(5) # generate load for 5 sec # check consistency of load_generator for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].quit() for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].join() self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration') # kill dst_redis and recover from bgsave util.log(">>> kill dst_redis and recover from bgsave (%s)" % time.asctime()) dst_redis.disconnect() ret = testbase.request_to_shutdown_redis(dst_master) self.assertEquals( ret, 0, 'failed to shutdown redis' ) ret = testbase.request_to_shutdown_smr(dst_master) self.assertEquals(ret, 0, 'failed to shutdown smr') time.sleep(5) testbase.request_to_start_smr(dst_master) self.assertEqual( ret, 0, 'failed to start smr, server:%d' % dst_master['id'] ) ret = testbase.request_to_start_redis(dst_master) self.assertEqual( ret, 0, 'failed to start redis, server:%d' % dst_master['id'] ) ret = testbase.wait_until_finished_to_set_up_role(dst_master) self.assertEquals( ret, 0, 'failed to role change. server:%d' % (dst_master['id']) ) dst_redis = redis_mgmt.Redis(dst_master['id']) ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port'] ) self.assertEquals( ret, 0, 'failed to connect to redis' ) self.assertTrue(self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse(self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue(self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist')) self.assertFalse(self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:expired')) self.assertTrue(self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist')) self.assertFalse(self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:expired')) self.assertTrue(self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist')) self.assertFalse(self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired')) self.assertTrue(self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist')) self.assertFalse(self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired')) self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist')) self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired')) self.getS3TTL(dst_redis, 'S3:PermanentKey') # kill dst_slave redis and recover without dump file util.log(">>> kill dst_redis and recover without dump file (%s)" % time.asctime()) dst_slave = util.get_server_by_role_and_pg(self.cluster['servers'], 'slave', dst_pg_id) ret = testbase.request_to_shutdown_redis(dst_slave) self.assertEquals( ret, 0, 'failed to shutdown redis' ) ret = testbase.request_to_shutdown_smr(dst_slave) self.assertEquals(ret, 0, 'failed to shutdown smr') time.sleep(5) testbase.request_to_start_smr(dst_slave) self.assertEqual( ret, 0, 'failed to start smr, server:%d' % dst_slave['id'] ) ret = testbase.request_to_start_redis(dst_slave) self.assertEqual( ret, 0, 'failed to start redis, server:%d' % dst_slave['id'] ) ret = testbase.wait_until_finished_to_set_up_role(dst_slave) self.assertEquals( ret, 0, 'failed to role change. server:%d' % (dst_slave['id']) ) dst_redis_slave = redis_mgmt.Redis(dst_slave['id']) ret = dst_redis_slave.connect(dst_slave['ip'], dst_slave['redis_port'] ) self.assertEquals( ret, 0, 'failed to connect to redis' ) self.assertTrue(self.isExist(dst_redis_slave, 'beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse(self.isExist(dst_redis_slave, 'beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue(self.isExist(dst_redis_slave, 'beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isExist(dst_redis_slave, 'beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isExist(dst_redis_slave, 'afterCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isExist(dst_redis_slave, 'afterCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~afterCheckpoint:persist')) self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~afterCheckpoint:expired')) self.assertTrue(self.isExist(dst_redis_slave, 'afterCheckpoint~duringCatchup:persist')) self.assertFalse(self.isExist(dst_redis_slave, 'afterCheckpoint~duringCatchup:expired')) self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~duringCatchup:persist')) self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~duringCatchup:expired')) self.assertTrue(self.isExist(dst_redis_slave, 'duringCatchup~duringCatchup:persist')) self.assertFalse(self.isExist(dst_redis_slave, 'duringCatchup~duringCatchup:expired')) self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~duringCatchup:persist')) self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~duringCatchup:expired')) self.assertTrue(self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:persist')) self.assertFalse(self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:expired')) self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~afterMig2pc:persist')) self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~afterMig2pc:expired')) self.assertTrue(self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:persist')) self.assertFalse(self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:expired')) self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:afterMig2pc~migrateEnd:persist')) self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:afterMig2pc~migrateEnd:expired')) self.getS3TTL(dst_redis_slave, 'S3:PermanentKey') # Go back to initial configuration self.assertTrue(util.migration(self.cluster, dst_pg_id, src_pg_id, 0, 8191, 40000), 'failed to rollback migration')
def test_random_migrate(self): util.print_frame() # start load generator load_gen_thrd_list = {} util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) load_gen_thrd_list[i].start() ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') leader_cm = self.cluster['servers'][0] cluster_name = self.cluster['cluster_name'] mapping = [-1] * 8192 count = 50 while count > 0: # get PN -> PG map cmd = 'cluster_info %s' % cluster_name result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd) ret = json.loads(result) rle = ret['data']['cluster_info']['PN_PG_Map'] print "PN_PG_MAP = %s" % rle sp = rle.split() index = 0 for i in range(len(sp)/2): for j in range(int(sp[i*2+1])): mapping[index] = int(sp[i*2]) index += 1 slot = random.randint(0, 8191) src_pgid = mapping[slot] dst_pgid = (src_pgid+1) % 2 slot_end = slot while random.randint(0,5) <= 4: if slot_end < 8191 and mapping[slot_end+1] == src_pgid: slot_end += 1 else: break print "SLOT=%d, SRC_PGID=%d, DST_PGID=%d" % (slot, src_pgid, dst_pgid) ret = util.migration(self.cluster, src_pgid, dst_pgid, slot, slot_end, 40000) self.assertEqual(True, ret, 'Migration Fail') ok = True for j in range(len(load_gen_thrd_list)): if load_gen_thrd_list[j].isConsistent() == False: ok = False break if not ok: break; count -= 1; # check consistency of load_generator for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].quit() for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].join() self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration') # Go back to initial configuration cinfo = util.cluster_info(leader_cm['ip'], leader_cm['cm_port'], cluster_name) for slot in util.get_slots(cinfo['cluster_info']['PN_PG_Map'], 1): self.assertTrue(util.migration(self.cluster, 1, 0, slot['begin'], slot['end'], 40000), 'failed to rollback migration')
def test_scaleout(self): util.print_frame() # start load generator util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) self.load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) self.load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec util.log("started load_generator") # servers for scale out servers = [config.server4, config.server5, config.server6] leader_cm = self.cluster['servers'][0] # start migration migration_count = 5 for i in range(migration_count): # Scale out cluster = config.clusters[0] ret = util.pg_add(cluster, servers, leader_cm) self.assertEqual(True, ret, 'Scale out fail. util.pg_add returns false') time.sleep(5) # pg0 -> pg1 cluster = config.clusters[1] ret = util.migration(cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail 0 -> 1') # pg0 <- pg1 cluster = config.clusters[1] ret = util.migration(cluster, 1, 0, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail 1 <- 0') # Scale in #TODO Temporary #cluster = config.clusters[0] #for server in cluster['servers']: # if testbase.request_to_shutdown_hbc(server) is not 0: # util.log('scale in : failed to request to shutdown hbc') # self.assertFalse('scale in : failed to request to shutdown hbc') #time.sleep(5) ############### cluster = config.clusters[1] ret = util.pg_del(cluster, servers, leader_cm) self.assertEqual(True, ret, 'Scale in fail. util.pg_del returns false') #TODO Temporary #cluster = config.clusters[0] #for server in cluster['servers']: # if testbase.request_to_start_heartbeat_checker( server ) is not 0: # util.log('scale in : failed to start hbc') # self.assertFalse('scale in : failed to start hbc') #time.sleep(5) ############### # check consistency ok = True for j in range(len(self.load_gen_thrd_list)): if self.load_gen_thrd_list[j].isConsistent() == False: ok = False break if not ok: break; time.sleep(5) # generate load for 5 sec # check consistency of load_generator for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].quit() for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].join() self.assertTrue(self.load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
def test_delete_smrlog_after_scaleout(self): util.print_frame() # start load generator util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) self.load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) self.load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec util.log("started load_generator") # servers for scale out servers = [config.server4, config.server5, config.server6] leader_cm = self.cluster['servers'][0] # Scale out cluster = config.clusters[0] ret = util.pg_add(cluster, servers, leader_cm) self.assertEqual(True, ret, 'Scale out fail. util.pg_add returns false') time.sleep(5) # pg0 -> pg1 cluster = config.clusters[1] ret = util.migration(cluster, 0, 1, 8000, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail 0 -> 1') # get log file old_logs = {} for s in config.clusters[0]['servers']: parent_dir, log_dir = util.smr_log_dir(s['id']) path = '%s/%s' % (parent_dir, log_dir) old_logs[s['id']] = util.ls(path) # bgsave in order to make smrlogs deleted. for s in config.clusters[0]['servers']: bgsave_ret = util.bgsave(s) self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % s['id']) util.log('bgsave pgs%d is done.') # check consistency ok = True for j in range(len(self.load_gen_thrd_list)): self.assertTrue(self.load_gen_thrd_list[j].isConsistent(), 'Inconsistent after migration') # is smr-replicator delete smrlogs? i = 0 while i < 20: i += 1 # get current log files cur_logs = {} for s in config.clusters[0]['servers']: parent_dir, log_dir = util.smr_log_dir(s['id']) path = '%s/%s' % (parent_dir, log_dir) cur_logs[s['id']] = util.ls(path) # compare old and new temp_old_logs = copy.deepcopy(old_logs) for id, nl in cur_logs.items(): ol = temp_old_logs.get(id) self.assertNotEqual(ol, None, "failed to check logfiles. old logs for smr-replicator '%d' is not exist." % id) for log in nl: if log in ol: ol.remove(log) ok = True for id, ol in temp_old_logs.items(): if len(ol) == 0: ok = False util.log('Loop %d ---------------------------------------------------------' % i) util.log('deleted smrlog files: %s' % util.json_to_str(temp_old_logs)) if ok: break time.sleep(10) self.assertTrue(ok, 'smr-replicator does not delete smrlogs.') util.log('smr-replicator deletes smrlogs.') # check consistency of load_generator for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].quit() for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].join() self.assertTrue(self.load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
def test_scaleout(self): util.print_frame() # start load generator util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) self.load_gen_thrd_list[i] = load_generator.LoadGenerator( i, ip, port) self.load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec util.log("started load_generator") # servers for scale out servers = [config.server4, config.server5, config.server6] leader_cm = self.cluster['servers'][0] # start migration migration_count = 5 for i in range(migration_count): # Scale out cluster = config.clusters[0] ret = util.pg_add(cluster, servers, leader_cm) self.assertEqual(True, ret, 'Scale out fail. util.pg_add returns false') time.sleep(5) # pg0 -> pg1 cluster = config.clusters[1] ret = util.migration(cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail 0 -> 1') # pg0 <- pg1 cluster = config.clusters[1] ret = util.migration(cluster, 1, 0, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail 1 <- 0') # Scale in #TODO Temporary #cluster = config.clusters[0] #for server in cluster['servers']: # if testbase.request_to_shutdown_hbc(server) is not 0: # util.log('scale in : failed to request to shutdown hbc') # self.assertFalse('scale in : failed to request to shutdown hbc') #time.sleep(5) ############### cluster = config.clusters[1] ret = util.pg_del(cluster, servers, leader_cm) self.assertEqual(True, ret, 'Scale in fail. util.pg_del returns false') #TODO Temporary #cluster = config.clusters[0] #for server in cluster['servers']: # if testbase.request_to_start_heartbeat_checker( server ) is not 0: # util.log('scale in : failed to start hbc') # self.assertFalse('scale in : failed to start hbc') #time.sleep(5) ############### # check consistency ok = True for j in range(len(self.load_gen_thrd_list)): if self.load_gen_thrd_list[j].isConsistent() == False: ok = False break if not ok: break time.sleep(5) # generate load for 5 sec # check consistency of load_generator for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].quit() for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].join() self.assertTrue(self.load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
def test_delete_smrlog_after_scaleout(self): util.print_frame() # start load generator util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) self.load_gen_thrd_list[i] = load_generator.LoadGenerator( i, ip, port) self.load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec util.log("started load_generator") # servers for scale out servers = [config.server4, config.server5, config.server6] leader_cm = self.cluster['servers'][0] # Scale out cluster = config.clusters[0] ret = util.pg_add(cluster, servers, leader_cm) self.assertEqual(True, ret, 'Scale out fail. util.pg_add returns false') time.sleep(5) # pg0 -> pg1 cluster = config.clusters[1] ret = util.migration(cluster, 0, 1, 8000, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail 0 -> 1') # get log file old_logs = {} for s in config.clusters[0]['servers']: parent_dir, log_dir = util.smr_log_dir(s['id']) path = '%s/%s' % (parent_dir, log_dir) old_logs[s['id']] = util.ls(path) # bgsave in order to make smrlogs deleted. for s in config.clusters[0]['servers']: bgsave_ret = util.bgsave(s) self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % s['id']) util.log('bgsave pgs%d is done.') # check consistency ok = True for j in range(len(self.load_gen_thrd_list)): self.assertTrue(self.load_gen_thrd_list[j].isConsistent(), 'Inconsistent after migration') # is smr-replicator delete smrlogs? i = 0 while i < 20: i += 1 # get current log files cur_logs = {} for s in config.clusters[0]['servers']: parent_dir, log_dir = util.smr_log_dir(s['id']) path = '%s/%s' % (parent_dir, log_dir) cur_logs[s['id']] = util.ls(path) # compare old and new temp_old_logs = copy.deepcopy(old_logs) for id, nl in cur_logs.items(): ol = temp_old_logs.get(id) self.assertNotEqual( ol, None, "failed to check logfiles. old logs for smr-replicator '%d' is not exist." % id) for log in nl: if log in ol: ol.remove(log) ok = True for id, ol in temp_old_logs.items(): if len(ol) == 0: ok = False util.log( 'Loop %d ---------------------------------------------------------' % i) util.log('deleted smrlog files: %s' % util.json_to_str(temp_old_logs)) if ok: break time.sleep(10) self.assertTrue(ok, 'smr-replicator does not delete smrlogs.') util.log('smr-replicator deletes smrlogs.') # check consistency of load_generator for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].quit() for i in range(len(self.load_gen_thrd_list)): self.load_gen_thrd_list[i].join() self.assertTrue(self.load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
def test_migration_with_expire_command(self): util.print_frame() util.log("start load_generator") load_gen_thrd_list = {} for i in range(1): ip, port = util.get_rand_gateway(self.cluster) load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) load_gen_thrd_list[i].start() time.sleep(5) # generate load for 5 sec tps = 20000 src_pg_id = 0 dst_pg_id = 1 leader_cm = self.cluster['servers'][0] src_master = util.get_server_by_role_and_pg(self.cluster['servers'], 'master', src_pg_id) dst_master = util.get_server_by_role_and_pg(self.cluster['servers'], 'master', dst_pg_id) smr = smr_mgmt.SMR(src_master['id']) ret = smr.connect(src_master['ip'], src_master['smr_mgmt_port']) if ret != 0: util.log('failed to connect to smr(source master)') return False src_redis = redis_mgmt.Redis(src_master['id']) ret = src_redis.connect(src_master['ip'], src_master['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') dst_redis = redis_mgmt.Redis(dst_master['id']) ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') ts = time.time() self.setExpireKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:expired', 10) self.setExpireKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired', 10) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:PermanentKey', 0) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key( src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key( src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired') self.assertEquals(res, ":0\r\n") util.log(">>> migrate test with expire command start(%s), ts:%d" % (time.asctime(), ts)) ts = time.time() self.setExpireKey(src_redis, 'beforeCheckpoint~afterCheckpoint:expired', 10) self.setExpireKey(src_redis, 'beforeCheckpoint~afterCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired', 10) self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist', 20) # notify dst_redis of migration start util.log(">>> notify dst_redis of migration start (%s)" % time.asctime()) cmd = 'migconf migstart %d-%d\r\n' % (0, 8191) dst_redis.write(cmd) res = dst_redis.read_until('\r\n') self.assertEquals(res, '+OK\r\n') # remote partial checkpoint util.log(">>> start remote checkpoint and load (%s)" % time.asctime()) cmd = "./cluster-util --getandplay %s %d %s %d %d-%d %d" % ( src_master['ip'], src_master['redis_port'], dst_master['ip'], dst_master['redis_port'], 0, 8191, tps) p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd, True, None, subprocess.PIPE, None) ret = p.wait() for line in p.stdout: if line.find("Checkpoint Sequence Number:") != -1: util.log("seqnumber : " + line[line.rfind(":") + 1:]) seq = int(line[line.rfind(":") + 1:]) util.log(">>>" + str(line.rstrip())) self.assertEqual(0, ret) util.log(">>> end remote checkpoint and load (%s)" % time.asctime()) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'beforeCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'beforeCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") # bgsave for testing later about recovery during migration util.log( ">>> bgsave for testing later about recovery during migration (%s)" % time.asctime()) cmd = 'bgsave\r\n' dst_redis.write(cmd) res = dst_redis.read_until('\r\n') self.assertEquals(res, '+Background saving started\r\n') ts = time.time() self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:expired', 10) self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:persist', 20) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:expired', 10) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:persist', 20) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'afterCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'afterCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:expired', 10) self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:persist', 100) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:expired', 10) self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:persist', 100) # remote catchup (smr log migration) util.log(">>> start remote catchup (%s)" % time.asctime()) dst_host = dst_master['ip'] dst_smr_port = dst_master['smr_base_port'] rle = '1 8192' num_part = 8192 smr.write('migrate start %s %d %d %d %d %s\r\n' % (dst_host, dst_smr_port, seq, tps, num_part, rle)) response = smr.read_until('\r\n') if response[:3] != '+OK': util.log('failed to execute migrate start command, response:%s' % response) return False while True: smr.write('migrate info\r\n') response = smr.read_until('\r\n') seqs = response.split() logseq = int(seqs[1].split(':')[1]) mig = int(seqs[2].split(':')[1]) util.log('migrate info: %s' % response) if (logseq - mig < 500000): util.log('Remote catchup almost done. try mig2pc') break time.sleep(1) util.log(">>> sleep until 90 sec pass") self.assertFalse(time.time() - ts >= 90) time.sleep(90 - (time.time() - ts)) res = self.persistKey(src_redis, 'afterCheckpoint~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'afterCheckpoint~duringCatchup:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:expired', 10) self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:persist', 20) self.setExpireS3Key(src_redis, 'S3:duringCatchup~duringCatchup:expired', 10) self.setExpireS3Key(src_redis, 'S3:duringCatchup~duringCatchup:persist', 20) util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(src_redis, 'S3:duringCatchup~duringCatchup:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(src_redis, 'S3:duringCatchup~duringCatchup:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:expired', 10) self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:persist', 20) self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:expired', 10) self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:persist', 20) util.log(">>> remote catchup phase almost done (%s)" % time.asctime()) # mig2pc util.log(">>> start mig2pc (%s)" % time.asctime()) cmd = 'mig2pc %s %d %d %d %d' % (self.cluster['cluster_name'], src_pg_id, dst_pg_id, 0, 8191) result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd) util.log('mig2pc result : ' + result) if not result.startswith('{"state":"success","msg":"+OK"}\r\n'): util.log('failed to execute mig2pc command, result:%s' % result) return False util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(dst_redis, 'S3:duringCatchup~afterMig2pc:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(dst_redis, 'S3:duringCatchup~afterMig2pc:expired') self.assertEquals(res, ":0\r\n") ts = time.time() self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:expired', 10) self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:persist', 20) self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired', 10) self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist', 20) # finish migration smr.write('migrate interrupt\r\n') response = smr.read_until('\r\n') util.log('migrate interrupt: %s' % response) smr.disconnect() # notify dst_redis of migration end util.log(">>> notify dst_redis of migration end (%s)" % time.asctime()) cmd = 'migconf migend\r\n' dst_redis.write(cmd) res = dst_redis.read_until('\r\n') self.assertEquals(res, '+OK\r\n') cmd = 'migconf clearstart %d-%d\r\n' % (0, 8191) src_redis.write(cmd) res = src_redis.read_until('\r\n') self.assertEquals(res, '+OK\r\n') util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:persist') self.assertEquals(res, ":1\r\n") res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:expired') self.assertEquals(res, ":0\r\n") res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist') self.assertEquals(res, ":1\r\n") res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired') self.assertEquals(res, ":0\r\n") ts = time.time() util.log(">>> sleep until 15 sec pass") self.assertFalse(time.time() - ts >= 15) time.sleep(15 - (time.time() - ts)) self.assertTrue( self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired')) # remote partial checkpoint util.log(">>> start rangedel (%s)" % time.asctime()) cmd = "./cluster-util --rangedel %s %d %d-%d %d" % ( src_master['ip'], src_master['redis_port'], 0, 8191, tps) p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd, True, None, subprocess.PIPE, None) ret = p.wait() for line in p.stdout: util.log(">>>" + str(line.rstrip())) cmd = 'migconf clearend\r\n' src_redis.write(cmd) res = src_redis.read_until('\r\n') self.assertEqual(res, '+OK\r\n') time.sleep(5) # generate load for 5 sec # check consistency of load_generator for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].quit() for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].join() self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration') # kill dst_redis and recover from bgsave util.log(">>> kill dst_redis and recover from bgsave (%s)" % time.asctime()) dst_redis.disconnect() ret = testbase.request_to_shutdown_redis(dst_master) self.assertEquals(ret, 0, 'failed to shutdown redis') ret = testbase.request_to_shutdown_smr(dst_master) self.assertEquals(ret, 0, 'failed to shutdown smr') time.sleep(5) testbase.request_to_start_smr(dst_master) self.assertEqual(ret, 0, 'failed to start smr, server:%d' % dst_master['id']) ret = testbase.request_to_start_redis(dst_master) self.assertEqual(ret, 0, 'failed to start redis, server:%d' % dst_master['id']) ret = testbase.wait_until_finished_to_set_up_role(dst_master) self.assertEquals( ret, 0, 'failed to role change. server:%d' % (dst_master['id'])) dst_redis = redis_mgmt.Redis(dst_master['id']) ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') self.assertTrue( self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired')) self.assertTrue( self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired')) self.getS3TTL(dst_redis, 'S3:PermanentKey') # kill dst_slave redis and recover without dump file util.log(">>> kill dst_redis and recover without dump file (%s)" % time.asctime()) dst_slave = util.get_server_by_role_and_pg(self.cluster['servers'], 'slave', dst_pg_id) ret = testbase.request_to_shutdown_redis(dst_slave) self.assertEquals(ret, 0, 'failed to shutdown redis') ret = testbase.request_to_shutdown_smr(dst_slave) self.assertEquals(ret, 0, 'failed to shutdown smr') time.sleep(5) testbase.request_to_start_smr(dst_slave) self.assertEqual(ret, 0, 'failed to start smr, server:%d' % dst_slave['id']) ret = testbase.request_to_start_redis(dst_slave) self.assertEqual(ret, 0, 'failed to start redis, server:%d' % dst_slave['id']) ret = testbase.wait_until_finished_to_set_up_role(dst_slave) self.assertEquals( ret, 0, 'failed to role change. server:%d' % (dst_slave['id'])) dst_redis_slave = redis_mgmt.Redis(dst_slave['id']) ret = dst_redis_slave.connect(dst_slave['ip'], dst_slave['redis_port']) self.assertEquals(ret, 0, 'failed to connect to redis') self.assertTrue( self.isExist(dst_redis_slave, 'beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~beforeCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~beforeCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~afterCheckpoint:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~afterCheckpoint:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'duringCatchup~duringCatchup:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'duringCatchup~duringCatchup:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~duringCatchup:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~duringCatchup:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~afterMig2pc:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~afterMig2pc:expired')) self.assertTrue( self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:expired')) self.assertTrue( self.isS3Exist(dst_redis_slave, 'S3:afterMig2pc~migrateEnd:persist')) self.assertFalse( self.isS3Exist(dst_redis_slave, 'S3:afterMig2pc~migrateEnd:expired')) self.getS3TTL(dst_redis_slave, 'S3:PermanentKey') # Go back to initial configuration self.assertTrue( util.migration(self.cluster, dst_pg_id, src_pg_id, 0, 8191, 40000), 'failed to rollback migration')
def test_random_migrate(self): util.print_frame() # start load generator load_gen_thrd_list = {} util.log("start load_generator") for i in range(self.max_load_generator): ip, port = util.get_rand_gateway(self.cluster) load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port) load_gen_thrd_list[i].start() ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000) self.assertEqual(True, ret, 'Migration Fail') leader_cm = self.cluster['servers'][0] cluster_name = self.cluster['cluster_name'] mapping = [-1] * 8192 count = 50 while count > 0: # get PN -> PG map cmd = 'cluster_info %s' % cluster_name result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd) ret = json.loads(result) rle = ret['data']['cluster_info']['PN_PG_Map'] print "PN_PG_MAP = %s" % rle sp = rle.split() index = 0 for i in range(len(sp) / 2): for j in range(int(sp[i * 2 + 1])): mapping[index] = int(sp[i * 2]) index += 1 slot = random.randint(0, 8191) src_pgid = mapping[slot] dst_pgid = (src_pgid + 1) % 2 slot_end = slot while random.randint(0, 5) <= 4: if slot_end < 8191 and mapping[slot_end + 1] == src_pgid: slot_end += 1 else: break print "SLOT=%d, SRC_PGID=%d, DST_PGID=%d" % (slot, src_pgid, dst_pgid) ret = util.migration(self.cluster, src_pgid, dst_pgid, slot, slot_end, 40000) self.assertEqual(True, ret, 'Migration Fail') ok = True for j in range(len(load_gen_thrd_list)): if load_gen_thrd_list[j].isConsistent() == False: ok = False break if not ok: break count -= 1 # check consistency of load_generator for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].quit() for i in range(len(load_gen_thrd_list)): load_gen_thrd_list[i].join() self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration') # Go back to initial configuration cinfo = util.cluster_info(leader_cm['ip'], leader_cm['cm_port'], cluster_name) for slot in util.get_slots(cinfo['cluster_info']['PN_PG_Map'], 1): self.assertTrue( util.migration(self.cluster, 1, 0, slot['begin'], slot['end'], 40000), 'failed to rollback migration')