def __del_server(self, server_to_del):
        # backup data
        redis = redis_mgmt.Redis( server_to_del['id'] )
        ret = redis.connect( server_to_del['ip'], server_to_del['redis_port'] )
        self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port']) )

        # bgsave
        ret = util.bgsave(server_to_del)
        self.assertTrue(ret, 'failed to bgsave. pgs%d' % server_to_del['id'])

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d\r\n' % (server_to_del['cluster_name'], server_to_del['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # check if pgs is removed
        success = False
        for try_cnt in range( 10 ):
            redis = redis_mgmt.Redis( server_to_del['id'] )
            ret = redis.connect( server_to_del['ip'], server_to_del['redis_port'] )
            self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port']) )
            util.log( 'succeeded : connect to smr%d(%s:%d)' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port']) )

            redis.write( 'info stats\r\n' )
            for i in range( 6 ):
                redis.read_until( '\r\n' )
            res = redis.read_until( '\r\n' )
            self.assertNotEqual( res, '', 'failed : get reply of "info stats" from redis%d(%s:%d)' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port']) )
            util.log( 'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port'], res[:-2]) )
            no = int( res.split(':')[1] )
            if no <= 100:
                success = True
                break
            time.sleep( 1 )

        self.assertEquals( success, True, 'failed : pgs does not removed.' )
        util.log( 'succeeded : pgs is removed' )

        # change state of pgs to lconn
        cmd = 'pgs_lconn %s %d\r\n' % (server_to_del['cluster_name'], server_to_del['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # shutdown
        ret = testbase.request_to_shutdown_smr( server_to_del )
        self.assertEqual( ret, 0, 'failed : shutdown smr. id:%d' % server_to_del['id'] )
        ret = testbase.request_to_shutdown_redis( server_to_del )
        self.assertEquals( ret, 0, 'failed : shutdown redis. id:%d' % server_to_del['id'] )
        util.log('succeeded : shutdown pgs%d.' % server_to_del['id'] )

        # delete pgs from cluster
        cmd = 'pgs_del %s %d\r\n' % (server_to_del['cluster_name'], server_to_del['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
Beispiel #2
0
    def __del_server(self, server_to_del):
        # backup data
        redis = redis_mgmt.Redis(server_to_del['id'])
        ret = redis.connect(server_to_del['ip'], server_to_del['redis_port'])
        self.assertEquals(
            ret, 0, 'failed : connect to smr%d(%s:%d)' %
            (server_to_del['id'], server_to_del['ip'],
             server_to_del['redis_port']))

        # bgsave
        ret = util.bgsave(server_to_del)
        self.assertTrue(ret, 'failed to bgsave. pgs%d' % server_to_del['id'])

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d\r\n' % (server_to_del['cluster_name'],
                                       server_to_del['id'])
        ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                              cmd)
        jobj = json.loads(ret)
        self.assertEqual(
            jobj['msg'], '+OK',
            'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
        util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))

        r = util.get_role_of_server(server_to_del)
        # If quorum of left master is larger than 1, info command will be blocked.
        if r != c.ROLE_MASTER:
            # check if pgs is removed
            success = False
            for try_cnt in range(10):
                redis = redis_mgmt.Redis(server_to_del['id'])
                ret = redis.connect(server_to_del['ip'],
                                    server_to_del['redis_port'])
                self.assertEquals(
                    ret, 0, 'failed : connect to smr%d(%s:%d)' %
                    (server_to_del['id'], server_to_del['ip'],
                     server_to_del['redis_port']))
                util.log('succeeded : connect to smr%d(%s:%d)' %
                         (server_to_del['id'], server_to_del['ip'],
                          server_to_del['redis_port']))

                redis.write('info stats\r\n')
                for i in range(6):
                    redis.read_until('\r\n')
                res = redis.read_until('\r\n')
                self.assertNotEqual(
                    res, '',
                    'failed : get reply of "info stats" from redis%d(%s:%d)' %
                    (server_to_del['id'], server_to_del['ip'],
                     server_to_del['redis_port']))
                util.log(
                    'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"'
                    % (server_to_del['id'], server_to_del['ip'],
                       server_to_del['redis_port'], res[:-2]))
                no = int(res.split(':')[1])
                if no <= 100:
                    success = True
                    break
                time.sleep(1)

            self.assertEquals(success, True, 'failed : pgs does not removed.')
        util.log('pgs is removed')

        # change state of pgs to lconn
        cmd = 'pgs_lconn %s %d\r\n' % (server_to_del['cluster_name'],
                                       server_to_del['id'])
        ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                              cmd)
        jobj = json.loads(ret)
        self.assertEqual(
            jobj['msg'], '+OK',
            'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
        util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))

        # shutdown
        ret = testbase.request_to_shutdown_smr(server_to_del)
        self.assertEqual(ret, 0,
                         'failed : shutdown smr. id:%d' % server_to_del['id'])
        ret = testbase.request_to_shutdown_redis(server_to_del)
        self.assertEquals(
            ret, 0, 'failed : shutdown redis. id:%d' % server_to_del['id'])
        util.log('succeeded : shutdown pgs%d.' % server_to_del['id'])

        # delete pgs from cluster
        cmd = 'pgs_del %s %d\r\n' % (server_to_del['cluster_name'],
                                     server_to_del['id'])
        ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                              cmd)
        jobj = json.loads(ret)
        self.assertEqual(
            jobj['msg'], '+OK',
            'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
        util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
Beispiel #3
0
    def test_5_transfer_pgs_to_another_machine(self):
        util.print_frame()

        self.load_gen_list = {}

        # get gateway info
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, %s:%d' % (ip, port))

        # incrase master generation number
        util.log('failover in order to increase master generation number.')
        max = 0
        for i in range(5):
            key_base = 'key'
            for i in range(max, max + 10000):
                cmd = 'set %s%d %d\r\n' % (key_base, i, i)
                gw.write(cmd)
                res = gw.read_until('\r\n')
                self.assertEquals(res, '+OK\r\n')
            max = max + 10000

            m = util.get_server_by_role(self.cluster['servers'], 'master')
            util.log('failover pgs%d' % m['id'])
            ret = util.failover(m, self.leader_cm)
            self.assertTrue(ret, 'failed to failover pgs%d' % m['id'])

        # start load generator
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            self.load_gen_list[i] = load_generator.LoadGenerator(i, ip, port)
            self.load_gen_list[i].start()

        time.sleep(5)  # generate load for 5 sec
        util.log("started load_generator")

        m, s1, s2 = util.get_mss(self.cluster)
        servers = [m, s1, s2]

        # bgsave
        for s in servers:
            ret = util.bgsave(s)
            self.assertTrue(ret, 'failed to bgsave. pgs%d' % s['id'])

        new_servers = [config.server4, config.server5]

        # add new slaves
        for s in new_servers:
            util.log('delete pgs%d`s check point.' % s['id'])
            util.del_dumprdb(s['id'])

            ret = util.cluster_util_getdump(s['id'], m['ip'], m['redis_port'],
                                            'dump.rdb', 0, 8191)
            self.assertEqual(
                True, ret,
                'failed : util.cluster_util_getdump returns false, src=%s:%d dest_pgsid=%d'
                % (m['ip'], m['redis_port'], s['id']))

            ret = util.install_pgs(self.cluster,
                                   s,
                                   self.leader_cm,
                                   0,
                                   rm_ckpt=False)
            self.assertEqual(
                True, ret,
                'failed : util.pgs_add returns false, pgsid=%d' % s['id'])
            util.log('succeeeded : add a new slave, pgsid=%d' % s['id'])

            # check consistency
            ok = True
            for j in range(self.max_load_generator):
                if self.load_gen_list[j].isConsistent() == False:
                    ok = False
                    break
            if not ok:
                break

        for server_to_del in servers:
            for s in servers:
                util.pingpong(s['ip'], s['smr_mgmt_port'])
            for s in new_servers:
                util.pingpong(s['ip'], s['smr_mgmt_port'])
            self.__del_server(server_to_del)
            util.log('succeeded : delete pgs%d' % server_to_del['id'])

        new_m = util.get_server_by_role(new_servers, 'master')
        new_s = util.get_server_by_role(new_servers, 'slave')
        self.assertNotEqual(new_m, None, 'master is None.')
        self.assertNotEqual(new_s, None, 'slave is None.')

        for s in new_servers:
            util.pingpong(s['ip'], s['smr_mgmt_port'])

        time.sleep(5)  # generate load for 5 sec
        # check consistency of load_generator
        for i in range(self.max_load_generator):
            self.load_gen_list[i].quit()
        for i in range(self.max_load_generator):
            self.load_gen_list[i].join()
            self.assertTrue(self.load_gen_list[i].isConsistent(),
                            'Inconsistent after migration')
            self.load_gen_list.pop(i, None)

        # Go back to initial configuration
        # recover pgs
        for s in servers:
            self.assertTrue(
                util.install_pgs(self.cluster,
                                 s,
                                 self.leader_cm,
                                 rm_ckpt=False),
                'failed to recover pgs. (install_pgs)')

        # cleanup new slaves
        for s in new_servers:
            self.assertTrue(
                util.uninstall_pgs(self.cluster, s, self.leader_cm),
                'failed to cleanup pgs. (uninstall_pgs)')
Beispiel #4
0
    def test_4_role_change_with_failover(self):
        util.print_frame()

        loop_cnt = 0
        while loop_cnt < 5:
            util.log('')
            util.log('Loop:%d' % loop_cnt)

            util.log("States (before role change)")
            util.log_server_state(self.cluster)

            target = random.choice(self.cluster['servers'])

            # bgsave
            ret = util.bgsave(target)
            self.assertTrue(ret, 'failed to bgsave. pgs:%d' % target['id'])

            # shutdown
            util.log('shutdown pgs%d(%s:%d)' %
                     (target['id'], target['ip'], target['smr_base_port']))
            ret = testbase.request_to_shutdown_smr(target)
            self.assertEqual(ret, 0, 'failed to shutdown smr')

            ret = testbase.request_to_shutdown_redis(target)
            self.assertEquals(ret, 0, 'failed to shutdown redis')

            r = ''
            expected = 'N'
            for fc_cnt in xrange(20):
                r = util.get_smr_role_of_cm(target, self.leader_cm)
                if r == expected:
                    break
                time.sleep(0.5)
            self.assertEquals(r, expected, 'failure detection error.')

            running_servers = []
            for s in self.cluster['servers']:
                if s != target:
                    running_servers.append(s)

            # Get old timestamp
            old_timestamps = {}
            for s in running_servers:
                ts = util.get_timestamp_of_pgs(s)
                old_timestamps[s['id']] = ts

            # Start load generator
            self.load_gen_list = {}
            util.log('start load generator')
            for i in range(self.max_load_generator):
                ip, port = util.get_rand_gateway(self.cluster)
                load_gen = load_generator.LoadGenerator(i, ip, port)
                load_gen.start()
                self.load_gen_list[i] = load_gen

            m, s1, s2 = util.get_mss(self.cluster)
            self.assertNotEqual(m, None, 'master is None.')
            self.assertNotEqual(s1, None, 'slave1 is None.')

            # Role change
            master_id = util.role_change(self.leader_cm,
                                         self.cluster['cluster_name'],
                                         s1['id'])
            self.assertNotEqual(master_id, -1, 'role_change failed')

            util.log("States (after role change)")
            util.log_server_state(self.cluster)

            # Check - get new timestamp
            new_timestamps = {}
            for s in running_servers:
                ts = util.get_timestamp_of_pgs(s)
                new_timestamps[s['id']] = ts

            # Check - compare old timestamps and new timestamps
            for s in running_servers:
                old_ts = old_timestamps[s['id']]
                new_ts = new_timestamps[s['id']]
                self.assertNotEqual(
                    old_ts, new_ts,
                    'Timestamp of a running server has not changed. %d->%d' %
                    (old_ts, new_ts))

            # Check quorum
            m = self.cluster['servers'][master_id]
            expected = 1
            ok = self.__check_quorum(m, expected)
            self.assertTrue(
                ok, 'unexpected quorum(after role change). expected:%s' %
                (expected))

            # recovery
            util.log('recovery pgs%d(%s:%d)' %
                     (target['id'], target['ip'], target['smr_base_port']))
            ret = testbase.request_to_start_smr(target)
            self.assertEqual(ret, 0, 'failed to start smr')
            util.log('start smr-replicator done')

            ret = testbase.request_to_start_redis(target, 60)
            self.assertEqual(ret, 0, 'failed to start redis')
            util.log('start redis-arc done')

            ret = testbase.wait_until_finished_to_set_up_role(target,
                                                              max_try=300)
            self.assertEquals(
                ret, 0, 'failed to role change. smr_id:%d' % (target['id']))

            util.log("States (after recovery)")
            util.log_server_state(self.cluster)

            # Check cluster state
            normal_state = False
            for i in xrange(20):
                normal_state = util.check_cluster(self.cluster['cluster_name'],
                                                  self.leader_cm['ip'],
                                                  self.leader_cm['cm_port'],
                                                  check_quorum=True)
                if normal_state:
                    break
                time.sleep(0.5)
            self.assertTrue(normal_state, "Unstable cluster state")

            # Check quorum
            expected = 2
            ok = self.__check_quorum(m, expected)
            self.assertTrue(
                ok,
                'unexpected quorum(after recovery). expected:%s' % (expected))

            # Cheeck Consistency
            util.log('stop load generator')
            for i in range(self.max_load_generator):
                self.load_gen_list[i].quit()
            for i in range(self.max_load_generator):
                self.load_gen_list[i].join()
                self.assertTrue(self.load_gen_list[i].isConsistent(),
                                'Inconsistent after migration')
                self.load_gen_list.pop(i, None)

            loop_cnt += 1

        return 0
Beispiel #5
0
    def test_delete_smrlog_after_scaleout(self):
        util.print_frame()

        # start load generator
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            self.load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port)
            self.load_gen_thrd_list[i].start()

        time.sleep(5) # generate load for 5 sec
        util.log("started load_generator")

        # servers for scale out
        servers = [config.server4, config.server5, config.server6]
        leader_cm = self.cluster['servers'][0]

        # Scale out
        cluster = config.clusters[0]
        ret = util.pg_add(cluster, servers, leader_cm)
        self.assertEqual(True, ret, 'Scale out fail. util.pg_add returns false')

        time.sleep(5)
        # pg0 -> pg1
        cluster = config.clusters[1]
        ret = util.migration(cluster, 0, 1, 8000, 8191, 40000)
        self.assertEqual(True, ret, 'Migration Fail 0 -> 1')

        # get log file
        old_logs = {}
        for s in config.clusters[0]['servers']:
            parent_dir, log_dir = util.smr_log_dir(s['id'])
            path = '%s/%s' % (parent_dir, log_dir)
            old_logs[s['id']] = util.ls(path)

        # bgsave in order to make smrlogs deleted.
        for s in config.clusters[0]['servers']:
            bgsave_ret = util.bgsave(s)
            self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % s['id'])
            util.log('bgsave pgs%d is done.')

        # check consistency
        ok = True
        for j in range(len(self.load_gen_thrd_list)):
            self.assertTrue(self.load_gen_thrd_list[j].isConsistent(),
                    'Inconsistent after migration')

        # is smr-replicator delete smrlogs?
        i = 0
        while i < 20:
            i += 1
            # get current log files
            cur_logs = {}
            for s in config.clusters[0]['servers']:
                parent_dir, log_dir = util.smr_log_dir(s['id'])
                path = '%s/%s' % (parent_dir, log_dir)
                cur_logs[s['id']] = util.ls(path)

            # compare old and new
            temp_old_logs = copy.deepcopy(old_logs)
            for id, nl in cur_logs.items():
                ol = temp_old_logs.get(id)
                self.assertNotEqual(ol, None, "failed to check logfiles. old logs for smr-replicator '%d' is not exist." % id)

                for log in nl:
                    if log in ol:
                        ol.remove(log)

            ok = True
            for id, ol in temp_old_logs.items():
                if len(ol) == 0:
                    ok = False

            util.log('Loop %d ---------------------------------------------------------' % i)
            util.log('deleted smrlog files: %s' % util.json_to_str(temp_old_logs))

            if ok:
                break

            time.sleep(10)

        self.assertTrue(ok, 'smr-replicator does not delete smrlogs.')
        util.log('smr-replicator deletes smrlogs.')

        # check consistency of load_generator
        for i in range(len(self.load_gen_thrd_list)):
            self.load_gen_thrd_list[i].quit()
        for i in range(len(self.load_gen_thrd_list)):
            self.load_gen_thrd_list[i].join()
            self.assertTrue(self.load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
Beispiel #6
0
    def test_delete_smrlog_after_scaleout(self):
        util.print_frame()

        # start load generator
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            self.load_gen_thrd_list[i] = load_generator.LoadGenerator(
                i, ip, port)
            self.load_gen_thrd_list[i].start()

        time.sleep(5)  # generate load for 5 sec
        util.log("started load_generator")

        # servers for scale out
        servers = [config.server4, config.server5, config.server6]
        leader_cm = self.cluster['servers'][0]

        # Scale out
        cluster = config.clusters[0]
        ret = util.pg_add(cluster, servers, leader_cm)
        self.assertEqual(True, ret,
                         'Scale out fail. util.pg_add returns false')

        time.sleep(5)
        # pg0 -> pg1
        cluster = config.clusters[1]
        ret = util.migration(cluster, 0, 1, 8000, 8191, 40000)
        self.assertEqual(True, ret, 'Migration Fail 0 -> 1')

        # get log file
        old_logs = {}
        for s in config.clusters[0]['servers']:
            parent_dir, log_dir = util.smr_log_dir(s['id'])
            path = '%s/%s' % (parent_dir, log_dir)
            old_logs[s['id']] = util.ls(path)

        # bgsave in order to make smrlogs deleted.
        for s in config.clusters[0]['servers']:
            bgsave_ret = util.bgsave(s)
            self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % s['id'])
            util.log('bgsave pgs%d is done.')

        # check consistency
        ok = True
        for j in range(len(self.load_gen_thrd_list)):
            self.assertTrue(self.load_gen_thrd_list[j].isConsistent(),
                            'Inconsistent after migration')

        # is smr-replicator delete smrlogs?
        i = 0
        while i < 20:
            i += 1
            # get current log files
            cur_logs = {}
            for s in config.clusters[0]['servers']:
                parent_dir, log_dir = util.smr_log_dir(s['id'])
                path = '%s/%s' % (parent_dir, log_dir)
                cur_logs[s['id']] = util.ls(path)

            # compare old and new
            temp_old_logs = copy.deepcopy(old_logs)
            for id, nl in cur_logs.items():
                ol = temp_old_logs.get(id)
                self.assertNotEqual(
                    ol, None,
                    "failed to check logfiles. old logs for smr-replicator '%d' is not exist."
                    % id)

                for log in nl:
                    if log in ol:
                        ol.remove(log)

            ok = True
            for id, ol in temp_old_logs.items():
                if len(ol) == 0:
                    ok = False

            util.log(
                'Loop %d ---------------------------------------------------------'
                % i)
            util.log('deleted smrlog files: %s' %
                     util.json_to_str(temp_old_logs))

            if ok:
                break

            time.sleep(10)

        self.assertTrue(ok, 'smr-replicator does not delete smrlogs.')
        util.log('smr-replicator deletes smrlogs.')

        # check consistency of load_generator
        for i in range(len(self.load_gen_thrd_list)):
            self.load_gen_thrd_list[i].quit()
        for i in range(len(self.load_gen_thrd_list)):
            self.load_gen_thrd_list[i].join()
            self.assertTrue(self.load_gen_thrd_list[i].isConsistent(),
                            'Inconsistent after migration')
    def test_5_transfer_pgs_to_another_machine(self):
        util.print_frame()

        self.load_gen_list = {}

        # get gateway info
        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( self.cluster['servers'][0]['id'] )
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port) )

        # incrase master generation number
        util.log('failover in order to increase master generation number.')
        max = 0
        for i in range(5):
            key_base = 'key'
            for i in range(max, max+10000):
                cmd = 'set %s%d %d\r\n' % (key_base, i, i)
                gw.write( cmd )
                res = gw.read_until( '\r\n' )
                self.assertEquals( res, '+OK\r\n' )
            max = max + 10000

            m = util.get_server_by_role(self.cluster['servers'], 'master')
            util.log('failover pgs%d' %  m['id'])
            ret = util.failover(m, self.leader_cm)
            self.assertTrue(ret, 'failed to failover pgs%d' % m['id'])

        # start load generator
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            self.load_gen_list[i] = load_generator.LoadGenerator(i, ip, port)
            self.load_gen_list[i].start()

        time.sleep(5) # generate load for 5 sec
        util.log("started load_generator")

        m, s1, s2 = util.get_mss(self.cluster)
        servers = [m, s1, s2]

        # bgsave
        for s in servers:
            ret = util.bgsave(s)
            self.assertTrue(ret, 'failed to bgsave. pgs%d' % s['id'])

        new_servers = [config.server4, config.server5]

        # add new slaves
        for s in new_servers:
            util.log('delete pgs%d`s check point.' % s['id'])
            util.del_dumprdb(s['id'])

            ret = util.cluster_util_getdump(s['id'], m['ip'], m['redis_port'], 'dump.rdb', 0, 8191)
            self.assertEqual(True, ret,
                'failed : util.cluster_util_getdump returns false, src=%s:%d dest_pgsid=%d' % (
                m['ip'], m['redis_port'], s['id']))

            ret = util.pgs_add(self.cluster, s, self.leader_cm, 0, rm_ckpt=False)
            self.assertEqual(True, ret, 'failed : util.pgs_add returns false, pgsid=%d' % s['id'])
            util.log('succeeeded : add a new slave, pgsid=%d' % s['id'])

            # check consistency
            ok = True
            for j in range(self.max_load_generator):
                if self.load_gen_list[j].isConsistent() == False:
                    ok = False
                    break
            if not ok:
                break;

        for server_to_del in servers:
            for s in servers:
                util.pingpong( s['ip'], s['smr_mgmt_port'] )
            for s in new_servers:
                util.pingpong( s['ip'], s['smr_mgmt_port'] )
            self.__del_server(server_to_del)
            util.log('succeeded : delete pgs%d' % server_to_del['id'])

        new_m = util.get_server_by_role(new_servers, 'master')
        new_s = util.get_server_by_role(new_servers, 'slave')
        self.assertNotEqual( new_m, None, 'master is None.' )
        self.assertNotEqual( new_s, None, 'slave is None.' )

        for s in new_servers:
            util.pingpong( s['ip'], s['smr_mgmt_port'] )

        time.sleep(5) # generate load for 5 sec
        # check consistency of load_generator
        for i in range(self.max_load_generator):
            self.load_gen_list[i].quit()
        for i in range(self.max_load_generator):
            self.load_gen_list[i].join()
            self.assertTrue(self.load_gen_list[i].isConsistent(), 'Inconsistent after migration')
            self.load_gen_list.pop(i, None)
    def deprecated_test_5_PGS_commit_is_greater_than_PG_commit( self ):
        util.print_frame()

        # get gateway info
        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( self.cluster['servers'][0]['id'] )
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port) )

        # initial data
        util.put_some_data(self.cluster)

        master, s1, s2 = util.get_mss(self.cluster)

        server_to_join = [s1, s2]
        # shutdown slaves
        for i in range(0, 2):
            ret = testbase.request_to_shutdown_smr( server_to_join[i] )
            self.assertEqual( ret, 0, 'failed to shutdown smr%d' % server_to_join[i]['id'])
            util.log('succeeded to shutdown smr%d' % server_to_join[i]['id'])

            ret = testbase.request_to_shutdown_redis( server_to_join[i] )
            self.assertEquals( ret, 0, 'failed to shutdown redis' )
            util.log('succeeded to shutdown redis%d' % server_to_join[i]['id'])

            # check state F
            max_try = 20
            expected = 'F'
            for j in range( 0, max_try):
                state = util.get_smr_state( server_to_join[i], self.leader_cm )
                if expected == state:
                    break;
                time.sleep( 1 )
            self.assertEquals( expected , state,
                               'server%d - state:%s, expected:%s' % (server_to_join[i]['id'], state, expected) )

        # put more data
        util.put_some_data(self.cluster, 10, 256)

        # bgsave
        ret = util.bgsave(master)
        self.assertTrue(ret, 'failed to bgsave. pgs%d' % master['id'])

        # shutdown master
        ret = testbase.request_to_shutdown_smr( master )
        self.assertEqual( ret, 0, 'failed to shutdown smr' )
        util.log('succeeded to shutdown master smr, id=%d' % master['id'])
        ret = testbase.request_to_shutdown_redis( master )
        self.assertEquals( ret, 0, 'failed to shutdown redis' )
        util.log('succeeded to shutdown master redis, id=%d' % master['id'])

        # check state F
        max_try = 20
        expected = 'F'
        for i in range( 0, max_try):
            state = util.get_smr_state( master, self.leader_cm )
            if expected == state:
                break;
            time.sleep( 1 )
        self.assertEquals( expected , state,
                           'server%d - state:%s, expected:%s' % (master['id'], state, expected) )

        # recovery slaves
        for i in range(0, 2):
            ret = testbase.request_to_start_smr( server_to_join[i] )
            self.assertEqual( ret, 0, 'failed to start smr' )

            ret = testbase.request_to_start_redis( server_to_join[i] )
            self.assertEqual( ret, 0, 'failed to start redis' )

            ret = testbase.wait_until_finished_to_set_up_role( server_to_join[i], 10 )
            self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (server_to_join[i]['id']) )

            # check state N
            max_try = 20
            expected = 'N'
            for j in range( 0, max_try):
                state = util.get_smr_state( server_to_join[i], self.leader_cm )
                if expected == state:
                    break;
                time.sleep( 1 )
            role = util.get_role_of_server( server_to_join[i] )
            self.assertEquals( expected , state,
                               'server%d - state:%s, expected:%s, role:%s' % (server_to_join[i]['id'], state, expected, role) )

        # set value
        s = random.choice(server_to_join)
        redis = redis_mgmt.Redis( ['id'] )
        ret = redis.connect( s['ip'], s['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        key_base = 'key_test'
        for i in range(0, 10000):
            cmd = 'set %s%d %d\r\n' % (key_base, i, i)
            redis.write( cmd )
            res = redis.read_until( '\r\n' )
            self.assertEquals( res, '+OK\r\n' )
        redis.disconnect()

        for i in range(0, 2):
            redis = redis_mgmt.Redis( server_to_join[i]['id'] )
            ret = redis.connect( server_to_join[i]['ip'], server_to_join[i]['redis_port'] )
            self.assertEquals( ret, 0, 'failed to connect to redis' )

            # check value
            for j in range(0, 10000):
                cmd = 'get %s%d\r\n' % (key_base, j)
                redis.write( cmd )
                redis.read_until( '\r\n'  )
                response = redis.read_until( '\r\n'  )
                self.assertEqual( response, '%d\r\n' % (j), 'inconsistent %s, %d' % (response[:-2], j) )

        # try to recover master, but failed
        ret = testbase.request_to_start_smr( master )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( master, False )
        self.assertEqual( ret, 0, 'failed to start redis' )

        max_try = 3
        expected = 'N'
        for i in range( 0, max_try):
            state = util.get_smr_state( master, self.leader_cm )
            if expected == state:
                break;
            time.sleep( 1 )
        role = util.get_role_of_server( master )
        self.assertNotEqual( expected, state,
                             'server%d - state:%s, expected:not %s, role:%s' % (master['id'], state, expected, role) )
        util.log('success : the old master that has a greater commit-seq than the current master tried to join as a slave, but it is blocked successfully.')

        gw.disconnect()
        return 0
    def test_4_role_change_with_failover(self):
        util.print_frame()

        loop_cnt = 0
        while loop_cnt < 5:
            util.log('')
            util.log('Loop:%d' % loop_cnt)

            util.log("States (before role change)")
            util.log_server_state(self.cluster)

            target = random.choice(self.cluster['servers'])

            # bgsave
            ret = util.bgsave(target)
            self.assertTrue(ret, 'failed to bgsave. pgs:%d' % target['id'])

            # shutdown
            util.log('shutdown pgs%d(%s:%d)' % (target['id'], target['ip'], target['smr_base_port']))
            ret = testbase.request_to_shutdown_smr( target )
            self.assertEqual( ret, 0, 'failed to shutdown smr' )

            ret = testbase.request_to_shutdown_redis( target )
            self.assertEquals( ret, 0, 'failed to shutdown redis' )

            running_servers = []
            for s in self.cluster['servers']:
                if s != target:
                    running_servers.append(s)

            # Get old timestamp
            old_timestamps = {}
            for s in running_servers:
                ts = util.get_timestamp_of_pgs(s)
                old_timestamps[s['id']] = ts

            # Start load generator
            self.load_gen_list = {}
            util.log('start load generator')
            for i in range(self.max_load_generator):
                ip, port = util.get_rand_gateway(self.cluster)
                load_gen = load_generator.LoadGenerator(i, ip, port)
                load_gen.start()
                self.load_gen_list[i] = load_gen

            m, s1, s2 = util.get_mss(self.cluster)
            self.assertNotEqual(m, None, 'master is None.')
            self.assertNotEqual(s1, None, 'slave1 is None.')

            # Role change
            master_id = util.role_change(self.leader_cm, self.cluster['cluster_name'], s1['id'])
            self.assertNotEqual(master_id, -1, 'role_change failed')

            util.log("States (after role change)")
            util.log_server_state(self.cluster)

            # Check - get new timestamp
            new_timestamps= {}
            for s in running_servers:
                ts = util.get_timestamp_of_pgs( s )
                new_timestamps[s['id']] = ts

            # Check - compare old timestamps and new timestamps
            for s in running_servers:
                old_ts = old_timestamps[s['id']]
                new_ts = new_timestamps[s['id']]
                self.assertNotEqual(old_ts, new_ts, 'Timestamp of a running server has not changed. %d->%d' % (old_ts, new_ts))

            # Check quorum
            m = self.cluster['servers'][master_id]
            expected = 1
            ok = self.__check_quorum(m, expected)
            self.assertTrue(ok, 'unexpected quorum(after role change). expected:%s' % (expected))

            # recovery
            util.log('recovery pgs%d(%s:%d)' % (target['id'], target['ip'], target['smr_base_port']))
            ret = testbase.request_to_start_smr( target )
            self.assertEqual( ret, 0, 'failed to start smr' )
            util.log('start smr-replicator done')

            ret = testbase.request_to_start_redis( target, 60 )
            self.assertEqual( ret, 0, 'failed to start redis' )
            util.log('start redis-arc done')

            ret = testbase.wait_until_finished_to_set_up_role( target, max_try=300)
            self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (target['id']) )

            util.log("States (after recovery)")
            util.log_server_state(self.cluster)

            # Check quorum
            expected = 1
            ok = self.__check_quorum(m, expected)
            self.assertTrue(ok, 'unexpected quorum(after recovery). expected:%s' % (expected))

            # Cheeck Consistency
            util.log('stop load generator')
            for i in range(self.max_load_generator):
                self.load_gen_list[i].quit()
            for i in range(self.max_load_generator):
                self.load_gen_list[i].join()
                self.assertTrue(self.load_gen_list[i].isConsistent(), 'Inconsistent after migration')
                self.load_gen_list.pop(i, None)

            loop_cnt += 1

        return 0
    def recovery_with_local_checkpoint_and_remote_log(self, role):
        server = util.get_server_by_role(self.cluster['servers'], role)

        # set initial data in order to make an elapsed time for bgsave longer
        self.put_some_data()

        # set value
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(server['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, id:%d' % server['id'])
        timestamp = {}
        key_base = 'key0000000000111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999'
        for i in range(0, 50000):
            timestamp[i] = time.time()
            k = '%s_%d' % (key_base, i)
            cmd = 'set %s %f\r\n' % (k, timestamp[i])
            gw.write(cmd)
            response = gw.read_until('\r\n')
            self.assertNotEqual(response.find('+OK'), -1,
                                'failed to set key value through gateway')

        # generate a check point
        bgsave_ret = util.bgsave(server)
        self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % server['id'])

        # shutdown
        ret = testbase.request_to_shutdown_smr(server)
        self.assertEqual(ret, 0, 'failed to shutdown smr')
        ret = testbase.request_to_shutdown_redis(server)
        self.assertEqual(ret, 0, 'failed to shutdown redis')
        util.log('succeeded : shutdown pgs%d' % (server['id']))

        # delete smr_logs
        ret = util.delete_smr_logs(server['id'])
        self.assertEqual(ret, 0,
                         'failed to delete smr log, id:%d' % server['id'])
        util.log('succeeded : delete replication logs')

        time.sleep(5)

        # set value
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0, 'failed to connect to gateway')
        for i in range(50000, 100000):
            timestamp[i] = time.time()
            k = '%s_%d' % (key_base, i)
            cmd = 'set %s %f\r\n' % (k, timestamp[i])
            gw.write(cmd)
            response = gw.read_until('\r\n')
            self.assertNotEqual(response.find('+OK'), -1,
                                'failed to set key value through gateway')

        # recovery
        ret = testbase.request_to_start_smr(server)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(server)
        self.assertEqual(ret, 0, 'failed to start redis')
        time.sleep(5)

        ret = testbase.wait_until_finished_to_set_up_role(server)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (server['id']))
        util.log('succeeded : recover pgs%d' % server['id'])

        # check value
        recovered_redis = redis_mgmt.Redis(server['id'])
        ret = recovered_redis.connect(server['ip'], server['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        for i in range(0, 100000):
            k = '%s_%d' % (key_base, i)
            cmd = 'get %s\r\n' % (k)
            recovered_redis.write(cmd)
            recovered_redis.read_until('\r\n')
            response = recovered_redis.read_until('\r\n')
            self.assertEqual(response, '%f\r\n' % (timestamp[i]),
                             'inconsistent %s, %f' % (response, timestamp[i]))
    def test_restart_recovery_with_remote_checkpoint_and_remote_log(self):
        util.print_frame()
        key_base = 'key'
        target = util.get_server_by_role(self.cluster['servers'], 'slave')
        master = util.get_server_by_role(self.cluster['servers'], 'master')

        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(master['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0, 'failed to connect to gateway')

        # set initial data in order to make an elapsed time for bgsave longer
        self.put_some_data()

        # generate some data
        for i in range(0, 100):
            key = '%s%d' % (key_base, i)
            cmd = 'set %s %d\r\n' % (key, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')
        gw.disconnect()

        # delete a local checkpoint
        util.log('delete pgs%d`s check point.' % target['id'])
        util.del_dumprdb(target['id'])

        # generate a remote check point
        bgsave_ret = util.bgsave(master)
        self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % master['id'])

        # shutdown
        util.log('shutdown target')
        ret = testbase.request_to_shutdown_smr(target)
        self.assertEqual(ret, 0, 'failed to shutdown smr')

        time.sleep(10)

        # generate some data
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0, 'failed to connect to gateway')
        for i in range(100, 200):
            key = '%s%d' % (key_base, i)
            cmd = 'set %s %d\r\n' % (key, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')
        gw.disconnect()

        # recovery
        util.log('recovery target')
        ret = testbase.request_to_start_smr(target)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(target)
        self.assertEqual(ret, 0, 'failed to start redis')
        time.sleep(5)

        ret = testbase.wait_until_finished_to_set_up_role(target)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (target['id']))

        # check value
        recovered_redis = redis_mgmt.Redis(target['id'])
        ret = recovered_redis.connect(target['ip'], target['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        for i in range(0, 200):
            key = '%s%d' % (key_base, i)
            cmd = 'get %s\r\n' % (key)
            recovered_redis.write(cmd)
            recovered_redis.read_until('\r\n')
            response = recovered_redis.read_until('\r\n')
            self.assertEqual(response, '%d\r\n' % i,
                             'inconsistent %s, %d' % (response, i))
    def recovery_with_local_checkpoint_and_remote_log( self, role ):
        server = util.get_server_by_role( self.cluster['servers'], role )

        # set initial data in order to make an elapsed time for bgsave longer
        self.put_some_data()

        # set value
        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( server['id'] )
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway, id:%d' % server['id'] )
        timestamp = {}
        key_base = 'key0000000000111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999'
        for i in range (0, 50000):
            timestamp[i] = time.time()
            k = '%s_%d' % (key_base, i)
            cmd = 'set %s %f\r\n' % (k, timestamp[i])
            gw.write( cmd )
            response = gw.read_until( '\r\n' )
            self.assertNotEqual( response.find( '+OK' ), -1, 'failed to set key value through gateway' )

        # generate a check point
        bgsave_ret = util.bgsave( server )
        self.assertTrue( bgsave_ret, 'failed to bgsave. pgs%d' % server['id'] )

        # shutdown
        ret = testbase.request_to_shutdown_smr( server )
        self.assertEqual( ret, 0, 'failed to shutdown smr' )
        ret = testbase.request_to_shutdown_redis( server )
        self.assertEqual( ret, 0, 'failed to shutdown redis' )
        util.log('succeeded : shutdown pgs%d' % (server['id']))

        # delete smr_logs
        ret = util.delete_smr_logs( server['id'] )
        self.assertEqual( ret, 0, 'failed to delete smr log, id:%d' % server['id'] )
        util.log('succeeded : delete replication logs')

        time.sleep( 5 )

        # set value
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway' )
        for i in range (50000, 100000):
            timestamp[i] = time.time()
            k = '%s_%d' % (key_base, i)
            cmd = 'set %s %f\r\n' % (k, timestamp[i])
            gw.write( cmd )
            response = gw.read_until( '\r\n' )
            self.assertNotEqual( response.find( '+OK' ), -1, 'failed to set key value through gateway' )

        # recovery
        ret = testbase.request_to_start_smr( server )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( server )
        self.assertEqual( ret, 0, 'failed to start redis' )
        time.sleep( 5 )

        ret = testbase.wait_until_finished_to_set_up_role( server )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (server['id']) )
        util.log('succeeded : recover pgs%d' % server['id'])

        # check value
        recovered_redis = redis_mgmt.Redis( server['id'] )
        ret = recovered_redis .connect( server['ip'], server['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        for i in range (0, 100000):
            k = '%s_%d' % (key_base, i)
            cmd = 'get %s\r\n' % (k)
            recovered_redis .write( cmd )
            recovered_redis.read_until( '\r\n'  )
            response = recovered_redis.read_until( '\r\n'  )
            self.assertEqual( response, '%f\r\n' % (timestamp[i]), 'inconsistent %s, %f' % (response, timestamp[i]) )
    def test_restart_recovery_with_remote_checkpoint_and_remote_log( self ):
        util.print_frame()
        key_base = 'key'
        target = util.get_server_by_role( self.cluster['servers'], 'slave' )
        master = util.get_server_by_role( self.cluster['servers'], 'master' )

        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( master['id'] )
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway' )

        # set initial data in order to make an elapsed time for bgsave longer
        self.put_some_data()

        # generate some data
        for i in range( 0, 100 ):
            key = '%s%d' % (key_base, i)
            cmd = 'set %s %d\r\n' % (key, i)
            gw.write( cmd )
            res = gw.read_until( '\r\n' )
            self.assertEquals( res, '+OK\r\n' )
        gw.disconnect()

        # delete a local checkpoint
        util.log('delete pgs%d`s check point.' % target['id'])
        util.del_dumprdb( target['id'] )

        # generate a remote check point
        bgsave_ret = util.bgsave( master )
        self.assertTrue( bgsave_ret, 'failed to bgsave. pgs%d' % master['id'] )

        # shutdown
        util.log('shutdown target')
        ret = testbase.request_to_shutdown_smr( target )
        self.assertEqual( ret, 0, 'failed to shutdown smr' )

        time.sleep( 10 )

        # generate some data
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway' )
        for i in range( 100, 200 ):
            key = '%s%d' % (key_base, i)
            cmd = 'set %s %d\r\n' % (key, i)
            gw.write( cmd )
            res = gw.read_until( '\r\n' )
            self.assertEquals( res, '+OK\r\n' )
        gw.disconnect()

        # recovery
        util.log('recovery target')
        ret = testbase.request_to_start_smr( target )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( target )
        self.assertEqual( ret, 0, 'failed to start redis' )
        time.sleep( 5 )

        ret = testbase.wait_until_finished_to_set_up_role( target)
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (target['id']) )

        # check value
        recovered_redis = redis_mgmt.Redis( target['id'] )
        ret = recovered_redis .connect( target['ip'], target['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        for i in range (0, 200):
            key = '%s%d' % (key_base, i)
            cmd = 'get %s\r\n' % (key)
            recovered_redis .write( cmd )
            recovered_redis.read_until( '\r\n'  )
            response = recovered_redis.read_until( '\r\n'  )
            self.assertEqual( response, '%d\r\n' % i, 'inconsistent %s, %d' % (response, i) )
Beispiel #14
0
    def deprecated_test_5_PGS_commit_is_greater_than_PG_commit(self):
        util.print_frame()

        # get gateway info
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, %s:%d' % (ip, port))

        # initial data
        util.put_some_data(self.cluster)

        master, s1, s2 = util.get_mss(self.cluster)

        server_to_join = [s1, s2]
        # shutdown slaves
        for i in range(0, 2):
            ret = testbase.request_to_shutdown_smr(server_to_join[i])
            self.assertEqual(
                ret, 0, 'failed to shutdown smr%d' % server_to_join[i]['id'])
            util.log('succeeded to shutdown smr%d' % server_to_join[i]['id'])

            ret = testbase.request_to_shutdown_redis(server_to_join[i])
            self.assertEquals(ret, 0, 'failed to shutdown redis')
            util.log('succeeded to shutdown redis%d' % server_to_join[i]['id'])

            # check state F
            max_try = 20
            expected = 'F'
            for j in range(0, max_try):
                state = util.get_smr_state(server_to_join[i], self.leader_cm)
                if expected == state:
                    break
                time.sleep(1)
            self.assertEquals(
                expected, state, 'server%d - state:%s, expected:%s' %
                (server_to_join[i]['id'], state, expected))

        # put more data
        util.put_some_data(self.cluster, 10, 256)

        # bgsave
        ret = util.bgsave(master)
        self.assertTrue(ret, 'failed to bgsave. pgs%d' % master['id'])

        # shutdown master
        ret = testbase.request_to_shutdown_smr(master)
        self.assertEqual(ret, 0, 'failed to shutdown smr')
        util.log('succeeded to shutdown master smr, id=%d' % master['id'])
        ret = testbase.request_to_shutdown_redis(master)
        self.assertEquals(ret, 0, 'failed to shutdown redis')
        util.log('succeeded to shutdown master redis, id=%d' % master['id'])

        # check state F
        max_try = 20
        expected = 'F'
        for i in range(0, max_try):
            state = util.get_smr_state(master, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (master['id'], state, expected))

        # recovery slaves
        for i in range(0, 2):
            ret = testbase.request_to_start_smr(server_to_join[i])
            self.assertEqual(ret, 0, 'failed to start smr')

            ret = testbase.request_to_start_redis(server_to_join[i])
            self.assertEqual(ret, 0, 'failed to start redis')

            ret = testbase.wait_until_finished_to_set_up_role(
                server_to_join[i], 10)
            self.assertEquals(
                ret, 0,
                'failed to role change. smr_id:%d' % (server_to_join[i]['id']))

            # check state N
            max_try = 20
            expected = 'N'
            for j in range(0, max_try):
                state = util.get_smr_state(server_to_join[i], self.leader_cm)
                if expected == state:
                    break
                time.sleep(1)
            role = util.get_role_of_server(server_to_join[i])
            self.assertEquals(
                expected, state, 'server%d - state:%s, expected:%s, role:%s' %
                (server_to_join[i]['id'], state, expected, role))

        # set value
        s = random.choice(server_to_join)
        redis = redis_mgmt.Redis(['id'])
        ret = redis.connect(s['ip'], s['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        key_base = 'key_test'
        for i in range(0, 10000):
            cmd = 'set %s%d %d\r\n' % (key_base, i, i)
            redis.write(cmd)
            res = redis.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')
        redis.disconnect()

        for i in range(0, 2):
            redis = redis_mgmt.Redis(server_to_join[i]['id'])
            ret = redis.connect(server_to_join[i]['ip'],
                                server_to_join[i]['redis_port'])
            self.assertEquals(ret, 0, 'failed to connect to redis')

            # check value
            for j in range(0, 10000):
                cmd = 'get %s%d\r\n' % (key_base, j)
                redis.write(cmd)
                redis.read_until('\r\n')
                response = redis.read_until('\r\n')
                self.assertEqual(response, '%d\r\n' % (j),
                                 'inconsistent %s, %d' % (response[:-2], j))

        # try to recover master, but failed
        ret = testbase.request_to_start_smr(master)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(master, False)
        self.assertEqual(ret, 0, 'failed to start redis')

        max_try = 3
        expected = 'N'
        for i in range(0, max_try):
            state = util.get_smr_state(master, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        role = util.get_role_of_server(master)
        self.assertNotEqual(
            expected, state, 'server%d - state:%s, expected:not %s, role:%s' %
            (master['id'], state, expected, role))
        util.log(
            'success : the old master that has a greater commit-seq than the current master tried to join as a slave, but it is blocked successfully.'
        )

        gw.disconnect()
        return 0