Example #1
0
    def __del_server(self, server_to_del):
        # backup data
        redis = redis_mgmt.Redis( server_to_del['id'] )
        ret = redis.connect( server_to_del['ip'], server_to_del['redis_port'] )
        self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port']) )

        # bgsave
        ret = util.bgsave(server_to_del)
        self.assertTrue(ret, 'failed to bgsave. pgs%d' % server_to_del['id'])

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d\r\n' % (server_to_del['cluster_name'], server_to_del['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # check if pgs is removed
        success = False
        for try_cnt in range( 10 ):
            redis = redis_mgmt.Redis( server_to_del['id'] )
            ret = redis.connect( server_to_del['ip'], server_to_del['redis_port'] )
            self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port']) )
            util.log( 'succeeded : connect to smr%d(%s:%d)' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port']) )

            redis.write( 'info stats\r\n' )
            for i in range( 6 ):
                redis.read_until( '\r\n' )
            res = redis.read_until( '\r\n' )
            self.assertNotEqual( res, '', 'failed : get reply of "info stats" from redis%d(%s:%d)' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port']) )
            util.log( 'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"' % (server_to_del['id'], server_to_del['ip'], server_to_del['redis_port'], res[:-2]) )
            no = int( res.split(':')[1] )
            if no <= 100:
                success = True
                break
            time.sleep( 1 )

        self.assertEquals( success, True, 'failed : pgs does not removed.' )
        util.log( 'succeeded : pgs is removed' )

        # change state of pgs to lconn
        cmd = 'pgs_lconn %s %d\r\n' % (server_to_del['cluster_name'], server_to_del['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # shutdown
        ret = testbase.request_to_shutdown_smr( server_to_del )
        self.assertEqual( ret, 0, 'failed : shutdown smr. id:%d' % server_to_del['id'] )
        ret = testbase.request_to_shutdown_redis( server_to_del )
        self.assertEquals( ret, 0, 'failed : shutdown redis. id:%d' % server_to_del['id'] )
        util.log('succeeded : shutdown pgs%d.' % server_to_del['id'] )

        # delete pgs from cluster
        cmd = 'pgs_del %s %d\r\n' % (server_to_del['cluster_name'], server_to_del['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
Example #2
0
    def pgs_add_and_del( self, upgrade_server, type ):
        util.print_frame()

        util.log( '[start] add and del pgs%d. type:%s' % (upgrade_server['id'], type) )
        util.log_server_state( self.cluster )

        # start load generator
        load_gen_list = {}
        for i in range( len(self.cluster['servers']) ):
            server = self.cluster['servers'][i]
            load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port'])
            load_gen.start()
            load_gen_list[i] = load_gen

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d\r\n' % (upgrade_server['cluster_name'], upgrade_server['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # set new values
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway( '0' )
        gw.connect( ip, port )
        for i in range( 0, 50 ):
            cmd = 'set %s%d %d\r\n' % (self.key_base, i, i)
            gw.write( cmd )
            res = gw.read_until( '\r\n' )
            self.assertEqual( res, '+OK\r\n', 'failed to set values to gw(%s:%d). cmd:%s, res:%s' % (ip, port, cmd[:-2], res[:-2]) )

        # attach pgs from cluster
        cmd = 'pgs_join %s %d\r\n' % (upgrade_server['cluster_name'], upgrade_server['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        time.sleep( 3 )

        # check new values
        redis = redis_mgmt.Redis( upgrade_server['id'] )
        ret = redis.connect( upgrade_server['ip'], upgrade_server['redis_port'] )
        self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (upgrade_server['id'], upgrade_server['ip'], upgrade_server['redis_port']) )

        for i in range( 0, 50 ):
            cmd = 'get %s%d\r\n' % (self.key_base, i)
            redis.write( cmd )
            redis.read_until( '\r\n' )
            res = redis.read_until( '\r\n' )
            self.assertEqual( res, '%d\r\n' % i, 'failed to get values from redis%d. %s != %d' % (upgrade_server['id'], res, i) )
        util.log( 'succeeded : check values with get operations on pgs%d.' % (upgrade_server['id']) )

        # shutdown load generators
        for i in range( len(load_gen_list) ):
            load_gen_list[i].quit()
            load_gen_list[i].join()

        util.log_server_state( self.cluster )

        return 0
    def test_7_remaining_hbc_connection( self ):
        util.print_frame()

        # check pgs
        for server in self.cluster['servers']:
            before_cnt_redis = util.get_clients_count_of_redis(server['ip'], server['redis_port'])
            before_cnt_smr = util.get_clients_count_of_smr(server['smr_mgmt_port'])

            cmd = 'pgs_leave %s %d forced\r\npgs_del %s %d' % (self.cluster['cluster_name'], server['id'], self.cluster['cluster_name'], server['id'])
            util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd)

        for server in self.cluster['servers']:
            # check redis
            success = False
            for i in range(5):
                after_cnt = util.get_clients_count_of_redis(server['ip'], server['redis_port'])
                if after_cnt <= 2:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals( success, True, 'failed : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) )
            util.log( 'succeeded : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) )

            # check smr
            success = False
            expected = 0
            for i in range(5):
                after_cnt = util.get_clients_count_of_smr(server['smr_mgmt_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals( success, True, 'failed : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) )
            util.log( 'succeeded : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) )

        # check gateway
        for server in self.cluster['servers']:
            before_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port'])

            cmd = 'gw_del %s %d' % (self.cluster['cluster_name'], server['id'])
            util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd)

        for server in self.cluster['servers']:
            success = False
            expected = 1
            for i in range(5):
                after_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)

            self.assertEquals( success, True, 'failed : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) )
            util.log( 'succeeded : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) )
Example #4
0
    def test_7_remaining_hbc_connection( self ):
        util.print_frame()

        # check pgs
        for server in self.cluster['servers']:
            before_cnt_redis = util.get_clients_count_of_redis(server['ip'], server['redis_port'])
            before_cnt_smr = util.get_clients_count_of_smr(server['smr_mgmt_port'])

            cmd = 'pgs_leave %s %d\r\npgs_del %s %d' % (self.cluster['cluster_name'], server['id'], self.cluster['cluster_name'], server['id'])
            util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd)

        for server in self.cluster['servers']:
            # check redis
            success = False
            for i in range(5):
                after_cnt = util.get_clients_count_of_redis(server['ip'], server['redis_port'])
                if after_cnt <= 2:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals( success, True, 'failed : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) )
            util.log( 'succeeded : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) )

            # check smr
            success = False
            expected = 0
            for i in range(5):
                after_cnt = util.get_clients_count_of_smr(server['smr_mgmt_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals( success, True, 'failed : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) )
            util.log( 'succeeded : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) )

        # check gateway
        for server in self.cluster['servers']:
            before_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port'])

            cmd = 'gw_del %s %d' % (self.cluster['cluster_name'], server['id'])
            util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd)

        for server in self.cluster['servers']:
            success = False
            expected = 1
            for i in range(5):
                after_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)

            self.assertEquals( success, True, 'failed : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) )
            util.log( 'succeeded : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) )
Example #5
0
def request_to_start_gateway(cluster_name,
                             server,
                             leader_cm,
                             check_state=True):
    # add gateway configuration to confmaster
    cmd = 'gw_add %s %d %s %s %d' % (cluster_name, server['id'],
                                     server['pm_name'], server['ip'],
                                     server['gateway_port'])
    result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd)
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1

    # start gateway process
    ret = util.start_gateway(server['id'], server['ip'], leader_cm['cm_port'],
                             cluster_name, server['gateway_port'])
    if ret is not 0:
        util.log('failed to start_gateway. server:%s, id:%d' %
                 (server['ip'], server['id']))
        return -1

    # check gateway state
    if check_state:
        ok = False
        try_cnt = 0
        while try_cnt < 5:
            try_cnt += 1
            if util.check_gateway_state(cluster_name, leader_cm, server):
                ok = True
                break
            time.sleep(0.5)

        if not ok:
            util.log('failed to start_gateway, Invalid state of gateway.' %
                     inactive_conns)
            return -1

    # check inactive redis connections of gateway
    if check_state:
        ok = False
        try_cnt = 0
        while try_cnt < 10:
            try_cnt += 1
            if util.gw_info_redis_disccons(server['ip'],
                                           server['gateway_port']) == 0:
                ok = True
                break
            else:
                time.sleep(0.5)

        if not ok:
            util.log(
                'failed to start_gateway, invalid number of inactive redis connections. %d'
                % inactive_conns)
            return -1

    util.log('succeeded to start_gateway. server:%s, id:%d' %
             (server['ip'], server['id']))
    return 0
Example #6
0
def request_to_shutdown_gateway( cluster_name, server, leader_cm, check=False ):
    ip = server['ip']
    port = server['gateway_port']
    id = server['id']

    # delete gateway configuration from confmaster
    cmd = 'gw_del %s %d' % (cluster_name, id)
    result = util.cm_command( leader_cm['ip'], leader_cm['cm_port'], cmd )
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1

    # check client connection
    ok = False
    for i in range(10):
        client_conn_num = util.gw_info_client_cnt(ip, port)
        if client_conn_num == 1:
            ok = True
            break
        else:
            time.sleep(1)

    if ok == False:
        util.log('failed to shutdown_gateway, invalid number of client connections. %d' % (client_conn_num - 1))
        return -1

    # shutdown gateway process
    if util.shutdown_gateway( id, port ) is not 0:
        util.log('failed to shutdown_gateway %d' % (id))
        return -1
    util.log('succeeded to shutdown_gateway. %d' % (id))
    return 0
Example #7
0
def request_to_shutdown_gateway(cluster_name, server, leader_cm, check=False):
    ip = server['ip']
    port = server['gateway_port']
    id = server['id']

    # delete gateway configuration from confmaster
    cmd = 'gw_del %s %d' % (cluster_name, id)
    result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd)
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1

    # check client connection
    ok = False
    for i in range(10):
        client_conn_num = util.gw_info_client_cnt(ip, port)
        if client_conn_num == 1:
            ok = True
            break
        else:
            time.sleep(1)

    if ok == False:
        util.log(
            'failed to shutdown_gateway, invalid number of client connections. %d'
            % (client_conn_num - 1))
        return -1

    # shutdown gateway process
    if util.shutdown_gateway(id, port) is not 0:
        util.log('failed to shutdown_gateway %d' % (id))
        return -1
    util.log('succeeded to shutdown_gateway. %d' % (id))
    return 0
Example #8
0
def add_physical_machine_to_mgmt(mgmt_ip, mgmt_port, pm_name, pm_ip):
    cmd = 'pm_add %s %s' % (pm_name, pm_ip)
    result = util.cm_command(mgmt_ip, mgmt_port, cmd)
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1
    return 0
Example #9
0
def fi_count(fi, ip, port):
    cmd = ('fi_count %s %s' % (fi[0], fi[1])).lower()
    reply = util.cm_command(ip, port, cmd)
    try:
        return int(reply)
    except ValueError as e:
        util.log("fi_count fail. cmd: \"%s\", reply: \"%s\"" % (cmd, reply))
        return -1
Example #10
0
def finalize_info_of_cm_about_pgs( cluster, server, leader_cm ):
    cmd = 'pgs_leave %s %d' % (cluster['cluster_name'], server['id'])
    result = util.cm_command( leader_cm['ip'], leader_cm['cm_port'], cmd )
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1
    time.sleep( 3 )

    cmd = 'pgs_del %s %d' % (cluster['cluster_name'], server['id'])
    result = util.cm_command( leader_cm['ip'], leader_cm['cm_port'], cmd )
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1
    time.sleep( 3 )
    return 0
Example #11
0
def finalize_info_of_cm_about_pgs(cluster, server, leader_cm):
    cmd = 'pgs_leave %s %d forced' % (cluster['cluster_name'], server['id'])
    result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd)
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1
    time.sleep(3)

    cmd = 'pgs_del %s %d' % (cluster['cluster_name'], server['id'])
    result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd)
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1
    time.sleep(3)
    return 0
Example #12
0
def fi_count(fi, ip, port):
    cmd = ('fi_count %s %s' % (fi[0], fi[1])).lower()
    reply = util.cm_command(ip, port, cmd)
    try:
        return int(reply)
    except ValueError as e:
        util.log("fi_count fail. cmd: \"%s\", reply: \"%s\"" % (cmd, reply))
        return -1
Example #13
0
def add_physical_machine_to_mgmt( mgmt_ip, mgmt_port, pm_name, pm_ip ):
    cmd = 'pm_add %s %s' % (pm_name, pm_ip)
    result = util.cm_command( mgmt_ip, mgmt_port, cmd )
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1
    return 0
Example #14
0
def fi_add(fi, count, ip, port):
    cmd = ('fi_add %s %s %s %d' % (fi[0], fi[1], fi[2], count)).lower()
    reply = util.cm_command(ip, port, cmd)
    ret = json.loads(reply)
    state = ret['state']
    if 'success' == state:
        return True
    else:
        util.log("fi_add fail. cmd: \"%s\", reply: \"%s\"" % (cmd, reply))
        return False
Example #15
0
def classify_cm(servers):
    cm_leaders = []
    cm_followers = []
    for s in servers:
        ret = json.loads(util.cm_command('0.0.0.0', s['cm_port'], 'cluster_ls'), encoding='ascii')['state']
        if ret == 'success':
            cm_leaders.append({'ip':'0.0.0.0', 'port':s['cm_port']})
        elif ret == 'redirect':
            cm_followers.append({'ip':'0.0.0.0', 'port':s['cm_port']})
    return {'leader':cm_leaders, 'follower':cm_followers}
Example #16
0
def fi_add(fi, count, ip, port):
    cmd = ('fi_add %s %s %s %d' % (fi[0], fi[1], fi[2], count)).lower()
    reply = util.cm_command(ip, port, cmd)
    ret = json.loads(reply)
    state = ret['state']
    if 'success' == state:
        return True
    else:
        util.log("fi_add fail. cmd: \"%s\", reply: \"%s\"" % (cmd, reply))
        return False
Example #17
0
def initialize_cluster(cluster, leader_cm=None):
    if leader_cm == None:
        leader_cm = cluster['servers'][0]
    servers = cluster['servers']

    initialize_physical_machine_znodes(leader_cm['ip'], leader_cm['cm_port'],
                                       config.machines)

    cmd = 'cluster_add %s %s' % (cluster['cluster_name'],
                                 cluster['quorum_policy'])
    result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd)
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1

    slot_no = 0
    for pg_id in cluster['pg_id_list']:
        cmd = 'pg_add %s %d' % (cluster['cluster_name'], pg_id)
        result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd)
        jobj = json.loads(result)
        if jobj['state'] != 'success':
            util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
            return -1

        if cluster['slots'][slot_no] != -1:
            cmd = 'slot_set_pg %s %d:%d %d' % (
                cluster['cluster_name'], cluster['slots'][slot_no],
                cluster['slots'][slot_no + 1], pg_id)
            result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'],
                                     cmd)
            jobj = json.loads(result)
            if jobj['state'] != 'success':
                util.log('failed to execute. cmd:%s, result:%s' %
                         (cmd, result))
                return -1

        slot_no = slot_no + 2

    for server in servers:
        initialize_info_of_cm_about_pgs(cluster, server, leader_cm)

    return 0
Example #18
0
def classify_cm(servers):
    cm_leaders = []
    cm_followers = []
    for s in servers:
        ret = json.loads(util.cm_command('0.0.0.0', s['cm_port'],
                                         'cluster_ls'),
                         encoding='ascii')['state']
        if ret == 'success':
            cm_leaders.append({'ip': '0.0.0.0', 'port': s['cm_port']})
        elif ret == 'redirect':
            cm_followers.append({'ip': '0.0.0.0', 'port': s['cm_port']})
    return {'leader': cm_leaders, 'follower': cm_followers}
Example #19
0
def initialize_info_of_cm_about_pgs( cluster, server, leader_cm, pg_id=None ):
    if pg_id == None:
        pg_id = server['pg_id']

    cmd = 'pgs_add %s %d %d %s %s %d %d' % (cluster['cluster_name'], server['id'], pg_id, server['pm_name'], server['ip'], server['smr_base_port'], server['redis_port'])
    result = util.cm_command( leader_cm['ip'], leader_cm['cm_port'], cmd )
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1

    cmd = 'pgs_join %s %d' % (cluster['cluster_name'], server['id'])
    result = util.cm_command( leader_cm['ip'], leader_cm['cm_port'], cmd )
    try:
        jobj = json.loads(result)
        if jobj['state'] != 'success':
            util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
            return -1
    except json.ValueError:
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1
    return 0
Example #20
0
def initialize_cluster( cluster, leader_cm=None ):
    if leader_cm == None:
        leader_cm = cluster['servers'][0]
    servers = cluster['servers']

    initialize_physical_machine_znodes( leader_cm['ip'], leader_cm['cm_port'], config.machines )

    cmd = 'cluster_add %s %s' % (cluster['cluster_name'], cluster['quorum_policy'])
    result = util.cm_command( leader_cm['ip'], leader_cm['cm_port'], cmd  )
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1

    slot_no = 0
    for pg_id in cluster['pg_id_list']:
        cmd = 'pg_add %s %d' % (cluster['cluster_name'], pg_id)
        result = util.cm_command( leader_cm['ip'], leader_cm['cm_port'], cmd )
        jobj = json.loads(result)
        if jobj['state'] != 'success':
            util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
            return -1

        if cluster['slots'][slot_no] != -1:
            cmd = 'slot_set_pg %s %d:%d %d' % (cluster['cluster_name'], cluster['slots'][slot_no], cluster['slots'][slot_no+1], pg_id)
            result = util.cm_command( leader_cm['ip'], leader_cm['cm_port'], cmd )
            jobj = json.loads(result)
            if jobj['state'] != 'success':
                util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
                return -1

        slot_no = slot_no + 2

    for server in servers:
        initialize_info_of_cm_about_pgs( cluster, server, leader_cm )

    return 0
Example #21
0
def initialize_physical_machine_znodes( ip, port, pm_list ):
    for pm in pm_list:
        if pm['type'] == 'virtual':
            pm_ip = pm['virtual_ip']
        else:
            pm_ip = pm['ip']

        cmd = 'pm_add %s %s' % (pm['name'], pm_ip)
        util.log('ip:%s, port:%d, cmd:%s' % (ip, port, cmd))
        result = util.cm_command( ip, port, cmd )
        jobj = json.loads(result)
        if jobj['state'] != 'success':
            util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
            return -1
    return 0
Example #22
0
def initialize_physical_machine_znodes(ip, port, pm_list):
    for pm in pm_list:
        if pm['type'] == 'virtual':
            pm_ip = pm['virtual_ip']
        else:
            pm_ip = pm['ip']

        cmd = 'pm_add %s %s' % (pm['name'], pm_ip)
        util.log('ip:%s, port:%d, cmd:%s' % (ip, port, cmd))
        result = util.cm_command(ip, port, cmd)
        jobj = json.loads(result)
        if jobj['state'] != 'success':
            util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
            return -1
    return 0
Example #23
0
def request_to_start_gateway( cluster_name, server, leader_cm, check_state=True ):
    # add gateway configuration to confmaster
    cmd = 'gw_add %s %d %s %s %d' % (cluster_name, server['id'], server['pm_name'], server['ip'], server['gateway_port'])
    result = util.cm_command( leader_cm['ip'], leader_cm['cm_port'], cmd )
    jobj = json.loads(result)
    if jobj['state'] != 'success':
        util.log('failed to execute. cmd:%s, result:%s' % (cmd, result))
        return -1

    # start gateway process
    ret = util.start_gateway( server['id'], server['ip'], leader_cm['cm_port'], cluster_name, server['gateway_port'] )
    if ret is not 0:
        util.log('failed to start_gateway. server:%s, id:%d' % (server['ip'], server['id']))
        return -1

    # check gateway state
    if check_state:
        ok = False
        try_cnt = 0
        while try_cnt < 5:
            try_cnt += 1
            if util.check_gateway_state( cluster_name, leader_cm, server ):
                ok = True
                break
            time.sleep(0.5)

        if not ok:
            util.log('failed to start_gateway, Invalid state of gateway.' % inactive_conns)
            return -1

    # check inactive redis connections of gateway
    if check_state:
        ok = False
        try_cnt = 0
        while try_cnt < 10:
            try_cnt += 1
            if util.gw_info_redis_disccons(server['ip'], server['gateway_port']) == 0:
                ok = True
                break
            else:
                time.sleep(0.5)

        if not ok:
            util.log('failed to start_gateway, invalid number of inactive redis connections. %d' % inactive_conns)
            return -1

    util.log('succeeded to start_gateway. server:%s, id:%d' % (server['ip'], server['id']))
    return 0
Example #24
0
def finalize_cluster(cluster, leader_cm=None):
    if leader_cm == None:
        leader_cm = cluster['servers'][0]
    servers = cluster['servers']

    # PGS
    for server in servers:
        if finalize_info_of_cm_about_pgs(cluster, server,leader_cm ) != 0:
            return -1

    # PG
    for pg_id in cluster['pg_id_list']:
        if util.cm_success(util.cm_command(leader_cm['ip'], leader_cm['cm_port'],
            'pg_del %s %d' % (cluster['cluster_name'], pg_id)))[0] == False:
            return -1

    # Cluster
    if util.cluster_del(leader_cm['ip'], leader_cm['cm_port'], cluster['cluster_name']) == False:
        return -1

    return 0
Example #25
0
def finalize_cluster(cluster, leader_cm=None):
    if leader_cm == None:
        leader_cm = cluster['servers'][0]
    servers = cluster['servers']

    # PGS
    for server in servers:
        if finalize_info_of_cm_about_pgs(cluster, server, leader_cm) != 0:
            return -1

    # PG
    for pg_id in cluster['pg_id_list']:
        if util.cm_success(
                util.cm_command(
                    leader_cm['ip'], leader_cm['cm_port'], 'pg_del %s %d' %
                    (cluster['cluster_name'], pg_id)))[0] == False:
            return -1

    # Cluster
    if util.cluster_del(leader_cm['ip'], leader_cm['cm_port'],
                        cluster['cluster_name']) == False:
        return -1

    return 0
Example #26
0
    def test_gateway_add_del(self):
        util.print_frame()

        api = ARC_API(ZK_ADDR, CLUSTER_NAME, logFilePrefix = self.arcci_log, so_path = self.so_path)

        # Add gateway
        gw_port = 10000
        gw_id = 10
        cmd = 'gw_add %s %d %s %s %d' % (CLUSTER_NAME, gw_id, HOST_NAME, HOST_IP, gw_port)
        ret = util.cm_command(MGMT_IP, MGMT_PORT, cmd)
        if ret != None and len(ret) >= 2:
            ret = ret[:-2]
        util.log('cmd:"%s", ret:"%s"' % (cmd, ret))
        if not ret.startswith('{"state":"success","msg":"+OK"}'):
            self.fail('failed to add gateway')

        # Deploy gateway
        server = self.cluster['servers'][0]
        ret = util.deploy_gateway(gw_id)
        self.assertTrue(ret, 'failed to deploy_gateway')

        # Start gateway
        ret = util.start_gateway( gw_id, server['ip'], MGMT_PORT, server['cluster_name'], gw_port)
        self.assertEqual( ret, 0, 'failed : start gateawy%d' % gw_id )

        time.sleep(5)

        # Check if gateway is added
        added_gw = {"ip":HOST_IP,"port":gw_port}

        log_reader = LogReader(api.conf.log_file_prefix)
        found = False
        while True:
            line = log_reader.readline()
            if line == None:
                break

            if line.find(MSG_GATEWAY_ADD_ZK) == -1:
                continue

            gw = line.split('data:')[1]
            gw = ast.literal_eval(gw)

            if gw['ip'] == added_gw['ip'] and gw['port'] == added_gw['port']:
                found = True

        if not found:
            self.fail('FAIL, load gateway information, gw:%s' % util.json_to_str(added_gw))
        else:
            util.log('SUCCESS, load gateway information.')

        # Delete gateway
        cmd = 'gw_del %s %d' % (CLUSTER_NAME, gw_id)
        ret = util.cm_command(MGMT_IP, MGMT_PORT, cmd)
        if ret != None and len(ret) >= 2:
            ret = ret[:-2]
        util.log('cmd:"%s", ret:"%s"' % (cmd, ret))
        if not ret.startswith('{"state":"success","msg":"+OK"}'):
            self.fail('failed to delete gateway')

        # Check if gateway is deleted
        deleted_gw = {"ip":HOST_IP,"port":gw_port}

        found = check_gateway_deleted(deleted_gw, api)
        if not found:
            self.fail('FAIL, delete gateway information, gw:%s' % util.json_to_str(deleted_gw))
        else:
            util.log('SUCCESS, delete gateway information.')

        # Stop gateway
        ret = util.shutdown_gateway(gw_id, gw_port)
        self.assertEqual(ret, 0, 'failed : shutdown gateawy%d' % gw_id)

        api.destroy()
    def test_3_heartbeat_target_connection_count( self ):
        util.print_frame()

        util.log( 'wait until all connections are established' )
        for i in range(1, 8):
            time.sleep(1)
            util.log( '%d sec' % i )

        # check pgs
        for server in self.cluster['servers']:
            before_cnt_redis = util.get_clients_count_of_redis(server['ip'], server['redis_port'])
            before_cnt_smr = util.get_clients_count_of_smr(server['smr_mgmt_port'])

            cmd = 'pgs_leave %s %d forced' % (self.cluster['cluster_name'], server['id'])
            ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd)
            jobj = json.loads(ret)
            self.assertEqual( jobj['state'], 'success', 'failed : cmd="%s", reply="%s"' % (cmd, ret[:-2]) )
            util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd, ret[:-2]) )

            # check redis
            success = False
            for i in range(5):
                after_cnt = util.get_clients_count_of_redis(server['ip'], server['redis_port'])
                if after_cnt <= 2:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals( success, True, 'failed : the number of connections to redis%d(%s:%d) is %d, exptected:n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) )
            util.log( 'succeeded : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) )

            # check smr
            success = False
            expected = 1
            for i in range(5):
                after_cnt = util.get_clients_count_of_smr(server['smr_mgmt_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals( success, True, 'failed : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) )
            util.log( 'succeeded : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) )

        # check gateway
        for server in self.cluster['servers']:
            before_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port'])

            cmd = 'gw_del %s %d' % (self.cluster['cluster_name'], server['id'])
            ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd)
            jobj = json.loads(ret)
            self.assertEqual( jobj['state'], 'success', 'failed : cmd="%s", reply="%s"' % (cmd, ret[:-2]) )
            util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd, ret[:-2]) )

            success = False
            expected = 1
            for i in range(5):
                after_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)

            self.assertEquals( success, True, 'failed : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) )
            util.log( 'succeeded : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) )
Example #28
0
    def test_random_migrate(self):
        util.print_frame()

        # start load generator
        load_gen_thrd_list = {}
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port)
            load_gen_thrd_list[i].start()

        ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000)
        self.assertEqual(True, ret, 'Migration Fail')

        leader_cm = self.cluster['servers'][0]
        cluster_name = self.cluster['cluster_name']
        mapping = [-1] * 8192

        count = 50
        while count > 0:
            # get PN -> PG map
            cmd = 'cluster_info %s' % cluster_name
            result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'],
                                     cmd)
            ret = json.loads(result)
            rle = ret['data']['cluster_info']['PN_PG_Map']
            print "PN_PG_MAP = %s" % rle
            sp = rle.split()
            index = 0
            for i in range(len(sp) / 2):
                for j in range(int(sp[i * 2 + 1])):
                    mapping[index] = int(sp[i * 2])
                    index += 1

            slot = random.randint(0, 8191)
            src_pgid = mapping[slot]
            dst_pgid = (src_pgid + 1) % 2
            slot_end = slot
            while random.randint(0, 5) <= 4:
                if slot_end < 8191 and mapping[slot_end + 1] == src_pgid:
                    slot_end += 1
                else:
                    break

            print "SLOT=%d, SRC_PGID=%d, DST_PGID=%d" % (slot, src_pgid,
                                                         dst_pgid)
            ret = util.migration(self.cluster, src_pgid, dst_pgid, slot,
                                 slot_end, 40000)
            self.assertEqual(True, ret, 'Migration Fail')

            ok = True
            for j in range(len(load_gen_thrd_list)):
                if load_gen_thrd_list[j].isConsistent() == False:
                    ok = False
                    break
            if not ok:
                break

            count -= 1

        # check consistency of load_generator
        for i in range(len(load_gen_thrd_list)):
            load_gen_thrd_list[i].quit()
        for i in range(len(load_gen_thrd_list)):
            load_gen_thrd_list[i].join()
            self.assertTrue(load_gen_thrd_list[i].isConsistent(),
                            'Inconsistent after migration')

        # Go back to initial configuration
        cinfo = util.cluster_info(leader_cm['ip'], leader_cm['cm_port'],
                                  cluster_name)
        for slot in util.get_slots(cinfo['cluster_info']['PN_PG_Map'], 1):
            self.assertTrue(
                util.migration(self.cluster, 1, 0, slot['begin'], slot['end'],
                               40000), 'failed to rollback migration')
Example #29
0
    def test_3_heartbeat_target_connection_count(self):
        util.print_frame()

        util.log('wait until all connections are established')
        for i in range(1, 8):
            time.sleep(1)
            util.log('%d sec' % i)

        # check pgs
        for server in self.cluster['servers']:
            before_cnt_redis = util.get_clients_count_of_redis(
                server['ip'], server['redis_port'])
            before_cnt_smr = util.get_clients_count_of_smr(
                server['smr_mgmt_port'])

            cmd = 'pgs_leave %s %d forced' % (self.cluster['cluster_name'],
                                              server['id'])
            ret = util.cm_command(self.leader_cm['ip'],
                                  self.leader_cm['cm_port'], cmd)
            jobj = json.loads(ret)
            self.assertEqual(jobj['state'], 'success',
                             'failed : cmd="%s", reply="%s"' % (cmd, ret[:-2]))
            util.log('succeeded : cmd="%s", reply="%s"' % (cmd, ret[:-2]))

            # check redis
            success = False
            for i in range(5):
                after_cnt = util.get_clients_count_of_redis(
                    server['ip'], server['redis_port'])
                if after_cnt <= 2:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals(
                success, True,
                'failed : the number of connections to redis%d(%s:%d) is %d, exptected:n<=2, before=%d'
                % (server['id'], server['ip'], server['redis_port'], after_cnt,
                   before_cnt_redis))
            util.log(
                'succeeded : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d'
                % (server['id'], server['ip'], server['redis_port'], after_cnt,
                   before_cnt_redis))

            # check smr
            success = False
            expected = 1
            for i in range(5):
                after_cnt = util.get_clients_count_of_smr(
                    server['smr_mgmt_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals(
                success, True,
                'failed : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d'
                % (server['id'], server['ip'], server['smr_mgmt_port'],
                   after_cnt, expected, before_cnt_smr))
            util.log(
                'succeeded : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d'
                % (server['id'], server['ip'], server['smr_mgmt_port'],
                   after_cnt, expected, before_cnt_smr))

        # check gateway
        for server in self.cluster['servers']:
            before_cnt = util.get_clients_count_of_gw(server['ip'],
                                                      server['gateway_port'])

            cmd = 'gw_del %s %d' % (self.cluster['cluster_name'], server['id'])
            ret = util.cm_command(self.leader_cm['ip'],
                                  self.leader_cm['cm_port'], cmd)
            jobj = json.loads(ret)
            self.assertEqual(jobj['state'], 'success',
                             'failed : cmd="%s", reply="%s"' % (cmd, ret[:-2]))
            util.log('succeeded : cmd="%s", reply="%s"' % (cmd, ret[:-2]))

            success = False
            expected = 1
            for i in range(5):
                after_cnt = util.get_clients_count_of_gw(
                    server['ip'], server['gateway_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)

            self.assertEquals(
                success, True,
                'failed : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.'
                % (server['id'], server['ip'], server['gateway_port'],
                   after_cnt, expected))
            util.log(
                'succeeded : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.'
                % (server['id'], server['ip'], server['gateway_port'],
                   after_cnt, expected))
    def test_7_remaining_hbc_connection(self):
        util.print_frame()

        # check pgs
        for server in self.cluster['servers']:
            before_cnt_redis = util.get_clients_count_of_redis(
                server['ip'], server['redis_port'])
            before_cnt_smr = util.get_clients_count_of_smr(
                server['smr_mgmt_port'])

            cmd = 'pgs_leave %s %d forced\r\npgs_del %s %d' % (
                self.cluster['cluster_name'], server['id'],
                self.cluster['cluster_name'], server['id'])
            util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                            cmd)

        for server in self.cluster['servers']:
            # check redis
            success = False
            for i in range(5):
                after_cnt = util.get_clients_count_of_redis(
                    server['ip'], server['redis_port'])
                if after_cnt <= 2:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals(
                success, True,
                'failed : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d'
                % (server['id'], server['ip'], server['redis_port'], after_cnt,
                   before_cnt_redis))
            util.log(
                'succeeded : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d'
                % (server['id'], server['ip'], server['redis_port'], after_cnt,
                   before_cnt_redis))

            # check smr
            success = False
            expected = 0
            for i in range(5):
                after_cnt = util.get_clients_count_of_smr(
                    server['smr_mgmt_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals(
                success, True,
                'failed : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d'
                % (server['id'], server['ip'], server['smr_mgmt_port'],
                   after_cnt, expected, before_cnt_smr))
            util.log(
                'succeeded : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d'
                % (server['id'], server['ip'], server['smr_mgmt_port'],
                   after_cnt, expected, before_cnt_smr))

        # check gateway
        for server in self.cluster['servers']:
            before_cnt = util.get_clients_count_of_gw(server['ip'],
                                                      server['gateway_port'])

            cmd = 'gw_del %s %d' % (self.cluster['cluster_name'], server['id'])
            util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                            cmd)

        for server in self.cluster['servers']:
            success = False
            expected = 1
            for i in range(5):
                after_cnt = util.get_clients_count_of_gw(
                    server['ip'], server['gateway_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)

            self.assertEquals(
                success, True,
                'failed : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.'
                % (server['id'], server['ip'], server['gateway_port'],
                   after_cnt, expected))
            util.log(
                'succeeded : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.'
                % (server['id'], server['ip'], server['gateway_port'],
                   after_cnt, expected))

        # Go back to initial configuration
        # Cleanup PG
        self.assertTrue(
            util.cm_success(
                util.cm_command(
                    self.leader_cm['ip'], self.leader_cm['cm_port'],
                    'pg_del %s %d' %
                    (self.cluster['cluster_name'],
                     self.cluster['servers'][0]['pg_id'])))[0])

        # Cleanup processes of PGS and GW
        for s in self.cluster['servers']:
            self.assertEqual(0, util.shutdown_redis(s['id'], s['redis_port']),
                             'failed to kill redis %d process' % s['id'])
            self.assertEqual(
                0, util.shutdown_smr(s['id'], s['ip'], s['smr_base_port']),
                'failed to kill smr %d process' % s['id'])
            self.assertEqual(0,
                             util.shutdown_gateway(s['id'], s['gateway_port']),
                             'failed to kill gw %d process' % s['id'])

        # Recover PG
        self.assertTrue(
            util.install_pg(self.cluster,
                            self.cluster['servers'],
                            self.cluster['servers'][0],
                            start_gw=True),
            'failed to recover PGS and GW in a PM')
Example #31
0
    def test_random_migrate(self):
        util.print_frame()

        # start load generator
        load_gen_thrd_list = {}
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port)
            load_gen_thrd_list[i].start()

        ret = util.migration(self.cluster, 0, 1, 4096, 8191, 40000)
        self.assertEqual(True, ret, 'Migration Fail')

        leader_cm = self.cluster['servers'][0]
        cluster_name = self.cluster['cluster_name']
        mapping = [-1] * 8192

        count = 50
        while count > 0:
            # get PN -> PG map
            cmd = 'cluster_info %s' % cluster_name
            result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd)
            ret = json.loads(result)
            rle = ret['data']['cluster_info']['PN_PG_Map']
            print "PN_PG_MAP = %s" % rle
            sp = rle.split()
            index = 0
            for i in range(len(sp)/2):
                for j in range(int(sp[i*2+1])):
                    mapping[index] = int(sp[i*2])
                    index += 1

            slot = random.randint(0, 8191)
            src_pgid = mapping[slot]
            dst_pgid = (src_pgid+1) % 2
            slot_end = slot
            while random.randint(0,5) <= 4:
                if slot_end < 8191 and mapping[slot_end+1] == src_pgid:
                    slot_end += 1
                else:
                    break

            print "SLOT=%d, SRC_PGID=%d, DST_PGID=%d" % (slot, src_pgid, dst_pgid)
            ret = util.migration(self.cluster, src_pgid, dst_pgid, slot, slot_end, 40000)
            self.assertEqual(True, ret, 'Migration Fail')

            ok = True
            for j in range(len(load_gen_thrd_list)):
                if load_gen_thrd_list[j].isConsistent() == False:
                    ok = False
                    break
            if not ok:
                break;

            count -= 1;

        # check consistency of load_generator
        for i in range(len(load_gen_thrd_list)):
            load_gen_thrd_list[i].quit()
        for i in range(len(load_gen_thrd_list)):
            load_gen_thrd_list[i].join()
            self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')

        # Go back to initial configuration
        cinfo = util.cluster_info(leader_cm['ip'], leader_cm['cm_port'], cluster_name)
        for slot in util.get_slots(cinfo['cluster_info']['PN_PG_Map'], 1):
            self.assertTrue(util.migration(self.cluster, 1, 0, slot['begin'], slot['end'], 40000),
                    'failed to rollback migration')
Example #32
0
    def __del_server(self, server_to_del):
        # backup data
        redis = redis_mgmt.Redis(server_to_del['id'])
        ret = redis.connect(server_to_del['ip'], server_to_del['redis_port'])
        self.assertEquals(
            ret, 0, 'failed : connect to smr%d(%s:%d)' %
            (server_to_del['id'], server_to_del['ip'],
             server_to_del['redis_port']))

        # bgsave
        ret = util.bgsave(server_to_del)
        self.assertTrue(ret, 'failed to bgsave. pgs%d' % server_to_del['id'])

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d\r\n' % (server_to_del['cluster_name'],
                                       server_to_del['id'])
        ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                              cmd)
        jobj = json.loads(ret)
        self.assertEqual(
            jobj['msg'], '+OK',
            'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
        util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))

        r = util.get_role_of_server(server_to_del)
        # If quorum of left master is larger than 1, info command will be blocked.
        if r != c.ROLE_MASTER:
            # check if pgs is removed
            success = False
            for try_cnt in range(10):
                redis = redis_mgmt.Redis(server_to_del['id'])
                ret = redis.connect(server_to_del['ip'],
                                    server_to_del['redis_port'])
                self.assertEquals(
                    ret, 0, 'failed : connect to smr%d(%s:%d)' %
                    (server_to_del['id'], server_to_del['ip'],
                     server_to_del['redis_port']))
                util.log('succeeded : connect to smr%d(%s:%d)' %
                         (server_to_del['id'], server_to_del['ip'],
                          server_to_del['redis_port']))

                redis.write('info stats\r\n')
                for i in range(6):
                    redis.read_until('\r\n')
                res = redis.read_until('\r\n')
                self.assertNotEqual(
                    res, '',
                    'failed : get reply of "info stats" from redis%d(%s:%d)' %
                    (server_to_del['id'], server_to_del['ip'],
                     server_to_del['redis_port']))
                util.log(
                    'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"'
                    % (server_to_del['id'], server_to_del['ip'],
                       server_to_del['redis_port'], res[:-2]))
                no = int(res.split(':')[1])
                if no <= 100:
                    success = True
                    break
                time.sleep(1)

            self.assertEquals(success, True, 'failed : pgs does not removed.')
        util.log('pgs is removed')

        # change state of pgs to lconn
        cmd = 'pgs_lconn %s %d\r\n' % (server_to_del['cluster_name'],
                                       server_to_del['id'])
        ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                              cmd)
        jobj = json.loads(ret)
        self.assertEqual(
            jobj['msg'], '+OK',
            'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
        util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))

        # shutdown
        ret = testbase.request_to_shutdown_smr(server_to_del)
        self.assertEqual(ret, 0,
                         'failed : shutdown smr. id:%d' % server_to_del['id'])
        ret = testbase.request_to_shutdown_redis(server_to_del)
        self.assertEquals(
            ret, 0, 'failed : shutdown redis. id:%d' % server_to_del['id'])
        util.log('succeeded : shutdown pgs%d.' % server_to_del['id'])

        # delete pgs from cluster
        cmd = 'pgs_del %s %d\r\n' % (server_to_del['cluster_name'],
                                     server_to_del['id'])
        ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                              cmd)
        jobj = json.loads(ret)
        self.assertEqual(
            jobj['msg'], '+OK',
            'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
        util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
Example #33
0
    def test_migration_with_expire_command(self):
        util.print_frame()

        util.log("start load_generator")
        load_gen_thrd_list = {}
        for i in range(1):
            ip, port = util.get_rand_gateway(self.cluster)
            load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port)
            load_gen_thrd_list[i].start()

        time.sleep(5)  # generate load for 5 sec
        tps = 20000
        src_pg_id = 0
        dst_pg_id = 1
        leader_cm = self.cluster['servers'][0]
        src_master = util.get_server_by_role_and_pg(self.cluster['servers'],
                                                    'master', src_pg_id)
        dst_master = util.get_server_by_role_and_pg(self.cluster['servers'],
                                                    'master', dst_pg_id)

        smr = smr_mgmt.SMR(src_master['id'])
        ret = smr.connect(src_master['ip'], src_master['smr_mgmt_port'])
        if ret != 0:
            util.log('failed to connect to smr(source master)')
            return False

        src_redis = redis_mgmt.Redis(src_master['id'])
        ret = src_redis.connect(src_master['ip'], src_master['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        dst_redis = redis_mgmt.Redis(dst_master['id'])
        ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        ts = time.time()
        self.setExpireKey(src_redis,
                          'beforeCheckpoint~beforeCheckpoint:expired', 10)
        self.setExpireKey(src_redis,
                          'beforeCheckpoint~beforeCheckpoint:persist', 20)
        self.setExpireS3Key(src_redis,
                            'S3:beforeCheckpoint~beforeCheckpoint:expired', 10)
        self.setExpireS3Key(src_redis,
                            'S3:beforeCheckpoint~beforeCheckpoint:persist', 20)

        self.setExpireS3Key(src_redis, 'S3:PermanentKey', 0)

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(src_redis,
                              'beforeCheckpoint~beforeCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis,
                              'beforeCheckpoint~beforeCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(
            src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(
            src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")

        util.log(">>> migrate test with expire command start(%s), ts:%d" %
                 (time.asctime(), ts))

        ts = time.time()
        self.setExpireKey(src_redis,
                          'beforeCheckpoint~afterCheckpoint:expired', 10)
        self.setExpireKey(src_redis,
                          'beforeCheckpoint~afterCheckpoint:persist', 20)
        self.setExpireS3Key(src_redis,
                            'S3:beforeCheckpoint~afterCheckpoint:expired', 10)
        self.setExpireS3Key(src_redis,
                            'S3:beforeCheckpoint~afterCheckpoint:persist', 20)

        # notify dst_redis of migration start
        util.log(">>> notify dst_redis of migration start (%s)" %
                 time.asctime())

        cmd = 'migconf migstart %d-%d\r\n' % (0, 8191)
        dst_redis.write(cmd)
        res = dst_redis.read_until('\r\n')
        self.assertEquals(res, '+OK\r\n')

        # remote partial checkpoint
        util.log(">>> start remote checkpoint and load (%s)" % time.asctime())
        cmd = "./cluster-util --getandplay %s %d %s %d %d-%d %d" % (
            src_master['ip'], src_master['redis_port'], dst_master['ip'],
            dst_master['redis_port'], 0, 8191, tps)
        p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd,
                                 True, None, subprocess.PIPE, None)

        ret = p.wait()
        for line in p.stdout:
            if line.find("Checkpoint Sequence Number:") != -1:
                util.log("seqnumber : " + line[line.rfind(":") + 1:])
                seq = int(line[line.rfind(":") + 1:])
            util.log(">>>" + str(line.rstrip()))

        self.assertEqual(0, ret)
        util.log(">>> end remote checkpoint and load (%s)" % time.asctime())

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(src_redis,
                              'beforeCheckpoint~afterCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis,
                              'beforeCheckpoint~afterCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(src_redis,
                                'S3:beforeCheckpoint~afterCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(src_redis,
                                'S3:beforeCheckpoint~afterCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")

        # bgsave for testing later about recovery during migration
        util.log(
            ">>> bgsave for testing later about recovery during migration (%s)"
            % time.asctime())
        cmd = 'bgsave\r\n'
        dst_redis.write(cmd)
        res = dst_redis.read_until('\r\n')
        self.assertEquals(res, '+Background saving started\r\n')

        ts = time.time()
        self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:expired',
                          10)
        self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:persist',
                          20)
        self.setExpireS3Key(src_redis,
                            'S3:afterCheckpoint~afterCheckpoint:expired', 10)
        self.setExpireS3Key(src_redis,
                            'S3:afterCheckpoint~afterCheckpoint:persist', 20)

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(src_redis,
                              'afterCheckpoint~afterCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis,
                              'afterCheckpoint~afterCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(src_redis,
                                'S3:afterCheckpoint~afterCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(src_redis,
                                'S3:afterCheckpoint~afterCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:expired',
                          10)
        self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:persist',
                          100)
        self.setExpireS3Key(src_redis,
                            'S3:afterCheckpoint~duringCatchup:expired', 10)
        self.setExpireS3Key(src_redis,
                            'S3:afterCheckpoint~duringCatchup:persist', 100)

        # remote catchup (smr log migration)
        util.log(">>> start remote catchup (%s)" % time.asctime())

        dst_host = dst_master['ip']
        dst_smr_port = dst_master['smr_base_port']
        rle = '1 8192'
        num_part = 8192

        smr.write('migrate start %s %d %d %d %d %s\r\n' %
                  (dst_host, dst_smr_port, seq, tps, num_part, rle))
        response = smr.read_until('\r\n')
        if response[:3] != '+OK':
            util.log('failed to execute migrate start command, response:%s' %
                     response)
            return False

        while True:
            smr.write('migrate info\r\n')
            response = smr.read_until('\r\n')
            seqs = response.split()
            logseq = int(seqs[1].split(':')[1])
            mig = int(seqs[2].split(':')[1])
            util.log('migrate info: %s' % response)
            if (logseq - mig < 500000):
                util.log('Remote catchup almost done. try mig2pc')
                break
            time.sleep(1)

        util.log(">>> sleep until 90 sec pass")
        self.assertFalse(time.time() - ts >= 90)
        time.sleep(90 - (time.time() - ts))

        res = self.persistKey(src_redis,
                              'afterCheckpoint~duringCatchup:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis,
                              'afterCheckpoint~duringCatchup:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(src_redis,
                                'S3:afterCheckpoint~duringCatchup:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(src_redis,
                                'S3:afterCheckpoint~duringCatchup:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:expired', 10)
        self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:persist', 20)
        self.setExpireS3Key(src_redis,
                            'S3:duringCatchup~duringCatchup:expired', 10)
        self.setExpireS3Key(src_redis,
                            'S3:duringCatchup~duringCatchup:persist', 20)

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(src_redis,
                                'S3:duringCatchup~duringCatchup:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(src_redis,
                                'S3:duringCatchup~duringCatchup:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:expired', 10)
        self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:persist', 20)
        self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:expired',
                            10)
        self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:persist',
                            20)

        util.log(">>> remote catchup phase almost done (%s)" % time.asctime())

        # mig2pc
        util.log(">>> start mig2pc (%s)" % time.asctime())

        cmd = 'mig2pc %s %d %d %d %d' % (self.cluster['cluster_name'],
                                         src_pg_id, dst_pg_id, 0, 8191)
        result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd)
        util.log('mig2pc result : ' + result)
        if not result.startswith('{"state":"success","msg":"+OK"}\r\n'):
            util.log('failed to execute mig2pc command, result:%s' % result)
            return False

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(dst_redis,
                                'S3:duringCatchup~afterMig2pc:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(dst_redis,
                                'S3:duringCatchup~afterMig2pc:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:expired', 10)
        self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:persist', 20)
        self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired', 10)
        self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist', 20)

        # finish migration
        smr.write('migrate interrupt\r\n')
        response = smr.read_until('\r\n')
        util.log('migrate interrupt: %s' % response)
        smr.disconnect()

        # notify dst_redis of migration end
        util.log(">>> notify dst_redis of migration end (%s)" % time.asctime())

        cmd = 'migconf migend\r\n'
        dst_redis.write(cmd)
        res = dst_redis.read_until('\r\n')
        self.assertEquals(res, '+OK\r\n')

        cmd = 'migconf clearstart %d-%d\r\n' % (0, 8191)
        src_redis.write(cmd)
        res = src_redis.read_until('\r\n')
        self.assertEquals(res, '+OK\r\n')

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        self.assertTrue(
            self.isExist(dst_redis,
                         'beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(
            self.isExist(dst_redis,
                         'beforeCheckpoint~beforeCheckpoint:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:beforeCheckpoint~beforeCheckpoint:expired'))

        self.assertTrue(
            self.isExist(dst_redis,
                         'beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isExist(dst_redis,
                         'beforeCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:beforeCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:afterCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:afterCheckpoint~duringCatchup:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:duringCatchup~duringCatchup:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:duringCatchup~duringCatchup:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired'))

        # remote partial checkpoint
        util.log(">>> start rangedel (%s)" % time.asctime())
        cmd = "./cluster-util --rangedel %s %d %d-%d %d" % (
            src_master['ip'], src_master['redis_port'], 0, 8191, tps)
        p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd,
                                 True, None, subprocess.PIPE, None)
        ret = p.wait()

        for line in p.stdout:
            util.log(">>>" + str(line.rstrip()))

        cmd = 'migconf clearend\r\n'
        src_redis.write(cmd)
        res = src_redis.read_until('\r\n')
        self.assertEqual(res, '+OK\r\n')

        time.sleep(5)  # generate load for 5 sec
        # check consistency of load_generator
        for i in range(len(load_gen_thrd_list)):
            load_gen_thrd_list[i].quit()
        for i in range(len(load_gen_thrd_list)):
            load_gen_thrd_list[i].join()
            self.assertTrue(load_gen_thrd_list[i].isConsistent(),
                            'Inconsistent after migration')

        # kill dst_redis and recover from bgsave
        util.log(">>> kill dst_redis and recover from bgsave (%s)" %
                 time.asctime())

        dst_redis.disconnect()
        ret = testbase.request_to_shutdown_redis(dst_master)
        self.assertEquals(ret, 0, 'failed to shutdown redis')
        ret = testbase.request_to_shutdown_smr(dst_master)
        self.assertEquals(ret, 0, 'failed to shutdown smr')
        time.sleep(5)

        testbase.request_to_start_smr(dst_master)
        self.assertEqual(ret, 0,
                         'failed to start smr, server:%d' % dst_master['id'])

        ret = testbase.request_to_start_redis(dst_master)
        self.assertEqual(ret, 0,
                         'failed to start redis, server:%d' % dst_master['id'])

        ret = testbase.wait_until_finished_to_set_up_role(dst_master)
        self.assertEquals(
            ret, 0, 'failed to role change. server:%d' % (dst_master['id']))

        dst_redis = redis_mgmt.Redis(dst_master['id'])
        ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        self.assertTrue(
            self.isExist(dst_redis,
                         'beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(
            self.isExist(dst_redis,
                         'beforeCheckpoint~beforeCheckpoint:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:beforeCheckpoint~beforeCheckpoint:expired'))

        self.assertTrue(
            self.isExist(dst_redis,
                         'beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isExist(dst_redis,
                         'beforeCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:beforeCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:afterCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:afterCheckpoint~duringCatchup:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis,
                           'S3:duringCatchup~duringCatchup:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis,
                           'S3:duringCatchup~duringCatchup:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired'))

        self.assertTrue(
            self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist'))
        self.assertFalse(
            self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired'))

        self.getS3TTL(dst_redis, 'S3:PermanentKey')

        # kill dst_slave redis and recover without dump file
        util.log(">>> kill dst_redis and recover without dump file (%s)" %
                 time.asctime())
        dst_slave = util.get_server_by_role_and_pg(self.cluster['servers'],
                                                   'slave', dst_pg_id)

        ret = testbase.request_to_shutdown_redis(dst_slave)
        self.assertEquals(ret, 0, 'failed to shutdown redis')
        ret = testbase.request_to_shutdown_smr(dst_slave)
        self.assertEquals(ret, 0, 'failed to shutdown smr')
        time.sleep(5)

        testbase.request_to_start_smr(dst_slave)
        self.assertEqual(ret, 0,
                         'failed to start smr, server:%d' % dst_slave['id'])

        ret = testbase.request_to_start_redis(dst_slave)
        self.assertEqual(ret, 0,
                         'failed to start redis, server:%d' % dst_slave['id'])

        ret = testbase.wait_until_finished_to_set_up_role(dst_slave)
        self.assertEquals(
            ret, 0, 'failed to role change. server:%d' % (dst_slave['id']))

        dst_redis_slave = redis_mgmt.Redis(dst_slave['id'])
        ret = dst_redis_slave.connect(dst_slave['ip'], dst_slave['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        self.assertTrue(
            self.isExist(dst_redis_slave,
                         'beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(
            self.isExist(dst_redis_slave,
                         'beforeCheckpoint~beforeCheckpoint:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis_slave,
                           'S3:beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis_slave,
                           'S3:beforeCheckpoint~beforeCheckpoint:expired'))

        self.assertTrue(
            self.isExist(dst_redis_slave,
                         'beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isExist(dst_redis_slave,
                         'beforeCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis_slave,
                           'S3:beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis_slave,
                           'S3:beforeCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(
            self.isExist(dst_redis_slave,
                         'afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isExist(dst_redis_slave,
                         'afterCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis_slave,
                           'S3:afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis_slave,
                           'S3:afterCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(
            self.isExist(dst_redis_slave,
                         'afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(
            self.isExist(dst_redis_slave,
                         'afterCheckpoint~duringCatchup:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis_slave,
                           'S3:afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis_slave,
                           'S3:afterCheckpoint~duringCatchup:expired'))

        self.assertTrue(
            self.isExist(dst_redis_slave,
                         'duringCatchup~duringCatchup:persist'))
        self.assertFalse(
            self.isExist(dst_redis_slave,
                         'duringCatchup~duringCatchup:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis_slave,
                           'S3:duringCatchup~duringCatchup:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis_slave,
                           'S3:duringCatchup~duringCatchup:expired'))

        self.assertTrue(
            self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:persist'))
        self.assertFalse(
            self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis_slave,
                           'S3:duringCatchup~afterMig2pc:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis_slave,
                           'S3:duringCatchup~afterMig2pc:expired'))

        self.assertTrue(
            self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:persist'))
        self.assertFalse(
            self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:expired'))
        self.assertTrue(
            self.isS3Exist(dst_redis_slave,
                           'S3:afterMig2pc~migrateEnd:persist'))
        self.assertFalse(
            self.isS3Exist(dst_redis_slave,
                           'S3:afterMig2pc~migrateEnd:expired'))

        self.getS3TTL(dst_redis_slave, 'S3:PermanentKey')

        # Go back to initial configuration
        self.assertTrue(
            util.migration(self.cluster, dst_pg_id, src_pg_id, 0, 8191, 40000),
            'failed to rollback migration')
Example #34
0
    def pgs_add_and_del(self, upgrade_server, type):
        util.print_frame()

        util.log('[start] add and del pgs%d. type:%s' %
                 (upgrade_server['id'], type))
        util.log_server_state(self.cluster)

        # start load generator
        load_gen_list = {}
        for i in range(len(self.cluster['servers'])):
            server = self.cluster['servers'][i]
            load_gen = load_generator.LoadGenerator(server['id'], server['ip'],
                                                    server['gateway_port'])
            load_gen.start()
            load_gen_list[i] = load_gen

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d\r\n' % (upgrade_server['cluster_name'],
                                       upgrade_server['id'])
        ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                              cmd)
        jobj = json.loads(ret)
        self.assertEqual(
            jobj['msg'], '+OK',
            'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
        util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))

        # set new values
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway('0')
        gw.connect(ip, port)
        for i in range(0, 50):
            cmd = 'set %s%d %d\r\n' % (self.key_base, i, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEqual(
                res, '+OK\r\n',
                'failed to set values to gw(%s:%d). cmd:%s, res:%s' %
                (ip, port, cmd[:-2], res[:-2]))

        # attach pgs from cluster
        cmd = 'pgs_join %s %d\r\n' % (upgrade_server['cluster_name'],
                                      upgrade_server['id'])
        ret = util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'],
                              cmd)
        jobj = json.loads(ret)
        self.assertEqual(jobj['msg'], '+OK',
                         'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret))
        util.log('succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]))
        time.sleep(3)

        # check new values
        redis = redis_mgmt.Redis(upgrade_server['id'])
        ret = redis.connect(upgrade_server['ip'], upgrade_server['redis_port'])
        self.assertEquals(
            ret, 0, 'failed : connect to smr%d(%s:%d)' %
            (upgrade_server['id'], upgrade_server['ip'],
             upgrade_server['redis_port']))

        for i in range(0, 50):
            cmd = 'get %s%d\r\n' % (self.key_base, i)
            redis.write(cmd)
            redis.read_until('\r\n')
            res = redis.read_until('\r\n')
            self.assertEqual(
                res, '%d\r\n' % i,
                'failed to get values from redis%d. %s != %d' %
                (upgrade_server['id'], res, i))
        util.log('succeeded : check values with get operations on pgs%d.' %
                 (upgrade_server['id']))

        # shutdown load generators
        for i in range(len(load_gen_list)):
            load_gen_list[i].quit()
            load_gen_list[i].join()

        util.log_server_state(self.cluster)

        return 0
Example #35
0
    def test_quorum_with_left_pgs( self ):
        util.print_frame()

        # start load generators
        load_gen_list = {}
        for i in range( len(self.cluster['servers']) ):
            server = self.cluster['servers'][i]
            load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port'])
            load_gen.start()
            load_gen_list[i] = load_gen

        # get master, slave1, slave2
        m, s1, s2 = util.get_mss( self.cluster )
        self.assertNotEqual( m, None, 'master is None.' )
        self.assertNotEqual( s1, None, 'slave1 is None.' )
        self.assertNotEqual( s2, None, 'slave2 is None.' )

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d\r\n' % (m['cluster_name'], m['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # check if pgs is removed
        success = False
        for try_cnt in range( 10 ):
            redis = redis_mgmt.Redis( m['id'] )
            ret = redis.connect( m['ip'], m['redis_port'] )
            self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
            util.log( 'succeeded : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )

            redis.write( 'info stats\r\n' )
            for i in range( 6 ):
                redis.read_until( '\r\n' )
            res = redis.read_until( '\r\n' )
            self.assertNotEqual( res, '', 'failed : get reply of "info stats" from redis%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
            util.log( 'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"' % (m['id'], m['ip'], m['redis_port'], res[:-2]) )
            no = int( res.split(':')[1] )
            if no <= 100:
                success = True
                break
            time.sleep( 1 )

        self.assertEquals( success, True, 'failed : pgs does not removed.' )
        util.log( 'succeeded : pgs is removed' )

        # check states of all pgs in pg
        for s in self.cluster['servers']:
            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual( self.quorum_policy[1], quorum_of_haning_master,
                          'invalid quorum of left master, expected:%d, but:%d' %( self.quorum_policy[1], quorum_of_haning_master) )
        util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master )

        # 'role lconn' to master
        cmd = 'role lconn\r\n'
        ret = util.cmd_to_smr( m, cmd )
        self.assertEqual( ret, '+OK\r\n', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # wait for master election
        success = False
        new_master = None
        for i in range( 10 ):
            role = util.get_role_of_server( s1 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s1
                break
            role = util.get_role_of_server( s2 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s2
                break
            time.sleep( 1 )
        self.assertEqual( success, True, 'failed to elect new master' )
        util.log( 'succeeded : elect new master, master_id=%d' % new_master['id'] )

        time.sleep( 1 )
        # check the numbers of master, slave, and lconn
        cnt_master = 0
        cnt_slave = 0
        cnt_lconn = 0
        for s in self.cluster['servers']:
            role = util.get_role_of_server( s )
            if role == c.ROLE_MASTER:
                cnt_master = cnt_master + 1
            elif role == c.ROLE_SLAVE:
                cnt_slave = cnt_slave + 1
            elif role == c.ROLE_LCONN:
                cnt_lconn = cnt_lconn + 1
        self.assertEqual( cnt_master, 1, 'failed : the number of master is %s, expected 1' % cnt_master )
        self.assertEqual( cnt_slave, 1, 'failed : the number of slave is %s, expected 1' % cnt_slave )
        self.assertEqual( cnt_lconn, 1, 'failed : the number of lconn is %s, expected 1' % cnt_lconn )

        # check states of all pgs in pg
        for s in self.cluster['servers']:
            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_new_master = util.get_quorum( new_master )
        self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' )
        self.assertEqual( self.quorum_policy[1], quorum_of_new_master ,
                          'invalid quorum of new master, expected:%d, but:%d' % (self.quorum_policy[1], quorum_of_new_master) )
        util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master )

        # shutdown load generators
        for i in range( len(load_gen_list) ):
            load_gen_list[i].quit()
            load_gen_list[i].join()

        return 0
Example #36
0
    def test_migration_with_expire_command(self):
        util.print_frame()

        util.log("start load_generator")
        load_gen_thrd_list = {}
        for i in range(1):
            ip, port = util.get_rand_gateway(self.cluster)
            load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port)
            load_gen_thrd_list[i].start()

        time.sleep(5) # generate load for 5 sec
        tps = 20000
        src_pg_id = 0
        dst_pg_id = 1
        leader_cm = self.cluster['servers'][0]
        src_master = util.get_server_by_role_and_pg(self.cluster['servers'], 'master', src_pg_id)
        dst_master = util.get_server_by_role_and_pg(self.cluster['servers'], 'master', dst_pg_id)

        smr = smr_mgmt.SMR(src_master['id'])
        ret = smr.connect(src_master['ip'], src_master['smr_mgmt_port'])
        if ret != 0:
            util.log('failed to connect to smr(source master)')
            return False

        src_redis = redis_mgmt.Redis(src_master['id'])
        ret = src_redis.connect(src_master['ip'], src_master['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        dst_redis = redis_mgmt.Redis(dst_master['id'])
        ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        ts = time.time()
        self.setExpireKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:expired', 10)
        self.setExpireKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:persist', 20)
        self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired', 10)
        self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist', 20)

        self.setExpireS3Key(src_redis, 'S3:PermanentKey', 0)

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis, 'beforeCheckpoint~beforeCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")

        util.log(">>> migrate test with expire command start(%s), ts:%d" % (time.asctime(), ts))

        ts = time.time()
        self.setExpireKey(src_redis, 'beforeCheckpoint~afterCheckpoint:expired', 10)
        self.setExpireKey(src_redis, 'beforeCheckpoint~afterCheckpoint:persist', 20)
        self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired', 10)
        self.setExpireS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist', 20)

        # notify dst_redis of migration start
        util.log(">>> notify dst_redis of migration start (%s)" % time.asctime())

        cmd = 'migconf migstart %d-%d\r\n' % (0, 8191)
        dst_redis.write(cmd)
        res = dst_redis.read_until('\r\n')
        self.assertEquals( res, '+OK\r\n' )

        # remote partial checkpoint
        util.log(">>> start remote checkpoint and load (%s)" % time.asctime())
        cmd = "./cluster-util --getandplay %s %d %s %d %d-%d %d" % (
                    src_master['ip'], src_master['redis_port'],
                    dst_master['ip'], dst_master['redis_port'],
                    0, 8191, tps)
        p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd, True, None, subprocess.PIPE, None)

        ret = p.wait()
        for line in p.stdout:
            if line.find("Checkpoint Sequence Number:") != -1:
                util.log("seqnumber : " + line[line.rfind(":")+1:])
                seq = int(line[line.rfind(":")+1:])
            util.log(">>>" + str(line.rstrip()))

        self.assertEqual(0, ret)
        util.log(">>> end remote checkpoint and load (%s)" % time.asctime())

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(src_redis, 'beforeCheckpoint~afterCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis, 'beforeCheckpoint~afterCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(src_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")

        # bgsave for testing later about recovery during migration
        util.log(">>> bgsave for testing later about recovery during migration (%s)" % time.asctime())
        cmd = 'bgsave\r\n'
        dst_redis.write(cmd)
        res = dst_redis.read_until('\r\n')
        self.assertEquals( res, '+Background saving started\r\n' )

        ts = time.time()
        self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:expired', 10)
        self.setExpireKey(src_redis, 'afterCheckpoint~afterCheckpoint:persist', 20)
        self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:expired', 10)
        self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:persist', 20)

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(src_redis, 'afterCheckpoint~afterCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis, 'afterCheckpoint~afterCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~afterCheckpoint:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:expired', 10)
        self.setExpireKey(src_redis, 'afterCheckpoint~duringCatchup:persist', 100)
        self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:expired', 10)
        self.setExpireS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:persist', 100)

        # remote catchup (smr log migration)
        util.log(">>> start remote catchup (%s)" % time.asctime())

        dst_host = dst_master['ip']
        dst_smr_port = dst_master['smr_base_port']
        rle = '1 8192'
        num_part = 8192

        smr.write('migrate start %s %d %d %d %d %s\r\n' % (dst_host, dst_smr_port,
                                                     seq, tps, num_part, rle))
        response = smr.read_until('\r\n')
        if response[:3] != '+OK':
            util.log('failed to execute migrate start command, response:%s' % response)
            return False

        while True:
            smr.write('migrate info\r\n')
            response = smr.read_until('\r\n')
            seqs = response.split()
            logseq = int(seqs[1].split(':')[1])
            mig = int(seqs[2].split(':')[1])
            util.log('migrate info: %s' % response)
            if (logseq-mig < 500000):
                util.log('Remote catchup almost done. try mig2pc')
                break
            time.sleep(1)

        util.log(">>> sleep until 90 sec pass")
        self.assertFalse(time.time() - ts >= 90)
        time.sleep(90 - (time.time() - ts))

        res = self.persistKey(src_redis, 'afterCheckpoint~duringCatchup:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis, 'afterCheckpoint~duringCatchup:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(src_redis, 'S3:afterCheckpoint~duringCatchup:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:expired', 10)
        self.setExpireKey(src_redis, 'duringCatchup~duringCatchup:persist', 20)
        self.setExpireS3Key(src_redis, 'S3:duringCatchup~duringCatchup:expired', 10)
        self.setExpireS3Key(src_redis, 'S3:duringCatchup~duringCatchup:persist', 20)

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(src_redis, 'duringCatchup~duringCatchup:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(src_redis, 'S3:duringCatchup~duringCatchup:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(src_redis, 'S3:duringCatchup~duringCatchup:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:expired', 10)
        self.setExpireKey(src_redis, 'duringCatchup~afterMig2pc:persist', 20)
        self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:expired', 10)
        self.setExpireS3Key(src_redis, 'S3:duringCatchup~afterMig2pc:persist', 20)

        util.log(">>> remote catchup phase almost done (%s)" % time.asctime())

        # mig2pc
        util.log(">>> start mig2pc (%s)" % time.asctime())

        cmd = 'mig2pc %s %d %d %d %d' % (self.cluster['cluster_name'], src_pg_id, dst_pg_id,
                                         0, 8191)
        result = util.cm_command(leader_cm['ip'], leader_cm['cm_port'], cmd)
        util.log('mig2pc result : ' + result)
        if not result.startswith('{"state":"success","msg":"+OK"}\r\n'):
            util.log('failed to execute mig2pc command, result:%s' % result)
            return False

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(dst_redis, 'duringCatchup~afterMig2pc:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(dst_redis, 'S3:duringCatchup~afterMig2pc:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(dst_redis, 'S3:duringCatchup~afterMig2pc:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:expired', 10)
        self.setExpireKey(dst_redis, 'afterMig2pc~migrateEnd:persist', 20)
        self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired', 10)
        self.setExpireS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist', 20)

        # finish migration
        smr.write('migrate interrupt\r\n')
        response = smr.read_until('\r\n')
        util.log('migrate interrupt: %s' % response)
        smr.disconnect()

        # notify dst_redis of migration end
        util.log(">>> notify dst_redis of migration end (%s)" % time.asctime())

        cmd = 'migconf migend\r\n'
        dst_redis.write(cmd)
        res = dst_redis.read_until('\r\n')
        self.assertEquals( res, '+OK\r\n' )

        cmd = 'migconf clearstart %d-%d\r\n' % (0, 8191)
        src_redis.write(cmd)
        res = src_redis.read_until('\r\n')
        self.assertEquals( res, '+OK\r\n' )

        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistKey(dst_redis, 'afterMig2pc~migrateEnd:expired')
        self.assertEquals(res, ":0\r\n")
        res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:persist')
        self.assertEquals(res, ":1\r\n")
        res = self.persistS3Key(dst_redis, 'S3:afterMig2pc~migrateEnd:expired')
        self.assertEquals(res, ":0\r\n")

        ts = time.time()
        util.log(">>> sleep until 15 sec pass")
        self.assertFalse(time.time() - ts >= 15)
        time.sleep(15 - (time.time() - ts))

        self.assertTrue(self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired'))

        self.assertTrue(self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:expired'))

        self.assertTrue(self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist'))
        self.assertFalse(self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:expired'))

        self.assertTrue(self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist'))
        self.assertFalse(self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired'))

        self.assertTrue(self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist'))
        self.assertFalse(self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired'))

        # remote partial checkpoint
        util.log(">>> start rangedel (%s)" % time.asctime())
        cmd = "./cluster-util --rangedel %s %d %d-%d %d" % (
                    src_master['ip'], src_master['redis_port'],
                    0, 8191, tps)
        p = util.exec_proc_async(util.cluster_util_dir(src_master['id']), cmd, True, None, subprocess.PIPE, None)
        ret = p.wait()

        for line in p.stdout:
            util.log(">>>" + str(line.rstrip()))

        cmd = 'migconf clearend\r\n'
        src_redis.write(cmd)
        res = src_redis.read_until('\r\n')
        self.assertEqual(res, '+OK\r\n')

        time.sleep(5) # generate load for 5 sec
        # check consistency of load_generator
        for i in range(len(load_gen_thrd_list)):
            load_gen_thrd_list[i].quit()
        for i in range(len(load_gen_thrd_list)):
            load_gen_thrd_list[i].join()
            self.assertTrue(load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')

        # kill dst_redis and recover from bgsave
        util.log(">>> kill dst_redis and recover from bgsave (%s)" % time.asctime())

        dst_redis.disconnect()
        ret = testbase.request_to_shutdown_redis(dst_master)
        self.assertEquals( ret, 0, 'failed to shutdown redis' )
        ret = testbase.request_to_shutdown_smr(dst_master)
        self.assertEquals(ret, 0, 'failed to shutdown smr')
        time.sleep(5)

        testbase.request_to_start_smr(dst_master)
        self.assertEqual( ret, 0, 'failed to start smr, server:%d' % dst_master['id'] )

        ret = testbase.request_to_start_redis(dst_master)
        self.assertEqual( ret, 0, 'failed to start redis, server:%d' % dst_master['id']  )

        ret = testbase.wait_until_finished_to_set_up_role(dst_master)
        self.assertEquals( ret, 0, 'failed to role change. server:%d' % (dst_master['id']) )

        dst_redis = redis_mgmt.Redis(dst_master['id'])
        ret = dst_redis.connect(dst_master['ip'], dst_master['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        self.assertTrue(self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(self.isExist(dst_redis, 'beforeCheckpoint~beforeCheckpoint:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~beforeCheckpoint:expired'))

        self.assertTrue(self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isExist(dst_redis, 'beforeCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:beforeCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isExist(dst_redis, 'afterCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(self.isExist(dst_redis, 'afterCheckpoint~duringCatchup:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterCheckpoint~duringCatchup:expired'))

        self.assertTrue(self.isExist(dst_redis, 'duringCatchup~duringCatchup:persist'))
        self.assertFalse(self.isExist(dst_redis, 'duringCatchup~duringCatchup:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:duringCatchup~duringCatchup:expired'))

        self.assertTrue(self.isExist(dst_redis, 'duringCatchup~afterMig2pc:persist'))
        self.assertFalse(self.isExist(dst_redis, 'duringCatchup~afterMig2pc:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:duringCatchup~afterMig2pc:expired'))

        self.assertTrue(self.isExist(dst_redis, 'afterMig2pc~migrateEnd:persist'))
        self.assertFalse(self.isExist(dst_redis, 'afterMig2pc~migrateEnd:expired'))
        self.assertTrue(self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:persist'))
        self.assertFalse(self.isS3Exist(dst_redis, 'S3:afterMig2pc~migrateEnd:expired'))

        self.getS3TTL(dst_redis, 'S3:PermanentKey')

        # kill dst_slave redis and recover without dump file
        util.log(">>> kill dst_redis and recover without dump file (%s)" % time.asctime())
        dst_slave = util.get_server_by_role_and_pg(self.cluster['servers'], 'slave', dst_pg_id)

        ret = testbase.request_to_shutdown_redis(dst_slave)
        self.assertEquals( ret, 0, 'failed to shutdown redis' )
        ret = testbase.request_to_shutdown_smr(dst_slave)
        self.assertEquals(ret, 0, 'failed to shutdown smr')
        time.sleep(5)

        testbase.request_to_start_smr(dst_slave)
        self.assertEqual( ret, 0, 'failed to start smr, server:%d' % dst_slave['id'] )

        ret = testbase.request_to_start_redis(dst_slave)
        self.assertEqual( ret, 0, 'failed to start redis, server:%d' % dst_slave['id']  )

        ret = testbase.wait_until_finished_to_set_up_role(dst_slave)
        self.assertEquals( ret, 0, 'failed to role change. server:%d' % (dst_slave['id']) )

        dst_redis_slave = redis_mgmt.Redis(dst_slave['id'])
        ret = dst_redis_slave.connect(dst_slave['ip'], dst_slave['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        self.assertTrue(self.isExist(dst_redis_slave, 'beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(self.isExist(dst_redis_slave, 'beforeCheckpoint~beforeCheckpoint:expired'))
        self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~beforeCheckpoint:persist'))
        self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~beforeCheckpoint:expired'))

        self.assertTrue(self.isExist(dst_redis_slave, 'beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isExist(dst_redis_slave, 'beforeCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:beforeCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(self.isExist(dst_redis_slave, 'afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isExist(dst_redis_slave, 'afterCheckpoint~afterCheckpoint:expired'))
        self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~afterCheckpoint:persist'))
        self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~afterCheckpoint:expired'))

        self.assertTrue(self.isExist(dst_redis_slave, 'afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(self.isExist(dst_redis_slave, 'afterCheckpoint~duringCatchup:expired'))
        self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~duringCatchup:persist'))
        self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:afterCheckpoint~duringCatchup:expired'))

        self.assertTrue(self.isExist(dst_redis_slave, 'duringCatchup~duringCatchup:persist'))
        self.assertFalse(self.isExist(dst_redis_slave, 'duringCatchup~duringCatchup:expired'))
        self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~duringCatchup:persist'))
        self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~duringCatchup:expired'))

        self.assertTrue(self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:persist'))
        self.assertFalse(self.isExist(dst_redis_slave, 'duringCatchup~afterMig2pc:expired'))
        self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~afterMig2pc:persist'))
        self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:duringCatchup~afterMig2pc:expired'))

        self.assertTrue(self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:persist'))
        self.assertFalse(self.isExist(dst_redis_slave, 'afterMig2pc~migrateEnd:expired'))
        self.assertTrue(self.isS3Exist(dst_redis_slave, 'S3:afterMig2pc~migrateEnd:persist'))
        self.assertFalse(self.isS3Exist(dst_redis_slave, 'S3:afterMig2pc~migrateEnd:expired'))

        self.getS3TTL(dst_redis_slave, 'S3:PermanentKey')

        # Go back to initial configuration
        self.assertTrue(util.migration(self.cluster, dst_pg_id, src_pg_id, 0, 8191, 40000),
                'failed to rollback migration')
    def test_7_remaining_hbc_connection( self ):
        util.print_frame()

        # check pgs
        for server in self.cluster['servers']:
            before_cnt_redis = util.get_clients_count_of_redis(server['ip'], server['redis_port'])
            before_cnt_smr = util.get_clients_count_of_smr(server['smr_mgmt_port'])

            cmd = 'pgs_leave %s %d forced\r\npgs_del %s %d' % (self.cluster['cluster_name'], server['id'], self.cluster['cluster_name'], server['id'])
            util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd)

        for server in self.cluster['servers']:
            # check redis
            success = False
            for i in range(5):
                after_cnt = util.get_clients_count_of_redis(server['ip'], server['redis_port'])
                if after_cnt <= 2:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals( success, True, 'failed : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) )
            util.log( 'succeeded : the number of connections to redis%d(%s:%d) is %d, exptected=n<=2, before=%d' % (server['id'], server['ip'], server['redis_port'], after_cnt, before_cnt_redis) )

            # check smr
            success = False
            expected = 0
            for i in range(5):
                after_cnt = util.get_clients_count_of_smr(server['smr_mgmt_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)
            self.assertEquals( success, True, 'failed : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) )
            util.log( 'succeeded : the number of connections to smr%d(%s:%d) is %d, exptected=%d, before=%d' % (server['id'], server['ip'], server['smr_mgmt_port'], after_cnt, expected, before_cnt_smr) )

        # check gateway
        for server in self.cluster['servers']:
            before_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port'])

            cmd = 'gw_del %s %d' % (self.cluster['cluster_name'], server['id'])
            util.cm_command(self.leader_cm['ip'], self.leader_cm['cm_port'], cmd)

        for server in self.cluster['servers']:
            success = False
            expected = 1
            for i in range(5):
                after_cnt = util.get_clients_count_of_gw(server['ip'], server['gateway_port'])
                if after_cnt == expected:
                    success = True
                    break
                time.sleep(1)

            self.assertEquals( success, True, 'failed : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) )
            util.log( 'succeeded : the number of connections to gateway%d(%s:%d) is %d, exptected=%d.' % (server['id'], server['ip'], server['gateway_port'], after_cnt, expected) )

        # Go back to initial configuration
        # Cleanup PG
        self.assertTrue(util.cm_success(util.cm_command(
            self.leader_cm['ip'], self.leader_cm['cm_port'], 
            'pg_del %s %d' % (self.cluster['cluster_name'], self.cluster['servers'][0]['pg_id'])))[0])

        # Cleanup processes of PGS and GW
        for s in self.cluster['servers']:
            self.assertEqual(0, util.shutdown_redis(s['id'], s['redis_port']), 
                'failed to kill redis %d process' % s['id'])
            self.assertEqual(0, util.shutdown_smr(s['id'], s['ip'], s['smr_base_port']), 
                'failed to kill smr %d process' % s['id'])
            self.assertEqual(0, util.shutdown_gateway(s['id'], s['gateway_port']),
                'failed to kill gw %d process' % s['id'])

        # Recover PG
        self.assertTrue(
                util.install_pg(self.cluster, self.cluster['servers'], self.cluster['servers'][0], start_gw=True),
                'failed to recover PGS and GW in a PM')
    def test_quorum_with_left_pgs( self ):
        util.print_frame()

        # start load generators
        load_gen_list = {}
        for i in range( len(self.cluster['servers']) ):
            server = self.cluster['servers'][i]
            load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port'])
            load_gen.start()
            load_gen_list[i] = load_gen

        # get master, slave1, slave2
        m, s1, s2 = util.get_mss( self.cluster )
        self.assertNotEqual( m, None, 'master is None.' )
        self.assertNotEqual( s1, None, 'slave1 is None.' )
        self.assertNotEqual( s2, None, 'slave2 is None.' )

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d forced\r\n' % (m['cluster_name'], m['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual(2, quorum_of_haning_master,
                          'invalid quorum of left master, expected:%d, but:%d' % (2, quorum_of_haning_master) )
        util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master )

        # check if pgs is removed
        r = util.get_role_of_server(m)
        if r != c.ROLE_MASTER:
            success = False
            for try_cnt in range( 10 ):
                redis = redis_mgmt.Redis( m['id'] )
                ret = redis.connect( m['ip'], m['redis_port'] )
                self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
                util.log( 'succeeded : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )

                redis.write( 'info stats\r\n' )
                for i in range( 6 ):
                    redis.read_until( '\r\n' )
                res = redis.read_until( '\r\n' )
                self.assertNotEqual( res, '', 'failed : get reply of "info stats" from redis%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
                util.log( 'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"' % (m['id'], m['ip'], m['redis_port'], res[:-2]) )
                no = int( res.split(':')[1] )
                if no <= 100:
                    success = True
                    break

                time.sleep( 1 )

            self.assertEquals( success, True, 'failed : pgs does not removed.' )
        util.log( 'pgs is removed' )

        # check states of all pgs in pg
        for i in xrange(10):
            for s in self.cluster['servers']:
                smr_info = util.get_smr_info( s, self.leader_cm )
                cc_role = smr_info['smr_Role']
                cc_hb = smr_info['hb']
                if cc_hb == 'N':
                    continue

                real_role = util.get_role_of_server( s )
                real_role = util.roleNumberToChar( real_role )
                if real_role != cc_role:
                    time.sleep(0.5)
                    continue

        for s in self.cluster['servers']:
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue

            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual(2, quorum_of_haning_master,
                          'invalid quorum of left master, expected:%d, but:%d' % (2, quorum_of_haning_master) )
        util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master )

        # 'role lconn' to master
        cmd = 'role lconn\r\n'
        ret = util.cmd_to_smr( m, cmd )
        self.assertEqual( ret, '+OK\r\n', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # wait for master election
        success = False
        new_master = None
        for i in range( 10 ):
            role = util.get_role_of_server( s1 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s1
                break
            role = util.get_role_of_server( s2 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s2
                break
            time.sleep( 1 )
        self.assertEqual( success, True, 'failed to elect new master' )
        util.log( 'succeeded : elect new master, master_id=%d' % new_master['id'] )

        time.sleep( 1 )
        # check the numbers of master, slave, and lconn
        cnt_master = 0
        cnt_slave = 0
        cnt_lconn = 0
        for s in self.cluster['servers']:
            role = util.get_role_of_server( s )
            if role == c.ROLE_MASTER:
                cnt_master = cnt_master + 1
            elif role == c.ROLE_SLAVE:
                cnt_slave = cnt_slave + 1
            elif role == c.ROLE_LCONN:
                cnt_lconn = cnt_lconn + 1
        self.assertEqual( cnt_master, 1, 'failed : the number of master is %s, expected 1' % cnt_master )
        self.assertEqual( cnt_slave, 1, 'failed : the number of slave is %s, expected 1' % cnt_slave )
        self.assertEqual( cnt_lconn, 1, 'failed : the number of lconn is %s, expected 1' % cnt_lconn )

        # check states of all pgs in pg
        for s in self.cluster['servers']:
            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_new_master = util.get_quorum( new_master )
        self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' )
        self.assertEqual( 1, quorum_of_new_master ,
                          'invalid quorum of new master, expected:%d, but:%d' % (1, quorum_of_new_master) )
        util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master )

        # shutdown load generators
        for i in range( len(load_gen_list) ):
            load_gen_list[i].quit()
            load_gen_list[i].join()

        # Go back to initial configuration
        self.assertTrue(util.pgs_join(self.leader_cm['ip'], self.leader_cm['cm_port'], m['cluster_name'], m['id']),
                'failed to recover pgs, (pgs_join)')

        return 0