def test_quorum_with_left_pgs( self ):
        util.print_frame()

        # start load generators
        load_gen_list = {}
        for i in range( len(self.cluster['servers']) ):
            server = self.cluster['servers'][i]
            load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port'])
            load_gen.start()
            load_gen_list[i] = load_gen

        # get master, slave1, slave2
        m, s1, s2 = util.get_mss( self.cluster )
        self.assertNotEqual( m, None, 'master is None.' )
        self.assertNotEqual( s1, None, 'slave1 is None.' )
        self.assertNotEqual( s2, None, 'slave2 is None.' )

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d forced\r\n' % (m['cluster_name'], m['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual(2, quorum_of_haning_master,
                          'invalid quorum of left master, expected:%d, but:%d' % (2, quorum_of_haning_master) )
        util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master )

        # check if pgs is removed
        r = util.get_role_of_server(m)
        if r != c.ROLE_MASTER:
            success = False
            for try_cnt in range( 10 ):
                redis = redis_mgmt.Redis( m['id'] )
                ret = redis.connect( m['ip'], m['redis_port'] )
                self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
                util.log( 'succeeded : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )

                redis.write( 'info stats\r\n' )
                for i in range( 6 ):
                    redis.read_until( '\r\n' )
                res = redis.read_until( '\r\n' )
                self.assertNotEqual( res, '', 'failed : get reply of "info stats" from redis%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
                util.log( 'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"' % (m['id'], m['ip'], m['redis_port'], res[:-2]) )
                no = int( res.split(':')[1] )
                if no <= 100:
                    success = True
                    break

                time.sleep( 1 )

            self.assertEquals( success, True, 'failed : pgs does not removed.' )
        util.log( 'pgs is removed' )

        # check states of all pgs in pg
        for i in xrange(10):
            for s in self.cluster['servers']:
                smr_info = util.get_smr_info( s, self.leader_cm )
                cc_role = smr_info['smr_Role']
                cc_hb = smr_info['hb']
                if cc_hb == 'N':
                    continue

                real_role = util.get_role_of_server( s )
                real_role = util.roleNumberToChar( real_role )
                if real_role != cc_role:
                    time.sleep(0.5)
                    continue

        for s in self.cluster['servers']:
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue

            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual(2, quorum_of_haning_master,
                          'invalid quorum of left master, expected:%d, but:%d' % (2, quorum_of_haning_master) )
        util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master )

        # 'role lconn' to master
        cmd = 'role lconn\r\n'
        ret = util.cmd_to_smr( m, cmd )
        self.assertEqual( ret, '+OK\r\n', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # wait for master election
        success = False
        new_master = None
        for i in range( 10 ):
            role = util.get_role_of_server( s1 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s1
                break
            role = util.get_role_of_server( s2 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s2
                break
            time.sleep( 1 )
        self.assertEqual( success, True, 'failed to elect new master' )
        util.log( 'succeeded : elect new master, master_id=%d' % new_master['id'] )

        time.sleep( 1 )
        # check the numbers of master, slave, and lconn
        cnt_master = 0
        cnt_slave = 0
        cnt_lconn = 0
        for s in self.cluster['servers']:
            role = util.get_role_of_server( s )
            if role == c.ROLE_MASTER:
                cnt_master = cnt_master + 1
            elif role == c.ROLE_SLAVE:
                cnt_slave = cnt_slave + 1
            elif role == c.ROLE_LCONN:
                cnt_lconn = cnt_lconn + 1
        self.assertEqual( cnt_master, 1, 'failed : the number of master is %s, expected 1' % cnt_master )
        self.assertEqual( cnt_slave, 1, 'failed : the number of slave is %s, expected 1' % cnt_slave )
        self.assertEqual( cnt_lconn, 1, 'failed : the number of lconn is %s, expected 1' % cnt_lconn )

        # check states of all pgs in pg
        for s in self.cluster['servers']:
            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_new_master = util.get_quorum( new_master )
        self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' )
        self.assertEqual( 1, quorum_of_new_master ,
                          'invalid quorum of new master, expected:%d, but:%d' % (1, quorum_of_new_master) )
        util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master )

        # shutdown load generators
        for i in range( len(load_gen_list) ):
            load_gen_list[i].quit()
            load_gen_list[i].join()

        # Go back to initial configuration
        self.assertTrue(util.pgs_join(self.leader_cm['ip'], self.leader_cm['cm_port'], m['cluster_name'], m['id']),
                'failed to recover pgs, (pgs_join)')

        return 0
    def elect_master_randomly( self ):
        # set data
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway( '0' )
        gw.connect( ip, port )
        for i in range( 0, 1000 ):
            cmd = 'set %s%d %d\r\n' % (self.key_base, i, i)
            gw.write( cmd )
            res = gw.read_until( '\r\n' )
            self.assertEqual( res, '+OK\r\n', 'failed to set values to gw(%s:%d). cmd:%s, res:%s' % (ip, port, cmd[:-2], res[:-2]) )

        server_ids = []
        for server in self.cluster['servers']:
            server_ids.append( server['id'] )

        for try_cnt in range( 30 ):
            # get master, slave1, slave2
            m, s1, s2 = util.get_mss( self.cluster )
            self.assertNotEqual( m, None, 'master is None.' )
            self.assertNotEqual( s1, None, 'slave1 is None.' )
            self.assertNotEqual( s2, None, 'slave2 is None.' )
            util.log( 'master id : %d' % m['id'] )

            if try_cnt != 0:
                if m['id'] in server_ids:
                    server_ids.remove( m['id'] )

            smr = smr_mgmt.SMR( m['id'] )
            ret = smr.connect( m['ip'], m['smr_mgmt_port'] )
            self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (m['ip'], m['smr_mgmt_port']) )
            cmd = 'role lconn\r\n'
            smr.write( cmd )
            reply = smr.read_until( '\r\n' )
            self.assertEqual( reply, '+OK\r\n', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], reply[:-2]) )
            util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], reply[:-2]) )

            # wait until role-change is finished
            for role_change_try_cnt in range( 5 ):
                count_master = 0
                count_slave = 0
                for server in self.cluster['servers']:
                    real_role = util.get_role_of_server( server )
                    real_role = util.roleNumberToChar( real_role )
                    if real_role == 'M':
                        count_master = count_master + 1
                    elif real_role == 'S':
                        count_slave = count_slave + 1
                if count_master == 1 and count_slave == 2:
                    break;
                time.sleep( 1 )

            # check the number of master and slave
            self.assertEqual( count_master, 1, 'failed : the number of master is not 1, count_master=%d, count_slave=%d' % (count_master, count_slave) )
            self.assertEqual( count_slave, 2, 'failed : the number of slave is not 2, count_master=%d, count_slave=%d' % (count_master, count_slave) )
            util.log( 'succeeded : the number of master is 1 and the number of slave is 2' )

            # check states of all pgs in pg
            for try_cnt in range( 3 ):
                ok = True
                for s in self.cluster['servers']:
                    real_role = util.get_role_of_server( s )
                    real_role = util.roleNumberToChar( real_role )
                    smr_info = util.get_smr_info( s, self.leader_cm )
                    cc_role = smr_info['smr_Role']
                    cc_hb = smr_info['hb']

                    if cc_hb != 'Y':
                        ok = False
                    if real_role != cc_role:
                        ok = False

                    if ok:
                        util.log( 'succeeded : a role of real pgs is the same with a role in cc, id=%d, real=%s, cc=%s, hb=%s' % (s['id'], real_role, cc_role, cc_hb) )
                    else:
                        util.log( '\n\n**********************************************************\n\nretry: a role of real pgs is not the same with a role in cc, id=%d, real=%s, cc=%s, hb=%s' % (s['id'], real_role, cc_role, cc_hb) )

                if ok == False:
                    time.sleep( 0.5 )
                else:
                    break

            self.assertTrue( ok, 'failed : role check' )

            if len( server_ids ) == 0:
                util.log( 'succeeded : all smrs have been as a master' )
                return 0

        self.assertEqual( 0, len( server_ids ) , 'failed : remains server ids=[%s]' % (','.join('%d' % id for id in server_ids))  )
        return 0
Exemple #3
0
    def elect_master_randomly(self):
        # set data
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway('0')
        gw.connect(ip, port)
        for i in range(0, 1000):
            cmd = 'set %s%d %d\r\n' % (self.key_base, i, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEqual(
                res, '+OK\r\n',
                'failed to set values to gw(%s:%d). cmd:%s, res:%s' %
                (ip, port, cmd[:-2], res[:-2]))

        server_ids = []
        for server in self.cluster['servers']:
            server_ids.append(server['id'])

        for try_cnt in range(30):
            # get master, slave1, slave2
            m, s1, s2 = util.get_mss(self.cluster)
            self.assertNotEqual(m, None, 'master is None.')
            self.assertNotEqual(s1, None, 'slave1 is None.')
            self.assertNotEqual(s2, None, 'slave2 is None.')
            util.log('master id : %d' % m['id'])

            if try_cnt != 0:
                if m['id'] in server_ids:
                    server_ids.remove(m['id'])

            smr = smr_mgmt.SMR(m['id'])
            ret = smr.connect(m['ip'], m['smr_mgmt_port'])
            self.assertEqual(
                ret, 0, 'failed to connect to master. %s:%d' %
                (m['ip'], m['smr_mgmt_port']))
            cmd = 'role lconn\r\n'
            smr.write(cmd)
            reply = smr.read_until('\r\n')
            self.assertEqual(
                reply, '+OK\r\n',
                'failed : cmd="%s", reply="%s"' % (cmd[:-2], reply[:-2]))
            util.log('succeeded : cmd="%s", reply="%s"' %
                     (cmd[:-2], reply[:-2]))

            # wait until role-change is finished
            for role_change_try_cnt in range(5):
                count_master = 0
                count_slave = 0
                for server in self.cluster['servers']:
                    real_role = util.get_role_of_server(server)
                    real_role = util.roleNumberToChar(real_role)
                    if real_role == 'M':
                        count_master = count_master + 1
                    elif real_role == 'S':
                        count_slave = count_slave + 1
                if count_master == 1 and count_slave == 2:
                    break
                time.sleep(1)

            # check the number of master and slave
            self.assertEqual(
                count_master, 1,
                'failed : the number of master is not 1, count_master=%d, count_slave=%d'
                % (count_master, count_slave))
            self.assertEqual(
                count_slave, 2,
                'failed : the number of slave is not 2, count_master=%d, count_slave=%d'
                % (count_master, count_slave))
            util.log(
                'succeeded : the number of master is 1 and the number of slave is 2'
            )

            # check states of all pgs in pg
            for try_cnt in range(3):
                ok = True
                for s in self.cluster['servers']:
                    real_role = util.get_role_of_server(s)
                    real_role = util.roleNumberToChar(real_role)
                    smr_info = util.get_smr_info(s, self.leader_cm)
                    cc_role = smr_info['smr_Role']
                    cc_hb = smr_info['hb']

                    if cc_hb != 'Y':
                        ok = False
                    if real_role != cc_role:
                        ok = False

                    if ok:
                        util.log(
                            'succeeded : a role of real pgs is the same with a role in cc, id=%d, real=%s, cc=%s, hb=%s'
                            % (s['id'], real_role, cc_role, cc_hb))
                    else:
                        util.log(
                            '\n\n**********************************************************\n\nretry: a role of real pgs is not the same with a role in cc, id=%d, real=%s, cc=%s, hb=%s'
                            % (s['id'], real_role, cc_role, cc_hb))

                if ok == False:
                    time.sleep(0.5)
                else:
                    break

            self.assertTrue(ok, 'failed : role check')

            if len(server_ids) == 0:
                util.log('succeeded : all smrs have been as a master')
                return 0

        self.assertEqual(
            0, len(server_ids), 'failed : remains server ids=[%s]' %
            (','.join('%d' % id for id in server_ids)))
        return 0
    def test_quorum_with_left_pgs( self ):
        util.print_frame()

        # start load generators
        load_gen_list = {}
        for i in range( len(self.cluster['servers']) ):
            server = self.cluster['servers'][i]
            load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port'])
            load_gen.start()
            load_gen_list[i] = load_gen

        # get master, slave1, slave2
        m, s1, s2 = util.get_mss( self.cluster )
        self.assertNotEqual( m, None, 'master is None.' )
        self.assertNotEqual( s1, None, 'slave1 is None.' )
        self.assertNotEqual( s2, None, 'slave2 is None.' )

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d\r\n' % (m['cluster_name'], m['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # check if pgs is removed
        success = False
        for try_cnt in range( 10 ):
            redis = redis_mgmt.Redis( m['id'] )
            ret = redis.connect( m['ip'], m['redis_port'] )
            self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
            util.log( 'succeeded : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )

            redis.write( 'info stats\r\n' )
            for i in range( 6 ):
                redis.read_until( '\r\n' )
            res = redis.read_until( '\r\n' )
            self.assertNotEqual( res, '', 'failed : get reply of "info stats" from redis%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
            util.log( 'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"' % (m['id'], m['ip'], m['redis_port'], res[:-2]) )
            no = int( res.split(':')[1] )
            if no <= 100:
                success = True
                break
            time.sleep( 1 )

        self.assertEquals( success, True, 'failed : pgs does not removed.' )
        util.log( 'succeeded : pgs is removed' )

        # check states of all pgs in pg
        for s in self.cluster['servers']:
            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual( self.quorum_policy[1], quorum_of_haning_master,
                          'invalid quorum of left master, expected:%d, but:%d' %( self.quorum_policy[1], quorum_of_haning_master) )
        util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master )

        # 'role lconn' to master
        cmd = 'role lconn\r\n'
        ret = util.cmd_to_smr( m, cmd )
        self.assertEqual( ret, '+OK\r\n', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # wait for master election
        success = False
        new_master = None
        for i in range( 10 ):
            role = util.get_role_of_server( s1 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s1
                break
            role = util.get_role_of_server( s2 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s2
                break
            time.sleep( 1 )
        self.assertEqual( success, True, 'failed to elect new master' )
        util.log( 'succeeded : elect new master, master_id=%d' % new_master['id'] )

        time.sleep( 1 )
        # check the numbers of master, slave, and lconn
        cnt_master = 0
        cnt_slave = 0
        cnt_lconn = 0
        for s in self.cluster['servers']:
            role = util.get_role_of_server( s )
            if role == c.ROLE_MASTER:
                cnt_master = cnt_master + 1
            elif role == c.ROLE_SLAVE:
                cnt_slave = cnt_slave + 1
            elif role == c.ROLE_LCONN:
                cnt_lconn = cnt_lconn + 1
        self.assertEqual( cnt_master, 1, 'failed : the number of master is %s, expected 1' % cnt_master )
        self.assertEqual( cnt_slave, 1, 'failed : the number of slave is %s, expected 1' % cnt_slave )
        self.assertEqual( cnt_lconn, 1, 'failed : the number of lconn is %s, expected 1' % cnt_lconn )

        # check states of all pgs in pg
        for s in self.cluster['servers']:
            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_new_master = util.get_quorum( new_master )
        self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' )
        self.assertEqual( self.quorum_policy[1], quorum_of_new_master ,
                          'invalid quorum of new master, expected:%d, but:%d' % (self.quorum_policy[1], quorum_of_new_master) )
        util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master )

        # shutdown load generators
        for i in range( len(load_gen_list) ):
            load_gen_list[i].quit()
            load_gen_list[i].join()

        return 0