def test_quorum_policy_of_hanging_master( self ):
        util.print_frame()

        # get master, slave1, slave2
        m, s1, s2 = util.get_mss( self.cluster )
        self.assertNotEqual( m, None, 'master is None.' )
        self.assertNotEqual( s1, None, 'slave1 is None.' )
        self.assertNotEqual( s2, None, 'slave2 is None.' )

        # hang
        smr = smr_mgmt.SMR( m['id'] )
        ret = smr.connect( m['ip'], m['smr_mgmt_port'] )
        self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (m['ip'], m['smr_mgmt_port']) )
        smr.write( 'fi delay sleep 1 15000\r\n' )
        time.sleep( 5 )

        # wait for forced master election
        success = False
        new_master = None
        for i in range( 7 ):
            role = util.get_role_of_server( s1 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s1
                break
            role = util.get_role_of_server( s2 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s2
                break
            time.sleep( 1 )
        self.assertEqual( success, True, 'failed to forced master election' )

        # shutdown confmaster
        for server in self.cluster['servers']:
            util.shutdown_cm( server['id'] )

        # wait until hanging master wake up
        time.sleep( 5 )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual( 2, quorum_of_haning_master,
                          'invalid quorum of haning master, expected:%d, but:%d' %(2, quorum_of_haning_master) )
        util.log( 'succeeded : quorum of haning master=%d' % quorum_of_haning_master )

        # check quorum policy
        quorum_of_new_master = util.get_quorum( new_master )
        self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' )
        self.assertEqual( 1, quorum_of_new_master ,
                          'invalid quorum of new master, expected:%d, but:%d' % (1, quorum_of_new_master) )
        util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master )

        # Go back to initial configuration
        # Recover Confmaster
        self.assertTrue(util.recover_confmaster(self.cluster, [0,1,2], 0), 'failed to recover confmaster')

        return 0
Esempio n. 2
0
    def test_quorum_policy_of_hanging_master( self ):
        util.print_frame()

        # get master, slave1, slave2
        m, s1, s2 = util.get_mss( self.cluster )
        self.assertNotEqual( m, None, 'master is None.' )
        self.assertNotEqual( s1, None, 'slave1 is None.' )
        self.assertNotEqual( s2, None, 'slave2 is None.' )

        # hang
        smr = smr_mgmt.SMR( m['id'] )
        ret = smr.connect( m['ip'], m['smr_mgmt_port'] )
        self.assertEqual( ret, 0, 'failed to connect to master. %s:%d' % (m['ip'], m['smr_mgmt_port']) )
        smr.write( 'fi delay sleep 1 15000\r\n' )
        time.sleep( 5 )

        # wait for forced master election
        success = False
        new_master = None
        for i in range( 7 ):
            role = util.get_role_of_server( s1 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s1
                break
            role = util.get_role_of_server( s2 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s2
                break
            time.sleep( 1 )
        self.assertEqual( success, True, 'failed to forced master election' )

        # shutdown confmaster
        for server in self.cluster['servers']:
            util.shutdown_cm( server['id'] )

        # wait until hanging master wake up
        time.sleep( 5 )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual( self.quorum_policy[1], quorum_of_haning_master,
                          'invalid quorum of haning master, expected:%d, but:%d' %( self.quorum_policy[1], quorum_of_haning_master) )
        util.log( 'succeeded : quorum of haning master=%d' % quorum_of_haning_master )

        # check quorum policy
        quorum_of_new_master = util.get_quorum( new_master )
        self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' )
        self.assertEqual( self.quorum_policy[1], quorum_of_new_master ,
                          'invalid quorum of new master, expected:%d, but:%d' % (self.quorum_policy[1], quorum_of_new_master) )
        util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master )

        return 0
Esempio n. 3
0
 def __check_quorum(self, m, expected):
     time.sleep(1)
     for try_cnt in range(10):
         quorum = util.get_quorum(m)
         if quorum == expected:
             util.log('quorum: %d, master: %d, try_cnt: %d, OK' % (quorum, m['id'], try_cnt))
             return True
         else:
             util.log('quorum: %d, master: %d, try_cnt: %d' % (quorum, m['id'], try_cnt))
         time.sleep(0.5)
     return False
Esempio n. 4
0
 def __check_quorum(self, m, expected):
     time.sleep(1)
     for try_cnt in range(10):
         quorum = util.get_quorum(m)
         if quorum == expected:
             util.log('quorum: %d, master: %d, try_cnt: %d, OK' % (quorum, m['id'], try_cnt))
             return True
         else:
             util.log('quorum: %d, master: %d, try_cnt: %d' % (quorum, m['id'], try_cnt))
         time.sleep(0.5)
     return False
    def test_quorum( self ):
        util.print_frame()

        master, slave1, slave2 = util.get_mss(self.cluster)

        expected = 2
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        ret = testbase.request_to_shutdown_smr( slave1 )
        self.assertEqual( ret, 0, 'failed to shutdown smr, server:%d' % slave1['id'] )
        time.sleep( 1 )

        expected = 1
        max_try = 20
        for i in range( 0, max_try ):
            master = util.get_server_by_role( self.cluster['servers'], 'master' )
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        ret = testbase.request_to_shutdown_smr( slave2 )
        self.assertEqual( ret, 0, 'failed to shutdown smr, server:%d' % slave2['id'] )
        time.sleep( 1 )

        expected = 0
        max_try = 20
        for i in range( 0, max_try ):
            master = util.get_server_by_role( self.cluster['servers'], 'master' )
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        # recovery
        ret = testbase.request_to_start_smr( slave1 )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( slave1 )
        self.assertEqual( ret, 0, 'failed to start redis' )

        ret = testbase.wait_until_finished_to_set_up_role( slave1 )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (slave1['id']) )
        time.sleep( 1 )

        expected = 1
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        # recovery
        ret = testbase.request_to_start_smr( slave2 )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( slave2 )
        self.assertEqual( ret, 0, 'failed to start redis' )

        ret = testbase.wait_until_finished_to_set_up_role( slave2 )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (slave2['id']) )
        time.sleep( 1 )

        expected = 2
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )
    def test_quorum_with_left_pgs( self ):
        util.print_frame()

        # start load generators
        load_gen_list = {}
        for i in range( len(self.cluster['servers']) ):
            server = self.cluster['servers'][i]
            load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port'])
            load_gen.start()
            load_gen_list[i] = load_gen

        # get master, slave1, slave2
        m, s1, s2 = util.get_mss( self.cluster )
        self.assertNotEqual( m, None, 'master is None.' )
        self.assertNotEqual( s1, None, 'slave1 is None.' )
        self.assertNotEqual( s2, None, 'slave2 is None.' )

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d forced\r\n' % (m['cluster_name'], m['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual(2, quorum_of_haning_master,
                          'invalid quorum of left master, expected:%d, but:%d' % (2, quorum_of_haning_master) )
        util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master )

        # check if pgs is removed
        r = util.get_role_of_server(m)
        if r != c.ROLE_MASTER:
            success = False
            for try_cnt in range( 10 ):
                redis = redis_mgmt.Redis( m['id'] )
                ret = redis.connect( m['ip'], m['redis_port'] )
                self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
                util.log( 'succeeded : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )

                redis.write( 'info stats\r\n' )
                for i in range( 6 ):
                    redis.read_until( '\r\n' )
                res = redis.read_until( '\r\n' )
                self.assertNotEqual( res, '', 'failed : get reply of "info stats" from redis%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
                util.log( 'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"' % (m['id'], m['ip'], m['redis_port'], res[:-2]) )
                no = int( res.split(':')[1] )
                if no <= 100:
                    success = True
                    break

                time.sleep( 1 )

            self.assertEquals( success, True, 'failed : pgs does not removed.' )
        util.log( 'pgs is removed' )

        # check states of all pgs in pg
        for i in xrange(10):
            for s in self.cluster['servers']:
                smr_info = util.get_smr_info( s, self.leader_cm )
                cc_role = smr_info['smr_Role']
                cc_hb = smr_info['hb']
                if cc_hb == 'N':
                    continue

                real_role = util.get_role_of_server( s )
                real_role = util.roleNumberToChar( real_role )
                if real_role != cc_role:
                    time.sleep(0.5)
                    continue

        for s in self.cluster['servers']:
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue

            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual(2, quorum_of_haning_master,
                          'invalid quorum of left master, expected:%d, but:%d' % (2, quorum_of_haning_master) )
        util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master )

        # 'role lconn' to master
        cmd = 'role lconn\r\n'
        ret = util.cmd_to_smr( m, cmd )
        self.assertEqual( ret, '+OK\r\n', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # wait for master election
        success = False
        new_master = None
        for i in range( 10 ):
            role = util.get_role_of_server( s1 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s1
                break
            role = util.get_role_of_server( s2 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s2
                break
            time.sleep( 1 )
        self.assertEqual( success, True, 'failed to elect new master' )
        util.log( 'succeeded : elect new master, master_id=%d' % new_master['id'] )

        time.sleep( 1 )
        # check the numbers of master, slave, and lconn
        cnt_master = 0
        cnt_slave = 0
        cnt_lconn = 0
        for s in self.cluster['servers']:
            role = util.get_role_of_server( s )
            if role == c.ROLE_MASTER:
                cnt_master = cnt_master + 1
            elif role == c.ROLE_SLAVE:
                cnt_slave = cnt_slave + 1
            elif role == c.ROLE_LCONN:
                cnt_lconn = cnt_lconn + 1
        self.assertEqual( cnt_master, 1, 'failed : the number of master is %s, expected 1' % cnt_master )
        self.assertEqual( cnt_slave, 1, 'failed : the number of slave is %s, expected 1' % cnt_slave )
        self.assertEqual( cnt_lconn, 1, 'failed : the number of lconn is %s, expected 1' % cnt_lconn )

        # check states of all pgs in pg
        for s in self.cluster['servers']:
            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_new_master = util.get_quorum( new_master )
        self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' )
        self.assertEqual( 1, quorum_of_new_master ,
                          'invalid quorum of new master, expected:%d, but:%d' % (1, quorum_of_new_master) )
        util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master )

        # shutdown load generators
        for i in range( len(load_gen_list) ):
            load_gen_list[i].quit()
            load_gen_list[i].join()

        # Go back to initial configuration
        self.assertTrue(util.pgs_join(self.leader_cm['ip'], self.leader_cm['cm_port'], m['cluster_name'], m['id']),
                'failed to recover pgs, (pgs_join)')

        return 0
Esempio n. 7
0
    def test_quorum( self ):
        util.print_frame()

        master, slave1, slave2 = self.get_mss()

        expected = self.quorum_policy[1]
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        ret = testbase.request_to_shutdown_smr( slave1 )
        self.assertEqual( ret, 0, 'failed to shutdown smr, server:%d' % slave1['id'] )
        time.sleep( 1 )

        expected = self.quorum_policy[1]
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        ret = testbase.request_to_shutdown_smr( slave2 )
        self.assertEqual( ret, 0, 'failed to shutdown smr, server:%d' % slave2['id'] )
        time.sleep( 1 )

        expected = self.quorum_policy[0]
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        # recovery
        ret = testbase.request_to_start_smr( slave1 )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( slave1 )
        self.assertEqual( ret, 0, 'failed to start redis' )

        ret = testbase.wait_until_finished_to_set_up_role( slave1 )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (slave1['id']) )
        time.sleep( 1 )

        expected = self.quorum_policy[1]
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        # recovery
        ret = testbase.request_to_start_smr( slave2 )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( slave2 )
        self.assertEqual( ret, 0, 'failed to start redis' )

        ret = testbase.wait_until_finished_to_set_up_role( slave2 )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (slave2['id']) )
        time.sleep( 1 )

        expected = self.quorum_policy[1]
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )
Esempio n. 8
0
    def test_quorum_with_left_pgs( self ):
        util.print_frame()

        # start load generators
        load_gen_list = {}
        for i in range( len(self.cluster['servers']) ):
            server = self.cluster['servers'][i]
            load_gen = load_generator.LoadGenerator(server['id'], server['ip'], server['gateway_port'])
            load_gen.start()
            load_gen_list[i] = load_gen

        # get master, slave1, slave2
        m, s1, s2 = util.get_mss( self.cluster )
        self.assertNotEqual( m, None, 'master is None.' )
        self.assertNotEqual( s1, None, 'slave1 is None.' )
        self.assertNotEqual( s2, None, 'slave2 is None.' )

        # detach pgs from cluster
        cmd = 'pgs_leave %s %d\r\n' % (m['cluster_name'], m['id'])
        ret = util.cm_command( self.leader_cm['ip'], self.leader_cm['cm_port'], cmd )
        jobj = json.loads(ret)
        self.assertEqual( jobj['msg'], '+OK', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # check if pgs is removed
        success = False
        for try_cnt in range( 10 ):
            redis = redis_mgmt.Redis( m['id'] )
            ret = redis.connect( m['ip'], m['redis_port'] )
            self.assertEquals( ret, 0, 'failed : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
            util.log( 'succeeded : connect to smr%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )

            redis.write( 'info stats\r\n' )
            for i in range( 6 ):
                redis.read_until( '\r\n' )
            res = redis.read_until( '\r\n' )
            self.assertNotEqual( res, '', 'failed : get reply of "info stats" from redis%d(%s:%d)' % (m['id'], m['ip'], m['redis_port']) )
            util.log( 'succeeded : get reply of "info stats" from redis%d(%s:%d), reply="%s"' % (m['id'], m['ip'], m['redis_port'], res[:-2]) )
            no = int( res.split(':')[1] )
            if no <= 100:
                success = True
                break
            time.sleep( 1 )

        self.assertEquals( success, True, 'failed : pgs does not removed.' )
        util.log( 'succeeded : pgs is removed' )

        # check states of all pgs in pg
        for s in self.cluster['servers']:
            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_haning_master = util.get_quorum( m )
        self.assertEqual( self.quorum_policy[1], quorum_of_haning_master,
                          'invalid quorum of left master, expected:%d, but:%d' %( self.quorum_policy[1], quorum_of_haning_master) )
        util.log( 'succeeded : quorum of left master=%d' % quorum_of_haning_master )

        # 'role lconn' to master
        cmd = 'role lconn\r\n'
        ret = util.cmd_to_smr( m, cmd )
        self.assertEqual( ret, '+OK\r\n', 'failed : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )
        util.log( 'succeeded : cmd="%s", reply="%s"' % (cmd[:-2], ret[:-2]) )

        # wait for master election
        success = False
        new_master = None
        for i in range( 10 ):
            role = util.get_role_of_server( s1 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s1
                break
            role = util.get_role_of_server( s2 )
            if role == c.ROLE_MASTER:
                success = True
                new_master = s2
                break
            time.sleep( 1 )
        self.assertEqual( success, True, 'failed to elect new master' )
        util.log( 'succeeded : elect new master, master_id=%d' % new_master['id'] )

        time.sleep( 1 )
        # check the numbers of master, slave, and lconn
        cnt_master = 0
        cnt_slave = 0
        cnt_lconn = 0
        for s in self.cluster['servers']:
            role = util.get_role_of_server( s )
            if role == c.ROLE_MASTER:
                cnt_master = cnt_master + 1
            elif role == c.ROLE_SLAVE:
                cnt_slave = cnt_slave + 1
            elif role == c.ROLE_LCONN:
                cnt_lconn = cnt_lconn + 1
        self.assertEqual( cnt_master, 1, 'failed : the number of master is %s, expected 1' % cnt_master )
        self.assertEqual( cnt_slave, 1, 'failed : the number of slave is %s, expected 1' % cnt_slave )
        self.assertEqual( cnt_lconn, 1, 'failed : the number of lconn is %s, expected 1' % cnt_lconn )

        # check states of all pgs in pg
        for s in self.cluster['servers']:
            real_role = util.get_role_of_server( s )
            real_role = util.roleNumberToChar( real_role )
            smr_info = util.get_smr_info( s, self.leader_cm )
            cc_role = smr_info['smr_Role']
            cc_hb = smr_info['hb']
            if cc_hb == 'N':
                continue
            self.assertEqual( real_role, cc_role,
                              'failed : each role is difference, real=%s, cc=%s' % (real_role, cc_role) )
            util.log( 'succeeded : a role of real pgs is the same with a role in cc, real=%s, cc=%s' % (real_role, cc_role) )

        # check quorum policy
        quorum_of_new_master = util.get_quorum( new_master )
        self.assertNotEqual( None, quorum_of_new_master, 'failed : find new master' )
        self.assertEqual( self.quorum_policy[1], quorum_of_new_master ,
                          'invalid quorum of new master, expected:%d, but:%d' % (self.quorum_policy[1], quorum_of_new_master) )
        util.log( 'succeeded : quorum of new master=%d' % quorum_of_new_master )

        # shutdown load generators
        for i in range( len(load_gen_list) ):
            load_gen_list[i].quit()
            load_gen_list[i].join()

        return 0
    def test_7_dirty_network_fi(self):
        util.print_frame()
        clnts = []

        try:
            out = util.sudo('iptables -L')
            util.log('====================================================================')
            util.log('out : %s' % out)
            util.log('out.return_code : %d' % out.return_code)
            util.log('out.stderr : %s' % out.stderr)
            util.log('out.succeeded : %s' % out.succeeded)

            # Add forwarding role
            out = util.sudo('iptables -t nat -A OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
            self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)
            out = util.sudo('iptables -t nat -A PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
            self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)

            cluster_name = 'network_isolation_cluster_1'
            cluster = filter(lambda x: x['cluster_name'] == cluster_name, config.clusters)[0]
            util.log(util.json_to_str(cluster))

            self.leader_cm = cluster['servers'][0]

            # MGMT
            mgmt_ip = cluster['servers'][0]['real_ip']
            mgmt_port = cluster['servers'][0]['cm_port']

            # Create cluster
            ret = default_cluster.initialize_starting_up_smr_before_redis( cluster, 
                    conf={'cm_context':'applicationContext-fi.xml'})
            self.assertEqual(0, ret, 'failed to TestMaintenance.initialize')

            # Print initial state of cluster
            util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
            initial_state = []
            self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

            # Start crc16 client
            for s in cluster['servers']:
                c = load_generator_crc16.Crc16Client(s['id'], s['ip'], s['gateway_port'], 3000, verbose=False)
                c.start()
                clnts.append(c)

            # Network isolation test
            cmfi = fi_confmaster.ConfmasterWfFi(['ra', 'qa', 'me', 'yj', 'bj', 'mg'], 
                                                ['lconn', 'slave', 'master', 'setquorum'], [True, False], 1)

            for fi in cmfi:
                # Block network
                util.log('\n\n\n ### BLOCK NETWORK, %s ### ' % str(fi))
                ret = block_network(cluster, mgmt_ip, mgmt_port)
                self.assertTrue(ret, '[%s] failed to block network.' % str(fi))

                for i in xrange(4):
                    util.log('waiting... %d' % (i + 1))
                    time.sleep(1)

                # Check cluster state
                ok = False
                for i in xrange(10):
                    isolated_states = []
                    util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)

                    state_transition_done = True
                    for s in isolated_states:
                        if s['ip'] != '127.0.0.100':
                            continue

                        if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                            state_transition_done = False

                    if state_transition_done:
                        ok = True
                        break
                    time.sleep(1)
                self.assertTrue(ok, 'Fail, state transition')

                # Fault injection
                try:
                    self.assertTrue(fi_confmaster.fi_add(fi, 1, mgmt_ip, mgmt_port), 
                            "Confmaster command fail. fi: %s" % str(fi))
                except ValueError as e:
                    self.fail("Confmaster command error. cmd: \"%s\", reply: \"%s\"" % (cmd, reply))

                # Unblock network
                util.log('\n\n\n ### UNBLOCK NETWORK, %s ### ' % str(fi))
                ret = unblock_network(cluster, mgmt_ip, mgmt_port, None)
                self.assertTrue(ret, '[%s] failed to unblock network.' % str(fi))

                for i in xrange(4):
                    util.log('waiting... %d' % (i + 1))
                    time.sleep(1)

                # Check cluster state
                ok = False
                for i in xrange(10):
                    isolated_states = []
                    ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                    if ok:
                        break
                    time.sleep(1)
                self.assertTrue(ok, '[%s] Fail. unstable cluster.' % str(fi))

                check_cluster = False

                # 'bj', 'slave'
                if fi[0] == 'bj' and fi[1] == 'slave':
                    m, s1, s2 = util.get_mss(cluster)
                    ret = util.role_lconn(s1)
                    self.assertEqual("+OK\r\n", ret, '[%s] role lconn fail.' % str(fi))
                    check_cluster = True
                # 'me', 'lconn'
                elif fi[0] == 'me' and fi[1] == 'lconn':
                    m, s1, s2 = util.get_mss(cluster)
                    ret = util.role_lconn(m)
                    self.assertEqual("+OK\r\n", ret, '[%s] role lconn fail.' % str(fi))
                    check_cluster = True
                # 'qa', 'setquorum'
                elif fi[0] == 'qa' and fi[1] == 'setquorum':
                    m, s1, s2 = util.get_mss(cluster)

                    # shutdown
                    ret = testbase.request_to_shutdown_smr(s1)
                    self.assertEqual(0, ret, '[%s] failed to shutdown smr%d' % (str(fi), s1['id']))
                    ret = testbase.request_to_shutdown_redis(s1)
                    self.assertEqual(0, ret, '[%s] failed to shutdown redis%d' % (str(fi), s1['id']))

                    # Check quorum
                    q = -1
                    for q_cnt in xrange(20):
                        q = util.get_quorum(m)
                        if q == 1:
                            break
                        time.sleep(1)
                    self.assertEquals(1, q, "[%s] check quorum fail." % str(fi))

                    # Modify quorum
                    ret = util.cmd_to_smr_addr(m['ip'], m['smr_mgmt_port'], 'setquorum 0\r\n')
                    self.assertEqual("+OK\r\n", ret, '[%s] "setquorum 0" fail.' % str(fi))

                    # Check quorum
                    q = -1
                    for q_cnt in xrange(20):
                        q = util.get_quorum(m)
                        if q == 1:
                            break
                        time.sleep(1)
                    self.assertEquals(1, q, "[%s] check quorum fail." % str(fi))

                    # recovery
                    ret = testbase.request_to_start_smr(s1)
                    self.assertEqual(0, ret, '[%s] failed to start smr' % str(fi))
                    ret = testbase.request_to_start_redis(s1, max_try=120)
                    self.assertEqual(0, ret, '[%s] failed to start redis' % str(fi))
                    ret = testbase.wait_until_finished_to_set_up_role(s1, 11)
                    self.assertEqual(0, ret, '[%s] failed to role change. smr_id:%d' % (str(fi), s1['id']))

                    check_cluster = True

                # 'setquorum'
                elif fi[1] == 'setquorum':
                    m, s1, s2 = util.get_mss(cluster)
                    ret = util.cmd_to_smr_addr(s1['ip'], s1['smr_mgmt_port'], 'fi delay sleep 1 8000\r\n', timeout=20)
                    self.assertEqual("+OK\r\n", ret, '[%s] "fi delay sleep 1 8000" fail. ret: "%s"' % (str(fi), ret))
                    check_cluster = True

                if check_cluster:
                    # Check cluster state
                    ok = False
                    for i in xrange(20):
                        isolated_states = []
                        ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                        if ok:
                            break
                        time.sleep(1)
                    self.assertTrue(ok, '[%s] Fail. unstable cluster.' % str(fi))

                # Check fault injection
                ok = False
                for i in xrange(10):
                    count = fi_confmaster.fi_count(fi, mgmt_ip, mgmt_port)
                    if count == 0:
                        ok = True
                        break
                    time.sleep(0.5)
                self.assertTrue(ok, "[%s] fail. failt injection had not been triggered." % str(fi))

            # Shutdown cluster
            ret = default_cluster.finalize( cluster )
            self.assertEqual(ret, 0, '[%s] failed to TestMaintenance.finalize' % str(fi))

            # Delete forwarding role
            out = util.sudo('iptables -t nat -D OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
            self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)
            out = util.sudo('iptables -t nat -D PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
            self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)

            for c in clnts:
                self.assertTrue(c.is_consistency(), '[%s] data consistency error!' % str(fi))

        finally:
            for c in clnts:
                c.quit()
            for c in clnts:
                c.join()