예제 #1
0
 def _validate(self, master_id, validated_inst_id):
     utils.check_server_status(validated_inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False,timeout=300)
     
     utils.check_mysql_adminuser(validated_inst_id)
     utils.check_vip(master_id)
     vip_id = utils.get_vip_id(validated_inst_id)
     utils.check_vip(validated_inst_id,vip_id=vip_id)
 
     utils.check_rpl_delay(validated_inst_id)
     
     master_ip = utils.check_allocate_ip(utils.get_builtin_instance(master_id).server)
     rr_ip = utils.check_allocate_ip(utils.get_builtin_instance(validated_inst_id).server)
     
     utils.check_rpl_consist(master_id, [validated_inst_id],master_ip,[rr_ip])
     utils.check_rpl_topo_rr(self.group_id)
예제 #2
0
 def validate(self):
     utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.MASTER,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False, timeout=600)
     
     utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.STANDBY,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False, timeout=600)
     
     rr_items = InstanceGroupItem.get_by_gid_type(utils.get_context(), self.group_id, DBInstanceType.READ_REPLI, deleted = False)
     slave_ids = []
     for rr in rr_items:
         slave_ids.append(rr.instance_id)
     slave_ids.append(self.dbslave_id)
     
     utils.check_mysql_adminuser(self.inst_id)
     utils.check_mysql_adminuser(self.dbslave_id)
     
     for _id in [self.inst_id, self.dbslave_id]:
         utils.check_mysql_is_running(self.inst_id)
     
     utils.check_vip(self.inst_id, vip_id=self.vip_id)
     self.backup_id = utils.check_backup(self.group_id)
     utils.check_backup_status(self.backup_id) 
     utils.check_backup_path(self.backup_id)
     
     for slave_id in slave_ids:
         utils.check_rpl_delay(slave_id)
         
     master_inst = utils.get_builtin_instance(self.inst_id)
     slave_inst = utils.get_builtin_instance(self.dbslave_id)
     master_ip = utils.check_allocate_ip(master_inst.server)
     slave_ip = utils.check_allocate_ip(slave_inst.server)
     LOG.info("master_ip:%s  slave_ip:%s" % (master_ip, slave_ip))
     utils.check_rpl_consist(self.inst_id, slave_ids, master_ip, [slave_ip])
     utils.check_rpl_topo_ha(self.group_id)
예제 #3
0
 def _failover(self, stop_mysqld = False, rm_mysql_data = False):
     self.inst_id = self._get_rid()
     utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False,timeout=10)
     self.vip_id = utils.get_vip_id(self.inst_id)
     _rr_server = utils.get_builtin_instance(self.inst_id)
     nova_instance = _rr_server.server
     self.group_id = _rr_server.db_info.group_id
     instance_id = self.inst_id
     
     ran_count = 56
     utils.generate_databases(self.master_id, count = ran_count)
     
     ip = utils.check_allocate_ip(nova_instance)
     if stop_mysqld:
         utils.stop_mysqld(ip)
     
     if rm_mysql_data:
         utils.mysql_data_lost(ip)
     
     utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.SHUTDOWN,
                               deleted=False,timeout=120)
     _ret = rpc.call(utils.get_context(),"taskmanager",
                     {
                      "method": "failover",
                      "args": {'instance_id':instance_id}
                      }
                     )
     
     utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False,timeout=120)
     
     self.master_id = utils.get_instance_id(self.group_id,DBInstanceType.MASTER)
     self.inst_id = self.inst_id
     
     utils.check_generated_databases(self.inst_id, count = ran_count)
     self.validate()
예제 #4
0
 def __trigger_mysqld_crash(cls, inst_id):
     inst = test_utils.get_builtin_instance( inst_id)
     nova_inst = inst.server
     ip = test_utils.check_allocate_ip(nova_inst)
     test_utils.stop_mysqld(ip, stop_ga=True)
예제 #5
0
    def _failover_test(self, group_id, trigger_inst_id, 
                          do_workload = False, do_prepare = False,
                          mysqld_killed = False,
                          host_rebooted = False,
                          remove_tmp_initsql = False, 
                          mysql_data_lost = False,
                          check_vip = False, 
                          check_rpl_consist = True, 
                          check_binlog_range = False):
                
        LOG.info("Doing Failover Test, group_id:%s, instance_id:%s, do_workload:%s, do_prepare:%s." % 
                 (group_id, trigger_inst_id, do_workload, do_prepare))
        before_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False)
        before_items = set(map(lambda x: x.type + "_" + x.instance_id, before_group_items))
        
        before_instance = test_utils.get_builtin_instance( trigger_inst_id)
        before_rip = test_utils.check_allocate_ip(before_instance.server)
        before_origin_instid = before_instance.id
        
        
        rt_before = rt_after = None
        if check_binlog_range:
            rt_before = test_utils.get_restorable_time(trigger_inst_id)
        
        if do_workload and before_instance.type == DBInstanceType.MASTER:
            FAILOVERInstance.__run_workload(do_prepare = do_prepare)

        if remove_tmp_initsql:
            FAILOVERInstance.__trigger_vm_remove_tmp_sql_file(trigger_inst_id)
            
        if mysqld_killed:
            FAILOVERInstance.__trigger_mysqld_crash(trigger_inst_id)
            test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, 
                                           type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, 
                                           deleted=False, timeout=120)

        if host_rebooted:
            FAILOVERInstance.__trigger_host_reboot(trigger_inst_id)
            # when host-machine rebooted, no guestagent  update service's status. 
#             test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, 
#                                            type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, 
#                                            deleted=False, timeout=120)
        
        if mysql_data_lost:
            FAILOVERInstance.__trigger_mysql_data_lost(trigger_inst_id)

        rpc.call(test_utils.get_context(), "taskmanager", 
                        {"method": "failover", "args": {'instance_id':before_origin_instid}}, timeout = 3600)

    
        ## check vip <--> rip mapping.
        ## vip should be changed in 10 seconds.
        if before_instance.type == DBInstanceType.MASTER or before_instance.type == DBInstanceType.READ_REPLI:
            after_instance = test_utils.get_builtin_instance( trigger_inst_id)
            after_nova_inst = after_instance.server
            after_rip = test_utils.check_allocate_ip(after_nova_inst)
            assert  after_instance.vip == before_instance.vip and before_rip != after_rip
        
        if before_instance.type == DBInstanceType.MASTER:
            test_utils.check_server_status(before_instance.id, 
                                           expected_task = tasks.InstanceTasks.NONE, 
                                           type=DBInstanceType.MASTER, 
                                           expected_svr_status = test_utils.ServiceStatuses.RUNNING, 
                                           deleted=False, timeout=120)
        
        ## check replication topo
        after_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False)
        after_items = set(map(lambda x: x.type + "_" + x.instance_id, after_group_items))
        LOG.info("before " + str(before_items))
        LOG.info("after " + str(after_items))
        
        if check_rpl_consist:
            diff_items = (before_items - after_items)
#             assert len(diff_items) == 0
            assert len(before_group_items) == len(after_group_items), "size of mysql cluster should be the same."
            
            for group_item in after_group_items:
                
                if group_item.type == DBInstanceType.STANDBY and group_item.instance_id == before_instance.id:
                    item = InstanceGroupItem.get_by_instance_id(test_utils.get_context(), group_item.instance_id, deleted = False)
                    assert item != None
                    continue
                
                test_utils.check_server_status(group_item.instance_id, 
                                               expected_task = tasks.InstanceTasks.NONE, 
                                               type = group_item.type, 
                                               expected_svr_status = test_utils.ServiceStatuses.RUNNING, 
                                               deleted = False, timeout = 120)
                
        if check_binlog_range:
            rt_after = test_utils.get_restorable_time(trigger_inst_id)
            assert  rt_after.end > rt_before.end, (rt_after.end, rt_before.end)
            time.sleep(60)
            rt_after2 = test_utils.get_restorable_time(trigger_inst_id)
            assert  rt_after2.end > rt_after.end, (rt_after2.end, rt_after.end)
예제 #6
0
 def __trigger_mysql_data_lost(cls, inst_id):
     inst = test_utils.get_builtin_instance( inst_id)
     nova_inst = inst.server
     ip = test_utils.check_allocate_ip(nova_inst)
     
     test_utils.mysql_data_lost(ip)
예제 #7
0
 def __trigger_vm_remove_tmp_sql_file(cls, inst_id):
     inst = test_utils.get_builtin_instance( inst_id)
     nova_inst = inst.server
     ip = test_utils.check_allocate_ip(nova_inst)
     test_utils.remove_tmp_initsql(ip)
예제 #8
0
 def __trigger_vm_oom(cls, inst_id):
     inst = test_utils.get_builtin_instance( inst_id)
     nova_inst = inst.server
     ip = test_utils.check_allocate_ip(nova_inst)
     test_utils.reboot_host(ip)
예제 #9
0
    def failover(self, instance_id=None, _strategy=None):
        self.inst_id = instance_id
        if not self.inst_id:
            self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER)
        _inst = utils.get_builtin_instance(self.inst_id)
        self.group_id = _inst.group_id
        self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY)
        
        utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.MASTER,
                                  expected_svr_status=utils.ServiceStatuses.RUNNING,
                                  deleted=False, timeout=120)
        
        utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.STANDBY,
                                  expected_svr_status=utils.ServiceStatuses.RUNNING,
                                  deleted=False, timeout=120)
        self.vip_id = utils.get_vip_id(self.inst_id)
        
        strategy = CONF.ha_failover_strategy
        virtual_instance_id = None
        if strategy == 'master':
            _ret = utils.get_builtin_instance(self.inst_id)
            nova_instance = _ret.server
            instance_id = _ret.id
            type = DBInstanceType.MASTER
            virtual_instance_id = _ret.virtual_instance_id
        elif strategy == 'standby':
            _ret = utils.get_builtin_instance(self.dbslave_id)
            nova_instance = _ret.server
            instance_id = _ret.id
            type = DBInstanceType.STANDBY
        else:
            raise Exception("not found ha_failover_strategy %s" % strategy)
            
        
        rancount = random.randint(50, 100)
        utils.generate_databases(self.inst_id, count = rancount)
        
        ip = utils.check_allocate_ip(nova_instance)
        utils.stop_mysqld(ip, stop_ga=True)
        utils.check_server_status(instance_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=type,
                                  expected_svr_status=utils.ServiceStatuses.SHUTDOWN,
                                  deleted=False, timeout=120)
        rpc.call(utils.get_context(), "taskmanager",
                     {"method": "failover",
                       "args": {'instance_id':instance_id}})
        
        if strategy == 'master':
            origin_inst_id = inst_utils.virtual_instid_2_origin_instid(virtual_instance_id)
            self.inst_id = origin_inst_id
            
            utils.check_server_status(origin_inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                      type=DBInstanceType.MASTER,
                                      expected_svr_status=utils.ServiceStatuses.RUNNING,
                                      deleted=False, timeout=120)

        new_slave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) 
        utils.check_server_status(new_slave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.STANDBY,
                                  expected_svr_status=utils.ServiceStatuses.RUNNING,
                                  deleted=False, timeout=120)

        self.dbslave_id = new_slave_id
        
        utils.check_generated_databases(self.inst_id, count = rancount)
        utils.clear_generated_databases(self.dbslave_id, count = rancount)
        self.validate()