def __check_config_is_right(self):
        config = KSC_Configuration.load(utils.get_context(), self.config.id)
        if not config:
            raise Exception("create config, not write in db")
        
        if self.name != config.name or self.description != config.description:
            raise Exception("create config, name or description bad value")
        
        if self.config.config_type != '1':
            raise Exception("create config, config_type bad value")
        
        overrides = KSC_Configuration.get_configuration_overrides(utils.get_context(), self.config.id)
        
        diffs = {}
        for k, v in self.values.iteritems():
            if overrides[k] != v and float(overrides[k]) != float(v):                
                diffs[k] = (v, overrides[k])
        if len(diffs):
            raise Exception("create config, values error")
            
#         
#         if overrides != self.values:
#             raise Exception("create config, values error")
        
        datastore_version = DatastoreVersion.load_by_uuid(self.config.datastore_version_id)
        if not datastore_version:
            raise Exception("create config, bad value of datastore_version_id")
        
        if datastore_version.name != self.datastore_version:
            raise Exception("create config, bad value of datastore_version_id")
    def resize_ha(self, instance_id=None):
        self.inst_id = instance_id
        if not self.inst_id:
            self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER)
        _inst = utils.get_instance(self.inst_id, deleted=0)
        self.group_id = _inst.group_id

        master = InstanceGroupItem.get_by_gid_type(utils.get_context(), self.group_id, DBInstanceType.MASTER)
        standby = InstanceGroupItem.get_by_gid_type(utils.get_context(), self.group_id, DBInstanceType.STANDBY)
        if master:
            old_master_inst_id = master.instance_id
        if standby:
            old_standy_inst_id = standby.instance_id
        
        old_master_inst = utils.get_builtin_instance(old_master_inst_id)
        old_flavor_id = old_master_inst.flavor_id
        old_virtual_instance_id = old_master_inst.virtual_instance_id
        self.inst_id = old_master_inst.id
        
        if old_flavor_id not in ['1', '2', '3', '4']:
            raise Exception("It is not support to do resizing based on flavor id: %s, supported flavor_ids should be in (1,2,3,4)" % (old_flavor_id))
        
        flavor = str(int(old_flavor_id) + 1)
        LOG.info("old flavor : %s, new flavor : %s" % (old_flavor_id, flavor))
        
        utils.resize_flavor_byclient(self.inst_id, flavor)
        inst_ids = utils.check_resize_status(self.group_id)

        if len(inst_ids) >= 2:
            utils.check_rpl_delay(inst_ids[0])
            utils.check_rpl_delay(inst_ids[1])

            import time

            time.sleep(60)

            new_master = utils.get_builtin_instance(old_virtual_instance_id)
            new_standby = InstanceGroupItem.get_by_gid_type(utils.get_context(), new_master.group_id, DBInstanceType.STANDBY)

            if new_master.virtual_instance_id == old_virtual_instance_id and \
                            new_master.id != old_master_inst.id:
                self.inst_id = new_master.id
                self.dbslave_id = new_standby.instance_id
                self.vip_id = utils.get_vip_id(self.inst_id)
            else:
                raise Exception("resize for ha failed,new_master.virtual_instance_id %s,"
                                " old_virtual_instance_id %s, new_master.id %s,"
                                " old_master_inst.id %s, new_standby_id %s,"
                                " old_standy_inst_id %s" % (
                                    new_master.virtual_instance_id, old_virtual_instance_id,
                                    new_master.id, old_master_inst.id,
                                    new_standby.instance_id, old_standy_inst_id))
            
            self.validate()
            
        else:
            raise Exception("the num of instance_id should be equal or greater than two after resize HA ")
 def update(self):
     self.create()
     configuration_id = self.config.id
     values = {"myisam_sort_buffer_size":115343360,"join_buffer_size":307200,'sort_buffer_size':327680}
     valuesJson = json.dumps(values)
     name = 'update_%s' % self.config.name
     description = 'update_%s' % self.config.description
     utils.configuration_update_byclient(configuration_id, valuesJson, name, description)
     config = KSC_Configuration.load(utils.get_context(), configuration_id)
     if config == self.config:
         raise Exception("update config, something is not update, config = %s" % config)
     
     override = KSC_Configuration.get_configuration_overrides(utils.get_context(), configuration_id)
     if values == override:
         raise Exception("update config, something is not update, config override = %s" % override)
 def migrate(self, instance_id=None):
     self.inst_id = instance_id
     if not self.inst_id:
         self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.SINGLE)
     
     utils.check_server_status(self.inst_id, 
                               expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.SINGLE,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False)
     
     inst_db_info = utils.get_instance(self.inst_id, deleted= False)
     old_nova_server_id = inst_db_info.compute_instance_id
     
     ran_count = 45
     utils.generate_databases(self.inst_id, count = ran_count)
     _ret = rpc.call(utils.get_context(),
                     "taskmanager",
                     {"method": "migrate",
                      "args": {'migrate_id': inst_db_info.id}})
     
     utils.check_server_status(self.inst_id, 
                               expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.SINGLE,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False)
     
     new_nova_server_id = utils.get_instance(self.inst_id, deleted= False).compute_instance_id
     assert old_nova_server_id != new_nova_server_id
     
     utils.check_generated_databases(self.inst_id, count = ran_count)
     self.validate()
 def patch(self):
     if not self.config:
         self.create()
     configuration_id = self.config.id
     values = json.dumps({"myisam_sort_buffer_size":116391936,"join_buffer_size":409600})
     utils.configuration_patch_byclient(configuration_id, values)
     override = KSC_Configuration.get_configuration_overrides(utils.get_context(), configuration_id)
     if values == override:
         raise Exception("patch config, something is not update,  override = %s" % override)
 def migrate_when_backup_fail(self):
     self.inst_id = self._get_rid()
     utils.check_server_status(self.inst_id, type = DBInstanceType.READ_REPLI)
     
     backup = utils.create_backup_byclient(self.inst_id)
     utils.check_backup_status(backup.id)
     bk_info = DBBackup.find_by(utils.get_context(), id = backup.id)
     bk_info.state = 'FAILED'
     bk_info.save()
     
     self.migrate()
 def delete(self):
     self.create()
     #config_type = "1"
     #config = utils.get_config_bytenant(config_type)
     config = self.config
     utils.configuration_delete_byclient(config.id)
     is_OK = 0
     #check ...
     try:
         config = KSC_Configuration.load(utils.get_context(), config.id)
     except Exception as e:
         is_OK = 1
             
     try:
         override = KSC_Configuration.get_configuration_overrides(utils.get_context(), config.id)
     except Exception as e1:
         is_OK = is_OK + 1
     
     if is_OK != 2:
         raise Exception("delete config, no delete success..")
    def patch_new(self):
        if not self.config:
            self.create_new()
        configuration_id = self.config.id
        values = {"myisam_sort_buffer_size":'xxx',"join_buffer_size":'xxx'}
#         values = json.dumps({"myisam_sort_buffer_size":116391936,"join_buffer_size":409600})
        utils.configuration_patch_byclient(configuration_id, json.dumps(values))
        override = KSC_Configuration.get_configuration_overrides(utils.get_context(), configuration_id)
        for k, v in values.iteritems():
            if override.get(k) != v:
        
#         if values == override:
                raise Exception("patch config, something is not update,  override = %s" % override)        
    def migrate(self):
        
        rr_inst_id = self._get_rid()
        old_rr_inst = utils.get_builtin_instance(rr_inst_id)
        old_rr_id = old_rr_inst.id
        
        rr_vid = old_rr_inst.virtual_instance_id
        
        _ret = rpc.call(utils.get_context(), "taskmanager",
                       {"method": "migrate",
                        "args": {'migrate_id': old_rr_inst.id}})

        utils.check_server_status(rr_vid, DBInstanceType.READ_REPLI, InstanceTasks.NONE, utils.ServiceStatuses.RUNNING, timeout = 600)
        new_rr_inst = utils.get_builtin_instance(rr_vid)
        assert new_rr_inst.virtual_instance_id == rr_vid and new_rr_inst.id != old_rr_id, (rr_vid, new_rr_inst.id)
        
        master_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER)
        self._validate(master_id, rr_vid)
Exemple #10
0
 def force_migrate(self):
     force_host = "rds_zone_1:rds-control-18-220.ksc.com"
     
     self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.SINGLE)
     _ret = rpc.call(utils.get_context(),
                     "taskmanager",
                     {"method": "migrate",
                      "args": {'migrate_id': self.inst_id, 'host' : force_host }})
     
     self.inst_id = _ret['id']
     self.validate()
     
     new_inst = utils.get_instance(self.inst_id)
     nova_id = new_inst.compute_instance_id
     info = utils.get_nova_server_info(nova_id)
     
     if info['OS-EXT-SRV-ATTR:hypervisor_hostname'] != force_host:
         LOG.error("expected host is %s, finally host is %s" % (force_host, info['OS-EXT-SRV-ATTR:hypervisor_hostname']))
Exemple #11
0
 def _failover(self, stop_mysqld = False, rm_mysql_data = False):
     self.inst_id = self._get_rid()
     utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False,timeout=10)
     self.vip_id = utils.get_vip_id(self.inst_id)
     _rr_server = utils.get_builtin_instance(self.inst_id)
     nova_instance = _rr_server.server
     self.group_id = _rr_server.db_info.group_id
     instance_id = self.inst_id
     
     ran_count = 56
     utils.generate_databases(self.master_id, count = ran_count)
     
     ip = utils.check_allocate_ip(nova_instance)
     if stop_mysqld:
         utils.stop_mysqld(ip)
     
     if rm_mysql_data:
         utils.mysql_data_lost(ip)
     
     utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.SHUTDOWN,
                               deleted=False,timeout=120)
     _ret = rpc.call(utils.get_context(),"taskmanager",
                     {
                      "method": "failover",
                      "args": {'instance_id':instance_id}
                      }
                     )
     
     utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.READ_REPLI,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False,timeout=120)
     
     self.master_id = utils.get_instance_id(self.group_id,DBInstanceType.MASTER)
     self.inst_id = self.inst_id
     
     utils.check_generated_databases(self.inst_id, count = ran_count)
     self.validate()
Exemple #12
0
 def validate(self):
     utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.MASTER,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False, timeout=600)
     
     utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.STANDBY,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False, timeout=600)
     
     rr_items = InstanceGroupItem.get_by_gid_type(utils.get_context(), self.group_id, DBInstanceType.READ_REPLI, deleted = False)
     slave_ids = []
     for rr in rr_items:
         slave_ids.append(rr.instance_id)
     slave_ids.append(self.dbslave_id)
     
     utils.check_mysql_adminuser(self.inst_id)
     utils.check_mysql_adminuser(self.dbslave_id)
     
     for _id in [self.inst_id, self.dbslave_id]:
         utils.check_mysql_is_running(self.inst_id)
     
     utils.check_vip(self.inst_id, vip_id=self.vip_id)
     self.backup_id = utils.check_backup(self.group_id)
     utils.check_backup_status(self.backup_id) 
     utils.check_backup_path(self.backup_id)
     
     for slave_id in slave_ids:
         utils.check_rpl_delay(slave_id)
         
     master_inst = utils.get_builtin_instance(self.inst_id)
     slave_inst = utils.get_builtin_instance(self.dbslave_id)
     master_ip = utils.check_allocate_ip(master_inst.server)
     slave_ip = utils.check_allocate_ip(slave_inst.server)
     LOG.info("master_ip:%s  slave_ip:%s" % (master_ip, slave_ip))
     utils.check_rpl_consist(self.inst_id, slave_ids, master_ip, [slave_ip])
     utils.check_rpl_topo_ha(self.group_id)
    def _failover_test(self, group_id, trigger_inst_id, 
                          do_workload = False, do_prepare = False,
                          mysqld_killed = False,
                          host_rebooted = False,
                          remove_tmp_initsql = False, 
                          mysql_data_lost = False,
                          check_vip = False, 
                          check_rpl_consist = True, 
                          check_binlog_range = False):
                
        LOG.info("Doing Failover Test, group_id:%s, instance_id:%s, do_workload:%s, do_prepare:%s." % 
                 (group_id, trigger_inst_id, do_workload, do_prepare))
        before_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False)
        before_items = set(map(lambda x: x.type + "_" + x.instance_id, before_group_items))
        
        before_instance = test_utils.get_builtin_instance( trigger_inst_id)
        before_rip = test_utils.check_allocate_ip(before_instance.server)
        before_origin_instid = before_instance.id
        
        
        rt_before = rt_after = None
        if check_binlog_range:
            rt_before = test_utils.get_restorable_time(trigger_inst_id)
        
        if do_workload and before_instance.type == DBInstanceType.MASTER:
            FAILOVERInstance.__run_workload(do_prepare = do_prepare)

        if remove_tmp_initsql:
            FAILOVERInstance.__trigger_vm_remove_tmp_sql_file(trigger_inst_id)
            
        if mysqld_killed:
            FAILOVERInstance.__trigger_mysqld_crash(trigger_inst_id)
            test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, 
                                           type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, 
                                           deleted=False, timeout=120)

        if host_rebooted:
            FAILOVERInstance.__trigger_host_reboot(trigger_inst_id)
            # when host-machine rebooted, no guestagent  update service's status. 
#             test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, 
#                                            type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, 
#                                            deleted=False, timeout=120)
        
        if mysql_data_lost:
            FAILOVERInstance.__trigger_mysql_data_lost(trigger_inst_id)

        rpc.call(test_utils.get_context(), "taskmanager", 
                        {"method": "failover", "args": {'instance_id':before_origin_instid}}, timeout = 3600)

    
        ## check vip <--> rip mapping.
        ## vip should be changed in 10 seconds.
        if before_instance.type == DBInstanceType.MASTER or before_instance.type == DBInstanceType.READ_REPLI:
            after_instance = test_utils.get_builtin_instance( trigger_inst_id)
            after_nova_inst = after_instance.server
            after_rip = test_utils.check_allocate_ip(after_nova_inst)
            assert  after_instance.vip == before_instance.vip and before_rip != after_rip
        
        if before_instance.type == DBInstanceType.MASTER:
            test_utils.check_server_status(before_instance.id, 
                                           expected_task = tasks.InstanceTasks.NONE, 
                                           type=DBInstanceType.MASTER, 
                                           expected_svr_status = test_utils.ServiceStatuses.RUNNING, 
                                           deleted=False, timeout=120)
        
        ## check replication topo
        after_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False)
        after_items = set(map(lambda x: x.type + "_" + x.instance_id, after_group_items))
        LOG.info("before " + str(before_items))
        LOG.info("after " + str(after_items))
        
        if check_rpl_consist:
            diff_items = (before_items - after_items)
#             assert len(diff_items) == 0
            assert len(before_group_items) == len(after_group_items), "size of mysql cluster should be the same."
            
            for group_item in after_group_items:
                
                if group_item.type == DBInstanceType.STANDBY and group_item.instance_id == before_instance.id:
                    item = InstanceGroupItem.get_by_instance_id(test_utils.get_context(), group_item.instance_id, deleted = False)
                    assert item != None
                    continue
                
                test_utils.check_server_status(group_item.instance_id, 
                                               expected_task = tasks.InstanceTasks.NONE, 
                                               type = group_item.type, 
                                               expected_svr_status = test_utils.ServiceStatuses.RUNNING, 
                                               deleted = False, timeout = 120)
                
        if check_binlog_range:
            rt_after = test_utils.get_restorable_time(trigger_inst_id)
            assert  rt_after.end > rt_before.end, (rt_after.end, rt_before.end)
            time.sleep(60)
            rt_after2 = test_utils.get_restorable_time(trigger_inst_id)
            assert  rt_after2.end > rt_after.end, (rt_after2.end, rt_after.end)
Exemple #14
0
 def failover(id):
     rpc.call(utils.get_context(), "taskmanager",
          {"method": "failover",
            "args": {'instance_id':id}})
Exemple #15
0
    def failover(self, instance_id=None, _strategy=None):
        self.inst_id = instance_id
        if not self.inst_id:
            self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER)
        _inst = utils.get_builtin_instance(self.inst_id)
        self.group_id = _inst.group_id
        self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY)
        
        utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.MASTER,
                                  expected_svr_status=utils.ServiceStatuses.RUNNING,
                                  deleted=False, timeout=120)
        
        utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.STANDBY,
                                  expected_svr_status=utils.ServiceStatuses.RUNNING,
                                  deleted=False, timeout=120)
        self.vip_id = utils.get_vip_id(self.inst_id)
        
        strategy = CONF.ha_failover_strategy
        virtual_instance_id = None
        if strategy == 'master':
            _ret = utils.get_builtin_instance(self.inst_id)
            nova_instance = _ret.server
            instance_id = _ret.id
            type = DBInstanceType.MASTER
            virtual_instance_id = _ret.virtual_instance_id
        elif strategy == 'standby':
            _ret = utils.get_builtin_instance(self.dbslave_id)
            nova_instance = _ret.server
            instance_id = _ret.id
            type = DBInstanceType.STANDBY
        else:
            raise Exception("not found ha_failover_strategy %s" % strategy)
            
        
        rancount = random.randint(50, 100)
        utils.generate_databases(self.inst_id, count = rancount)
        
        ip = utils.check_allocate_ip(nova_instance)
        utils.stop_mysqld(ip, stop_ga=True)
        utils.check_server_status(instance_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=type,
                                  expected_svr_status=utils.ServiceStatuses.SHUTDOWN,
                                  deleted=False, timeout=120)
        rpc.call(utils.get_context(), "taskmanager",
                     {"method": "failover",
                       "args": {'instance_id':instance_id}})
        
        if strategy == 'master':
            origin_inst_id = inst_utils.virtual_instid_2_origin_instid(virtual_instance_id)
            self.inst_id = origin_inst_id
            
            utils.check_server_status(origin_inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                      type=DBInstanceType.MASTER,
                                      expected_svr_status=utils.ServiceStatuses.RUNNING,
                                      deleted=False, timeout=120)

        new_slave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) 
        utils.check_server_status(new_slave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                  type=DBInstanceType.STANDBY,
                                  expected_svr_status=utils.ServiceStatuses.RUNNING,
                                  deleted=False, timeout=120)

        self.dbslave_id = new_slave_id
        
        utils.check_generated_databases(self.inst_id, count = rancount)
        utils.clear_generated_databases(self.dbslave_id, count = rancount)
        self.validate()
Exemple #16
0
 def migrate(self, instance_id=None, _strategy=None):
     self.inst_id = instance_id
     if not self.inst_id:
         self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER)
     _master_inst = utils.get_instance(id = self.inst_id, deleted = 0)
     self.group_id = _master_inst.group_id
     self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY)
     
     utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.MASTER,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False, timeout=120)
     
     utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE,
                               type=DBInstanceType.STANDBY,
                               expected_svr_status=utils.ServiceStatuses.RUNNING,
                               deleted=False, timeout=120)
     
     self.vip_id = utils.get_vip_id(_master_inst.id)
     
     virtual_instance_id = None 
     strategy = CONF.migrate_strategy
     if strategy == 'master':
         _ret = utils.get_builtin_instance(_master_inst.id)
         nova_instance = _ret.server
         instance_id = _master_inst.id
         type = DBInstanceType.MASTER
         virtual_instance_id = _ret.virtual_instance_id
         
     elif strategy == 'standby':
         _ret = utils.get_builtin_instance(self.dbslave_id)
         nova_instance = _ret.server
         instance_id = self.dbslave_id
         type = DBInstanceType.STANDBY
     else:
         raise Exception("not found migrate_strategy ss%s" % strategy)
     
     ran_count = random.randint(50, 100)
     utils.generate_databases(self.inst_id, count = ran_count)    
     
     _ret = rpc.call(utils.get_context(), "taskmanager",
                  {"method": "migrate",
                   "args": {'migrate_id':instance_id}})
     
     if strategy == 'master':
         raw_instance_id = inst_utils.virtual_instid_2_origin_instid(virtual_instance_id) 
         new_server_id = utils.get_builtin_instance(raw_instance_id).server_id
         utils.check_server_status(raw_instance_id, expected_task=utils.tasks.InstanceTasks.NONE,
                                   type=DBInstanceType.MASTER,
                                   expected_svr_status=utils.ServiceStatuses.RUNNING,
                                   deleted=False, timeout=120)
         assert new_server_id != nova_instance.id
         self.inst_id = raw_instance_id
         
     elif strategy == "standby":
         utils.check_server_status(_ret['id'], expected_task=utils.tasks.InstanceTasks.NONE, 
                                   type = DBInstanceType.STANDBY, 
                                   expected_svr_status=utils.ServiceStatuses.RUNNING,
                                   deleted=False, timeout = 123)
         self.dbslave_id = _ret['id']
         
     utils.check_generated_databases(self.inst_id, count = ran_count)
     utils.check_generated_databases(self.dbslave_id, count = ran_count)
     utils.clear_generated_databases(self.inst_id, count = ran_count)
     self.validate()