def resize_ha(self, instance_id=None): self.inst_id = instance_id if not self.inst_id: self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) _inst = utils.get_instance(self.inst_id, deleted=0) self.group_id = _inst.group_id master = InstanceGroupItem.get_by_gid_type(utils.get_context(), self.group_id, DBInstanceType.MASTER) standby = InstanceGroupItem.get_by_gid_type(utils.get_context(), self.group_id, DBInstanceType.STANDBY) if master: old_master_inst_id = master.instance_id if standby: old_standy_inst_id = standby.instance_id old_master_inst = utils.get_builtin_instance(old_master_inst_id) old_flavor_id = old_master_inst.flavor_id old_virtual_instance_id = old_master_inst.virtual_instance_id self.inst_id = old_master_inst.id if old_flavor_id not in ['1', '2', '3', '4']: raise Exception("It is not support to do resizing based on flavor id: %s, supported flavor_ids should be in (1,2,3,4)" % (old_flavor_id)) flavor = str(int(old_flavor_id) + 1) LOG.info("old flavor : %s, new flavor : %s" % (old_flavor_id, flavor)) utils.resize_flavor_byclient(self.inst_id, flavor) inst_ids = utils.check_resize_status(self.group_id) if len(inst_ids) >= 2: utils.check_rpl_delay(inst_ids[0]) utils.check_rpl_delay(inst_ids[1]) import time time.sleep(60) new_master = utils.get_builtin_instance(old_virtual_instance_id) new_standby = InstanceGroupItem.get_by_gid_type(utils.get_context(), new_master.group_id, DBInstanceType.STANDBY) if new_master.virtual_instance_id == old_virtual_instance_id and \ new_master.id != old_master_inst.id: self.inst_id = new_master.id self.dbslave_id = new_standby.instance_id self.vip_id = utils.get_vip_id(self.inst_id) else: raise Exception("resize for ha failed,new_master.virtual_instance_id %s," " old_virtual_instance_id %s, new_master.id %s," " old_master_inst.id %s, new_standby_id %s," " old_standy_inst_id %s" % ( new_master.virtual_instance_id, old_virtual_instance_id, new_master.id, old_master_inst.id, new_standby.instance_id, old_standy_inst_id)) self.validate() else: raise Exception("the num of instance_id should be equal or greater than two after resize HA ")
def _get_ignore_hosts(self,group_id): standby = InstanceGroupItem.get_by_gid_type(self.context, group_id,DBInstanceType.STANDBY) master = InstanceGroupItem.get_by_gid_type(self.context, group_id,DBInstanceType.MASTER) ignore_hosts = [] if standby: standby_phyhost = inst_models.Instance.get_pyhhostname(self.context, standby.instance_id) ignore_hosts.append(standby_phyhost) else: LOG.error("standby is not exist in group_id %s." % group_id) if master: master_phyhost = inst_models.Instance.get_pyhhostname(self.context,master.instance_id) ignore_hosts.append(master_phyhost) else: LOG.error("master is not exist in group_id %s." % group_id) return ignore_hosts
def delete_async(self): LOG.debug("prepare delete instance %s" % self.id) deleted_at = utils.utcnow() # Delete guest queue. _item = InstanceGroupItem.get_by_instance_id(self.context,self.id) group_id = _item.group_id if _item.type == DBInstanceType.MASTER: standby = None try: standby = InstanceGroupItem.get_by_gid_type(self.context, group_id,DBInstanceType.STANDBY) except exception.ModelNotFoundError: pass if standby is not None: standby.delete() standby_inst_id = standby.instance_id standby_tasks = self.load(self.context,standby_inst_id) standby_tasks.update_db(deleted=True, deleted_at=deleted_at, task_status=InstanceTasks.NONE) standby_tasks.set_servicestatus_deleted() standby_tasks._delete_instance_config() item_list = InstanceGroupItem.list_by_gid(self.context,group_id) if len(item_list)==1: # Delete associated security group if CONF.trove_security_groups_support: try: SecurityGroup.delete_for_group(self.group_id, self.context) except Exception as e: LOG.error(utils.get_traceback_stack()) LOG.info("send notify to monitor when delete instance %s" % self.id) group_rpcapi.API(self.context).group_update(group_id,notify_when_delete=True) LOG.info("Delete group %s" % group_id) InstanceGroup.delete(self.context,group_id) if _item.type in [DBInstanceType.MASTER,DBInstanceType.SINGLE]: try: LOG.info("Delete autobackup_setting of group_id %s" % group_id) AutoBackup.delete(self.context,group_id) except: LOG.error(utils.get_traceback_stack()) _item.delete() self.update_db(deleted=True, deleted_at=deleted_at, task_status=InstanceTasks.NONE) self.set_servicestatus_deleted()
def validate(self): utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=600) utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.STANDBY, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=600) rr_items = InstanceGroupItem.get_by_gid_type(utils.get_context(), self.group_id, DBInstanceType.READ_REPLI, deleted = False) slave_ids = [] for rr in rr_items: slave_ids.append(rr.instance_id) slave_ids.append(self.dbslave_id) utils.check_mysql_adminuser(self.inst_id) utils.check_mysql_adminuser(self.dbslave_id) for _id in [self.inst_id, self.dbslave_id]: utils.check_mysql_is_running(self.inst_id) utils.check_vip(self.inst_id, vip_id=self.vip_id) self.backup_id = utils.check_backup(self.group_id) utils.check_backup_status(self.backup_id) utils.check_backup_path(self.backup_id) for slave_id in slave_ids: utils.check_rpl_delay(slave_id) master_inst = utils.get_builtin_instance(self.inst_id) slave_inst = utils.get_builtin_instance(self.dbslave_id) master_ip = utils.check_allocate_ip(master_inst.server) slave_ip = utils.check_allocate_ip(slave_inst.server) LOG.info("master_ip:%s slave_ip:%s" % (master_ip, slave_ip)) utils.check_rpl_consist(self.inst_id, slave_ids, master_ip, [slave_ip]) utils.check_rpl_topo_ha(self.group_id)
def _delete_resources(self, fake): group_item = InstanceGroupItem.get_by_instance_id(self.context, self.id) group_id = group_item.group_id inst_type = group_item.type instance_id = self.db_info.id if self.server and self.db_info.server_status == "ACTIVE": # set instance to read only model LOG.info("Set readonly for instance %s" % self.id) self._set_instance_readonly(instance_id=self.id) else: LOG.info("vm_status is not ACTIVE for %s" % self.id) if inst_type == DBInstanceType.MASTER: rrinsts = [] try: standby = InstanceGroupItem.get_by_gid_type(self.context, group_id, DBInstanceType.STANDBY) rrinsts = InstanceGroupItem.get_by_gid_type(self.context, group_id, DBInstanceType.READ_REPLI) standby_inst_id = standby.instance_id LOG.info("MASTER %s,it hava STANDBY %s,RRS %s", (self.id, standby_inst_id, [_inst.instance_id for _inst in rrinsts])) InstanceGroupItem.delete(self.context, standby_inst_id) except Exception as e: LOG.error(utils.get_traceback_stack()) # waite replication group db sysnc if len(rrinsts) > 0: self.guest.ksc_set_read_only(True) for _inst in rrinsts: try: rr_instance = self.load(self.context, _inst.instance_id) rr_instance.waite_rpl_synchronize(time_out=CONF.delete_waite_rplg_sync) except Exception as e: LOG.error(utils.get_traceback_stack()) # delete standby instance try: try: standby_instance = self.load(self.context, standby_inst_id) except exception.UnprocessableEntity: standby_instance = FreshInstanceTasks.load(self.context, standby_inst_id) standby_instance.update_db(deleted=True, deleted_at=utils.utcnow(), task_status=InstanceTasks.NONE) standby_instance.set_servicestatus_deleted() standby_instance._delete_instance_config() if standby_instance.server: LOG.info("Delete STANDBY compute server %s" % standby_instance.server.id) standby_instance.get_guest().delete_queue() standby_instance.server.delete() poll_until(standby_instance.server_is_finished, sleep_time=1, time_out=CONF.server_delete_time_out) else: LOG.info("standby instance vm_status is not ACTIVE for %s" % standby_inst_id) except Exception as e: LOG.error(utils.get_traceback_stack()) if fake is True and self.type == DBInstanceType.MASTER: try: LOG.debug("fake is True, %s is MASTER, stop mysqld", self.id) self.guest.ksc_stop_db(do_not_start_on_reboot=True) except Exception as e: msg = "fake_delete, instance: %s, stop mysqld error, exception: %s " % (self.id, str(e)) LOG.error("%s, %s", msg, utils.get_traceback_stack()) AlarmRpc(self.context).alarm(self.tenant_id, level=AlarmRpc.LEVEL_ERROR, _type=AlarmRpc.TYPE_TASKMANAGER, message=msg) if self.server: if fake is True and self.type == DBInstanceType.MASTER: LOG.debug("fake is True, %s is MASTER, skip delete server", self.id) else: try: LOG.info("Delete compute server %s" % self.server.id) guest = self.get_guest() guest.delete_queue() self.server.delete() poll_until(self.server_is_finished, sleep_time=1, time_out=CONF.server_delete_time_out) except Exception as e: LOG.error(utils.get_traceback_stack()) # delete group_item/autobackup_setting/group if self.type in [DBInstanceType.MASTER, DBInstanceType.SINGLE]: try: LOG.info("Delete autobackup_setting of group_id %s" % group_id) AutoBackup.delete(self.context, group_id) except: LOG.error(utils.get_traceback_stack()) # remove vip. if CONF.trove_vip_support and \ self.type in [DBInstanceType.MASTER, DBInstanceType.SINGLE, DBInstanceType.READ_REPLI]: if fake is True and self.type == DBInstanceType.MASTER: LOG.debug("fake is True, %s is MASTER, skip release vip", self.id) else: try: self.update_db(task_status=InstanceTasks.RELEASE_VIP) LOG.info("release vip for instance %s" % instance_id) if inst_type in [DBInstanceType.MASTER, DBInstanceType.SINGLE]: cur_vip = vipService.InstanceVip.get_by_instance_id(self.context, instance_id) vipService.InstanceVip.release_vip(self.context, cur_vip) elif inst_type in [DBInstanceType.READ_REPLI]: vipService.InstanceVip.deallocate(self.context, instance_id, deleted=False, purge=True) except Exception as e: LOG.error(utils.get_traceback_stack())
def create(self, req, body, tenant_id): LOG.debug("Creating a Backup for tenant '%s'" % tenant_id) context = req.environ[wsgi.CONTEXT_KEY] data = body['backup'] instance = data.get('instance',None) group = data.get('group',None) name = data['name'] type = data.get("type", "snapshot") #expire_at = data.get("expire_after", 7) desc = data.get('description') parent_id = data.get('parent_id') LOG.info("parent_id:%s", parent_id) if group is None and instance is None: raise exception.BadRequest("you must specify group or instance") instance_id = None if group is not None: try: instance_id = InstanceGroupItem.get_by_gid_type(context, group, DBInstanceType.STANDBY).instance_id except: instance_id = InstanceGroupItem.get_by_gid_type(context, group, DBInstanceType.SINGLE).instance_id if instance_id is None and instance is not None: instance_id = inst_utils.virtual_instid_2_origin_instid(instance) _instance = DBInstance.find_by(context,id=instance_id) _type = _instance.service_type #_image = ServiceImage.find_by(context,service_name=_type) #service_image_id = _image.id ds,ds_version = ds_models.get_datastore_version(_type) service_image_id = ds_version.image_id grp_item = InstanceGroupItem.get_by_instance_id(context, _instance.id) group_id = grp_item.group_id # get this group's autobackup config and set the expire_after default _autobackup = AutoBackup.get_by_gid(context, group_id) expire_after = data.get("expire_after", _autobackup.expire_after) duration = _autobackup.duration expire_at = AutoBackup.calculate_expire_at(expire_after, duration) LOG.info("group_id %s, expire_at :%s", group_id, time.ctime(expire_at)) if grp_item.type == DBInstanceType.MASTER: try: instance_id = InstanceGroupItem.get_by_gid_type(context, group_id, DBInstanceType.STANDBY).instance_id except Exception as e: LOG.error(e) backup = Backup.create(context, instance_id, name, description=desc,group_id=group_id,backup_type=type,expire_at=expire_at,service_image_id=service_image_id,parent_id=parent_id) try: #service = inst_models.ServiceImage.find_by(id=backup.service_image_id) #backup.db_type = service['service_name'] ds,ds_version = ds_patch_models.find_datastore_by_image_id(backup.service_image_id) backup.db_type = ds.name except Exception as ex: backup.db_type = "" LOG.warn("Failed get db type information of backup %s, %s", backup.id, ex) chain = self._get_chain_ids(context, id) LOG.info(_("chain : '%s'") % chain) return wsgi.Result(views.BackupView(backup).data(), 202)
def create(cls, context, instance_id, name, description=None, group_id=None, backup_type=None, expire_at=None, init=False, service_image_id=None, parent_id=None): if parent_id is not None: parent_id = str(parent_id) LOG.info("parent_id:%s, parent_id.len:%s", parent_id, len(parent_id.strip())) if len(parent_id.strip()) == 0: parent_id = None _parent_id = parent_id from trove.instance.models import Instance instance_id = utils.get_id_from_href(instance_id) instance_model = Instance.load(context, instance_id) if init: if instance_model.db_info.server_status != 'ACTIVE': msg = ("Instance is not currently available for an action to be " "performed (server_status was %s).", instance_model.db_info.server_status) LOG.error(msg) raise exception.UnprocessableEntity(msg) else: instance_model.validate_can_perform_action() if instance_model.type == DBInstanceType.MASTER: try: standby_id = InstanceGroupItem.get_by_gid_type(context, instance_model.group_id, DBInstanceType.STANDBY).instance_id instance_model = Instance.load(context, standby_id) instance_model.validate_can_perform_action() instance_id = standby_id except Exception as e: LOG.error(e) raise e if group_id is None: raise exception.TroveError("group_id can't None") if backup_type is None or backup_type not in [Type.SNAPSHOT, Type.AUTOBACKUP]: raise exception.TroveError("instType can't None, only accept value: snapshot or autobackup ") if backup_type == Type.SNAPSHOT: expire_time = 0 _parent_id = None # force full elif backup_type == Type.AUTOBACKUP: expire_time = int(expire_at) if parent_id and parent_id == '0': _parent_id = None # force full elif parent_id and parent_id != '0': try: backup_parent = cls.get_by_id(context, parent_id) LOG.debug("backup_parent:%s", backup_parent) except: raise exception.NotFound("not found backup with parent_id: %s" % parent_id) if not backup_parent: raise exception.NotFound("not found backup with parent_id: %s" % parent_id) elif parent_id is None: LOG.debug("parent_id is None:%s", parent_id) last_backup_chain = cls.get_last_backup_chain(group_id) backup_incremental_chain_size = CONF.backup_incremental_chain_size LOG.info("last_backup_chain: %s, backup_incremental_chain_size: %s", last_backup_chain, backup_incremental_chain_size) if len(last_backup_chain) == 0 \ or len(last_backup_chain) >= int(backup_incremental_chain_size): _parent_id = None # create full else: compare_instance = None try: compare_instance = InstanceGroupItem.get_by_gid_type(context, group_id, DBInstanceType.STANDBY) except exception.NotFound: # not has standby try: compare_instance = InstanceGroupItem.get_by_gid_type(context, group_id, DBInstanceType.SINGLE) except exception.NotFound: # not has single pass if compare_instance: compare_id = compare_instance.instance_id switched = False for b in last_backup_chain: # has standby if b["instance_id"] != compare_id: switched = True # create full LOG.debug("last_backup_chain: %s, switched: %s, backup_instance_id: %s, b.instance_id: %s" , last_backup_chain, switched, compare_id, b["instance_id"]) break if not switched: parent = last_backup_chain.pop() # create incremental _parent_id = parent["id"] else: # not found standby and single _parent_id = None # create full LOG.debug("create backup use parent_id: %s", _parent_id) def _create_resources(): try: db_info = models.DBBackup.create(name=name, description=description, tenant_id=context.tenant, state=models.BackupState.NEW, instance_id=instance_id, deleted=False, group_id=group_id, type=backup_type, expire_at=expire_time, service_image_id=service_image_id, parent_id=_parent_id) except exception.InvalidModelError as ex: LOG.exception("Unable to create Backup record:") msg = "Unable to create Backup record, group_id %s, instance_id %s, parent_id %s " % (group_id, _instance_id, _parent_id) AlarmRpc(context).alarm(context.tenant, level=AlarmRpc.LEVEL_ERROR, _type=AlarmRpc.TYPE_TASKMANAGER, message=msg+str(ex)) raise exception.BackupCreationError(str(ex)) api.API(context).create_backup(db_info.id, instance_id) return db_info return run_with_quotas(context.tenant, {'backups': 1}, _create_resources)