def decorated_function(self, *args, **kwargs): try: ret = function(self, *args, **kwargs) return ret except Exception as e: LOG.error(_("%s" % utils.get_traceback_stack())) raise e except PollTimeOut as e: LOG.error(_("timeout %s" % utils.get_traceback_stack())) raise e
def delete_async(self): LOG.debug("prepare delete instance %s" % self.id) deleted_at = utils.utcnow() # Delete guest queue. _item = InstanceGroupItem.get_by_instance_id(self.context,self.id) group_id = _item.group_id if _item.type == DBInstanceType.MASTER: standby = None try: standby = InstanceGroupItem.get_by_gid_type(self.context, group_id,DBInstanceType.STANDBY) except exception.ModelNotFoundError: pass if standby is not None: standby.delete() standby_inst_id = standby.instance_id standby_tasks = self.load(self.context,standby_inst_id) standby_tasks.update_db(deleted=True, deleted_at=deleted_at, task_status=InstanceTasks.NONE) standby_tasks.set_servicestatus_deleted() standby_tasks._delete_instance_config() item_list = InstanceGroupItem.list_by_gid(self.context,group_id) if len(item_list)==1: # Delete associated security group if CONF.trove_security_groups_support: try: SecurityGroup.delete_for_group(self.group_id, self.context) except Exception as e: LOG.error(utils.get_traceback_stack()) LOG.info("send notify to monitor when delete instance %s" % self.id) group_rpcapi.API(self.context).group_update(group_id,notify_when_delete=True) LOG.info("Delete group %s" % group_id) InstanceGroup.delete(self.context,group_id) if _item.type in [DBInstanceType.MASTER,DBInstanceType.SINGLE]: try: LOG.info("Delete autobackup_setting of group_id %s" % group_id) AutoBackup.delete(self.context,group_id) except: LOG.error(utils.get_traceback_stack()) _item.delete() self.update_db(deleted=True, deleted_at=deleted_at, task_status=InstanceTasks.NONE) self.set_servicestatus_deleted()
def run(self): retry = 3 while retry > 0: try: LOG.info("begin makdirs:%s, left retry count: %s" % (self.backup_dir, retry)) if (not os.path.exists(self.backup_dir)): os.makedirs(self.backup_dir) break except OSError, e: LOG.error(e) utils.get_traceback_stack() retry = retry -1 time.sleep(5)
def release_vip(cls, context,vip_address): lb_id = None vip_info = models.DBVips.find_by(context,vip=vip_address,deleted=False) try: lb_id = vip_info.lb_id vip_id = vip_info.id inst_list = models.DBInstanceVip.find_all(vip_id = vip_id,deleted=False) for _i in inst_list: cls.deallocate(context, _i.instance_id, purge = True) except Exception as e: raise exception.TroveError(str(e)) if not lb_id: raise exception.TroveError("release vip %(vip_address)s fail,we can't get lb_id" % locals()) try: lb_vip = find_vip(context, lb_id) pool_id = lb_vip['vip']['pool_id'] delete_vip(context, lb_id) delete_pool(context, pool_id) except Exception as e: LOG.error("delete vip failed or delete pool failed! vip:%s" % (vip_address)) LOG.error(utils.get_traceback_stack()) raise DeleteVipFailed(str(e)) finally: vip_info.delete()
def _get_actual_db_status(self): global MYSQLD_ARGS try: out, err = utils.execute_with_timeout( "/usr/bin/mysqladmin", "ping", run_as_root=True, root_helper="sudo") LOG.info("Service Status is RUNNING.") return rd_models.ServiceStatuses.RUNNING except exception.ProcessExecutionError as e: LOG.error("Process execution %s " % utils.get_traceback_stack()) try: out, err = utils.execute_with_timeout("/bin/ps", "-C", "mysqld", "h") pid = out.split()[0] # TODO(rnirmal): Need to create new statuses for instances # where the mysql service is up, but unresponsive LOG.info("Service Status is BLOCKED.") return rd_models.ServiceStatuses.BLOCKED except exception.ProcessExecutionError as e: if not MYSQLD_ARGS: MYSQLD_ARGS = load_mysqld_options() pid_file = MYSQLD_ARGS.get('pid_file', '/var/run/mysqld/mysqld.pid') if os.path.exists(pid_file): LOG.info("Service Status is CRASHED.") return rd_models.ServiceStatuses.CRASHED else: LOG.info("Service Status is SHUTDOWN.") return rd_models.ServiceStatuses.SHUTDOWN
def _set_instance_readonly(self, instance_id, read_only=True): try: instance = KSC_FreshInstanceTasks.load(self.context, instance_id) instance._change_variable({"read_only":"ON" if read_only else "OFF"}) #instance = self.load(self.context, instance_id) #guest = instance.get_guest() #guest.ksc_set_read_only(read_only) except Exception as e: LOG.error(utils.get_traceback_stack())
def execute_restore(self, context, backup_id, restore_location): try: LOG.debug("Finding backup %s to restore", backup_id) #replace to conductor backup = conductor_api.API(context).get_backup(backup_id) if not backup: raise Exception("get backup occur error, backup_id:%s" % backup_id) if not backup.backup_type: raise Exception("get backup_type occur error, backup_id:%s" % backup_id) LOG.debug("Getting Restore Runner of type %s", backup.backup_type) restore_runner = self._get_restore_runner(backup.backup_type) backup_chain = [backup] if backup.parent_id: backup_chain = conductor_api.API(context).get_chain_before_backup(backup_id) LOG.info('backup_chain: %s', backup_chain) if len(backup_chain) < 1 or backup_chain[0]['parent_id'] is not None: raise BackupError("get_backup_chain occur error! backup_id:%s, backup_chain:%s" % (backup_id, backup_chain)) for bk in backup_chain: LOG.info('backup: %s', bk) if not bk['extend']: raise Exception("get extend occur error, backup_id:%s" % bk['id']) backup = backup_chain[0] location = backup['location'] binlog = json.loads(backup['extend']).get("binlog", 0) offset = json.loads(backup['extend']).get("offset", 0) LOG.info("backup_location:%s, binlog:%s, offset:%s", (location, binlog, offset)) if (binlog == 0 and offset == 0) or location is None: raise BackupError("get binlog,offset,location occur error for backup_id:%s" % backup_id) with restore_runner(restore_stream=None, restore_location=restore_location, backup_location=location, binlog=binlog, offset=offset, filename=backup_id, backup_chain=backup_chain) as runner: runner.restore() LOG.info("Restoring instance from backup %s to %s", backup_id, restore_location) LOG.info("Restore from backup %s completed successfully to %s", backup_id, restore_location) except Exception as e: LOG.error(utils.get_traceback_stack()) LOG.error("Error restoring backup %s", backup_id) raise else: LOG.info("Restored Backup %s", backup_id)
def _call(self, method_name, timeout_sec=None, **kwargs): LOG.debug("Calling %s with timeout %s" % (method_name, timeout_sec)) if timeout_sec is None: timeout_sec = RPC_DEFAULT_TIME_OUT try: result = self.call(self.context, self.make_msg(method_name, **kwargs), timeout=timeout_sec) LOG.debug("Result is %s" % result) return result except Exception as e: LOG.error(utils.get_traceback_stack()) raise exception.GuestError(original_message=str(e)) except Timeout as t: raise p_exception.ConductorTimeout()
def delete_async(self, fake): LOG.debug("prepare delete instance %s, fake: %s " % (self.id, fake)) modified_group_id = self.group_id self._delete_resources(fake) # Delete guest queue. _item = InstanceGroupItem.get_by_instance_id(self.context, self.id) group_id = _item.group_id del_instance_type = _item.type # if size of item_list equal 1,then we will delete last instance in group item_list = InstanceGroupItem.list_by_gid(self.context, modified_group_id) if len(item_list) == 1: if CONF.trove_security_groups_support: if fake is True: LOG.debug("fake is True, %s skip delete secgroup rules", self.group_id) else: # Delete associated security group self.update_db(task_status=InstanceTasks.DELETEING_SECURITY_GROUP) try: SecurityGroup.delete_for_group(modified_group_id, self.context) except Exception as e: LOG.error(utils.get_traceback_stack()) self.set_servicestatus_deleted() # zs: configuration is needed for restore deleted instance, DO NOT DELETE! # self._delete_instance_config() LOG.info("Delete instance_group_item for instance %s" % self.id) _type = self.type InstanceGroupItem.delete(self.context, self.id) deleted_at = utils.utcnow() if fake is True and _type == DBInstanceType.MASTER: LOG.debug("fake is True, %s is MASTER, set task_status :%s ", self.id, InstanceTasks.FAKE_DELETED) self.update_db(deleted=True, deleted_at=deleted_at, task_status=InstanceTasks.FAKE_DELETED) else: self.update_db(deleted=True, deleted_at=deleted_at, task_status=InstanceTasks.NONE) LOG.info("send notify to monitor when delete instance %s" % self.id) group_rpcapi.API(self.context).group_update(group_id, notify_when_delete=True) if len(item_list) == 1: LOG.info("Delete group %s" % group_id) InstanceGroup.delete(self.context, group_id) self._send_usage_event(self.server, utils.utcnow())
def _call(self, method_name, timeout_sec, **kwargs): LOG.debug("Calling %s with timeout %s" % (method_name, timeout_sec)) try: result = self.call(self.context, self.make_msg(method_name, **kwargs), timeout=timeout_sec) LOG.debug("Result is %s" % result) return result except Exception as e: LOG.error(utils.get_traceback_stack()) raise exception.GuestError(original_message=str(e)) except Timeout as t: if t is not timeout: raise else: raise exception.GuestTimeout()
def _create_server_volume_individually(self, flavor, image_id, security_groups, service_type, ignore_hosts=None, availability_zone=None): server = None volume_info = { 'block_device': None, 'device_path': CONF.device_path, 'mount_point': CONF.mount_point, 'volumes': None, } block_device_mapping = None try: server = self._create_server(flavor['id'], flavor['disk'],image_id, security_groups, service_type, block_device_mapping, ignore_hosts, availability_zone) server_id = server.id # Save server ID. self.update_db(compute_instance_id=server_id) except Exception as e: msg = "Error creating server for instance. %s %s" % (e,utils.get_traceback_stack()) raise Exception(msg) return server, volume_info
def _delete_resources(self, fake): group_item = InstanceGroupItem.get_by_instance_id(self.context, self.id) group_id = group_item.group_id inst_type = group_item.type instance_id = self.db_info.id if self.server and self.db_info.server_status == "ACTIVE": # set instance to read only model LOG.info("Set readonly for instance %s" % self.id) self._set_instance_readonly(instance_id=self.id) else: LOG.info("vm_status is not ACTIVE for %s" % self.id) if inst_type == DBInstanceType.MASTER: rrinsts = [] try: standby = InstanceGroupItem.get_by_gid_type(self.context, group_id, DBInstanceType.STANDBY) rrinsts = InstanceGroupItem.get_by_gid_type(self.context, group_id, DBInstanceType.READ_REPLI) standby_inst_id = standby.instance_id LOG.info("MASTER %s,it hava STANDBY %s,RRS %s", (self.id, standby_inst_id, [_inst.instance_id for _inst in rrinsts])) InstanceGroupItem.delete(self.context, standby_inst_id) except Exception as e: LOG.error(utils.get_traceback_stack()) # waite replication group db sysnc if len(rrinsts) > 0: self.guest.ksc_set_read_only(True) for _inst in rrinsts: try: rr_instance = self.load(self.context, _inst.instance_id) rr_instance.waite_rpl_synchronize(time_out=CONF.delete_waite_rplg_sync) except Exception as e: LOG.error(utils.get_traceback_stack()) # delete standby instance try: try: standby_instance = self.load(self.context, standby_inst_id) except exception.UnprocessableEntity: standby_instance = FreshInstanceTasks.load(self.context, standby_inst_id) standby_instance.update_db(deleted=True, deleted_at=utils.utcnow(), task_status=InstanceTasks.NONE) standby_instance.set_servicestatus_deleted() standby_instance._delete_instance_config() if standby_instance.server: LOG.info("Delete STANDBY compute server %s" % standby_instance.server.id) standby_instance.get_guest().delete_queue() standby_instance.server.delete() poll_until(standby_instance.server_is_finished, sleep_time=1, time_out=CONF.server_delete_time_out) else: LOG.info("standby instance vm_status is not ACTIVE for %s" % standby_inst_id) except Exception as e: LOG.error(utils.get_traceback_stack()) if fake is True and self.type == DBInstanceType.MASTER: try: LOG.debug("fake is True, %s is MASTER, stop mysqld", self.id) self.guest.ksc_stop_db(do_not_start_on_reboot=True) except Exception as e: msg = "fake_delete, instance: %s, stop mysqld error, exception: %s " % (self.id, str(e)) LOG.error("%s, %s", msg, utils.get_traceback_stack()) AlarmRpc(self.context).alarm(self.tenant_id, level=AlarmRpc.LEVEL_ERROR, _type=AlarmRpc.TYPE_TASKMANAGER, message=msg) if self.server: if fake is True and self.type == DBInstanceType.MASTER: LOG.debug("fake is True, %s is MASTER, skip delete server", self.id) else: try: LOG.info("Delete compute server %s" % self.server.id) guest = self.get_guest() guest.delete_queue() self.server.delete() poll_until(self.server_is_finished, sleep_time=1, time_out=CONF.server_delete_time_out) except Exception as e: LOG.error(utils.get_traceback_stack()) # delete group_item/autobackup_setting/group if self.type in [DBInstanceType.MASTER, DBInstanceType.SINGLE]: try: LOG.info("Delete autobackup_setting of group_id %s" % group_id) AutoBackup.delete(self.context, group_id) except: LOG.error(utils.get_traceback_stack()) # remove vip. if CONF.trove_vip_support and \ self.type in [DBInstanceType.MASTER, DBInstanceType.SINGLE, DBInstanceType.READ_REPLI]: if fake is True and self.type == DBInstanceType.MASTER: LOG.debug("fake is True, %s is MASTER, skip release vip", self.id) else: try: self.update_db(task_status=InstanceTasks.RELEASE_VIP) LOG.info("release vip for instance %s" % instance_id) if inst_type in [DBInstanceType.MASTER, DBInstanceType.SINGLE]: cur_vip = vipService.InstanceVip.get_by_instance_id(self.context, instance_id) vipService.InstanceVip.release_vip(self.context, cur_vip) elif inst_type in [DBInstanceType.READ_REPLI]: vipService.InstanceVip.deallocate(self.context, instance_id, deleted=False, purge=True) except Exception as e: LOG.error(utils.get_traceback_stack())
def fake_deleted_instance_delete(cls, context, instance_id): base_msg = " instance_id: %s " % instance_id success = True msg = " fake_deleted_instance_delete %s " % base_msg deleted_at = utils.utcnow() db_info = None try: db_info = DBInstance.find_by(context=context, id=instance_id, task_id=InstanceTasks.FAKE_DELETED.code, deleted=True) db_info.update(task_status=InstanceTasks.DELETING) LOG.debug("fake_deleted_instance_delete, load instance ok, %s " % base_msg) except Exception: LOG.debug("fake_deleted_instance_delete failed, deleted instance not found, %s " % base_msg) if db_info is None: success = False msg = " fake_deleted_instance_delete failed, load instance error %s " % base_msg return success, msg try: server = load_server(context, db_info.id, db_info.compute_instance_id) LOG.debug("fake_deleted_instance_delete, load server: %s ok, %s ", db_info.compute_instance_id, base_msg) nova_client = create_nova_client(context) def server_is_finished(): try: server_id = db_info.compute_instance_id _server = nova_client.servers.get(server_id) if _server.status not in ['SHUTDOWN', 'ACTIVE']: _msg = "Server %s got into %s status during delete " \ "of instance %s!" % (server.id, server.status, instance_id) LOG.error(_msg) return False except nova_exceptions.NotFound: return True try: LOG.debug("Delete compute server %s" % server.id) server.delete() poll_until(server_is_finished, sleep_time=1, time_out=CONF.server_delete_time_out) guest = create_guest_client(context, db_info.id) guest.delete_queue() LOG.debug("fake_deleted_instance_delete, delete server: %s ok, %s ", db_info.compute_instance_id, base_msg) except Exception as ex: LOG.error(utils.get_traceback_stack()) success = False msg += " ,deleted server error, compute_instance_id: %s, ex:%s, %s " \ % (db_info.compute_instance_id, str(ex), base_msg) except Exception as ex: LOG.error("COMPUTE ID = %s" % db_info.compute_instance_id) success = False msg += " ,load server error, compute_instance_id: %s, %s " % (db_info.compute_instance_id, base_msg) if CONF.trove_vip_support: try: db_info.update(task_status=InstanceTasks.RELEASE_VIP) instance_vip = DBInstanceVip.find_by(context, instance_id=instance_id, deleted=False) vip_info = DBVips.find_by(context, id=instance_vip.vip_id, deleted=False) InstanceVip.release_vip(context, vip_info.vip) LOG.debug("fake_deleted_instance_delete, release_vip: %s ok, %s " % (vip_info.vip, base_msg)) except Exception as ex: LOG.error(utils.get_traceback_stack()) success = False msg += " ,release_vip error, ex:%s, %s " % (str(ex), base_msg) if CONF.trove_security_groups_support: db_info.update(task_status=InstanceTasks.DELETEING_SECURITY_GROUP) try: SecurityGroup.delete_for_group(db_info.group_id, context) LOG.debug( "fake_deleted_instance_delete, delete SecurityGroup: %s ok, %s " % (db_info.group_id, base_msg)) except Exception as ex: LOG.error(utils.get_traceback_stack()) success = False msg += " ,delete SecurityGroup error, ex:%s, %s " % (str(ex), base_msg) db_info.update(deleted_at=deleted_at, task_status=InstanceTasks.NONE) if success is True: msg = "fake_deleted_instance_delete finished, %s " % base_msg return success, msg
def create_instance(self, flavor, image_id, databases, users, service_type, volume_size, security_groups, backup_id, instance_type, ignore_hosts=None, master_id=None, extend=None): if instance_type==DBInstanceType.STANDBY or instance_type==DBInstanceType.READ_REPLI: if master_id is None: raise Exception("when instance_type is STANDBY or RR, The master_id can't none") availability_zone=None overrides = {} if extend != None: availability_zone=extend.get('availability_zone', None) ds_version_id = extend.get('datastore_version_id', None) overrides = extend.get('overrides', {}) self.update_db(task_status=InstanceTasks.BUILDING_SERVER) try: server, volume_info = self._create_server_volume_individually( flavor, image_id, security_groups, service_type, ignore_hosts, availability_zone) except Exception as e: self.set_servicestatus(ServiceStatuses.UNKNOWN) raise e try: configuration_id =self.db_info.configuration_id LOG.debug("Prepare task instance id = %s, configuration id =%s" % (self.id, configuration_id)) overrides = KSC_Configuration.get_configuration_overrides(self.context, configuration_id) except Exception: pass self.update_db(task_status=InstanceTasks.GUEST_PREPARE) LOG.info("======= > groupid %s" % self.group_id) group = InstanceGroup.get_by_groupid(self.context, self.group_id) sys_variables = {"port" : group.db_port, "read_only" : "OFF" if instance_type in (DBInstanceType.MASTER, DBInstanceType.SINGLE) else "ON"} overrides.update(sys_variables) config = self._render_config(flavor) overrides_config = self._render_override_config(flavor, overrides) self._guest_prepare(server, flavor['ram'], volume_info, databases, users, backup_id, config.config_contents, overrides_contents = overrides_config.config_contents) try: utils.poll_until(self._service_is_active, sleep_time=USAGE_SLEEP_TIME, time_out=USAGE_TIMEOUT) except Exception as e: self.set_servicestatus(ServiceStatuses.UNKNOWN) raise e group_item = InstanceGroupItem.get_by_instance_id(self.context, self.id) group_id = group_item.group_id self.update_db(task_status=InstanceTasks.CONFIG_MYSQL) if instance_type==DBInstanceType.STANDBY or instance_type==DBInstanceType.READ_REPLI: if instance_type==DBInstanceType.STANDBY: self._update_instance_type(master_id, DBInstanceType.MASTER) self._update_instance_type(self.id, instance_type) self._relocate_master(master_id,self.id,backup_id) #when upgrade single to ha if instance_type == DBInstanceType.STANDBY: master_group_item = InstanceGroupItem.get_by_instance_id(self.context,master_id) InstanceGroupItem.update_type(self.context, item_id=master_group_item.id, type=DBInstanceType.MASTER) if instance_type==DBInstanceType.SINGLE or instance_type==DBInstanceType.MASTER: self._create_master_user(instance_id=self.id, user=extend.get('admin_user'), \ password=extend.get('admin_password')) self._update_instance_type(self.id, instance_type) if instance_type==DBInstanceType.SINGLE or instance_type==DBInstanceType.MASTER: try: self.update_db(task_status=InstanceTasks.BACKUPING) expiretime = AutoBackup.get_autobackup_expiretime(self.context, group_id) backup_name = self._backup_name(instance_id=self.id) # Fore. 2014/07/02 Get Service Image ID from datastore version # _type = self.service_type # _image = ServiceImage.find_by(self.context,service_name=_type) #ds, ds_version = ds_models.get_datastore_version(type = None, version = ds_version_id) #service_image_id = image_id desc = 'Init backup for new instance' Backup.create(context=self.context, instance=self.id, name=backup_name, description=desc, group_id=group_id, backup_type=BackupType.AUTOBACKUP, expire_at=expiretime,init=True,service_image_id=image_id) except Exception as e: msg = "Error creating backup for instance %s %s" % (self.id,utils.get_traceback_stack()) LOG.error(msg) self.update_db(task_status=InstanceTasks.SETIOTUNE) LOG.debug("Set block iotune for instance %s ." % self.id) self._set_blkiotune(flavor) if instance_type!=DBInstanceType.STANDBY: if CONF.trove_vip_support: self.update_db(task_status=InstanceTasks.ALLOCATE_VIP) try: rip = self._get_instance_ip(self.id) vip = InstanceVip.allocate(self.context,instance_id=self.id,rip = rip) LOG.debug("Allocated vip %s for instance %s"%(vip, self.id)) except Exception as e: self.set_servicestatus(ServiceStatuses.UNKNOWN) raise e self.update_db(deleted=False,task_status=InstanceTasks.NONE) LOG.info("create instance_id:%s,notify monitor,autobackup" % self.id) group_rpcapi.API(self.context).group_update(group_id) self.send_usage_event('create', instance_size=flavor['ram'])