def gpu_umount(args): api = GPUAPI() vm_api = VmAPI() gpu = api.get_gpu_by_id(args['gpu_id']) vm = vm_api.get_vm_by_uuid(gpu.vm) if not vm.can_operate_by(args['req_user']): return {'res': False, 'err': ERR_AUTH_PERM} if vm.is_running(): return {'res': False, 'err': ERR_UMOUNT_RUNNING} res = api.umount(args['gpu_id']) if res: return {'res': True} return {'res': False, 'err': ERR_GPU_UMOUNT}
def gpu_mount(args): api = GPUAPI() vm_api = VmAPI() vm = vm_api.get_vm_by_uuid(args['vm_id']) if not vm.can_operate_by(args['req_user']): return {'res': False, 'err': ERR_AUTH_PERM} if vm.is_running(): return {'res': False, 'err': ERR_MOUNT_RUNNING} gpu = api.get_gpu_by_id(args['gpu_id']) if vm.group_id != gpu.group_id: return {'res': False, 'err': ERR_GPU_MOUNT} res = api.mount(args['vm_id'], args['gpu_id']) if res: return {'res': True} return {'res': False, 'err': ERR_GPU_MOUNT}
def delete_vm(self, vm_uuid, force=False): vm = self.manager.get_vm_by_uuid(vm_uuid) image_id = vm.image_id diskname = vm.disk host_id = vm.host_id vcpu = vm.vcpu mem = vm.mem mac = vm.mac ceph_pool_id = vm.ceph_id from device.api import GPUAPI from volume.api import VolumeAPI try: gpuapi = GPUAPI() if len(gpuapi.get_gpu_list_by_vm_uuid(vm_uuid)) > 0: raise Error(ERR_VM_DEL_GPU_MOUNTED) volumeapi = VolumeAPI() if len(volumeapi.get_volume_list_by_vm_uuid(vm_uuid)) > 0: raise Error(ERR_VM_DEL_VOL_MOUNTED) deletion_permitted = False archive_disk_name = '' if self.image_api.disk_exists(image_id, diskname, cephpool_id=ceph_pool_id): archive_disk_name = self.image_api.archive_disk( image_id, diskname, cephpool_id=ceph_pool_id) if archive_disk_name != False: deletion_permitted = True else: deletion_permitted = True except Exception as e: if not force: # 不强制删除的话抛出异常 raise e else: # 强制删除,不管其他操作是否成功,都删除虚拟机记录 deletion_permitted = True if deletion_permitted: if vm.delete(archive_disk_name, force=force): if not self.host_api.host_release(host_id, vcpu, mem): print('[delete_vm]', '释放宿主机资源失败') if not self.network_api.mac_release(mac, vm_uuid): print('[delete_vm]', '释放IP地址失败') return True return False
def get_gpu(args): api = GPUAPI() gpu = api.get_gpu_by_id(args['gpu_id']) if gpu.managed_by(args['req_user']): return {'res': True, 'info':{ 'id': gpu.id, 'host_ipv4': gpu.host_ipv4, 'host_id': gpu.host_id, 'address': gpu.address, 'vm': gpu.vm, 'attach_time': gpu.attach_time, 'enable': gpu.enable, 'remarks': gpu.remarks }} else: return {'res': False, 'err': ERR_AUTH_PERM}
def get_gpu_list(args): group_api = GroupAPI() group_list = group_api.get_group_list_in_perm(args['req_user']) gpu_api = GPUAPI() ret_list = [] for g in group_list: gpu_list = gpu_api.get_gpu_list_by_group_id(g.id) for gpu in gpu_list: ret_list.append({ 'id': gpu.id, 'host_ipv4': gpu.host_ipv4, 'address': gpu.address, 'vm': gpu.vm, 'attach_time': gpu.attach_time, 'enable': gpu.enable, 'remarks': gpu.remarks}) return {'res': True, 'list': ret_list}
def migrate(args): #被迁移虚拟机校验 api = VmAPI() host_api = HostAPI() gpu_api = GPUAPI() volume_api = VolumeAPI() vm = api.get_vm_by_uuid(args['uuid']) if not vm: return {'res': False, 'err': ERR_VM_UUID} if not vm.can_operate_by(args['req_user']): return {'res': False, 'err': ERR_AUTH_PERM} #目标主机校验 host = host_api.get_host_by_id(args['host_id']) if not host.managed_by(args['req_user']): return {'res': False, 'err': ERR_AUTH_PERM} #被迁移虚拟机与目标主机是否处于同一个分中心 if not vm.center_id == host.center_id: return {'res': False, 'err': ERR_VM_MIGRATE_DIFF_CEPH} #检测目标主机是否为当前宿主机 if vm.host_id == host.id: return {'res': False, 'err': ERR_VM_MIGRATE_SAME_HOST} #检测是否挂载GPU gpu_list = gpu_api.get_gpu_list_by_vm_uuid(args['uuid']) if len(gpu_list) > 0: return {'res': False, 'err': ERR_VM_MIGRATE_WITHGPU} #检测挂载云硬盘与目标主机是否在同一集群 volume_list = volume_api.get_volume_list_by_vm_uuid(args['uuid']) if len(volume_list) > 0 and vm.group_id != host.group_id: return {'res': False, 'err': ERR_VM_MIGRATE_WITHVOL} res = api.migrate_vm(args['uuid'], args['host_id']) if res: return {'res': True} return {'res': False, 'err': ERR_VM_MIGRATE}
def delete_vm(self, vm_uuid): vm = self.manager.get_vm_by_uuid(vm_uuid) image_id = vm.image_id diskname = vm.disk host_id = vm.host_id vcpu = vm.vcpu mem = vm.mem mac = vm.mac from device.api import GPUAPI from volume.api import CephVolumeAPI gpuapi = GPUAPI() if len(gpuapi.get_gpu_list_by_vm_uuid(vm_uuid)) > 0: raise Error(ERR_VM_DEL_GPU_MOUNTED) volumeapi = CephVolumeAPI() if len(volumeapi.get_volume_list_by_vm_uuid(vm_uuid)) > 0: raise Error(ERR_VM_DEL_VOL_MOUNTED) deletion_permitted = False archive_disk_name = '' if self.image_api.disk_exists(image_id, diskname): archive_disk_name = self.image_api.archive_disk(image_id, diskname) if archive_disk_name != False: deletion_permitted = True else: deletion_permitted = True if deletion_permitted: if vm.delete(archive_disk_name): if not self.host_api.host_release(host_id, vcpu, mem): print('[delete_vm]', '释放宿主机资源失败') if not self.network_api.mac_release(mac, vm_uuid): print('[delete_vm]', '释放IP地址失败') return True return False
def reset_vm(self, vm_uuid, new_image_id=None): vm = self.manager.get_vm_by_uuid(vm_uuid) if new_image_id == None: new_image_id = vm.image_id host = vm.host old_image_id = vm.image_id old_diskname = vm.disk old_ceph_pool_id = vm.ceph_id if vm.is_running(): raise Error(ERR_VM_RESET_LIVING) new_image_info = self.image_api.get_image_info_by_id(new_image_id) if not new_image_info: raise Error(ERR_IMAGE_INFO) from device.api import GPUAPI from volume.api import VolumeAPI gpuapi = GPUAPI() volumeapi = VolumeAPI() gpu_list = gpuapi.get_gpu_list_by_vm_uuid(vm_uuid) volume_list = volumeapi.get_volume_list_by_vm_uuid(vm_uuid) archive_disk_name = '' old_image_dict = { 'image_id': old_image_id, 'image_snap': vm.image_snap, 'image_name': vm.image, 'ceph_id': vm.ceph_id, 'ceph_host': vm.ceph_host, 'ceph_pool': vm.ceph_pool, 'ceph_uuid': vm.ceph_uuid, 'ceph_port': vm.ceph_port, 'ceph_username': vm.ceph_username } old_xml_desc = vm.xml_desc res = False try: #先detach设备(gpu,volume) for gpu1 in gpu_list: r1 = vm.detach_device(gpu1.xml_desc) if settings.DEBUG: print('[reset_vm]', 'detach gpu ', gpu1.id, r1) for volume1 in volume_list: r1 = vm.detach_device(volume1.xml_desc) if settings.DEBUG: print('[reset_vm]', 'detach volume ', volume1.id, r1) #dom的destroy和undefine操作 dom_undefine_res = False if self.manager.domain_exists(vm.host_ipv4, vm.uuid): dom = self.manager.get_domain(vm.host_ipv4, vm.uuid) try: dom.destroy() except: pass dom.undefine() if not self.manager.domain_exists(vm.host_ipv4, vm.uuid): dom_undefine_res = True if dom_undefine_res: archive_disk_name = self.image_api.archive_disk( old_image_id, vm.disk) if archive_disk_name: init_disk_success = False if dom_undefine_res: init_disk_success = self.image_api.init_disk( new_image_id, vm.disk) if init_disk_success: #更新vm_db相关image信息 vm.db_obj.image_id = new_image_id vm.db_obj.image_snap = new_image_info['image_snap'] vm.db_obj.image = new_image_info['image_name'] vm.db_obj.ceph_id = new_image_info['ceph_id'] vm.db_obj.ceph_host = new_image_info['ceph_host'] vm.db_obj.ceph_pool = new_image_info['ceph_pool'] vm.db_obj.ceph_uuid = new_image_info['ceph_uuid'] vm.db_obj.ceph_port = new_image_info['ceph_port'] vm.db_obj.ceph_username = new_image_info['ceph_username'] vm.db_obj.save() xml_tpl = self.image_api.get_xml_tpl(new_image_id) xml_desc = xml_tpl % { 'name': vm.uuid, 'uuid': vm.uuid, 'mem': vm.mem, 'vcpu': vm.vcpu, 'ceph_uuid': new_image_info['ceph_uuid'], 'ceph_pool': new_image_info['ceph_pool'], 'diskname': vm.disk, 'ceph_host': new_image_info['ceph_host'], 'ceph_port': new_image_info['ceph_port'], 'ceph_username': new_image_info['ceph_username'], 'ceph_hosts_xml': new_image_info['ceph_hosts_xml'], 'mac': vm.mac, 'bridge': vm.br } dom = self.manager.define(host.ipv4, xml_desc) res = True except Exception as e: if settings.DEBUG: print('[reset_vm]', '重置镜像失败', e) res = False if archive_disk_name: #已经归档成功,重新恢复 self.image_api.restore_disk(vm.image_id, archive_disk_name) vm.db_obj.image_id = old_image_dict['image_id'] vm.db_obj.image_snap = old_image_dict['image_snap'] vm.db_obj.image = old_image_dict['image_name'] vm.db_obj.ceph_id = old_image_dict['ceph_id'] vm.db_obj.ceph_host = old_image_dict['ceph_host'] vm.db_obj.ceph_pool = old_image_dict['ceph_pool'] vm.db_obj.ceph_uuid = old_image_dict['ceph_uuid'] vm.db_obj.ceph_port = old_image_dict['ceph_port'] vm.db_obj.ceph_username = old_image_dict['ceph_username'] vm.db_obj.save() dom = self.manager.define(host.ipv4, old_xml_desc) if settings.DEBUG: print('[reset_vm]', '回滚成功') finally: for gpu1 in gpu_list: r1 = vm.attach_device(gpu1.xml_desc) if settings.DEBUG: print('[reset_vm]', 'attach gpu ', gpu1.id, r1) for volume1 in volume_list: r1 = vm.attach_device(volume1.xml_desc) if settings.DEBUG: print('[reset_vm]', 'attach volume ', volume1.id, r1) return res
def migrate_vm(self, vm_uuid, host_id): #参数验证 vm = self.manager.get_vm_by_uuid(vm_uuid) host = self.host_api.get_host_by_id(host_id) src_host_alive = vm.is_host_connected from device.api import GPUAPI from volume.api import VolumeAPI gpuapi = GPUAPI() volumeapi = VolumeAPI() gpu_list = gpuapi.get_gpu_list_by_vm_uuid(vm_uuid) volume_list = volumeapi.get_volume_list_by_vm_uuid(vm_uuid) #判断是否在同一个center if vm.center_id != host.center_id: raise Error(ERR_VM_MIGRATE_DIFF_CEPH) #是否关机 if vm.is_running(): raise Error(ERR_VM_MIGRATE_LIVING) #在新宿主机上创建 image_info = self.image_api.get_image_info_by_id(vm.image_id) xml_tpl = self.image_api.get_xml_tpl(vm.image_id) xml_desc = xml_tpl % { 'name': vm_uuid, 'uuid': vm_uuid, 'mem': vm.mem, 'vcpu': vm.vcpu, 'ceph_uuid': image_info['ceph_uuid'], 'ceph_pool': image_info['ceph_pool'], 'diskname': vm.disk, 'ceph_host': image_info['ceph_host'], 'ceph_port': image_info['ceph_port'], 'ceph_username': image_info['ceph_username'], 'ceph_hosts_xml': image_info['ceph_hosts_xml'], 'mac': vm.mac, 'bridge': vm.br } migrate_res = False if self.host_api.host_claim(host_id, vm.vcpu, vm.mem, 1): try: if src_host_alive: for gpu1 in gpu_list: r1 = vm.detach_device(gpu1.xml_desc) if settings.DEBUG: print('[migrate_vm]', 'detach gpu ', gpu1.id, r1) for volume1 in volume_list: r1 = vm.detach_device(volume1.xml_desc) if settings.DEBUG: print('[migrate_vm]', 'detach volume ', volume1.id, r1) old_host_id = vm.host_id if self.manager.migrate(vm_uuid, host_id, host.ipv4, xml_desc, old_host_alive=src_host_alive): migrate_res = True old_res = self.host_api.host_release( old_host_id, vm.vcpu, vm.mem, 1) if settings.DEBUG: print('[migrate_vm]', '释放原宿主机资源 ', old_res) #重新attach device(只有云硬盘) for volume1 in volume_list: r1 = vm.attach_device(volume1.xml_desc) if settings.DEBUG: print('[migrate_vm]', 'attach volume ', volume1.id, r1) except Exception as e: if type(e) == Error: raise e finally: if not migrate_res: new_res = self.host_api.host_release( host_id, vm.vcpu, vm.mem, 1) if settings.DEBUG: print('[migrate_vm]', '迁移失败,释放新宿主机资源 ', new_res) return migrate_res
def set_gpu_remarks(args): api = GPUAPI() if api.set_remarks(args['gpu_id'], args['remarks']): return {'res': True} return {'res': False, 'err': ERR_GPU_EDIT_REMARKS}
from compute.vm.vm import VIR_DOMAIN_HOST_DOWN from device.api import GPUAPI from volume.api import VolumeAPI from api.error import Error from api.error import ERROR_CN from .api import MonitoringAPI api = MonitoringAPI() center_api = CenterAPI() group_api = GroupAPI() vm_api = VmAPI() host_api = HostAPI() gpuapi = GPUAPI() volumeapi = VolumeAPI() def run_ha_monitoring(): """ 虚拟机高可用定时监控 lzx: 2018-09-25 """ global center_api, group_api, vm_api, gpuapi, volumeapi group_list = group_api.get_group_list() vm_list = [] vm_dic = {} for group in group_list: