def delete_snapshot(guest=None, msg=None): extend_data = dict() try: assert isinstance(guest, libvirt.virDomain) assert isinstance(msg, dict) snapshot = guest.snapshotLookupByName(name=msg['snapshot_id']) snapshot.delete() response_emit.success( _object=msg['_object'], action=msg['action'], uuid=msg['uuid'], data=extend_data, passback_parameters=msg.get('passback_parameters')) except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc()) response_emit.failure( _object=msg['_object'], action=msg.get('action'), uuid=msg.get('uuid'), data=extend_data, passback_parameters=msg.get('passback_parameters'))
def revert_snapshot(guest=None, msg=None): extend_data = dict() try: assert isinstance(guest, libvirt.virDomain) assert isinstance(msg, dict) snap_flags = 0 snap_flags |= libvirt.VIR_DOMAIN_SNAPSHOT_REVERT_FORCE snapshot = guest.snapshotLookupByName(name=msg['snapshot_id']) try: guest.revertToSnapshot(snap=snapshot, flags=0) except libvirt.libvirtError, e: # 给予一次重新恢复的机会 if e.get_error_code() == libvirt.VIR_ERR_SYSTEM_ERROR: guest.revertToSnapshot(snap=snapshot, flags=snap_flags) # 如果恢复后的 Guest 为 Running 状态,则同步其系统时间。 if guest.isActive(): # https://qemu.weilnetz.de/doc/qemu-ga-ref.html#index-guest_002dset_002dtime try: libvirt_qemu.qemuAgentCommand(guest, json.dumps({ 'execute': 'guest-set-time', 'arguments': { 'time': int(ji.Common.ts() * (10**9)) } }), 3, libvirt_qemu.VIR_DOMAIN_QEMU_AGENT_COMMAND_NOWAIT) except libvirt.libvirtError, e: logger.error(e.message) log_emit.error(e.message)
def host_performance_collection_engine(self): self.init_conn() while True: if Utils.exit_flag: msg = 'Thread host_performance_collection_engine say bye-bye' print msg logger.info(msg=msg) return threads_status['host_performance_collection_engine'] = dict() threads_status['host_performance_collection_engine']['timestamp'] = ji.Common.ts() time.sleep(config['engine_cycle_interval']) self.ts = ji.Common.ts() # noinspection PyBroadException try: if self.ts % self.interval != 0: continue self.update_interfaces() self.update_disks() self.host_cpu_memory_performance_report() self.host_traffic_performance_report() self.host_disk_usage_io_performance_report() except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc())
def guest_state_report(guest): try: _uuid = guest.UUIDString() state, maxmem, mem, ncpu, cputime = guest.info() # state 参考链接: # http://libvirt.org/docs/libvirt-appdev-guide-python/en-US/html/libvirt_application_development_guide_using_python-Guest_Domains-Information-State.html # http://stackoverflow.com/questions/4986076/alternative-to-virsh-libvirt log = u' '.join([u'域', guest.name(), u', UUID', _uuid, u'的状态改变为']) if state == libvirt.VIR_DOMAIN_RUNNING: log += u' Running。' guest_event_emit.running(uuid=_uuid) # log += u' Booting。' # guest_event_emit.booting(uuid=_uuid) # q_booting_guest.put(guest) elif state == libvirt.VIR_DOMAIN_BLOCKED: log += u' Blocked。' guest_event_emit.blocked(uuid=_uuid) elif state == libvirt.VIR_DOMAIN_PAUSED: log += u' Paused。' guest_event_emit.paused(uuid=_uuid) elif state == libvirt.VIR_DOMAIN_SHUTDOWN: log += u' Shutdown。' guest_event_emit.shutdown(uuid=_uuid) elif state == libvirt.VIR_DOMAIN_SHUTOFF: log += u' Shutoff。' guest_event_emit.shutoff(uuid=_uuid) elif state == libvirt.VIR_DOMAIN_CRASHED: log += u' Crashed。' guest_event_emit.crashed(uuid=_uuid) elif state == libvirt.VIR_DOMAIN_PMSUSPENDED: log += u' PM_Suspended。' guest_event_emit.pm_suspended(uuid=_uuid) else: log += u' NO_State。' guest_event_emit.no_state(uuid=_uuid) logger.info(log) log_emit.info(log) except Exception as e: logger.error(e.message) log_emit.error(e.message)
def disk_info_by_local(image_path): cmd = ' '.join(['/usr/bin/qemu-img', 'info', '--output=json', '-f', 'qcow2', image_path, '2>/dev/null']) exit_status, output = Utils.shell_cmd(cmd) if exit_status != 0: log = u' '.join([u'路径', image_path, u'磁盘扩容时,命令执行退出异常:', str(output)]) logger.error(msg=log) log_emit.error(msg=log) raise CommandExecFailed(log) return json.loads(output)
def refresh_guest_state(self): try: self.init_conn() self.refresh_guest_mapping() for guest in self.guest_mapping_by_uuid.values(): Guest.guest_state_report(guest) except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc())
def resize_qemu_image_by_local(image_path, size): cmd = ' '.join(['/usr/bin/qemu-img', 'resize', '-f', 'qcow2', image_path, size.__str__() + 'G']) exit_status, output = Utils.shell_cmd(cmd) if exit_status != 0: log = u' '.join([u'路径', image_path, u'磁盘扩容时,命令执行退出异常:', str(output)]) logger.error(msg=log) log_emit.error(msg=log) raise CommandExecFailed(log) return True
def keepalived_redis(cls): while True: try: time.sleep(5) cls.r.ping() except redis.exceptions.ConnectionError as e: logger.error(e.message) cls.init_conn_redis() except: logger.error(traceback.format_exc())
def resize_qemu_image_by_glusterfs(dfs_volume, image_path, size): image_path = '/'.join(['gluster://127.0.0.1', dfs_volume, image_path]) cmd = ' '.join(['/usr/bin/qemu-img', 'resize', '-f', 'qcow2', image_path, size.__str__() + 'G']) exit_status, output = Utils.shell_cmd(cmd) if exit_status != 0: log = u' '.join([u'路径', image_path, u'磁盘扩容时,命令执行退出异常:', str(output)]) logger.error(msg=log) log_emit.error(msg=log) raise CommandExecFailed(log) return True
def ping(label='', _cnxpool=None): if _cnxpool is None: logger.critical(''.join(['cnxpool must not None by ', label])) return try: _cnx = _cnxpool.get_connection() _cnx.ping(attempts=1, delay=0) except mysql.connector.errors.InterfaceError as err: logger.critical(err.msg) except mysql.connector.Error as err: logger.error(err) else: _cnx.close()
def start_by_uuid(self, conn=None): try: domain = conn.lookupByUUIDString(uuidstr=self.uuid) domain.create() log = u' '.join([u'域', self.name, u', UUID', self.uuid, u'启动成功.']) logger.info(msg=log) log_emit.info(msg=log) except libvirt.libvirtError as e: logger.error(e.message) log_emit.error(e.message) return False return True
def make_qemu_image_by_local(image_path, size): if not os.path.isdir(os.path.dirname(image_path)): os.makedirs(os.path.dirname(image_path), 0755) cmd = ' '.join(['/usr/bin/qemu-img', 'create', '-f', 'qcow2', image_path, size.__str__() + 'G']) exit_status, output = Utils.shell_cmd(cmd) if exit_status != 0: log = u' '.join([u'路径', image_path, u'创建磁盘时,命令执行退出异常:', str(output)]) logger.error(msg=log) log_emit.error(msg=log) raise CommandExecFailed(log) return True
def emit(self, _kind=None, _type=None, message=None): from initialize import logger if all([key is None for key in [_kind, _type, message]]): logger.warning(u'参数 _kind, _type, message 均不能为None.') return False msg = json.dumps({'kind': _kind, 'type': _type, 'timestamp': ji.Common.ts(), 'host': self.hostname, 'node_id': self.node_id, 'message': message}, ensure_ascii=False) try: return self.r.rpush(self.upstream_queue, msg) except redis.exceptions.ConnectionError as e: logger.error(traceback.format_exc()) # 防止循环线程,在redis连接断开时,混水写入日志 time.sleep(5)
def make_qemu_image_by_glusterfs(gf, dfs_volume, image_path, size): if not gf.isdir(os.path.dirname(image_path)): gf.makedirs(os.path.dirname(image_path), 0755) image_path = '/'.join(['gluster://127.0.0.1', dfs_volume, image_path]) cmd = ' '.join(['/usr/bin/qemu-img', 'create', '-f', 'qcow2', image_path, size.__str__() + 'G']) exit_status, output = Utils.shell_cmd(cmd) if exit_status != 0: log = u' '.join([u'路径', image_path, u'创建磁盘时,命令执行退出异常:', str(output)]) logger.error(msg=log) log_emit.error(msg=log) raise CommandExecFailed(log) return True
def is_running(_guest): running = False try: exec_ret = libvirt_qemu.qemuAgentCommand(_guest, json.dumps({ 'execute': 'guest-ping', 'arguments': { } }), 3, libvirt_qemu.VIR_DOMAIN_QEMU_AGENT_COMMAND_NOWAIT) running = True except: logger.error(traceback.format_exc()) return running
def define_by_xml(self, conn=None): try: if conn.defineXML(xml=self.xml): log = u' '.join([u'域', self.name, u', UUID', self.uuid, u'定义成功.']) logger.info(msg=log) log_emit.info(msg=log) else: log = u' '.join([u'域', self.name, u', UUID', self.uuid, u'定义时未预期返回.']) logger.info(msg=log) log_emit.info(msg=log) return False except libvirt.libvirtError as e: logger.error(e.message) log_emit.error(e.message) return False return True
def guest_performance_collection_engine(self): self.init_conn() while True: if Utils.exit_flag: msg = 'Thread guest_performance_collection_engine say bye-bye' print msg logger.info(msg=msg) return threads_status['guest_performance_collection_engine'] = dict() threads_status['guest_performance_collection_engine']['timestamp'] = ji.Common.ts() time.sleep(config['engine_cycle_interval']) self.ts = ji.Common.ts() # noinspection PyBroadException try: if self.ts % self.interval != 0: continue if self.ts % 3600 == 0: # 一小时做一次 垃圾回收 操作 for k, v in self.last_guest_cpu_time.items(): if (self.ts - v['timestamp']) > self.interval * 2: del self.last_guest_cpu_time[k] for k, v in self.last_guest_traffic.items(): if (self.ts - v['timestamp']) > self.interval * 2: del self.last_guest_traffic[k] for k, v in self.last_guest_disk_io.items(): if (self.ts - v['timestamp']) > self.interval * 2: del self.last_guest_disk_io[k] self.refresh_guest_mapping() self.guest_cpu_memory_performance_report() self.guest_traffic_performance_report() self.guest_disk_io_performance_report() except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc())
def init_conn_mysql(cls): try: cls.cnxpool = mysql.connector.pooling.MySQLConnectionPool( host=app.config["db_host"], user=app.config["db_user"], password=app.config["db_password"], port=app.config["db_port"], database=app.config["db_name"], raise_on_warnings=app.config["DEBUG"], pool_size=app.config["db_pool_size"], charset=app.config["db_charset"]) except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: e_msg = u'用户名或密码错误' elif err.errno == errorcode.ER_BAD_DB_ERROR: e_msg = u'数据库不存在' else: e_msg = err.msg logger.error(e_msg) exit(err.errno)
def host_state_report_engine(self): """ 计算节点状态上报引擎 """ self.init_conn() # 首次启动时,做数据初始化 self.update_interfaces() self.update_disks() boot_time = ji.Common.ts() while True: if Utils.exit_flag: msg = 'Thread host_state_report_engine say bye-bye' print msg logger.info(msg=msg) return threads_status['host_state_report_engine'] = dict() threads_status['host_state_report_engine']['timestamp'] = ji.Common.ts() # noinspection PyBroadException try: time.sleep(config['engine_cycle_interval']) # 一分钟做一次更新 if ji.Common.ts() % 60 == 0: self.update_interfaces() self.update_disks() host_event_emit.heartbeat(message={'node_id': self.node_id, 'cpu': self.cpu, 'cpuinfo': self.cpuinfo, 'memory': self.memory, 'dmidecode': self.dmidecode, 'interfaces': self.interfaces, 'disks': self.disks, 'system_load': os.getloadavg(), 'boot_time': boot_time, 'memory_available': psutil.virtual_memory().available, 'threads_status': threads_status}) except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc())
def allocate_bandwidth(guest=None, msg=None): assert isinstance(guest, libvirt.virDomain) assert isinstance(msg, dict) extend_data = dict() """ https://libvirt.org/html/libvirt-libvirt-domain.html#virDomainModificationImpact """ try: bandwidth = msg['bandwidth'] / 1000 / 8 mac = ET.fromstring(guest.XMLDesc()).findall( 'devices/interface')[0].find('mac').attrib['address'] interface_bandwidth = guest.interfaceParameters(mac, 0) interface_bandwidth['inbound.average'] = bandwidth interface_bandwidth['outbound.average'] = bandwidth guest.setInterfaceParameters(mac, interface_bandwidth, libvirt.VIR_DOMAIN_AFFECT_CONFIG) if guest.isActive(): guest.setInterfaceParameters(mac, interface_bandwidth, libvirt.VIR_DOMAIN_AFFECT_LIVE) response_emit.success( _object=msg['_object'], action=msg['action'], uuid=msg['uuid'], data=extend_data, passback_parameters=msg.get('passback_parameters')) except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc()) response_emit.failure( _object=msg['_object'], action=msg.get('action'), uuid=msg.get('uuid'), data=extend_data, passback_parameters=msg.get('passback_parameters'))
def keepalived_mysql(cls): def ping(label='', _cnxpool=None): if _cnxpool is None: logger.critical(''.join(['cnxpool must not None by ', label])) return try: _cnx = _cnxpool.get_connection() _cnx.ping(attempts=1, delay=0) except mysql.connector.errors.InterfaceError as err: logger.critical(err.msg) except mysql.connector.Error as err: logger.error(err) else: _cnx.close() while True: try: time.sleep(5) ping(label='', _cnxpool=cls.cnxpool) except: logger.error(traceback.format_exc())
def generate_system_image(self): if self.storage_mode in [ StorageMode.ceph.value, StorageMode.glusterfs.value ]: if self.storage_mode == StorageMode.glusterfs.value: if not self.gf.isfile(self.template_path): log = u' '.join([ u'域', self.name, u', UUID', self.uuid, u'所依赖的模板', self.template_path, u'不存在.' ]) logger.error(msg=log) log_emit.error(msg=log) return False if not self.gf.isdir(os.path.dirname(self.system_image_path)): self.gf.makedirs(os.path.dirname(self.system_image_path), 0755) self.gf.copyfile(self.template_path, self.system_image_path) elif self.storage_mode in [ StorageMode.local.value, StorageMode.shared_mount.value ]: if not os.path.exists(self.template_path) or not os.path.isfile( self.template_path): log = u' '.join([ u'域', self.name, u', UUID', self.uuid, u'所依赖的模板', self.template_path, u'不存在.' ]) logger.error(msg=log) log_emit.error(msg=log) return False if not os.access(self.template_path, os.R_OK): log = u' '.join([ u'域', self.name, u', UUID', self.uuid, u'所依赖的模板', self.template_path, u'无权访问.' ]) logger.error(msg=log) log_emit.error(msg=log) return False system_image_path_dir = os.path.dirname(self.system_image_path) if not os.path.exists(system_image_path_dir): os.makedirs(system_image_path_dir, 0755) elif not os.path.isdir(system_image_path_dir): os.rename(system_image_path_dir, system_image_path_dir + '.bak') os.makedirs(system_image_path_dir, 0755) shutil.copyfile(self.template_path, self.system_image_path) else: raise ValueError('Unknown value of storage_mode.') return True
def emit2(self, _type=None, message=None): from initialize import logger if _type == LogLevel.debug.value: logger.debug(msg=message) elif _type == LogLevel.info.value: logger.info(msg=message) elif _type == LogLevel.warn.value: logger.warn(msg=message) elif _type == LogLevel.error.value: logger.error(msg=message) elif _type == LogLevel.critical.value: logger.critical(msg=message) else: logger.debug(msg=message) return self.emit(_kind=EmitKind.log.value, _type=_type, message=message)
def create(cls, conn, msg): try: Guest.storage_mode = msg['storage_mode'] guest = Guest(uuid=msg['uuid'], name=msg['name'], template_path=msg['template_path'], disk=msg['disks'][0], xml=msg['xml']) if Guest.storage_mode == StorageMode.glusterfs.value: Guest.dfs_volume = msg['dfs_volume'] Guest.init_gfapi() guest.system_image_path = guest.disk['path'] q_creating_guest.put({ 'storage_mode': Guest.storage_mode, 'dfs_volume': Guest.dfs_volume, 'uuid': guest.uuid, 'template_path': guest.template_path, 'system_image_path': guest.system_image_path }) if not guest.generate_system_image(): raise RuntimeError('System image generate failure.') if not guest.define_by_xml(conn=conn): raise RuntimeError( 'Define the instance of virtual machine by xml failure.') guest_event_emit.creating(uuid=guest.uuid, progress=92) disk_info = dict() if Guest.storage_mode == StorageMode.glusterfs.value: disk_info = Disk.disk_info_by_glusterfs( dfs_volume=guest.dfs_volume, image_path=guest.system_image_path) elif Guest.storage_mode in [ StorageMode.local.value, StorageMode.shared_mount.value ]: disk_info = Disk.disk_info_by_local( image_path=guest.system_image_path) # 由该线程最顶层的异常捕获机制,处理其抛出的异常 guest.execute_os_template_initialize_operates( guest=conn.lookupByUUIDString(uuidstr=guest.uuid), os_template_initialize_operates=msg[ 'os_template_initialize_operates'], os_type=msg['os_type']) extend_data = dict() extend_data.update({'disk_info': disk_info}) guest_event_emit.creating(uuid=guest.uuid, progress=97) if not guest.start_by_uuid(conn=conn): raise RuntimeError( 'Start the instance of virtual machine by uuid failure.') cls.quota(guest=conn.lookupByUUIDString(uuidstr=guest.uuid), msg=msg) response_emit.success( _object=msg['_object'], action=msg['action'], uuid=msg['uuid'], data=extend_data, passback_parameters=msg.get('passback_parameters')) except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc()) response_emit.failure( _object=msg['_object'], action=msg.get('action'), uuid=msg.get('uuid'), passback_parameters=msg.get('passback_parameters'))
extend_data.update({ 'snapshot_id': ret.getName(), 'parent_id': parent_id, 'xml': ret.getXMLDesc() }) response_emit.success( _object=msg['_object'], action=msg['action'], uuid=msg['uuid'], data=extend_data, passback_parameters=msg.get('passback_parameters')) except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc()) response_emit.failure( _object=msg['_object'], action=msg.get('action'), uuid=msg.get('uuid'), data=extend_data, passback_parameters=msg.get('passback_parameters')) @staticmethod def delete_snapshot(guest=None, msg=None): extend_data = dict() try: assert isinstance(guest, libvirt.virDomain) assert isinstance(msg, dict)
def convert_snapshot(msg=None): pattern_progress = re.compile(r'\((\d+(\.\d+)?)/100%\)') extend_data = dict() try: assert isinstance(msg, dict) snapshot_path = msg['snapshot_path'] template_path = msg['template_path'] if msg['storage_mode'] == StorageMode.glusterfs.value: Guest.dfs_volume = msg['dfs_volume'] Guest.init_gfapi() if not Guest.gf.isdir(os.path.dirname(template_path)): Guest.gf.makedirs(os.path.dirname(template_path), 0755) snapshot_path = '/'.join( ['gluster://127.0.0.1', msg['dfs_volume'], snapshot_path]) template_path = '/'.join( ['gluster://127.0.0.1', msg['dfs_volume'], template_path]) elif msg['storage_mode'] in [ StorageMode.local.value, StorageMode.shared_mount.value ]: pass else: raise ValueError('Unknown value of storage_mode.') cmd = ' '.join([ '/usr/bin/qemu-img', 'convert', '--force-share', '-O', 'qcow2', '-s', msg['snapshot_id'], snapshot_path, template_path ]) qemu_img_convert = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) fcntl.fcntl( qemu_img_convert.stdout, fcntl.F_SETFL, fcntl.fcntl(qemu_img_convert.stdout, fcntl.F_GETFL) | os.O_NONBLOCK) while qemu_img_convert.returncode is None: line = None try: line = qemu_img_convert.stdout.readline() except IOError as e: pass if line is not None: p = pattern_progress.match(line.strip()) if p is not None: fields = p.groups() guest_event_emit.snapshot_converting( uuid=msg['uuid'], os_template_image_id=msg['os_template_image_id'], progress=int(fields[0].split('.')[0])) time.sleep(0.5) qemu_img_convert.send_signal(signal.SIGUSR1) qemu_img_convert.poll() if qemu_img_convert.returncode != 0: log = u'创建自定义模板失败,命令执行退出异常。' logger.error(msg=log) log_emit.error(msg=log) raise CommandExecFailed(log) response_emit.success( _object=msg['_object'], action=msg['action'], uuid=msg['uuid'], data=extend_data, passback_parameters=msg.get('passback_parameters')) except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc()) response_emit.failure( _object=msg['_object'], action=msg.get('action'), uuid=msg.get('uuid'), data=extend_data, passback_parameters=msg.get('passback_parameters'))
def guest_booting2running_report_engine(self): """ Guest 启动到运行状态上报 """ self.init_conn() list_booting_guest = list() def is_running(_guest): running = False try: exec_ret = libvirt_qemu.qemuAgentCommand(_guest, json.dumps({ 'execute': 'guest-ping', 'arguments': { } }), 3, libvirt_qemu.VIR_DOMAIN_QEMU_AGENT_COMMAND_NOWAIT) running = True except: logger.error(traceback.format_exc()) return running while True: if Utils.exit_flag: msg = 'Thread guest_booting2running_report_engine say bye-bye' print msg logger.info(msg=msg) return # noinspection PyBroadException try: try: payload = q_booting_guest.get(timeout=config['engine_cycle_interval']) list_booting_guest.append(payload) q_booting_guest.task_done() except Queue.Empty as e: time.sleep(config['engine_cycle_interval']) threads_status['guest_booting2running_report_engine'] = dict() threads_status['guest_booting2running_report_engine']['timestamp'] = ji.Common.ts() for i, uuid in enumerate(list_booting_guest): guest = self.conn.lookupByUUIDString(uuidstr=uuid) log = u' '.join([u'域', guest.name(), u', UUID', uuid, u'的状态改变为']) if guest is not None and guest.isActive() and is_running(_guest=guest): log += u' Running。' guest_event_emit.running(uuid=uuid) logger.info(log) log_emit.info(log) else: time.sleep(config['engine_cycle_interval']) Guest.guest_state_report(guest=guest) del list_booting_guest[i] except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc())
def guest_creating_progress_report_engine(): """ Guest 创建进度上报引擎 """ list_creating_guest = list() template_size = dict() while True: if Utils.exit_flag: msg = 'Thread guest_creating_progress_report_engine say bye-bye' print msg logger.info(msg=msg) return # noinspection PyBroadException try: try: payload = q_creating_guest.get(timeout=config['engine_cycle_interval']) list_creating_guest.append(payload) q_creating_guest.task_done() except Queue.Empty as e: pass threads_status['guest_creating_progress_report_engine'] = dict() threads_status['guest_creating_progress_report_engine']['timestamp'] = ji.Common.ts() # 当有 Guest 被创建时,略微等待一下,避免复制模板的动作还没开始,就开始计算进度。这样会产生找不到镜像路径的异常。 time.sleep(1) for i, guest in enumerate(list_creating_guest): template_path = guest['template_path'] progress = 0 if guest['storage_mode'] in [StorageMode.ceph.value, StorageMode.glusterfs.value]: if guest['storage_mode'] == StorageMode.glusterfs.value: if template_path not in template_size: Guest.dfs_volume = guest['dfs_volume'] Guest.init_gfapi() template_size[template_path] = float(Guest.gf.getsize(template_path)) system_image_size = Guest.gf.getsize(guest['system_image_path']) progress = system_image_size / template_size[template_path] elif guest['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: if template_path not in template_size: template_size[template_path] = float(os.path.getsize(template_path)) system_image_size = os.path.getsize(guest['system_image_path']) progress = system_image_size / template_size[template_path] else: del list_creating_guest[i] log = u' '.join([u'UUID: ', guest['uuid'], u'未支持的存储模式: ', str(guest['storage_mode'])]) logger.error(log) log_emit.error(log) guest_event_emit.creating(uuid=guest['uuid'], progress=int(progress * 90)) if progress >= 1: del list_creating_guest[i] except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc())
def adjust_ability(conn=None, guest=None, msg=None): assert isinstance(conn, libvirt.virConnect) assert isinstance(guest, libvirt.virDomain) assert isinstance(msg, dict) extend_data = dict() try: cpu = msg['cpu'].__str__() memory = msg['memory'].__str__() xml = ET.fromstring(guest.XMLDesc()) origin_ability = xml.find('vcpu').text + '核' + ( int(xml.find('memory').text) / 1024**2).__str__() + 'GiB' new_ability = cpu + '核' + memory + 'GiB' xml.find('vcpu').text = cpu xml.find('memory').set('unit', 'GiB') xml.find('memory').text = memory xml.find('currentMemory').set('unit', 'GiB') xml.find('currentMemory').text = memory xml_str = ET.tostring(xml, encoding='utf8', method='xml') if guest.isActive(): log = u'虚拟机非关闭状态。' raise RuntimeError(log) else: if conn.defineXML(xml=xml_str): log = u' '.join([ u'域', guest.name(), u', UUID', guest.UUIDString(), u'配置从', origin_ability, '变更为', new_ability ]) logger.info(msg=log) log_emit.info(msg=log) else: log = u'变更配置失败。' raise RuntimeError(log) response_emit.success( _object=msg['_object'], action=msg['action'], uuid=msg['uuid'], data=extend_data, passback_parameters=msg.get('passback_parameters')) except: logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc()) response_emit.failure( _object=msg['_object'], action=msg.get('action'), uuid=msg.get('uuid'), data=extend_data, passback_parameters=msg.get('passback_parameters'))
def instruction_process_engine(self): self.init_conn() ps = r.pubsub(ignore_subscribe_messages=False) ps.subscribe(config['instruction_channel']) while True: if Utils.exit_flag: msg = 'Thread instruction_process_engine say bye-bye' print msg logger.info(msg=msg) return threads_status['instruction_process_engine'] = dict() threads_status['instruction_process_engine']['timestamp'] = ji.Common.ts() # noinspection PyBroadException try: msg = ps.get_message(timeout=config['engine_cycle_interval']) if msg is None or 'data' not in msg or not isinstance(msg['data'], basestring): continue try: msg = json.loads(msg['data']) if msg['action'] == 'pong': continue if msg['action'] == 'ping': # 通过 ping pong 来刷存在感。因为经过实际测试发现,当订阅频道长时间没有数据来往,那么订阅者会被自动退出。 r.publish(config['instruction_channel'], message=json.dumps({'action': 'pong'})) continue except ValueError as e: logger.error(e.message) log_emit.error(e.message) continue if 'node_id' in msg and int(msg['node_id']) != self.node_id: continue # 下列语句繁琐写法如 <code>if 'action' not in msg or 'uuid' not in msg:</code> if not all([key in msg for key in ['_object', 'action']]): continue extend_data = dict() if msg['_object'] == 'guest': self.refresh_guest_mapping() if msg['action'] not in ['create']: if msg['uuid'] not in self.guest_mapping_by_uuid: if config['DEBUG']: _log = u' '.join([u'uuid', msg['uuid'], u'在计算节点', self.hostname, u'中未找到.']) logger.debug(_log) log_emit.debug(_log) raise RuntimeError('The uuid ' + msg['uuid'] + ' not found in current domains list.') self.guest = self.guest_mapping_by_uuid[msg['uuid']] if not isinstance(self.guest, libvirt.virDomain): raise RuntimeError('Guest ' + msg['uuid'] + ' is not a domain.') if msg['action'] == 'create': t = threading.Thread(target=Guest.create, args=(self.conn, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'reboot': if self.guest.reboot() != 0: raise RuntimeError('Guest reboot failure.') elif msg['action'] == 'force_reboot': self.guest.destroy() self.guest.create() Guest.quota(guest=self.guest, msg=msg) elif msg['action'] == 'shutdown': if self.guest.shutdown() != 0: raise RuntimeError('Guest shutdown failure.') elif msg['action'] == 'force_shutdown': if self.guest.destroy() != 0: raise RuntimeError('Guest force shutdown failure.') elif msg['action'] == 'boot': if not self.guest.isActive(): if self.guest.create() != 0: raise RuntimeError('Guest boot failure.') Guest.quota(guest=self.guest, msg=msg) elif msg['action'] == 'suspend': if self.guest.suspend() != 0: raise RuntimeError('Guest suspend failure.') elif msg['action'] == 'resume': if self.guest.resume() != 0: raise RuntimeError('Guest resume failure.') elif msg['action'] == 'delete': root = ET.fromstring(self.guest.XMLDesc()) if self.guest.isActive(): self.guest.destroy() self.guest.undefine() system_disk = None for _disk in root.findall('devices/disk'): if 'vda' == _disk.find('target').get('dev'): system_disk = _disk if msg['storage_mode'] in [StorageMode.ceph.value, StorageMode.glusterfs.value]: # 签出系统镜像路径 path_list = system_disk.find('source').attrib['name'].split('/') if msg['storage_mode'] == StorageMode.glusterfs.value: Guest.dfs_volume = path_list[0] Guest.init_gfapi() try: Guest.gf.remove('/'.join(path_list[1:])) except OSError: pass elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: file_path = system_disk.find('source').attrib['file'] try: os.remove(file_path) except OSError: pass elif msg['action'] == 'reset_password': if self.guest.setUserPassword(msg['user'], msg['password']) != 0: raise RuntimeError('Guest reset password failure.') elif msg['action'] == 'attach_disk': if 'xml' not in msg: _log = u'添加磁盘缺少 xml 参数' raise KeyError(_log) flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG if self.guest.isActive(): flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE # 添加磁盘成功返回时,ret值为0。可参考 Linux 命令返回值规范? if self.guest.attachDeviceFlags(xml=msg['xml'], flags=flags) != 0: raise RuntimeError('Attack disk failure.') Guest.quota(guest=self.guest, msg=msg) elif msg['action'] == 'detach_disk': if 'xml' not in msg: _log = u'分离磁盘缺少 xml 参数' raise KeyError(_log) flags = libvirt.VIR_DOMAIN_AFFECT_CONFIG if self.guest.isActive(): flags |= libvirt.VIR_DOMAIN_AFFECT_LIVE if self.guest.detachDeviceFlags(xml=msg['xml'], flags=flags) != 0: raise RuntimeError('Detach disk failure.') elif msg['action'] == 'update_ssh_key': if not self.guest.isActive(): _log = u'欲更新 SSH-KEY 的目标虚拟机未处于活动状态。' logger.warning(_log) log_emit.warn(_log) continue ret = Guest.update_ssh_key(guest=self.guest, msg=msg) logger.info(json.dumps(ret, ensure_ascii=False)) elif msg['action'] == 'allocate_bandwidth': t = threading.Thread(target=Guest.allocate_bandwidth, args=(self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'adjust_ability': t = threading.Thread(target=Guest.adjust_ability, args=(self.conn, self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'migrate': # duri like qemu+ssh://destination_host/system if 'duri' not in msg: _log = u'迁移操作缺少 duri 参数' raise KeyError(_log) # https://rk4n.github.io/2016/08/10/qemu-post-copy-and-auto-converge-features/ flags = libvirt.VIR_MIGRATE_PERSIST_DEST | \ libvirt.VIR_MIGRATE_UNDEFINE_SOURCE | \ libvirt.VIR_MIGRATE_COMPRESSED | \ libvirt.VIR_MIGRATE_PEER2PEER | \ libvirt.VIR_MIGRATE_AUTO_CONVERGE root = ET.fromstring(self.guest.XMLDesc()) if msg['storage_mode'] == StorageMode.local.value: # 需要把磁盘存放路径加入到两边宿主机的存储池中 # 不然将会报 no storage pool with matching target path '/opt/Images' 错误 flags |= libvirt.VIR_MIGRATE_NON_SHARED_DISK flags |= libvirt.VIR_MIGRATE_LIVE if not self.guest.isActive(): _log = u'非共享存储不支持离线迁移。' logger.error(_log) log_emit.error(_log) raise RuntimeError('Nonsupport online migrate with storage of non sharing mode.') if self.init_ssh_client(hostname=msg['duri'].split('/')[2], user='******'): for _disk in root.findall('devices/disk'): _file_path = _disk.find('source').get('file') disk_info = Disk.disk_info_by_local(image_path=_file_path) disk_size = disk_info['virtual-size'] stdin, stdout, stderr = self.ssh_client.exec_command( ' '.join(['qemu-img', 'create', '-f', 'qcow2', _file_path, str(disk_size)])) for line in stdout: logger.info(line) log_emit.info(line) for line in stderr: logger.error(line) log_emit.error(line) elif msg['storage_mode'] in [StorageMode.shared_mount.value, StorageMode.ceph.value, StorageMode.glusterfs.value]: if self.guest.isActive(): flags |= libvirt.VIR_MIGRATE_LIVE flags |= libvirt.VIR_MIGRATE_TUNNELLED else: flags |= libvirt.VIR_MIGRATE_OFFLINE if self.guest.migrateToURI(duri=msg['duri'], flags=flags) == 0: if msg['storage_mode'] == StorageMode.local.value: for _disk in root.findall('devices/disk'): _file_path = _disk.find('source').get('file') if _file_path is not None: os.remove(_file_path) else: raise RuntimeError('Unknown storage mode.') elif msg['_object'] == 'disk': if msg['action'] == 'create': if msg['storage_mode'] == StorageMode.glusterfs.value: Guest.dfs_volume = msg['dfs_volume'] Guest.init_gfapi() if not Disk.make_qemu_image_by_glusterfs(gf=Guest.gf, dfs_volume=msg['dfs_volume'], image_path=msg['image_path'], size=msg['size']): raise RuntimeError('Create disk failure with glusterfs.') elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: if not Disk.make_qemu_image_by_local(image_path=msg['image_path'], size=msg['size']): raise RuntimeError('Create disk failure with local storage mode.') elif msg['action'] == 'delete': if msg['storage_mode'] == StorageMode.glusterfs.value: Guest.dfs_volume = msg['dfs_volume'] Guest.init_gfapi() if Disk.delete_qemu_image_by_glusterfs(gf=Guest.gf, image_path=msg['image_path']) \ is not None: raise RuntimeError('Delete disk failure with glusterfs.') elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: if Disk.delete_qemu_image_by_local(image_path=msg['image_path']) is not None: raise RuntimeError('Delete disk failure with local storage mode.') elif msg['action'] == 'resize': if 'size' not in msg: _log = u'添加磁盘缺少 disk 或 disk["size"] 参数' raise KeyError(_log) used = False if msg['guest_uuid'].__len__() == 36: used = True if used: self.refresh_guest_mapping() if msg['guest_uuid'] not in self.guest_mapping_by_uuid: if config['DEBUG']: _log = u' '.join([u'uuid', msg['uuid'], u'在计算节点', self.hostname, u'中未找到.']) logger.debug(_log) log_emit.debug(_log) raise RuntimeError('Resize disk failure, because the uuid ' + msg['guest_uuid'] + ' not found in current domains.') self.guest = self.guest_mapping_by_uuid[msg['guest_uuid']] if not isinstance(self.guest, libvirt.virDomain): raise RuntimeError('Resize disk failure, because the guest is not a domain.') # 在线磁盘扩容 if used and self.guest.isActive(): if 'device_node' not in msg: _log = u'添加磁盘缺少 disk 或 disk["device_node|size"] 参数' raise KeyError(_log) # 磁盘大小默认单位为KB,乘以两个 1024,使其单位达到GB msg['size'] = int(msg['size']) * 1024 * 1024 if self.guest.blockResize(disk=msg['device_node'], size=msg['size']) != 0: raise RuntimeError('Online resize disk failure in blockResize method.') Guest.quota(guest=self.guest, msg=msg) # 离线磁盘扩容 else: if not all([key in msg for key in ['storage_mode', 'dfs_volume', 'image_path']]): _log = u'添加磁盘缺少 disk 或 disk["storage_mode|dfs_volume|image_path|size"] 参数' raise KeyError(_log) if msg['storage_mode'] == StorageMode.glusterfs.value: if not Disk.resize_qemu_image_by_glusterfs(dfs_volume=msg['dfs_volume'], image_path=msg['image_path'], size=msg['size']): raise RuntimeError('Offline resize disk failure with glusterfs.') elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: if not Disk.resize_qemu_image_by_local(image_path=msg['image_path'], size=msg['size']): raise RuntimeError('Offline resize disk failure with local storage mode.') elif msg['action'] == 'quota': self.refresh_guest_mapping() if msg['guest_uuid'] not in self.guest_mapping_by_uuid: if config['DEBUG']: _log = u' '.join([u'uuid', msg['guest_uuid'], u'在计算节点', self.hostname, u'中未找到.']) logger.debug(_log) log_emit.debug(_log) raise RuntimeError('Disk quota failure, because the uuid ' + msg['guest_uuid'] + ' not found in current domains.') self.guest = self.guest_mapping_by_uuid[msg['guest_uuid']] if not isinstance(self.guest, libvirt.virDomain): raise RuntimeError('Disk quota failure, because the guest is not a domain.') if not self.guest.isActive(): _log = u'磁盘 ' + msg['uuid'] + u' 所属虚拟机未处于活动状态。' logger.warning(_log) log_emit.warn(_log) continue Guest.quota(guest=self.guest, msg=msg) elif msg['_object'] == 'snapshot': self.refresh_guest_mapping() if msg['uuid'] not in self.guest_mapping_by_uuid: if config['DEBUG']: _log = u' '.join([u'uuid', msg['uuid'], u'在计算节点', self.hostname, u'中未找到.']) logger.debug(_log) log_emit.debug(_log) raise RuntimeError('Snapshot ' + msg['action'] + ' failure, because the uuid ' + msg['uuid'] + ' not found in current domains.') self.guest = self.guest_mapping_by_uuid[msg['uuid']] if not isinstance(self.guest, libvirt.virDomain): raise RuntimeError('Snapshot ' + msg['action'] + ' failure, because the guest is not a domain.') if msg['action'] == 'create': t = threading.Thread(target=Guest.create_snapshot, args=(self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'delete': t = threading.Thread(target=Guest.delete_snapshot, args=(self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'revert': t = threading.Thread(target=Guest.revert_snapshot, args=(self.guest, msg)) t.setDaemon(False) t.start() continue elif msg['action'] == 'convert': t = threading.Thread(target=Guest.convert_snapshot, args=(msg,)) t.setDaemon(False) t.start() continue elif msg['_object'] == 'os_template_image': if msg['action'] == 'delete': if msg['storage_mode'] == StorageMode.glusterfs.value: Guest.dfs_volume = msg['dfs_volume'] Guest.init_gfapi() try: Guest.gf.remove(msg['template_path']) except OSError: pass elif msg['storage_mode'] in [StorageMode.local.value, StorageMode.shared_mount.value]: try: os.remove(msg['template_path']) except OSError: pass elif msg['_object'] == 'global': if msg['action'] == 'refresh_guest_state': host_use_for_refresh_guest_state = Host() t = threading.Thread(target=host_use_for_refresh_guest_state.refresh_guest_state, args=()) t.setDaemon(False) t.start() continue else: _log = u'未支持的 _object:' + msg['_object'] logger.error(_log) log_emit.error(_log) response_emit.success(_object=msg['_object'], action=msg['action'], uuid=msg['uuid'], data=extend_data, passback_parameters=msg.get('passback_parameters')) except redis.exceptions.ConnectionError as e: logger.error(traceback.format_exc()) # 防止循环线程,在redis连接断开时,混水写入日志 time.sleep(5) except: # 防止循环线程,在redis连接断开时,混水写入日志 time.sleep(5) logger.error(traceback.format_exc()) log_emit.error(traceback.format_exc()) response_emit.failure(_object=msg['_object'], action=msg.get('action'), uuid=msg.get('uuid'), passback_parameters=msg.get('passback_parameters'))