def action(self, options): cli = util.connect_vdsm_json_rpc( logger=self._log ) try: caps = cli.Host.getCapabilities() except ServerError as e: self._log.error(e) self.update_result(None) return if 'bridges' not in caps: self._log.error("Failed to getVdsCapabilities: " "No 'bridges' in result") self.update_result(None) return if self._bridge in caps['bridges']: if 'ports' in caps['bridges'][self._bridge]: self._log.info("Found bridge %s with ports", self._bridge, extra=log_filter.lf_args('status', 60)) self.update_result(True) else: self._log.info("Found bridge %s with no ports", self._bridge, extra=log_filter.lf_args('status', 60)) self.update_result(False) else: self._log.info("Bridge %s not found", self._bridge, extra=log_filter.lf_args('status', 60)) self.update_result(False)
def calculate_load(self): dtotal = self.system['cur'].total - self.system['prev'].total dbusy = self.system['cur'].busy - self.system['prev'].busy load = dbusy / float(dtotal) cli = util.connect_vdsm_json_rpc(logger=self._log) engine_load = 0.0 try: stats = cli.VM.getStats(vmID=self._vm_uuid)[0] vm_cpu_total = float(stats["cpuUser"]) + float(stats["cpuSys"]) cpu_count = multiprocessing.cpu_count() engine_load = (vm_cpu_total / cpu_count) / 100.0 except ServerError as e: if e.code == vdsm_exception.NoSuchVM.code: self._log.info("VM not on this host", extra=log_filter.lf_args('vm', 60)) else: self._log.error(e, extra=log_filter.lf_args('vm', 60)) except KeyError: self._log.info( "VM stats do not contain cpu usage. VM might be down.", extra=log_filter.lf_args('vm', 60)) except ValueError as e: self._log.error("Error getting cpuUser: %s", str(e)) load_no_engine = load - engine_load load_no_engine = max(load_no_engine, 0.0) self._log.info( "System load" " total={0:.4f}, engine={1:.4f}, non-engine={2:.4f}".format( load, engine_load, load_no_engine)) self.load = load_no_engine
def action(self, options): cli = util.connect_vdsm_json_rpc(logger=self._log) try: stats = cli.Host.getStats() except ServerError as e: self._log.error(e) self.update_result(None) return if 'network' not in stats: self._log.error("Failed to getVdsStats: " "No 'network' in result") self.update_result(None) return if self._bridge in stats['network']: if ('state' in stats['network'][self._bridge] and stats['network'][self._bridge]['state'] == 'up'): self._log.info("Found bridge %s in up state", self._bridge, extra=log_filter.lf_args('status', 60)) self.update_result(True) else: self._log.info("Found bridge %s not in up state", self._bridge, extra=log_filter.lf_args('status', 60)) self.update_result(False) else: self._log.info("Bridge %s not found", self._bridge, extra=log_filter.lf_args('status', 60)) self.update_result(False)
def getEngineVMOVF(self): self._log.debug('Extracting Engine VM OVF from the OVF_STORE') volumepath = OVFStore._ovf_store_path self._log.info('OVF_STORE volume path: %s ' % volumepath, extra=log_filter.lf_args(LF_OVF_STORE_PATH, LF_OVF_LOG_DELAY)) filename = self._HEVMID + '.ovf' ovf = heconflib.extractConfFile(self._log, volumepath, filename) self._log.debug('HEVM OVF: \n%s\n' % ovf) if ovf is None: self._log.error('Unable to extract HEVM OVF', extra=log_filter.lf_args(LF_EXTRACTION_FAILED, LF_OVF_LOG_DELAY)) return ovf
def _get_vm_conf_content_from_ovf_store(self): if self._logger: self._logger.debug( "Trying to get a fresher copy of vm configuration " "from the OVF_STORE") ovfs = ovf_store.OVFStore() if not ovfs.have_store_info(): try: ovfs.scan() except (EnvironmentError, Exception) as err: self._logger.error("Failed scanning for OVF_STORE due to %s", err) if ovfs.have_store_info(): heovf = ovfs.getEngineVMOVF() if heovf is not None: self._logger.debug("Found an OVF for HE VM, " "trying to convert") conf = ovf2VmParams.confFromOvf(heovf) if conf is not None: self._logger.debug('Got vm.conf from OVF_STORE') return conf else: self._logger.error( 'Failed converting vm.conf from the VM OVF, ' 'falling back to initial vm.conf', extra=log_filter.lf_args(LF_OVF_CONVERSION_FAILED, LF_OVF_LOG_DELAY)) else: self._logger.error( 'Failed extracting VM OVF from the OVF_STORE ' 'volume, falling back to initial vm.conf', extra=log_filter.lf_args(LF_OVF_EXTRACTION_FAILED, LF_OVF_LOG_DELAY)) # This error might indicate the OVF location changed # and clearing the cache will trigger a rescan # next time we access the OVF. ovfs.clear_store_info() else: self._logger.error( 'Unable to identify the OVF_STORE volume, ' 'falling back to initial vm.conf. Please ' 'ensure you already added your first data ' 'domain for regular VMs', extra=log_filter.lf_args(LF_OVF_NOT_THERE, LF_OVF_LOG_DELAY)) return None
def action(self, options): with open(os.devnull, "w") as devnull: p = subprocess.Popen(["ping", "-c", "1", "-W", self._timeout, self._addr], stdout=devnull, stderr=devnull) if p.wait() != 0: self._log.warning("Failed to ping %s", self._addr) self.update_result(False) else: self._log.info("Successfully pinged %s", self._addr, extra=log_filter.lf_args("status", 60)) self.update_result(True)
def getEngineVMOVF(self): self._log.debug('Extracting Engine VM OVF from the OVF_STORE') volumepath = OVFStore._ovf_store_path self._log.info('OVF_STORE volume path: %s ' % volumepath, extra=log_filter.lf_args( LF_OVF_STORE_PATH, LF_OVF_LOG_DELAY )) filename = self._HEVMID + '.ovf' ovf = heconflib.extractConfFile(self._log, volumepath, filename) self._log.debug('HEVM OVF: \n%s\n' % ovf) if ovf is None: self._log.error('Unable to extract HEVM OVF', extra=log_filter.lf_args( LF_EXTRACTION_FAILED, LF_OVF_LOG_DELAY )) return ovf
def update_stat_file(self): if self.engine_pid: # Try the known pid and verify it's the same process fname = '/proc/{0}/stat'.format(self.engine_pid) try: with open(fname, 'r') as f: self.proc_stat = f.readline().split() except Exception: self.proc_stat = None else: if int(self.proc_stat[21]) == self.engine_pid_start_time: self._log.debug("VM on this host, pid %d", self.engine_pid, extra=log_filter.lf_args('vm', 60)) else: # This isn't the engine qemu process... self.proc_stat = None if self.proc_stat is None: # Look for the engine vm pid and try to get the stats self.engine_pid = None self.engine_pid_start_time = None try: stats = vdsc.run_vds_client_cmd(self._address, self._use_ssl, 'getVmStats', self._vm_uuid) pid = int(stats['statsList'][0]['pid']) except Exception as e: if isinstance(e, exceptions.DetailedError) \ and e.detail == "Virtual machine does not exist": self._log.info("VM not on this host", extra=log_filter.lf_args('vm', 60)) else: self._log.error("Failed to getVmStats: %s", str(e), extra=log_filter.lf_args('vm', 60)) else: fname = '/proc/{0}/stat'.format(pid) try: with open(fname, 'r') as f: self.proc_stat = f.readline().split() self.engine_pid_start_time = int(self.proc_stat[21]) self.engine_pid = pid except Exception as e: # Try again next time self._log.error("Failed to read vm stats: %s", str(e), extra=log_filter.lf_args('vm', 60))
def action(self, options): try: response = vdsc.run_vds_client_cmd(self._address, self._use_ssl, "getVdsStats") except Exception as e: self._log.error("Failed to getVdsStats: %s", str(e)) self.update_result(None) return mem_free = str(response["info"]["memFree"]) self._log.info("memFree: %s", mem_free, extra=log_filter.lf_args("status", 60)) self.update_result(mem_free)
def action(self, options): try: response = vdsc.run_vds_client_cmd(self._address, self._use_ssl, 'getVdsCapabilities') except Exception as e: self._log.error("Failed to getVdsCapabilities: %s", str(e)) self.update_result(None) return if ('bridges' in response['info'] and self._bridge in response['info']['bridges']): if response['info']['bridges'][self._bridge]['ports']: self._log.info("Found bridge %s with ports", self._bridge, extra=log_filter.lf_args('status', 60)) self.update_result(True) else: self._log.info("Found bridge %s with no ports", self._bridge, extra=log_filter.lf_args('status', 60)) self.update_result(False) else: self._log.info("Bridge %s not found", self._bridge, extra=log_filter.lf_args('status', 60)) self.update_result(False)
def action(self, options): cli = util.connect_vdsm_json_rpc( logger=self._log ) try: stats = cli.Host.getStats() except ServerError as e: self._log.error(e) self.update_result(None) return mem_free = str(stats['memFree']) self._log.info("memFree: %s", mem_free, extra=log_filter.lf_args('status', 60)) self.update_result(mem_free)
def calculate_load(self): dtotal = self.system['cur'].total - self.system['prev'].total dbusy = self.system['cur'].busy - self.system['prev'].busy load = dbusy / float(dtotal) cli = util.connect_vdsm_json_rpc( logger=self._log ) engine_load = 0.0 try: stats = cli.VM.getStats(vmID=self._vm_uuid)[0] vm_cpu_total = float(stats["cpuUser"]) + float(stats["cpuSys"]) cpu_count = multiprocessing.cpu_count() engine_load = (vm_cpu_total / cpu_count) / 100.0 except ServerError as e: if e.code == vdsm_exception.NoSuchVM.code: self._log.info("VM not on this host", extra=log_filter.lf_args('vm', 60)) else: self._log.error(e, extra=log_filter.lf_args('vm', 60)) except KeyError: self._log.info( "VM stats do not contain cpu usage. VM might be down.", extra=log_filter.lf_args('vm', 60) ) except ValueError as e: self._log.error("Error getting cpuUser: %s", str(e)) load_no_engine = load - engine_load load_no_engine = max(load_no_engine, 0.0) self._log.info("System load" " total={0:.4f}, engine={1:.4f}, non-engine={2:.4f}" .format(load, engine_load, load_no_engine)) self.load = load_no_engine
def action(self, options): cli = util.connect_vdsm_json_rpc( logger=self._log ) try: stats = cli.Host.getStats() caps = cli.Host.getCapabilities() except ServerError as e: self._log.error(e) self.update_result(None) return mem_size = int(caps['memSize']) mem_used = int(stats['memUsed']) mem_load = float(mem_used) / mem_size self._log.info("memSize: %d, memUsed: %d, Load: %f", mem_size, mem_used, mem_load, extra=log_filter.lf_args('status', 60)) self.update_result(str(mem_load))
def action(self, options): count = 0 for i in range(self._total): if self._ping(): count += 1 # wait between pings if i < self._total - 1: time.sleep(self._delay) if count == self._total: self._log.info("Successfully pinged %s", self._addr, extra=log_filter.lf_args('status', 60)) else: self._log.warning("Failed to ping %s, (%s out of %s)", self._addr, count, self._total) self.update_result(float(count) / float(self._total))
def action(self, options): # First, see if vdsm tells us it's up cli = util.connect_vdsm_json_rpc(logger=self._log) # Get timestamp before RPC call, so any future event with # status change will have a newer timestamp local_ts = monotonic.time() try: stats = cli.VM.getStats(vmID=self._vm_uuid)[0] except ServerError as e: if e.code == vdsm_exception.NoSuchVM.code: self._log.info("VM not on this host", extra=log_filter.lf_args('status', 60)) if self._vm_state == engine.VMState.UP: self._vm_state = engine.VMState.DOWN_MISSING d = { 'vm': self._vm_state, 'health': engine.Health.BAD, 'detail': 'unknown', 'reason': 'vm not running on this host' } else: self._log.error(e) d = { 'vm': 'unknown', 'health': 'unknown', 'detail': 'unknown', 'reason': 'failed to getVmStats' } with self._lock: self._stats_local_timestamp = local_ts self._stats_vdsm_timestamp = None self.update_result(json.dumps(d)) return # Convert timestamp to string in case it is an int vdsm_ts = str(stats.get("statusTime")) self._update_stats(stats, vdsm_ts, local_ts)
def action(self, options): count = 0 test_function = self._tests[self._network_test] for i in range(self._total): if test_function(): count += 1 # wait between tests if i < self._total - 1: time.sleep(self._delay) if count == self._total: self._log.info("Successfully verified network status", extra=log_filter.lf_args('status', 60)) else: self._log.warning( "Failed to verify network status, (%s out of %s)", count, self._total) self.update_result(float(count) / float(self._total))
def action(self, options): count = 0 test_function = self._tests[self._network_test] for i in range(self._total): if test_function(): count += 1 # wait between tests if i < self._total - 1: time.sleep(self._delay) if count == self._total: self._log.info("Successfully verified network status", extra=log_filter.lf_args('status', 60)) else: self._log.warning( "Failed to verify network status, (%s out of %s)", count, self._total ) self.update_result(float(count) / float(self._total))
def action(self, options): # First, see if vdsm tells us it's up cli = util.connect_vdsm_json_rpc( logger=self._log ) # Get timestamp before RPC call, so any future event with # status change will have a newer timestamp local_ts = monotonic.time() try: stats = cli.VM.getStats(vmID=self._vm_uuid)[0] except ServerError as e: if e.code == vdsm_exception.NoSuchVM.code: self._log.info("VM not on this host", extra=log_filter.lf_args('status', 60)) if self._vm_state == engine.VMState.UP: self._vm_state = engine.VMState.DOWN_MISSING d = {'vm': self._vm_state, 'health': engine.Health.BAD, 'detail': 'unknown', 'reason': 'vm not running on this host'} else: self._log.error(e) d = {'vm': 'unknown', 'health': 'unknown', 'detail': 'unknown', 'reason': 'failed to getVmStats'} with self._lock: self._stats_local_timestamp = local_ts self._stats_vdsm_timestamp = None self.update_result(json.dumps(d)) return # Convert timestamp to string in case it is an int vdsm_ts = str(stats.get("statusTime")) self._update_stats(stats, vdsm_ts, local_ts)
def calculate_load(self): dtotal = self.system['cur'].total - self.system['prev'].total dbusy = self.system['cur'].busy - self.system['prev'].busy load = dbusy / float(dtotal) cli = util.connect_vdsm_json_rpc(logger=self._log) engine_load = 0.0 cpu_data_is_real = False vm_on_this_host = False try: stats = cli.VM.getStats(vmID=self._vm_uuid)[0] vm_on_this_host = True vm_cpu_total = float(stats["cpuUser"]) + float(stats["cpuSys"]) cpu_count = multiprocessing.cpu_count() engine_load = (vm_cpu_total / cpu_count) / 100.0 # This is a hack. vdsm initializes cpuUsage to 0.00, and when it # gets a result from libvirt (as 'cpu.user', 'cpu.system'), sets # it to libvirt's value. cpuUser and cpuSystem are also initialized # to '0.00', but can also have '0.00' as a legit value afterwards. # But cpuUsage, if it has a value from libvirt, is always an # integer. Actually, AFAICT, initializing it to '0.00' might be # considered a bug. Anyway, rely on this for deciding whether # cpuUser/cpuSystem are real or init values. # TODO: Extend VDSM's API to include this information explicitly, # e.g. by adding a new field, say 'stats_from_libvirt' which is # True or False, and base the decision on this. cpu_data_is_real = stats['cpuUsage'] != '0.00' except ServerError as e: if e.code == vdsm_exception.NoSuchVM.code: self._log.info("VM not on this host", extra=log_filter.lf_args('vm', 60)) self.latest_real_stats_ts = None else: self._log.error(e, extra=log_filter.lf_args('vm', 60)) except KeyError: self._log.info( "VM stats do not contain cpu usage. VM might be down.", extra=log_filter.lf_args('vm', 60)) except ValueError as e: self._log.error("Error getting cpuUser: %s", str(e)) load_no_engine = load - engine_load load_no_engine = max(load_no_engine, 0.0) if cpu_data_is_real or not vm_on_this_host: self._log.info( "System load" " total={0:.4f}, engine={1:.4f}, non-engine={2:.4f}".format( load, engine_load, load_no_engine)) self.load = load_no_engine self.latest_real_stats_ts = time.time() else: # In certain cases, we got cpuUser=0.00 for up to around # 90 seconds after a VM was up, causing what seems like # a "general" high cpu load unrelated to that VM. # This caused problems with hosted-engine HA daemons, # which lower the score of that host due to that load. # Rely on cpuUsage value instead. See also: # https://lists.ovirt.org/archives/list/[email protected]/thread/\ # 7HNIFCW4NENG4ADZ5ROT43TCDXDURRJB/ if self.latest_real_stats_ts is None: # Just ignore, but start counting self.latest_real_stats_ts = time.time() elif not util.has_elapsed(self.latest_real_stats_ts, 300): self._log.info("Ignoring cpuUser/cpuSys, init values") else: # No real data, and for more than 5 minutes. # It's probably bad enough that we should just # not ignore - so if cpu load is high, just report # that, and if as a result the score will be low # and the VM will be shut down - so be it. self._log.info( "System load" " total={0:.4f}, engine={1:.4f}, non-engine={2:.4f}". format(load, engine_load, load_no_engine)) self._log.info("engine VM cpu usage is not up-to-date") self.load = load_no_engine
def _result_from_stats(self, stats): vm_status = stats['status'] # Check if another host was faster in acquiring the storage lock exit_message = stats.get('exitMessage', "") if vm_status == vmstatus.DOWN and ( exit_message.endswith('Failed to acquire lock: error -243') or exit_message.endswith( 'Failed to acquire lock: Lease is held by another host' ) ): self._log.info( "VM storage is already locked.", extra=log_filter.lf_args('status', 60) ) self._vm_state = engine.VMState.DOWN return {'vm': self._vm_state, 'health': engine.Health.BAD, 'detail': vm_status, 'reason': 'Storage of VM is locked. ' 'Is another host already starting the VM?'} # Check if VM migration succeeded if ( vm_status == vmstatus.DOWN and stats.get('exitReason', 0) == vmexitreason.MIGRATION_SUCCEEDED ): self._log.info( "VM successfully migrated away from this host.", extra=log_filter.lf_args('status', 60) ) self._vm_state = engine.VMState.DOWN return {'vm': self._vm_state, 'health': engine.Health.BAD, 'detail': vm_status, 'reason': 'VM migrated away successfully'} # Check for states that are definitely down if vm_status in (vmstatus.DOWN, vmstatus.MIGRATION_DESTINATION): self._log.info("VM not running on this host, status %s", vm_status, extra=log_filter.lf_args('status', 60)) if self._vm_state != engine.VMState.DOWN: self._vm_state = engine.VMState.DOWN_UNEXPECTED return {'vm': self._vm_state, 'health': engine.Health.BAD, 'detail': vm_status, 'reason': 'bad vm status'} # Report states that are not really Up, but should be # reported as such if vm_status in (vmstatus.PAUSED, vmstatus.WAIT_FOR_LAUNCH, vmstatus.RESTORING_STATE, vmstatus.POWERING_UP): self._log.info("VM status: %s", vm_status, extra=log_filter.lf_args('status', 60)) self._vm_state = engine.VMState.UP return {'vm': self._vm_state, 'health': engine.Health.BAD, 'detail': vm_status, 'reason': 'bad vm status'} # VM is probably up, let's see if engine is up by polling # health status page p = subprocess.Popen([constants.HOSTED_ENGINE_BINARY, '--check-liveliness'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = p.communicate() if p.returncode != 0: self._log.warning("bad health status: %s", output[0]) self._vm_state = engine.VMState.UP return {'vm': self._vm_state, 'health': engine.Health.BAD, 'detail': vm_status, 'reason': 'failed liveliness check'} self._log.info("VM is up on this host with healthy engine", extra=log_filter.lf_args('status', 60)) self._vm_state = engine.VMState.UP return {'vm': self._vm_state, 'health': engine.Health.GOOD, 'detail': vm_status}
def scan(self): self.clear_store_info() cli = util.connect_vdsm_json_rpc(logger=self._log, timeout=constants.VDSCLI_SSL_TIMEOUT) imgs = image.Image(self._type, self._sdUUID) imageslist = imgs.get_images_list(cli) for img_uuid in imageslist: try: volumeslist = cli.StorageDomain.getVolumes( imageID=img_uuid, storagepoolID=self._spUUID, storagedomainID=self._sdUUID, ) self._log.debug(volumeslist) except ServerError as e: raise RuntimeError(str(e)) for vol_uuid in volumeslist: try: volumeinfo = cli.Volume.getInfo( volumeID=vol_uuid, imageID=img_uuid, storagepoolID=self._spUUID, storagedomainID=self._sdUUID, ) self._log.debug(volumeinfo) except ServerError as e: raise RuntimeError(str(e)) description = volumeinfo['description'] if ('Disk Description' in description and description[0] == '{' and description[-1] == '}'): description_dict = json.loads(description) self._log.debug(description_dict) if description_dict['Disk Description'] == 'OVF_STORE': self._log.info('Found OVF_STORE: ' 'imgUUID:{img}, volUUID:{vol}'.format( img=img_uuid, vol=vol_uuid, )) # Prepare symlinks for the OVF store try: image_info = cli.Image.prepare( storagepoolID=self._spUUID, storagedomainID=self._sdUUID, imageID=img_uuid, volumeID=vol_uuid) OVFStore._ovf_store_path = image_info["path"] except ServerError as e: raise RuntimeError(str(e)) if self._ovf_store_path is None: self._log.warning('Unable to find OVF_STORE', extra=log_filter.lf_args(LF_OVF_STORE_NOT_FOUND, LF_OVF_LOG_DELAY)) return False return True
def scan(self): self.clear_store_info() cli = util.connect_vdsm_json_rpc( logger=self._log, timeout=constants.VDSCLI_SSL_TIMEOUT ) imgs = image.Image(self._type, self._sdUUID) imageslist = imgs.get_images_list(cli) for img_uuid in imageslist: try: volumeslist = cli.StorageDomain.getVolumes( imageID=img_uuid, storagepoolID=self._spUUID, storagedomainID=self._sdUUID, ) self._log .debug(volumeslist) except ServerError as e: raise RuntimeError(str(e)) for vol_uuid in volumeslist: try: volumeinfo = cli.Volume.getInfo( volumeID=vol_uuid, imageID=img_uuid, storagepoolID=self._spUUID, storagedomainID=self._sdUUID, ) self._log.debug(volumeinfo) except ServerError as e: raise RuntimeError(str(e)) description = volumeinfo['description'] if ( 'Disk Description' in description and description[0] == '{' and description[-1] == '}' ): description_dict = json.loads(description) self._log.debug(description_dict) if description_dict['Disk Description'] == 'OVF_STORE': self._log.info( 'Found OVF_STORE: ' 'imgUUID:{img}, volUUID:{vol}'.format( img=img_uuid, vol=vol_uuid, ) ) # Prepare symlinks for the OVF store try: image_info = cli.Image.prepare( storagepoolID=self._spUUID, storagedomainID=self._sdUUID, imageID=img_uuid, volumeID=vol_uuid ) OVFStore._ovf_store_path = image_info["path"] except ServerError as e: raise RuntimeError(str(e)) if self._ovf_store_path is None: self._log.warning('Unable to find OVF_STORE', extra=log_filter.lf_args( LF_OVF_STORE_NOT_FOUND, LF_OVF_LOG_DELAY )) return False return True
def action(self, options): # First, see if vdsm tells us it's up try: stats = vdsc.run_vds_client_cmd(self._address, self._use_ssl, 'getVmStats', self._vm_uuid) except Exception as e: if isinstance(e, exceptions.DetailedError) \ and e.detail == "Virtual machine does not exist": # Not on this host self._log.info("VM not on this host", extra=log_filter.lf_args('status', 60)) d = {'vm': 'down', 'health': 'bad', 'detail': 'unknown', 'reason': 'vm not running on this host'} self.update_result(json.dumps(d)) return else: self._log.error("Failed to getVmStats: %s", str(e)) d = {'vm': 'unknown', 'health': 'unknown', 'detail': 'unknown', 'reason': 'failed to getVmStats'} self.update_result(json.dumps(d)) return vm_status = stats['statsList'][0]['status'].lower() # Report states that are not really Up, but should be # reported as such if vm_status in ('paused', 'waitforlaunch', 'restoringstate', 'powering up'): self._log.info("VM status: %s", vm_status, extra=log_filter.lf_args('status', 60)) d = {'vm': engine.VMState.UP, 'health': engine.Health.BAD, 'detail': vm_status, 'reason': 'bad vm status'} self.update_result(json.dumps(d)) return # Check for states that are definitely down if vm_status in ('down', 'migration destination'): self._log.info("VM not running on this host, status %s", vm_status, extra=log_filter.lf_args('status', 60)) d = {'vm': engine.VMState.DOWN, 'health': engine.Health.BAD, 'detail': vm_status, 'reason': 'bad vm status'} self.update_result(json.dumps(d)) return # VM is probably up, let's see if engine is up by polling # health status page p = subprocess.Popen([constants.HOSTED_ENGINE_BINARY, '--check-liveliness'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = p.communicate() if p.returncode == 0: self._log.info("VM is up on this host with healthy engine", extra=log_filter.lf_args('status', 60)) d = {'vm': engine.VMState.UP, 'health': engine.Health.GOOD, 'detail': vm_status} self.update_result(json.dumps(d)) else: self._log.warning("bad health status: %s", output[0]) d = {'vm': engine.VMState.UP, 'health': engine.Health.BAD, 'detail': vm_status, 'reason': 'failed liveliness check'} self.update_result(json.dumps(d))
def _get_vm_conf_content_from_ovf_store(self): if self._logger: self._logger.debug( "Trying to get a fresher copy of vm configuration " "from the OVF_STORE" ) ovfs = ovf_store.OVFStore() if not ovfs.have_store_info(): try: ovfs.scan() except (EnvironmentError, Exception) as err: self._logger.error( "Failed scanning for OVF_STORE due to %s", err ) if ovfs.have_store_info(): heovf = ovfs.getEngineVMOVF() if heovf is not None: self._logger.debug( "Found an OVF for HE VM, " "trying to convert" ) conf = ovf2VmParams.confFromOvf(heovf) if conf is not None: self._logger.debug('Got vm.conf from OVF_STORE') return conf else: self._logger.error( 'Failed converting vm.conf from the VM OVF, ' 'falling back to initial vm.conf', extra=log_filter.lf_args( LF_OVF_CONVERSION_FAILED, LF_OVF_LOG_DELAY) ) else: self._logger.error( 'Failed extracting VM OVF from the OVF_STORE ' 'volume, falling back to initial vm.conf', extra=log_filter.lf_args( LF_OVF_EXTRACTION_FAILED, LF_OVF_LOG_DELAY) ) # This error might indicate the OVF location changed # and clearing the cache will trigger a rescan # next time we access the OVF. ovfs.clear_store_info() else: self._logger.error( 'Unable to identify the OVF_STORE volume, ' 'falling back to initial vm.conf. Please ' 'ensure you already added your first data ' 'domain for regular VMs', extra=log_filter.lf_args( LF_OVF_NOT_THERE, LF_OVF_LOG_DELAY ) ) return None