Exemple #1
0
    def action(self, options):
        cli = util.connect_vdsm_json_rpc(
            logger=self._log
        )
        try:
            caps = cli.Host.getCapabilities()
        except ServerError as e:
            self._log.error(e)
            self.update_result(None)
            return

        if 'bridges' not in caps:
            self._log.error("Failed to getVdsCapabilities: "
                            "No 'bridges' in result")
            self.update_result(None)
            return

        if self._bridge in caps['bridges']:
            if 'ports' in caps['bridges'][self._bridge]:
                self._log.info("Found bridge %s with ports", self._bridge,
                               extra=log_filter.lf_args('status', 60))
                self.update_result(True)
            else:
                self._log.info("Found bridge %s with no ports", self._bridge,
                               extra=log_filter.lf_args('status', 60))
                self.update_result(False)
        else:
            self._log.info("Bridge %s not found", self._bridge,
                           extra=log_filter.lf_args('status', 60))
            self.update_result(False)
    def calculate_load(self):
        dtotal = self.system['cur'].total - self.system['prev'].total
        dbusy = self.system['cur'].busy - self.system['prev'].busy
        load = dbusy / float(dtotal)

        cli = util.connect_vdsm_json_rpc(logger=self._log)

        engine_load = 0.0
        try:
            stats = cli.VM.getStats(vmID=self._vm_uuid)[0]
            vm_cpu_total = float(stats["cpuUser"]) + float(stats["cpuSys"])
            cpu_count = multiprocessing.cpu_count()
            engine_load = (vm_cpu_total / cpu_count) / 100.0
        except ServerError as e:
            if e.code == vdsm_exception.NoSuchVM.code:
                self._log.info("VM not on this host",
                               extra=log_filter.lf_args('vm', 60))
            else:
                self._log.error(e, extra=log_filter.lf_args('vm', 60))
        except KeyError:
            self._log.info(
                "VM stats do not contain cpu usage. VM might be down.",
                extra=log_filter.lf_args('vm', 60))
        except ValueError as e:
            self._log.error("Error getting cpuUser: %s", str(e))

        load_no_engine = load - engine_load
        load_no_engine = max(load_no_engine, 0.0)

        self._log.info(
            "System load"
            " total={0:.4f}, engine={1:.4f}, non-engine={2:.4f}".format(
                load, engine_load, load_no_engine))
        self.load = load_no_engine
Exemple #3
0
    def action(self, options):
        cli = util.connect_vdsm_json_rpc(logger=self._log)
        try:
            stats = cli.Host.getStats()
        except ServerError as e:
            self._log.error(e)
            self.update_result(None)
            return

        if 'network' not in stats:
            self._log.error("Failed to getVdsStats: " "No 'network' in result")
            self.update_result(None)
            return

        if self._bridge in stats['network']:
            if ('state' in stats['network'][self._bridge]
                    and stats['network'][self._bridge]['state'] == 'up'):
                self._log.info("Found bridge %s in up state",
                               self._bridge,
                               extra=log_filter.lf_args('status', 60))
                self.update_result(True)
            else:
                self._log.info("Found bridge %s not in up state",
                               self._bridge,
                               extra=log_filter.lf_args('status', 60))
                self.update_result(False)
        else:
            self._log.info("Bridge %s not found",
                           self._bridge,
                           extra=log_filter.lf_args('status', 60))
            self.update_result(False)
 def getEngineVMOVF(self):
     self._log.debug('Extracting Engine VM OVF from the OVF_STORE')
     volumepath = OVFStore._ovf_store_path
     self._log.info('OVF_STORE volume path: %s ' % volumepath,
                    extra=log_filter.lf_args(LF_OVF_STORE_PATH,
                                             LF_OVF_LOG_DELAY))
     filename = self._HEVMID + '.ovf'
     ovf = heconflib.extractConfFile(self._log, volumepath, filename)
     self._log.debug('HEVM OVF: \n%s\n' % ovf)
     if ovf is None:
         self._log.error('Unable to extract HEVM OVF',
                         extra=log_filter.lf_args(LF_EXTRACTION_FAILED,
                                                  LF_OVF_LOG_DELAY))
     return ovf
    def _get_vm_conf_content_from_ovf_store(self):
        if self._logger:
            self._logger.debug(
                "Trying to get a fresher copy of vm configuration "
                "from the OVF_STORE")

        ovfs = ovf_store.OVFStore()

        if not ovfs.have_store_info():
            try:
                ovfs.scan()
            except (EnvironmentError, Exception) as err:
                self._logger.error("Failed scanning for OVF_STORE due to %s",
                                   err)

        if ovfs.have_store_info():
            heovf = ovfs.getEngineVMOVF()
            if heovf is not None:
                self._logger.debug("Found an OVF for HE VM, "
                                   "trying to convert")
                conf = ovf2VmParams.confFromOvf(heovf)
                if conf is not None:
                    self._logger.debug('Got vm.conf from OVF_STORE')
                    return conf
                else:
                    self._logger.error(
                        'Failed converting vm.conf from the VM OVF, '
                        'falling back to initial vm.conf',
                        extra=log_filter.lf_args(LF_OVF_CONVERSION_FAILED,
                                                 LF_OVF_LOG_DELAY))
            else:
                self._logger.error(
                    'Failed extracting VM OVF from the OVF_STORE '
                    'volume, falling back to initial vm.conf',
                    extra=log_filter.lf_args(LF_OVF_EXTRACTION_FAILED,
                                             LF_OVF_LOG_DELAY))
                # This error might indicate the OVF location changed
                # and clearing the cache will trigger a rescan
                # next time we access the OVF.
                ovfs.clear_store_info()
        else:
            self._logger.error(
                'Unable to identify the OVF_STORE volume, '
                'falling back to initial vm.conf. Please '
                'ensure you already added your first data '
                'domain for regular VMs',
                extra=log_filter.lf_args(LF_OVF_NOT_THERE, LF_OVF_LOG_DELAY))
        return None
 def action(self, options):
     with open(os.devnull, "w") as devnull:
         p = subprocess.Popen(["ping", "-c", "1", "-W", self._timeout, self._addr], stdout=devnull, stderr=devnull)
         if p.wait() != 0:
             self._log.warning("Failed to ping %s", self._addr)
             self.update_result(False)
         else:
             self._log.info("Successfully pinged %s", self._addr, extra=log_filter.lf_args("status", 60))
             self.update_result(True)
 def getEngineVMOVF(self):
     self._log.debug('Extracting Engine VM OVF from the OVF_STORE')
     volumepath = OVFStore._ovf_store_path
     self._log.info('OVF_STORE volume path: %s ' % volumepath,
                    extra=log_filter.lf_args(
                        LF_OVF_STORE_PATH,
                        LF_OVF_LOG_DELAY
                    ))
     filename = self._HEVMID + '.ovf'
     ovf = heconflib.extractConfFile(self._log, volumepath, filename)
     self._log.debug('HEVM OVF: \n%s\n' % ovf)
     if ovf is None:
         self._log.error('Unable to extract HEVM OVF',
                         extra=log_filter.lf_args(
                             LF_EXTRACTION_FAILED,
                             LF_OVF_LOG_DELAY
                         ))
     return ovf
    def update_stat_file(self):
        if self.engine_pid:
            # Try the known pid and verify it's the same process
            fname = '/proc/{0}/stat'.format(self.engine_pid)
            try:
                with open(fname, 'r') as f:
                    self.proc_stat = f.readline().split()
            except Exception:
                self.proc_stat = None
            else:
                if int(self.proc_stat[21]) == self.engine_pid_start_time:
                    self._log.debug("VM on this host, pid %d", self.engine_pid,
                                    extra=log_filter.lf_args('vm', 60))
                else:
                    # This isn't the engine qemu process...
                    self.proc_stat = None

        if self.proc_stat is None:
            # Look for the engine vm pid and try to get the stats
            self.engine_pid = None
            self.engine_pid_start_time = None
            try:
                stats = vdsc.run_vds_client_cmd(self._address, self._use_ssl,
                                                'getVmStats', self._vm_uuid)
                pid = int(stats['statsList'][0]['pid'])
            except Exception as e:
                if isinstance(e, exceptions.DetailedError) \
                        and e.detail == "Virtual machine does not exist":
                    self._log.info("VM not on this host",
                                   extra=log_filter.lf_args('vm', 60))
                else:
                    self._log.error("Failed to getVmStats: %s", str(e),
                                    extra=log_filter.lf_args('vm', 60))
            else:
                fname = '/proc/{0}/stat'.format(pid)
                try:
                    with open(fname, 'r') as f:
                        self.proc_stat = f.readline().split()
                    self.engine_pid_start_time = int(self.proc_stat[21])
                    self.engine_pid = pid
                except Exception as e:
                    # Try again next time
                    self._log.error("Failed to read vm stats: %s", str(e),
                                    extra=log_filter.lf_args('vm', 60))
    def action(self, options):
        try:
            response = vdsc.run_vds_client_cmd(self._address, self._use_ssl, "getVdsStats")
        except Exception as e:
            self._log.error("Failed to getVdsStats: %s", str(e))
            self.update_result(None)
            return

        mem_free = str(response["info"]["memFree"])
        self._log.info("memFree: %s", mem_free, extra=log_filter.lf_args("status", 60))
        self.update_result(mem_free)
    def action(self, options):
        try:
            response = vdsc.run_vds_client_cmd(self._address, self._use_ssl,
                                               'getVdsCapabilities')
        except Exception as e:
            self._log.error("Failed to getVdsCapabilities: %s", str(e))
            self.update_result(None)
            return

        if ('bridges' in response['info']
                and self._bridge in response['info']['bridges']):
            if response['info']['bridges'][self._bridge]['ports']:
                self._log.info("Found bridge %s with ports", self._bridge,
                               extra=log_filter.lf_args('status', 60))
                self.update_result(True)
            else:
                self._log.info("Found bridge %s with no ports", self._bridge,
                               extra=log_filter.lf_args('status', 60))
                self.update_result(False)
        else:
            self._log.info("Bridge %s not found", self._bridge,
                           extra=log_filter.lf_args('status', 60))
            self.update_result(False)
    def action(self, options):
        cli = util.connect_vdsm_json_rpc(
            logger=self._log
        )
        try:
            stats = cli.Host.getStats()
        except ServerError as e:
            self._log.error(e)
            self.update_result(None)
            return

        mem_free = str(stats['memFree'])
        self._log.info("memFree: %s", mem_free,
                       extra=log_filter.lf_args('status', 60))
        self.update_result(mem_free)
    def calculate_load(self):
        dtotal = self.system['cur'].total - self.system['prev'].total
        dbusy = self.system['cur'].busy - self.system['prev'].busy
        load = dbusy / float(dtotal)

        cli = util.connect_vdsm_json_rpc(
            logger=self._log
        )

        engine_load = 0.0
        try:
            stats = cli.VM.getStats(vmID=self._vm_uuid)[0]
            vm_cpu_total = float(stats["cpuUser"]) + float(stats["cpuSys"])
            cpu_count = multiprocessing.cpu_count()
            engine_load = (vm_cpu_total / cpu_count) / 100.0
        except ServerError as e:
            if e.code == vdsm_exception.NoSuchVM.code:
                self._log.info("VM not on this host",
                               extra=log_filter.lf_args('vm', 60))
            else:
                self._log.error(e, extra=log_filter.lf_args('vm', 60))
        except KeyError:
            self._log.info(
                "VM stats do not contain cpu usage. VM might be down.",
                extra=log_filter.lf_args('vm', 60)
            )
        except ValueError as e:
            self._log.error("Error getting cpuUser: %s", str(e))

        load_no_engine = load - engine_load
        load_no_engine = max(load_no_engine, 0.0)

        self._log.info("System load"
                       " total={0:.4f}, engine={1:.4f}, non-engine={2:.4f}"
                       .format(load, engine_load, load_no_engine))
        self.load = load_no_engine
Exemple #13
0
    def action(self, options):
        cli = util.connect_vdsm_json_rpc(
            logger=self._log
        )
        try:
            stats = cli.Host.getStats()
            caps = cli.Host.getCapabilities()
        except ServerError as e:
            self._log.error(e)
            self.update_result(None)
            return

        mem_size = int(caps['memSize'])
        mem_used = int(stats['memUsed'])
        mem_load = float(mem_used) / mem_size
        self._log.info("memSize: %d, memUsed: %d, Load: %f",
                       mem_size, mem_used, mem_load,
                       extra=log_filter.lf_args('status', 60))
        self.update_result(str(mem_load))
    def action(self, options):
        count = 0
        for i in range(self._total):
            if self._ping():
                count += 1

            # wait between pings
            if i < self._total - 1:
                time.sleep(self._delay)

        if count == self._total:
            self._log.info("Successfully pinged %s",
                           self._addr,
                           extra=log_filter.lf_args('status', 60))
        else:
            self._log.warning("Failed to ping %s, (%s out of %s)", self._addr,
                              count, self._total)

        self.update_result(float(count) / float(self._total))
Exemple #15
0
    def action(self, options):
        # First, see if vdsm tells us it's up
        cli = util.connect_vdsm_json_rpc(logger=self._log)

        # Get timestamp before RPC call, so any future event with
        # status change will have a newer timestamp
        local_ts = monotonic.time()

        try:
            stats = cli.VM.getStats(vmID=self._vm_uuid)[0]
        except ServerError as e:
            if e.code == vdsm_exception.NoSuchVM.code:
                self._log.info("VM not on this host",
                               extra=log_filter.lf_args('status', 60))

                if self._vm_state == engine.VMState.UP:
                    self._vm_state = engine.VMState.DOWN_MISSING

                d = {
                    'vm': self._vm_state,
                    'health': engine.Health.BAD,
                    'detail': 'unknown',
                    'reason': 'vm not running on this host'
                }
            else:
                self._log.error(e)
                d = {
                    'vm': 'unknown',
                    'health': 'unknown',
                    'detail': 'unknown',
                    'reason': 'failed to getVmStats'
                }

            with self._lock:
                self._stats_local_timestamp = local_ts
                self._stats_vdsm_timestamp = None
                self.update_result(json.dumps(d))

            return

        # Convert timestamp to string in case it is an int
        vdsm_ts = str(stats.get("statusTime"))
        self._update_stats(stats, vdsm_ts, local_ts)
    def action(self, options):
        count = 0
        test_function = self._tests[self._network_test]
        for i in range(self._total):
            if test_function():
                count += 1

            # wait between tests
            if i < self._total - 1:
                time.sleep(self._delay)

        if count == self._total:
            self._log.info("Successfully verified network status",
                           extra=log_filter.lf_args('status', 60))
        else:
            self._log.warning(
                "Failed to verify network status, (%s out of %s)", count,
                self._total)

        self.update_result(float(count) / float(self._total))
    def action(self, options):
        count = 0
        test_function = self._tests[self._network_test]
        for i in range(self._total):
            if test_function():
                count += 1

            # wait between tests
            if i < self._total - 1:
                time.sleep(self._delay)

        if count == self._total:
            self._log.info("Successfully verified network status",
                           extra=log_filter.lf_args('status', 60))
        else:
            self._log.warning(
                "Failed to verify network status, (%s out of %s)",
                count, self._total
            )

        self.update_result(float(count) / float(self._total))
    def action(self, options):
        # First, see if vdsm tells us it's up
        cli = util.connect_vdsm_json_rpc(
            logger=self._log
        )

        # Get timestamp before RPC call, so any future event with
        # status change will have a newer timestamp
        local_ts = monotonic.time()

        try:
            stats = cli.VM.getStats(vmID=self._vm_uuid)[0]
        except ServerError as e:
            if e.code == vdsm_exception.NoSuchVM.code:
                self._log.info("VM not on this host",
                               extra=log_filter.lf_args('status', 60))

                if self._vm_state == engine.VMState.UP:
                    self._vm_state = engine.VMState.DOWN_MISSING

                d = {'vm': self._vm_state,
                     'health': engine.Health.BAD,
                     'detail': 'unknown',
                     'reason': 'vm not running on this host'}
            else:
                self._log.error(e)
                d = {'vm': 'unknown', 'health': 'unknown', 'detail': 'unknown',
                     'reason': 'failed to getVmStats'}

            with self._lock:
                self._stats_local_timestamp = local_ts
                self._stats_vdsm_timestamp = None
                self.update_result(json.dumps(d))

            return

        # Convert timestamp to string in case it is an int
        vdsm_ts = str(stats.get("statusTime"))
        self._update_stats(stats, vdsm_ts, local_ts)
Exemple #19
0
    def calculate_load(self):
        dtotal = self.system['cur'].total - self.system['prev'].total
        dbusy = self.system['cur'].busy - self.system['prev'].busy
        load = dbusy / float(dtotal)

        cli = util.connect_vdsm_json_rpc(logger=self._log)

        engine_load = 0.0
        cpu_data_is_real = False
        vm_on_this_host = False
        try:
            stats = cli.VM.getStats(vmID=self._vm_uuid)[0]
            vm_on_this_host = True
            vm_cpu_total = float(stats["cpuUser"]) + float(stats["cpuSys"])
            cpu_count = multiprocessing.cpu_count()
            engine_load = (vm_cpu_total / cpu_count) / 100.0
            # This is a hack. vdsm initializes cpuUsage to 0.00, and when it
            # gets a result from libvirt (as 'cpu.user', 'cpu.system'), sets
            # it to libvirt's value. cpuUser and cpuSystem are also initialized
            # to '0.00', but can also have '0.00' as a legit value afterwards.
            # But cpuUsage, if it has a value from libvirt, is always an
            # integer. Actually, AFAICT, initializing it to '0.00' might be
            # considered a bug. Anyway, rely on this for deciding whether
            # cpuUser/cpuSystem are real or init values.
            # TODO: Extend VDSM's API to include this information explicitly,
            # e.g. by adding a new field, say 'stats_from_libvirt' which is
            # True or False, and base the decision on this.
            cpu_data_is_real = stats['cpuUsage'] != '0.00'
        except ServerError as e:
            if e.code == vdsm_exception.NoSuchVM.code:
                self._log.info("VM not on this host",
                               extra=log_filter.lf_args('vm', 60))
                self.latest_real_stats_ts = None
            else:
                self._log.error(e, extra=log_filter.lf_args('vm', 60))
        except KeyError:
            self._log.info(
                "VM stats do not contain cpu usage. VM might be down.",
                extra=log_filter.lf_args('vm', 60))
        except ValueError as e:
            self._log.error("Error getting cpuUser: %s", str(e))

        load_no_engine = load - engine_load
        load_no_engine = max(load_no_engine, 0.0)

        if cpu_data_is_real or not vm_on_this_host:
            self._log.info(
                "System load"
                " total={0:.4f}, engine={1:.4f}, non-engine={2:.4f}".format(
                    load, engine_load, load_no_engine))
            self.load = load_no_engine
            self.latest_real_stats_ts = time.time()
        else:
            # In certain cases, we got cpuUser=0.00 for up to around
            # 90 seconds after a VM was up, causing what seems like
            # a "general" high cpu load unrelated to that VM.
            # This caused problems with hosted-engine HA daemons,
            # which lower the score of that host due to that load.
            # Rely on cpuUsage value instead. See also:
            # https://lists.ovirt.org/archives/list/[email protected]/thread/\
            # 7HNIFCW4NENG4ADZ5ROT43TCDXDURRJB/
            if self.latest_real_stats_ts is None:
                # Just ignore, but start counting
                self.latest_real_stats_ts = time.time()
            elif not util.has_elapsed(self.latest_real_stats_ts, 300):
                self._log.info("Ignoring cpuUser/cpuSys, init values")
            else:
                # No real data, and for more than 5 minutes.
                # It's probably bad enough that we should just
                # not ignore - so if cpu load is high, just report
                # that, and if as a result the score will be low
                # and the VM will be shut down - so be it.
                self._log.info(
                    "System load"
                    " total={0:.4f}, engine={1:.4f}, non-engine={2:.4f}".
                    format(load, engine_load, load_no_engine))
                self._log.info("engine VM cpu usage is not up-to-date")
                self.load = load_no_engine
    def _result_from_stats(self, stats):
        vm_status = stats['status']

        # Check if another host was faster in acquiring the storage lock
        exit_message = stats.get('exitMessage', "")
        if vm_status == vmstatus.DOWN and (
            exit_message.endswith('Failed to acquire lock: error -243') or
            exit_message.endswith(
                'Failed to acquire lock: Lease is held by another host'
            )
        ):
            self._log.info(
                "VM storage is already locked.",
                extra=log_filter.lf_args('status', 60)
            )
            self._vm_state = engine.VMState.DOWN
            return {'vm': self._vm_state,
                    'health': engine.Health.BAD,
                    'detail': vm_status,
                    'reason': 'Storage of VM is locked. '
                              'Is another host already starting the VM?'}

        # Check if VM migration succeeded
        if (
            vm_status == vmstatus.DOWN and
            stats.get('exitReason', 0) == vmexitreason.MIGRATION_SUCCEEDED
        ):
            self._log.info(
                "VM successfully migrated away from this host.",
                extra=log_filter.lf_args('status', 60)
            )
            self._vm_state = engine.VMState.DOWN
            return {'vm': self._vm_state,
                    'health': engine.Health.BAD,
                    'detail': vm_status,
                    'reason': 'VM migrated away successfully'}

        # Check for states that are definitely down
        if vm_status in (vmstatus.DOWN, vmstatus.MIGRATION_DESTINATION):
            self._log.info("VM not running on this host, status %s", vm_status,
                           extra=log_filter.lf_args('status', 60))

            if self._vm_state != engine.VMState.DOWN:
                self._vm_state = engine.VMState.DOWN_UNEXPECTED

            return {'vm': self._vm_state,
                    'health': engine.Health.BAD,
                    'detail': vm_status,
                    'reason': 'bad vm status'}

        # Report states that are not really Up, but should be
        # reported as such
        if vm_status in (vmstatus.PAUSED,
                         vmstatus.WAIT_FOR_LAUNCH,
                         vmstatus.RESTORING_STATE,
                         vmstatus.POWERING_UP):
            self._log.info("VM status: %s", vm_status,
                           extra=log_filter.lf_args('status', 60))
            self._vm_state = engine.VMState.UP
            return {'vm': self._vm_state,
                    'health': engine.Health.BAD,
                    'detail': vm_status,
                    'reason': 'bad vm status'}

        # VM is probably up, let's see if engine is up by polling
        # health status page
        p = subprocess.Popen([constants.HOSTED_ENGINE_BINARY,
                              '--check-liveliness'],
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        output = p.communicate()
        if p.returncode != 0:
            self._log.warning("bad health status: %s", output[0])
            self._vm_state = engine.VMState.UP
            return {'vm': self._vm_state,
                    'health': engine.Health.BAD,
                    'detail': vm_status,
                    'reason': 'failed liveliness check'}

        self._log.info("VM is up on this host with healthy engine",
                       extra=log_filter.lf_args('status', 60))
        self._vm_state = engine.VMState.UP
        return {'vm': self._vm_state,
                'health': engine.Health.GOOD,
                'detail': vm_status}
    def scan(self):
        self.clear_store_info()

        cli = util.connect_vdsm_json_rpc(logger=self._log,
                                         timeout=constants.VDSCLI_SSL_TIMEOUT)

        imgs = image.Image(self._type, self._sdUUID)
        imageslist = imgs.get_images_list(cli)

        for img_uuid in imageslist:
            try:
                volumeslist = cli.StorageDomain.getVolumes(
                    imageID=img_uuid,
                    storagepoolID=self._spUUID,
                    storagedomainID=self._sdUUID,
                )
                self._log.debug(volumeslist)
            except ServerError as e:
                raise RuntimeError(str(e))

            for vol_uuid in volumeslist:
                try:
                    volumeinfo = cli.Volume.getInfo(
                        volumeID=vol_uuid,
                        imageID=img_uuid,
                        storagepoolID=self._spUUID,
                        storagedomainID=self._sdUUID,
                    )
                    self._log.debug(volumeinfo)
                except ServerError as e:
                    raise RuntimeError(str(e))

                description = volumeinfo['description']
                if ('Disk Description' in description and description[0] == '{'
                        and description[-1] == '}'):
                    description_dict = json.loads(description)
                    self._log.debug(description_dict)
                    if description_dict['Disk Description'] == 'OVF_STORE':
                        self._log.info('Found OVF_STORE: '
                                       'imgUUID:{img}, volUUID:{vol}'.format(
                                           img=img_uuid,
                                           vol=vol_uuid,
                                       ))

                        # Prepare symlinks for the OVF store
                        try:
                            image_info = cli.Image.prepare(
                                storagepoolID=self._spUUID,
                                storagedomainID=self._sdUUID,
                                imageID=img_uuid,
                                volumeID=vol_uuid)
                            OVFStore._ovf_store_path = image_info["path"]
                        except ServerError as e:
                            raise RuntimeError(str(e))

        if self._ovf_store_path is None:
            self._log.warning('Unable to find OVF_STORE',
                              extra=log_filter.lf_args(LF_OVF_STORE_NOT_FOUND,
                                                       LF_OVF_LOG_DELAY))
            return False
        return True
    def scan(self):
        self.clear_store_info()

        cli = util.connect_vdsm_json_rpc(
            logger=self._log,
            timeout=constants.VDSCLI_SSL_TIMEOUT
        )

        imgs = image.Image(self._type, self._sdUUID)
        imageslist = imgs.get_images_list(cli)

        for img_uuid in imageslist:
            try:
                volumeslist = cli.StorageDomain.getVolumes(
                    imageID=img_uuid,
                    storagepoolID=self._spUUID,
                    storagedomainID=self._sdUUID,
                )
                self._log .debug(volumeslist)
            except ServerError as e:
                raise RuntimeError(str(e))

            for vol_uuid in volumeslist:
                try:
                    volumeinfo = cli.Volume.getInfo(
                        volumeID=vol_uuid,
                        imageID=img_uuid,
                        storagepoolID=self._spUUID,
                        storagedomainID=self._sdUUID,
                    )
                    self._log.debug(volumeinfo)
                except ServerError as e:
                    raise RuntimeError(str(e))

                description = volumeinfo['description']
                if (
                    'Disk Description' in description and
                    description[0] == '{' and
                    description[-1] == '}'
                ):
                    description_dict = json.loads(description)
                    self._log.debug(description_dict)
                    if description_dict['Disk Description'] == 'OVF_STORE':
                        self._log.info(
                            'Found OVF_STORE: '
                            'imgUUID:{img}, volUUID:{vol}'.format(
                                img=img_uuid,
                                vol=vol_uuid,
                            )
                        )

                        # Prepare symlinks for the OVF store
                        try:
                            image_info = cli.Image.prepare(
                                storagepoolID=self._spUUID,
                                storagedomainID=self._sdUUID,
                                imageID=img_uuid,
                                volumeID=vol_uuid
                            )
                            OVFStore._ovf_store_path = image_info["path"]
                        except ServerError as e:
                            raise RuntimeError(str(e))

        if self._ovf_store_path is None:
            self._log.warning('Unable to find OVF_STORE',
                              extra=log_filter.lf_args(
                                  LF_OVF_STORE_NOT_FOUND,
                                  LF_OVF_LOG_DELAY
                              ))
            return False
        return True
    def action(self, options):
        # First, see if vdsm tells us it's up
        try:
            stats = vdsc.run_vds_client_cmd(self._address, self._use_ssl,
                                            'getVmStats', self._vm_uuid)
        except Exception as e:
            if isinstance(e, exceptions.DetailedError) \
                    and e.detail == "Virtual machine does not exist":
                # Not on this host
                self._log.info("VM not on this host",
                               extra=log_filter.lf_args('status', 60))
                d = {'vm': 'down', 'health': 'bad', 'detail': 'unknown',
                     'reason': 'vm not running on this host'}
                self.update_result(json.dumps(d))
                return
            else:
                self._log.error("Failed to getVmStats: %s", str(e))
                d = {'vm': 'unknown', 'health': 'unknown', 'detail': 'unknown',
                     'reason': 'failed to getVmStats'}
                self.update_result(json.dumps(d))
                return
        vm_status = stats['statsList'][0]['status'].lower()

        # Report states that are not really Up, but should be
        # reported as such
        if vm_status in ('paused',
                         'waitforlaunch',
                         'restoringstate',
                         'powering up'):
            self._log.info("VM status: %s", vm_status,
                           extra=log_filter.lf_args('status', 60))
            d = {'vm': engine.VMState.UP,
                 'health': engine.Health.BAD,
                 'detail': vm_status,
                 'reason': 'bad vm status'}
            self.update_result(json.dumps(d))
            return

        # Check for states that are definitely down
        if vm_status in ('down', 'migration destination'):
            self._log.info("VM not running on this host, status %s", vm_status,
                           extra=log_filter.lf_args('status', 60))
            d = {'vm': engine.VMState.DOWN,
                 'health': engine.Health.BAD,
                 'detail': vm_status,
                 'reason': 'bad vm status'}
            self.update_result(json.dumps(d))
            return

        # VM is probably up, let's see if engine is up by polling
        # health status page
        p = subprocess.Popen([constants.HOSTED_ENGINE_BINARY,
                              '--check-liveliness'],
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        output = p.communicate()
        if p.returncode == 0:
            self._log.info("VM is up on this host with healthy engine",
                           extra=log_filter.lf_args('status', 60))
            d = {'vm': engine.VMState.UP,
                 'health': engine.Health.GOOD,
                 'detail': vm_status}
            self.update_result(json.dumps(d))
        else:
            self._log.warning("bad health status: %s", output[0])
            d = {'vm': engine.VMState.UP,
                 'health': engine.Health.BAD,
                 'detail': vm_status,
                 'reason': 'failed liveliness check'}
            self.update_result(json.dumps(d))
    def _get_vm_conf_content_from_ovf_store(self):
        if self._logger:
            self._logger.debug(
                "Trying to get a fresher copy of vm configuration "
                "from the OVF_STORE"
            )

        ovfs = ovf_store.OVFStore()

        if not ovfs.have_store_info():
            try:
                ovfs.scan()
            except (EnvironmentError, Exception) as err:
                self._logger.error(
                    "Failed scanning for OVF_STORE due to %s",
                    err
                )

        if ovfs.have_store_info():
            heovf = ovfs.getEngineVMOVF()
            if heovf is not None:
                self._logger.debug(
                    "Found an OVF for HE VM, "
                    "trying to convert"
                )
                conf = ovf2VmParams.confFromOvf(heovf)
                if conf is not None:
                    self._logger.debug('Got vm.conf from OVF_STORE')
                    return conf
                else:
                    self._logger.error(
                        'Failed converting vm.conf from the VM OVF, '
                        'falling back to initial vm.conf',
                        extra=log_filter.lf_args(
                            LF_OVF_CONVERSION_FAILED,
                            LF_OVF_LOG_DELAY)
                    )
            else:
                self._logger.error(
                    'Failed extracting VM OVF from the OVF_STORE '
                    'volume, falling back to initial vm.conf',
                    extra=log_filter.lf_args(
                        LF_OVF_EXTRACTION_FAILED,
                        LF_OVF_LOG_DELAY)
                )
                # This error might indicate the OVF location changed
                # and clearing the cache will trigger a rescan
                # next time we access the OVF.
                ovfs.clear_store_info()
        else:
            self._logger.error(
                'Unable to identify the OVF_STORE volume, '
                'falling back to initial vm.conf. Please '
                'ensure you already added your first data '
                'domain for regular VMs',
                extra=log_filter.lf_args(
                    LF_OVF_NOT_THERE,
                    LF_OVF_LOG_DELAY
                )
            )
        return None