Beispiel #1
0
def start(cif, scheduler):
    """
    Starts all the periodic Operations, to be run in one executor.Executor
    instance owned by the `periodic` module.
    There is no guarantee on the order on which the operations will be
    started; this function only guarantees that it will attempt to
    start every known Operation.
    """
    global _executor
    global _operations

    _executor = executor.Executor(name="periodic",
                                  workers_count=_WORKERS,
                                  max_tasks=_TASKS,
                                  scheduler=scheduler,
                                  max_workers=_MAX_WORKERS)

    _executor.start()

    _operations = _create(cif, scheduler)

    if config.getboolean('sampling', 'enable'):
        host.stats.start()

    for op in _operations:
        try:
            op.start()
        except Error as e:
            logging.warning('Operation not started: %s', e)
Beispiel #2
0
 def __init__(self, cif, log, scheduler):
     self._cif = cif
     self.log = log
     self._scheduler = scheduler
     self._executor = executor.Executor(name="qgapoller",
                                        workers_count=_WORKERS,
                                        max_tasks=_TASKS,
                                        scheduler=scheduler,
                                        max_workers=_MAX_WORKERS)
     self._operations = []
     self._capabilities_lock = threading.Lock()
     self._capabilities = {}
     self._guest_info_lock = threading.Lock()
     self._guest_info = defaultdict(dict)
     self._last_failure_lock = threading.Lock()
     self._last_failure = {}
     self._last_check_lock = threading.Lock()
     # Key is tuple (vm_id, command)
     self._last_check = defaultdict(lambda: 0)
     self._initial_interval = config.getint('guest_agent',
                                            'qga_initial_info_interval')
     if _USE_LIBVIRT:
         self._get_guest_info = self._libvirt_get_guest_info
         self.log.info('Using libvirt for querying QEMU-GA')
     else:
         self._get_guest_info = self._qga_get_all_info
         self.log.info('Using direct messages for querying QEMU-GA')
Beispiel #3
0
    def test_dump_executor_state_on_resource_exhausted(self):
        PERIOD = 0.1
        MAX_TASKS = 20  # random value

        log = fakelib.FakeLogger()

        exc = executor.Executor(name="test.Executor",
                                # intentional we  just want to clog the queue
                                workers_count=0,
                                max_tasks=MAX_TASKS,
                                scheduler=self.sched,  # unused
                                max_workers=0,
                                log=log)
        exc.start()

        op = periodic.Operation(lambda: None,
                                period=PERIOD,
                                scheduler=self.sched,
                                executor=exc,
                                timeout=None,
                                exclusive=False)
        with MonkeyPatchScope([
            (throttledlog, '_logger', log),
        ]):
            # the first dispatch is done here
            op.start()
            for _ in range(MAX_TASKS - 1):
                op._dispatch()
            # this will trigger the exception, and the dump
            op._dispatch()
        level, message, args = log.messages[-1]
        self.assertTrue(message.startswith('executor state:'))
Beispiel #4
0
    def test_report_blocked_workers(self):
        REPORT_PERIOD = 1.0  # seconds
        WAIT = 10.0  # seconds
        WORKERS = 3
        log = FakeLogger(level=logging.DEBUG)

        self.executor = executor.Executor('test',
                                          workers_count=10,
                                          max_tasks=self.max_tasks,
                                          scheduler=self.scheduler,
                                          max_workers=self.max_workers,
                                          log=log)
        self.executor.start()
        time.sleep(0.1)  # Give time to start all threads

        # make sure we have plenty of slow tasks
        slow_tasks = [Task(wait=WAIT) for n in range(WORKERS * 2)]
        for task in slow_tasks:
            # and also make sure to discard workers
            self.executor.dispatch(task, 1.0, discard=False)
        # we want to catch at least one report
        time.sleep(REPORT_PERIOD * 2)

        print(log.messages)  # troubleshooting aid when test fails
        self.assertTrue(any(
            text.startswith('Worker blocked')
            for (level, text, _) in log.messages))
Beispiel #5
0
def start(cif, scheduler):
    global _operations
    global _executor

    _executor = executor.Executor(name="periodic",
                                  workers_count=_WORKERS,
                                  max_tasks=_TASKS,
                                  scheduler=scheduler,
                                  max_workers=_MAX_WORKERS)
    _executor.start()

    def per_vm_operation(func, period):
        disp = VmDispatcher(cif.getVMs, _executor, func, _timeout_from(period))
        return Operation(disp, period, scheduler)

    _operations = [
        # Needs dispatching because updating the volume stats needs
        # access to the storage, thus can block.
        per_vm_operation(UpdateVolumes,
                         config.getint('irs', 'vol_size_sample_interval')),

        # Job monitoring need QEMU monitor access.
        per_vm_operation(BlockjobMonitor,
                         config.getint('vars', 'vm_sample_jobs_interval')),

        # We do this only until we get high water mark notifications
        # from QEMU. It accesses storage and/or QEMU monitor, so can block,
        # thus we need dispatching.
        per_vm_operation(DriveWatermarkMonitor,
                         config.getint('vars', 'vm_watermark_interval')),
        Operation(lambda: recovery.lookup_external_vms(cif),
                  config.getint('sampling', 'external_vm_lookup_interval'),
                  scheduler,
                  exclusive=True,
                  discard=False),
        Operation(containersconnection.monitor,
                  config.getint('vars', 'vm_sample_interval'), scheduler),
    ]

    if config.getboolean('sampling', 'enable'):
        _operations.extend([
            # libvirt sampling using bulk stats can block, but unresponsive
            # domains are handled inside VMBulkstatsMonitor for performance
            # reasons; thus, does not need dispatching.
            Operation(
                sampling.VMBulkstatsMonitor(libvirtconnection.get(cif),
                                            cif.getVMs, sampling.stats_cache),
                config.getint('vars', 'vm_sample_interval'), scheduler),
            Operation(sampling.HostMonitor(cif=cif),
                      config.getint('vars', 'host_sample_stats_interval'),
                      scheduler,
                      timeout=config.getint('vars',
                                            'host_sample_stats_interval'),
                      exclusive=True,
                      discard=False),
        ])
        host.stats.start()

    for op in _operations:
        op.start()
Beispiel #6
0
 def test_repr_defaults(self):
     # we are using the kwargs syntax, but we are omitting arguments
     # with default values - thus using their defaults.
     exc = executor.Executor('test',
                             workers_count=10,
                             max_tasks=self.max_tasks,
                             scheduler=self.scheduler)
     self.assertTrue(repr(exc))
Beispiel #7
0
    def test_multiple_executors(self):
        names = []
        workers = 2
        done = concurrent.Barrier(2 * workers + 1)

        def get_worker_name():
            names.append(pthread.getname())
            done.wait()

        foo = executor.Executor('foo', workers, workers, None)
        bar = executor.Executor('bar', workers, workers, None)
        with utils.running(foo), utils.running(bar):
            for i in range(workers):
                foo.dispatch(get_worker_name)
                bar.dispatch(get_worker_name)
            done.wait()

        self.assertEqual(sorted(names), ["bar/0", "bar/1", "foo/0", "foo/1"])
Beispiel #8
0
    def __init__(self, bridge, subs, timeout, scheduler):
        self._executor = executor.Executor(name="jsonrpc.Executor",
                                           workers_count=_THREADS,
                                           max_tasks=_TASKS,
                                           scheduler=scheduler)

        self._server = JsonRpcServer(bridge, timeout, self._executor.dispatch)
        self._reactor = StompReactor(subs)
        self.startReactor()
Beispiel #9
0
 def setUp(self):
     self.scheduler = schedule.Scheduler()
     self.scheduler.start()
     self.executor = executor.Executor('test',
                                       workers_count=10,
                                       max_tasks=20,
                                       scheduler=self.scheduler)
     self.executor.start()
     time.sleep(0.1)  # Give time to start all threads
Beispiel #10
0
    def setUp(self):
        self.sched = schedule.Scheduler(name="test.Scheduler",
                                        clock=monotonic_time)
        self.sched.start()

        self.exc = executor.Executor(name="test.Executor",
                                     workers_count=1,
                                     max_tasks=100,
                                     scheduler=self.sched)
        self.exc.start()
Beispiel #11
0
def start(cif, scheduler):
    global _operations
    global _executor

    _executor = executor.Executor(name="periodic",
                                  workers_count=_WORKERS,
                                  max_tasks=_TASKS,
                                  scheduler=scheduler,
                                  max_workers=_MAX_WORKERS)
    _executor.start()

    def per_vm_operation(func, period):
        disp = VmDispatcher(cif.getVMs, _executor, func, _timeout_from(period))
        return Operation(disp, period, scheduler)

    _operations = [
        # Needs dispatching because updating the volume stats needs
        # access to the storage, thus can block.
        per_vm_operation(UpdateVolumes,
                         config.getint('irs', 'vol_size_sample_interval')),

        # Needs dispatching because it accesses FS and libvirt data.
        # Ignored by new engine, has to be kept for BC sake.
        per_vm_operation(NumaInfoMonitor,
                         config.getint('vars', 'vm_sample_numa_interval')),

        # Job monitoring need QEMU monitor access.
        per_vm_operation(BlockjobMonitor,
                         config.getint('vars', 'vm_sample_jobs_interval')),

        # libvirt sampling using bulk stats can block, but unresponsive
        # domains are handled inside VMBulkSampler for performance reasons;
        # thus, does not need dispatching.
        Operation(
            sampling.VMBulkSampler(libvirtconnection.get(cif), cif.getVMs,
                                   sampling.stats_cache),
            config.getint('vars', 'vm_sample_interval'), scheduler),

        # We do this only until we get high water mark notifications
        # from QEMU. It accesses storage and/or QEMU monitor, so can block,
        # thus we need dispatching.
        per_vm_operation(DriveWatermarkMonitor,
                         config.getint('vars', 'vm_watermark_interval')),
        Operation(sampling.HostMonitor(cif=cif),
                  config.getint('vars', 'host_sample_stats_interval'),
                  scheduler),
        Operation(containersconnection.monitor,
                  config.getint('vars', 'vm_sample_interval'), scheduler),
    ]

    host.stats.start()

    for op in _operations:
        op.start()
Beispiel #12
0
 def __init__(self, bridge, subs, timeout, scheduler, cif):
     self._executor = executor.Executor(name="jsonrpc",
                                        workers_count=_THREADS,
                                        max_tasks=_TASKS,
                                        scheduler=scheduler)
     self._bridge = bridge
     self._server = JsonRpcServer(
         bridge, timeout, cif,
         functools.partial(self._executor.dispatch,
                           timeout=_TIMEOUT, discard=False))
     self._reactor = StompReactor(subs)
     self.startReactor()
Beispiel #13
0
    def test_worker_thread_system_name(self):
        names = []
        workers = 2
        done = concurrent.Barrier(workers + 1)

        def get_worker_name():
            names.append(pthread.getname())
            done.wait()

        foo = executor.Executor('foo', workers, workers, None)
        with utils.running(foo):
            for i in range(workers):
                foo.dispatch(get_worker_name)
            done.wait()

        self.assertEqual(sorted(names), ["foo/0", "foo/1"])
Beispiel #14
0
 def __init__(self, cif, log, scheduler):
     self._cif = cif
     self.log = log
     self._scheduler = scheduler
     self._executor = executor.Executor(name="qgapoller",
                                        workers_count=_WORKERS,
                                        max_tasks=_TASKS,
                                        scheduler=scheduler,
                                        max_workers=_MAX_WORKERS)
     self._operations = []
     self._capabilities_lock = threading.Lock()
     self._capabilities = {}
     self._guest_info_lock = threading.Lock()
     self._guest_info = defaultdict(dict)
     self._last_failure_lock = threading.Lock()
     self._last_failure = {}
Beispiel #15
0
from vdsm import executor
from vdsm import schedule
from vdsm.config import config
from vdsm.utils import monotonic_time

# just a made up number. Maybe should be equal to number of cores?
# TODO: make them tunable through private, unsupported configuration items
_WORKERS = config.getint('sampling', 'periodic_workers')
_TASK_PER_WORKER = config.getint('sampling', 'periodic_task_per_worker')
_TASKS = _WORKERS * _TASK_PER_WORKER

_scheduler = schedule.Scheduler(name="periodic.Scheduler",
                                clock=monotonic_time)

_executor = executor.Executor(name="periodic.Executor",
                              workers_count=_WORKERS,
                              max_tasks=_TASKS,
                              scheduler=_scheduler)
_operations = []


def _timeout_from(interval):
    """
    Estimate a sensible timeout given a periodic interval.
    """
    return interval / 2.


def _dispatched_operation(get_vms, func, period):
    disp = VmDispatcher(get_vms, _executor, func, _timeout_from(period))
    return Operation(disp, period)