コード例 #1
0
def start(cif, scheduler):
    global _operations
    global _executor

    _executor = executor.Executor(name="periodic",
                                  workers_count=_WORKERS,
                                  max_tasks=_TASKS,
                                  scheduler=scheduler,
                                  max_workers=_MAX_WORKERS)
    _executor.start()

    def per_vm_operation(func, period):
        disp = VmDispatcher(cif.getVMs, _executor, func, _timeout_from(period))
        return Operation(disp, period, scheduler)

    _operations = [
        # Needs dispatching because updating the volume stats needs
        # access to the storage, thus can block.
        per_vm_operation(UpdateVolumes,
                         config.getint('irs', 'vol_size_sample_interval')),

        # Job monitoring need QEMU monitor access.
        per_vm_operation(BlockjobMonitor,
                         config.getint('vars', 'vm_sample_jobs_interval')),

        # We do this only until we get high water mark notifications
        # from QEMU. It accesses storage and/or QEMU monitor, so can block,
        # thus we need dispatching.
        per_vm_operation(DriveWatermarkMonitor,
                         config.getint('vars', 'vm_watermark_interval')),
        Operation(lambda: recovery.lookup_external_vms(cif),
                  config.getint('sampling', 'external_vm_lookup_interval'),
                  scheduler,
                  exclusive=True,
                  discard=False),
        Operation(containersconnection.monitor,
                  config.getint('vars', 'vm_sample_interval'), scheduler),
    ]

    if config.getboolean('sampling', 'enable'):
        _operations.extend([
            # libvirt sampling using bulk stats can block, but unresponsive
            # domains are handled inside VMBulkstatsMonitor for performance
            # reasons; thus, does not need dispatching.
            Operation(
                sampling.VMBulkstatsMonitor(libvirtconnection.get(cif),
                                            cif.getVMs, sampling.stats_cache),
                config.getint('vars', 'vm_sample_interval'), scheduler),
            Operation(sampling.HostMonitor(cif=cif),
                      config.getint('vars', 'host_sample_stats_interval'),
                      scheduler,
                      timeout=config.getint('vars',
                                            'host_sample_stats_interval'),
                      exclusive=True,
                      discard=False),
        ])
        host.stats.start()

    for op in _operations:
        op.start()
コード例 #2
0
def _create(cif, scheduler):
    def per_vm_operation(func, period):
        disp = VmDispatcher(cif.getVMs, _executor, func, _timeout_from(period))
        return Operation(disp, period, scheduler)

    ops = [
        # Needs dispatching because updating the volume stats needs
        # access to the storage, thus can block.
        per_vm_operation(UpdateVolumes,
                         config.getint('irs', 'vol_size_sample_interval')),

        # Job monitoring need QEMU monitor access.
        per_vm_operation(BlockjobMonitor,
                         config.getint('vars', 'vm_sample_jobs_interval')),

        # We do this only until we get high water mark notifications
        # from QEMU. It accesses storage and/or QEMU monitor, so can block,
        # thus we need dispatching.
        per_vm_operation(DriveWatermarkMonitor,
                         config.getint('vars', 'vm_watermark_interval')),
        per_vm_operation(
            NvramDataMonitor,
            config.getint('sampling', 'nvram_data_update_interval')),
        per_vm_operation(TpmDataMonitor,
                         config.getint('sampling',
                                       'tpm_data_update_interval')),
        Operation(lambda: recovery.lookup_external_vms(cif),
                  config.getint('sampling', 'external_vm_lookup_interval'),
                  scheduler,
                  exclusive=True,
                  discard=False),
        Operation(lambda: _kill_long_paused_vms(cif),
                  config.getint('vars', 'vm_kill_paused_time') // 2,
                  scheduler,
                  exclusive=True,
                  discard=False),
    ]

    if config.getboolean('sampling', 'enable'):
        ops.extend([
            # libvirt sampling using bulk stats can block, but unresponsive
            # domains are handled inside VMBulkstatsMonitor for performance
            # reasons; thus, does not need dispatching.
            Operation(
                sampling.VMBulkstatsMonitor(libvirtconnection.get(cif),
                                            cif.getVMs, sampling.stats_cache),
                config.getint('vars', 'vm_sample_interval'), scheduler),
            Operation(sampling.HostMonitor(cif=cif),
                      config.getint('vars', 'host_sample_stats_interval'),
                      scheduler,
                      timeout=config.getint('vars',
                                            'host_sample_stats_interval'),
                      exclusive=True,
                      discard=False),
        ])

    return ops
コード例 #3
0
ファイル: bulk_sampling_test.py プロジェクト: dong-df/vdsm
    def test_collect_fast_path_as_default(self):
        vms = make_vms(num=3)
        conn = FakeConnection(vms=vms)
        cache = FakeStatsCache()

        sampler = sampling.VMBulkstatsMonitor(conn, conn.getVMs, cache)

        with cache.await_completion(self.CALL_TIMEOUT):
            self.exc.dispatch(sampler, self.CALL_TIMEOUT)

        self.assertCallSequence(conn.__calls__, ['getAllDomainStats'])
コード例 #4
0
ファイル: bulk_sampling_test.py プロジェクト: dong-df/vdsm
    def test_collect_slow_path_after_blocked(self):
        vms = make_vms(num=3)
        conn = FakeConnection(vms=vms)
        cache = FakeStatsCache()

        sampler = sampling.VMBulkstatsMonitor(conn, conn.getVMs, cache)

        with conn.stuck(self.TIMEOUT * 2):
            with cache.await_completion(self.TIMEOUT):
                # we expect only the call #2 (slow) to _complete_
                # while te connection is stuck. Please note that
                # call will always be recorded, even if not completed
                self.exc.dispatch(sampler, self.CALL_TIMEOUT)
                self.exc.dispatch(sampler, self.CALL_TIMEOUT)

        self.assertCallSequence(conn.__calls__,
                                ['getAllDomainStats', 'domainListGetStats'])
コード例 #5
0
ファイル: bulk_sampling_test.py プロジェクト: dong-df/vdsm
    def test_collect_vm_unresponsive(self):
        vms = make_vms(num=3)
        conn = FakeConnection(vms=vms)
        cache = FakeStatsCache()

        sampler = sampling.VMBulkstatsMonitor(conn, conn.getVMs, cache)

        with conn.stuck(self.TIMEOUT * 2):
            with cache.await_completion(self.TIMEOUT, expected=2):
                # we only expect call #2 (slow) and call #3 (slow)
                # to _complete_, hence expected=2
                self.exc.dispatch(sampler, self.CALL_TIMEOUT)
                vms['1'].ready = False
                self.exc.dispatch(sampler, self.CALL_TIMEOUT)
                self.exc.dispatch(sampler, self.CALL_TIMEOUT)

        self.assertCallSequence(
            conn.__calls__,
            ['getAllDomainStats', 'domainListGetStats', 'domainListGetStats'])
コード例 #6
0
ファイル: bulk_sampling_test.py プロジェクト: dong-df/vdsm
    def test_slow_collect_while_vm_unresponsive(self):
        vms = make_vms(num=3)
        conn = FakeConnection(vms=vms)
        cache = FakeStatsCache()

        sampler = sampling.VMBulkstatsMonitor(conn, conn.getVMs, cache)

        with conn.stuck(self.TIMEOUT * 2):
            with cache.await_completion(self.TIMEOUT):
                self.exc.dispatch(sampler, self.CALL_TIMEOUT)
                vms['1'].ready = False
                self.exc.dispatch(sampler, self.CALL_TIMEOUT)
            # now we succesfully waited_for the second (slow) call:
            # call #1 (fast) recorded, not yet completed
            # call #2 (slow) recorded, completed, waited
            # now we need to be able to wait for the still pending call,
            # hence we re-prepare to wait
            cache.clear()

        # so we check indeed we recorded the right calls.
        # the call #1 (fast) may complete any moment, asynchronously
        expected = ['getAllDomainStats', 'domainListGetStats']
        self.assertCallSequence(conn.__calls__, expected)
        # now we make sure the call #1 (fast) is completed.
        # we expect NOT to wait here, timeout added just in case
        assert (cache.sync.wait(self.TIMEOUT))

        # reset fake environment to pristine state
        vms['1'].ready = True
        sampler._skip_doms.clear()

        expected.append('getAllDomainStats')
        with cache.await_completion(self.TIMEOUT):
            self.exc.dispatch(sampler, self.CALL_TIMEOUT)

        self.assertCallSequence(conn.__calls__, expected)