Beispiel #1
0
def test_assert_metric(got_metrics, expected_metric_name,
                       expected_metric_labels, expected_metric_value, exception_message):
    if exception_message is not None:
        with pytest.raises(AssertionError, match=exception_message):
            assert_metric(got_metrics, expected_metric_name,
                          expected_metric_labels, expected_metric_value)
    else:
        assert_metric(got_metrics, expected_metric_name,
                      expected_metric_labels, expected_metric_value)
def test_collect_platform_information(*mocks):
    got_platform, got_metrics, got_labels = collect_platform_information(
        include_optional_labels=True
    )

    assert got_platform == Platform(
        sockets=1,
        cores=1,
        cpus=2,
        numa_nodes=1,
        topology={},
        cpu_model='intel xeon',
        cpu_model_number=0x5E,
        cpu_codename=CPUCodeName.SKYLAKE,
        timestamp=1536071557.123456,  # timestamp,
        node_cpus={},
        node_distances={},
        rdt_information=RDTInformation(True, True, True, True, 'fffff', '2', 8, 10, 20),
        measurements={MetricName.PLATFORM_CPU_USAGE: {0: 100, 1: 200},
                      MetricName.PLATFORM_MEM_USAGE_BYTES: 1337,
                      MetricName.PLATFORM_MEM_NUMA_FREE_BYTES: {0: 1},
                      MetricName.PLATFORM_MEM_NUMA_USED_BYTES: {0: 2},
                      MetricName.PLATFORM_VMSTAT_NUMA_PAGES_MIGRATED: 5,
                      },
        swap_enabled=False
    )

    assert_metric(got_metrics, MetricName.PLATFORM_MEM_USAGE_BYTES, expected_metric_value=1337)
    assert_metric(got_metrics, MetricName.PLATFORM_CPU_USAGE, {'cpu': '0'},
                  expected_metric_value=100)
    assert_metric(got_metrics, MetricName.PLATFORM_TOPOLOGY_CORES, expected_metric_value=1)
    assert_metric(got_metrics, MetricName.PLATFORM_VMSTAT_NUMA_PAGES_MIGRATED,
                  expected_metric_value=5)
    assert got_labels == {"sockets": "1", "cores": "1", "cpus": "2", "host": "test_host",
                          "wca_version": "0.1", "cpu_model": "intel xeon"}
Beispiel #3
0
def test_measurements_runner(subcgroups):
    # Node mock
    t1 = redis_task_with_default_labels('t1', subcgroups)
    t2 = redis_task_with_default_labels('t2', subcgroups)

    runner = MeasurementRunner(node=Mock(
        spec=MesosNode, get_tasks=Mock(return_value=[t1, t2])),
                               metrics_storage=Mock(spec=storage.Storage,
                                                    store=Mock()),
                               rdt_enabled=False,
                               gather_hw_mm_topology=False,
                               extra_labels=dict(extra_label='extra_value'))
    runner._wait = Mock()
    # Mock to finish after one iteration.
    runner._initialize()
    runner._iterate()

    # Check output metrics.
    got_metrics = runner._metrics_storage.store.call_args[0][0]

    # Internal wca metrics are generated (wca is running, number of task under control,
    # memory usage and profiling information)
    assert_metric(got_metrics, MetricName.WCA_UP,
                  dict(extra_label='extra_value'))
    assert_metric(got_metrics, MetricName.WCA_TASKS, expected_metric_value=2)
    # wca & its children memory usage (in bytes)
    assert_metric(got_metrics,
                  MetricName.WCA_MEM_USAGE_BYTES,
                  expected_metric_value=WCA_MEMORY_USAGE * 2 * 1024)

    # Measurements metrics about tasks, based on get_measurements mocks.
    cpu_usage = TASK_CPU_USAGE * (len(subcgroups) if subcgroups else 1)
    assert_metric(got_metrics,
                  MetricName.TASK_CPU_USAGE_SECONDS,
                  dict(task_id=t1.task_id),
                  expected_metric_value=cpu_usage)
    assert_metric(got_metrics,
                  MetricName.TASK_CPU_USAGE_SECONDS,
                  dict(task_id=t2.task_id),
                  expected_metric_value=cpu_usage)
Beispiel #4
0
def test_profiler():
    counter = 0

    def time_side_effect():
        nonlocal counter
        counter += 2  # every call takes 2 seconds
        return counter

    profiler = profiling.Profiler()

    def some_function():
        pass

    some_function = profiler.profile_duration(name='new_name')(some_function)

    metrics = profiler.get_metrics()
    # There are no metrics before call function
    assert metrics == []

    with patch('time.time', side_effect=time_side_effect):
        for _ in range(5):
            some_function()

    # Two different calls with different times - average should be 2.
    profiler.register_duration('other_function', 1)
    profiler.register_duration('other_function', 3)

    metrics = profiler.get_metrics()

    assert_metric(metrics, 'wca_duration_seconds', {'function': 'new_name'},
                  expected_metric_value=2.)
    assert_metric(metrics, 'wca_duration_seconds_avg', {'function': 'new_name'},
                  expected_metric_value=2.)

    assert_metric(metrics, 'wca_duration_seconds', {'function': 'other_function'},
                  expected_metric_value=3.)
    assert_metric(metrics, 'wca_duration_seconds_avg', {'function': 'other_function'},
                  expected_metric_value=2.)
def test_measurements_runner(subcgroups):
    # Node mock
    t1 = redis_task_with_default_labels('t1', subcgroups)
    t2 = redis_task_with_default_labels('t2', subcgroups)

    runner = MeasurementRunner(
        node=Mock(spec=MesosNode, get_tasks=Mock(return_value=[t1, t2])),
        metrics_storage=Mock(spec=storage.Storage, store=Mock()),
        rdt_enabled=False,
        extra_labels=dict(
            extra_label='extra_value')  # extra label with some extra value
    )
    runner._wait = Mock()
    # Mock to finish after one iteration.
    runner._initialize()
    runner._iterate()

    # Check output metrics.
    got_metrics = runner._metrics_storage.store.call_args[0][0]

    # Internal wca metrics are generated (wca is running, number of task under control,
    # memory usage and profiling information)
    assert_metric(got_metrics, 'wca_up', dict(extra_label='extra_value'))
    assert_metric(got_metrics, 'wca_tasks', expected_metric_value=2)
    # wca & its children memory usage (in bytes)
    assert_metric(got_metrics,
                  'wca_memory_usage_bytes',
                  expected_metric_value=WCA_MEMORY_USAGE * 2 * 1024)

    # Measurements metrics about tasks, based on get_measurements mocks.
    cpu_usage = TASK_CPU_USAGE * (len(subcgroups) if subcgroups else 1)
    assert_metric(got_metrics,
                  'cpu_usage',
                  dict(task_id=t1.task_id),
                  expected_metric_value=cpu_usage)
    assert_metric(got_metrics,
                  'cpu_usage',
                  dict(task_id=t2.task_id),
                  expected_metric_value=cpu_usage)

    # Test whether application and application_version_name were properly generated using
    #   default runner._task_label_generators defined in constructor of MeasurementsRunner.
    assert_metric(got_metrics, 'cpu_usage', {
        'application': t1.name,
        'application_version_name': ''
    })

    # Test whether `initial_task_cpu_assignment` label is attached to task metrics.
    assert_metric(got_metrics, 'cpu_usage',
                  {'initial_task_cpu_assignment': '8.0'})
Beispiel #6
0
def test_detection_runner(reset_counters_mock, subcgroups):
    # Tasks mock
    t1 = redis_task_with_default_labels('t1', subcgroups)
    t2 = redis_task_with_default_labels('t2', subcgroups)

    # Detector mock - simulate returning one anomaly and additional metric
    detector_mock = Mock(
        spec=AnomalyDetector,
        detect=Mock(return_value=(
            [
                anomaly(t1.task_id, [t2.task_id],
                        metrics=[metric('contention_related_metric')])
            ],  # one anomaly + related metric
            [metric('extra_metric_from_detector')]  # one extra metric
        )))

    runner = DetectionRunner(measurement_runner=MeasurementRunner(
        node=Mock(spec=MesosNode, get_tasks=Mock(return_value=[t1, t2])),
        metrics_storage=Mock(spec=storage.Storage, store=Mock()),
        rdt_enabled=False,
        extra_labels=dict(extra_label='extra_value'),
    ),
                             anomalies_storage=Mock(spec=storage.Storage,
                                                    store=Mock()),
                             detector=detector_mock)

    runner._measurement_runner._wait = Mock()
    runner._measurement_runner._initialize()

    # Mock to finish after one iteration.
    runner._measurement_runner._iterate()

    got_anomalies_metrics = runner._anomalies_storage.store.mock_calls[0][1][0]

    # Check that anomaly based metrics,
    assert_metric(got_anomalies_metrics,
                  'anomaly',
                  expected_metric_some_labels={
                      LABEL_WORKLOAD_INSTANCE:
                      t1.labels[LABEL_WORKLOAD_INSTANCE],
                      LABEL_CONTENDED_TASK_ID:
                      t1.task_id,
                      LABEL_CONTENDING_WORKLOAD_INSTANCE:
                      t2.labels[LABEL_WORKLOAD_INSTANCE]
                  })
    assert_metric(got_anomalies_metrics,
                  'contention_related_metric',
                  expected_metric_some_labels=dict(extra_label='extra_value'))
    assert_metric(got_anomalies_metrics, 'extra_metric_from_detector')
    assert_metric(got_anomalies_metrics,
                  'anomaly_count',
                  expected_metric_value=1)
    assert_metric(got_anomalies_metrics, 'anomaly_last_occurrence')

    # Check that detector was called with proper arguments.
    (platform, tasks_data) = detector_mock.detect.mock_calls[0][1]
    # Make sure that proper values are propagate to detect method for t1.
    assert platform == platform_mock
    # Measurements have to mach get_measurements mock from measurements_patch decorator.
    # Labels should have extra LABEL_WORKLOAD_INSTANCE based on redis_task_with_default_labels
    # and sanitized version of other labels for mesos (without prefix).
    # Resources should match resources from redis_task_with_default_labels
    # Check any metrics for t2
    cpu_usage = TASK_CPU_USAGE * (len(subcgroups) if subcgroups else 1)

    assert_subdict(tasks_data[t1.task_id].measurements,
                   {MetricName.TASK_CPU_USAGE_SECONDS: cpu_usage})
    assert_subdict(tasks_data[t1.task_id].labels, {
        LABEL_WORKLOAD_INSTANCE: 'redis_6792_t1',
        'load_generator': 'rpc-perf-t1'
    })

    assert_subdict(tasks_data[t1.task_id].resources, t1.resources)

    assert_subdict(tasks_data[t1.task_id].measurements,
                   {MetricName.TASK_CPU_USAGE_SECONDS: cpu_usage})
def test_allocation_runner(_get_allocations_mock, _get_allocations_mock_,
                           platform_mock, reset_counter_mock, subcgroups):
    """ Low level system calls are not mocked - but higher level objects and functions:
        Cgroup, Resgroup, Platform, etc. Thus the test do not cover the full usage scenario
        (such tests would be much harder to write).
    """
    # Tasks mock
    t1 = redis_task_with_default_labels('t1', subcgroups)
    t2 = redis_task_with_default_labels('t2', subcgroups)

    # Allocator mock (lower the quota and number of cache ways in dedicated group).
    # Patch some of the functions of AllocationRunner.
    runner = AllocationRunner(measurement_runner=MeasurementRunner(
        node=Mock(spec=MesosNode, get_tasks=Mock(return_value=[])),
        metrics_storage=Mock(spec=storage.Storage, store=Mock()),
        rdt_enabled=True,
        gather_hw_mm_topology=False,
        extra_labels=dict(extra_labels='extra_value'),
    ),
                              anomalies_storage=Mock(spec=storage.Storage,
                                                     store=Mock()),
                              allocations_storage=Mock(spec=storage.Storage,
                                                       store=Mock()),
                              rdt_mb_control_required=True,
                              rdt_cache_control_required=True,
                              allocator=Mock(spec=Allocator,
                                             allocate=Mock(return_value=({},
                                                                         [],
                                                                         []))))

    runner._measurement_runner._wait = Mock()
    runner._measurement_runner._initialize()

    ############
    # First run (one task, one allocation).
    runner._measurement_runner._node.get_tasks.return_value = [t1]
    runner._allocator.allocate.return_value = ({
        t1.task_id: {
            AllocationType.QUOTA: .5,
            AllocationType.RDT: RDTAllocation(name=None, l3='L3:0=0000f')
        }
    }, [], [])
    runner._measurement_runner._iterate()

    # Check that allocator.allocate was called with proper arguments.
    assert runner._allocator.allocate.call_count == 1
    (_, tasks_data) = runner._allocator.allocate.mock_calls[0][1]
    assert_subdict(tasks_data[t1.task_id].allocations, _os_tasks_allocations)

    # Check allocation metrics ...
    got_allocations_metrics = runner._allocations_storage.store.call_args[0][0]
    # ... generic allocation metrics ...
    assert_metric(got_allocations_metrics,
                  'allocations_count',
                  dict(extra_labels='extra_value'),
                  expected_metric_value=1)
    assert_metric(got_allocations_metrics,
                  'allocations_errors',
                  dict(extra_labels='extra_value'),
                  expected_metric_value=0)
    assert_metric(got_allocations_metrics, 'allocation_duration',
                  dict(extra_labels='extra_value'))
    # ... and allocation metrics for task t1.
    assert_metric(got_allocations_metrics, 'allocation_cpu_quota',
                  dict(task=t1.task_id, extra_labels='extra_value'), 0.5)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways',
                  dict(task=t1.task_id, extra_labels='extra_value'), 4)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_mask',
                  dict(task=t1.task_id, extra_labels='extra_value'), 15)

    ############################
    # Second run (two tasks, one allocation)
    runner._measurement_runner._node.get_tasks.return_value = [t1, t2]
    first_run_t1_task_allocations = {
        t1.task_id: {
            AllocationType.QUOTA: .5,
            AllocationType.RDT: RDTAllocation(name=None, l3='L3:0=0000f')
        }
    }
    runner._allocator.allocate.return_value = (first_run_t1_task_allocations,
                                               [], [])
    runner._measurement_runner._iterate()

    # Check allocation metrics...
    got_allocations_metrics = runner._allocations_storage.store.call_args[0][0]
    # ... generic allocation metrics ...
    assert_metric(got_allocations_metrics,
                  'allocations_count',
                  expected_metric_value=2)
    assert_metric(got_allocations_metrics,
                  'allocations_errors',
                  expected_metric_value=0)
    assert_metric(got_allocations_metrics, 'allocation_duration')
    # ... and metrics for task t1 ...
    assert_metric(got_allocations_metrics, 'allocation_cpu_quota',
                  dict(task=t1.task_id), 0.5)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways',
                  dict(task=t1.task_id), 4)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_mask',
                  dict(task=t1.task_id), 15)

    # Check allocate call.
    (_, tasks_data) = runner._allocator.allocate.mock_calls[1][1]
    # (note: tasks_allocations are always read from filesystem)
    assert_subdict(tasks_data[t1.task_id].allocations, _os_tasks_allocations)
    assert_subdict(tasks_data[t2.task_id].allocations, _os_tasks_allocations)

    ############
    # Third run (two tasks, two allocations) - modify L3 cache and put in the same group
    runner._measurement_runner._node.get_tasks.return_value = [t1, t2]
    runner._allocator.allocate.return_value = \
        {
            t1.task_id: {
                AllocationType.QUOTA: 0.7,
                AllocationType.RDT: RDTAllocation(name='one_group', l3='L3:0=00fff')
            },
            t2.task_id: {
                AllocationType.QUOTA: 0.8,
                AllocationType.RDT: RDTAllocation(name='one_group', l3='L3:0=00fff')
            }
        }, [], []
    runner._measurement_runner._iterate()

    got_allocations_metrics = runner._allocations_storage.store.call_args[0][0]

    assert_metric(got_allocations_metrics,
                  'allocations_count',
                  expected_metric_value=4)
    # ... and metrics for task t1 ...
    assert_metric(got_allocations_metrics, 'allocation_cpu_quota',
                  dict(task=t1.task_id), 0.7)
    assert_metric(got_allocations_metrics, 'allocation_cpu_quota',
                  dict(task=t2.task_id), 0.8)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways',
                  dict(task=t1.task_id, group_name='one_group'),
                  12)  # 00fff=12
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways',
                  dict(task=t1.task_id, group_name='one_group'),
                  12)  # 00fff=12
 def check(self, metrics):
     assert_metric(metrics, self.name, self.labels, self.value)