Example #1
0
def test_measurements_runner_init_and_checks(rdt_enabled, resctrl_available,
                                             monitoring_available, access_ok,
                                             ok):
    # auto rdt
    runner = MeasurementRunner(
        node=Mock(spec=MesosNode),
        metrics_storage=Mock(spec=storage.Storage),
        rdt_enabled=rdt_enabled,
    )

    platform_mock = Mock(rdt_information=Mock(is_monitoring_enabled=Mock(
        return_value=monitoring_available)))

    with patch('wca.resctrl.check_resctrl', return_value=resctrl_available), \
            patch('wca.security.are_privileges_sufficient', return_value=access_ok), \
            patch('wca.platforms.collect_platform_information',
                  return_value=(platform_mock, None, None)):
        if ok:
            # ok no error
            assert runner._initialize() is None
        else:
            # fails
            assert runner._initialize() == 1
Example #2
0
def test_detection_runner(reset_counters_mock, subcgroups):
    # Tasks mock
    t1 = redis_task_with_default_labels('t1', subcgroups)
    t2 = redis_task_with_default_labels('t2', subcgroups)

    # Detector mock - simulate returning one anomaly and additional metric
    detector_mock = Mock(
        spec=AnomalyDetector,
        detect=Mock(return_value=(
            [
                anomaly(t1.task_id, [t2.task_id],
                        metrics=[metric('contention_related_metric')])
            ],  # one anomaly + related metric
            [metric('extra_metric_from_detector')]  # one extra metric
        )))

    runner = DetectionRunner(measurement_runner=MeasurementRunner(
        node=Mock(spec=MesosNode, get_tasks=Mock(return_value=[t1, t2])),
        metrics_storage=Mock(spec=storage.Storage, store=Mock()),
        rdt_enabled=False,
        extra_labels=dict(extra_label='extra_value'),
    ),
                             anomalies_storage=Mock(spec=storage.Storage,
                                                    store=Mock()),
                             detector=detector_mock)

    runner._measurement_runner._wait = Mock()
    runner._measurement_runner._initialize()

    # Mock to finish after one iteration.
    runner._measurement_runner._iterate()

    got_anomalies_metrics = runner._anomalies_storage.store.mock_calls[0][1][0]

    # Check that anomaly based metrics,
    assert_metric(got_anomalies_metrics,
                  'anomaly',
                  expected_metric_some_labels={
                      LABEL_WORKLOAD_INSTANCE:
                      t1.labels[LABEL_WORKLOAD_INSTANCE],
                      LABEL_CONTENDED_TASK_ID:
                      t1.task_id,
                      LABEL_CONTENDING_WORKLOAD_INSTANCE:
                      t2.labels[LABEL_WORKLOAD_INSTANCE]
                  })
    assert_metric(got_anomalies_metrics,
                  'contention_related_metric',
                  expected_metric_some_labels=dict(extra_label='extra_value'))
    assert_metric(got_anomalies_metrics, 'extra_metric_from_detector')
    assert_metric(got_anomalies_metrics,
                  'anomaly_count',
                  expected_metric_value=1)
    assert_metric(got_anomalies_metrics, 'anomaly_last_occurrence')

    # Check that detector was called with proper arguments.
    (platform, tasks_data) = detector_mock.detect.mock_calls[0][1]
    # Make sure that proper values are propagate to detect method for t1.
    assert platform == platform_mock
    # Measurements have to mach get_measurements mock from measurements_patch decorator.
    # Labels should have extra LABEL_WORKLOAD_INSTANCE based on redis_task_with_default_labels
    # and sanitized version of other labels for mesos (without prefix).
    # Resources should match resources from redis_task_with_default_labels
    # Check any metrics for t2
    cpu_usage = TASK_CPU_USAGE * (len(subcgroups) if subcgroups else 1)

    assert_subdict(tasks_data[t1.task_id].measurements,
                   {MetricName.TASK_CPU_USAGE_SECONDS: cpu_usage})
    assert_subdict(tasks_data[t1.task_id].labels, {
        LABEL_WORKLOAD_INSTANCE: 'redis_6792_t1',
        'load_generator': 'rpc-perf-t1'
    })

    assert_subdict(tasks_data[t1.task_id].resources, t1.resources)

    assert_subdict(tasks_data[t1.task_id].measurements,
                   {MetricName.TASK_CPU_USAGE_SECONDS: cpu_usage})
def test_allocation_runner(_get_allocations_mock, _get_allocations_mock_,
                           platform_mock, reset_counter_mock, subcgroups):
    """ Low level system calls are not mocked - but higher level objects and functions:
        Cgroup, Resgroup, Platform, etc. Thus the test do not cover the full usage scenario
        (such tests would be much harder to write).
    """
    # Tasks mock
    t1 = redis_task_with_default_labels('t1', subcgroups)
    t2 = redis_task_with_default_labels('t2', subcgroups)

    # Allocator mock (lower the quota and number of cache ways in dedicated group).
    # Patch some of the functions of AllocationRunner.
    runner = AllocationRunner(measurement_runner=MeasurementRunner(
        node=Mock(spec=MesosNode, get_tasks=Mock(return_value=[])),
        metrics_storage=Mock(spec=storage.Storage, store=Mock()),
        rdt_enabled=True,
        gather_hw_mm_topology=False,
        extra_labels=dict(extra_labels='extra_value'),
    ),
                              anomalies_storage=Mock(spec=storage.Storage,
                                                     store=Mock()),
                              allocations_storage=Mock(spec=storage.Storage,
                                                       store=Mock()),
                              rdt_mb_control_required=True,
                              rdt_cache_control_required=True,
                              allocator=Mock(spec=Allocator,
                                             allocate=Mock(return_value=({},
                                                                         [],
                                                                         []))))

    runner._measurement_runner._wait = Mock()
    runner._measurement_runner._initialize()

    ############
    # First run (one task, one allocation).
    runner._measurement_runner._node.get_tasks.return_value = [t1]
    runner._allocator.allocate.return_value = ({
        t1.task_id: {
            AllocationType.QUOTA: .5,
            AllocationType.RDT: RDTAllocation(name=None, l3='L3:0=0000f')
        }
    }, [], [])
    runner._measurement_runner._iterate()

    # Check that allocator.allocate was called with proper arguments.
    assert runner._allocator.allocate.call_count == 1
    (_, tasks_data) = runner._allocator.allocate.mock_calls[0][1]
    assert_subdict(tasks_data[t1.task_id].allocations, _os_tasks_allocations)

    # Check allocation metrics ...
    got_allocations_metrics = runner._allocations_storage.store.call_args[0][0]
    # ... generic allocation metrics ...
    assert_metric(got_allocations_metrics,
                  'allocations_count',
                  dict(extra_labels='extra_value'),
                  expected_metric_value=1)
    assert_metric(got_allocations_metrics,
                  'allocations_errors',
                  dict(extra_labels='extra_value'),
                  expected_metric_value=0)
    assert_metric(got_allocations_metrics, 'allocation_duration',
                  dict(extra_labels='extra_value'))
    # ... and allocation metrics for task t1.
    assert_metric(got_allocations_metrics, 'allocation_cpu_quota',
                  dict(task=t1.task_id, extra_labels='extra_value'), 0.5)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways',
                  dict(task=t1.task_id, extra_labels='extra_value'), 4)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_mask',
                  dict(task=t1.task_id, extra_labels='extra_value'), 15)

    ############################
    # Second run (two tasks, one allocation)
    runner._measurement_runner._node.get_tasks.return_value = [t1, t2]
    first_run_t1_task_allocations = {
        t1.task_id: {
            AllocationType.QUOTA: .5,
            AllocationType.RDT: RDTAllocation(name=None, l3='L3:0=0000f')
        }
    }
    runner._allocator.allocate.return_value = (first_run_t1_task_allocations,
                                               [], [])
    runner._measurement_runner._iterate()

    # Check allocation metrics...
    got_allocations_metrics = runner._allocations_storage.store.call_args[0][0]
    # ... generic allocation metrics ...
    assert_metric(got_allocations_metrics,
                  'allocations_count',
                  expected_metric_value=2)
    assert_metric(got_allocations_metrics,
                  'allocations_errors',
                  expected_metric_value=0)
    assert_metric(got_allocations_metrics, 'allocation_duration')
    # ... and metrics for task t1 ...
    assert_metric(got_allocations_metrics, 'allocation_cpu_quota',
                  dict(task=t1.task_id), 0.5)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways',
                  dict(task=t1.task_id), 4)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_mask',
                  dict(task=t1.task_id), 15)

    # Check allocate call.
    (_, tasks_data) = runner._allocator.allocate.mock_calls[1][1]
    # (note: tasks_allocations are always read from filesystem)
    assert_subdict(tasks_data[t1.task_id].allocations, _os_tasks_allocations)
    assert_subdict(tasks_data[t2.task_id].allocations, _os_tasks_allocations)

    ############
    # Third run (two tasks, two allocations) - modify L3 cache and put in the same group
    runner._measurement_runner._node.get_tasks.return_value = [t1, t2]
    runner._allocator.allocate.return_value = \
        {
            t1.task_id: {
                AllocationType.QUOTA: 0.7,
                AllocationType.RDT: RDTAllocation(name='one_group', l3='L3:0=00fff')
            },
            t2.task_id: {
                AllocationType.QUOTA: 0.8,
                AllocationType.RDT: RDTAllocation(name='one_group', l3='L3:0=00fff')
            }
        }, [], []
    runner._measurement_runner._iterate()

    got_allocations_metrics = runner._allocations_storage.store.call_args[0][0]

    assert_metric(got_allocations_metrics,
                  'allocations_count',
                  expected_metric_value=4)
    # ... and metrics for task t1 ...
    assert_metric(got_allocations_metrics, 'allocation_cpu_quota',
                  dict(task=t1.task_id), 0.7)
    assert_metric(got_allocations_metrics, 'allocation_cpu_quota',
                  dict(task=t2.task_id), 0.8)
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways',
                  dict(task=t1.task_id, group_name='one_group'),
                  12)  # 00fff=12
    assert_metric(got_allocations_metrics, 'allocation_rdt_l3_cache_ways',
                  dict(task=t1.task_id, group_name='one_group'),
                  12)  # 00fff=12