예제 #1
0
def test_service_check_ok(monkeypatch):
    instance = {'prometheus_url': 'http://localhost:10252/metrics'}
    instance_tags = []

    check = KubeControllerManagerCheck(CHECK_NAME, {}, [instance])

    monkeypatch.setattr(check, 'service_check', mock.Mock())

    calls = [
        mock.call('kube_controller_manager.up',
                  AgentCheck.OK,
                  tags=instance_tags),
        mock.call('kube_controller_manager.up',
                  AgentCheck.CRITICAL,
                  tags=instance_tags,
                  message='health check failed'),
    ]

    # successful health check
    with mock.patch("requests.get",
                    return_value=mock.MagicMock(status_code=200)):
        check._perform_service_check(instance)

    # failed health check
    raise_error = mock.Mock()
    raise_error.side_effect = requests.HTTPError('health check failed')
    with mock.patch("requests.get",
                    return_value=mock.MagicMock(raise_for_status=raise_error)):
        check._perform_service_check(instance)

    check.service_check.assert_has_calls(calls)
def test_check_metrics(aggregator, mock_metrics, mock_leader):
    c = KubeControllerManagerCheck(CHECK_NAME, None, {}, [instance])
    c.check(instance)

    def assert_metric(name, **kwargs):
        # Wrapper to keep assertions < 120 chars
        aggregator.assert_metric(NAMESPACE + name, **kwargs)

    assert_metric('.goroutines')
    assert_metric('.threads')
    assert_metric('.open_fds')
    assert_metric('.client.http.requests')
    assert_metric('.max_fds')

    assert_metric('.nodes.evictions',
                  metric_type=aggregator.MONOTONIC_COUNT,
                  value=33,
                  tags=["zone:test"])
    assert_metric('.nodes.count', value=5, tags=["zone:test"])
    assert_metric('.nodes.unhealthy', value=1, tags=["zone:test"])

    assert_metric('.rate_limiter.use',
                  value=1,
                  tags=["limiter:job_controller"])
    assert_metric('.rate_limiter.use',
                  value=0,
                  tags=["limiter:daemon_controller"])

    assert_metric('.queue.adds',
                  metric_type=aggregator.MONOTONIC_COUNT,
                  value=29,
                  tags=["queue:replicaset"])
    assert_metric('.queue.depth',
                  metric_type=aggregator.GAUGE,
                  value=3,
                  tags=["queue:service"])
    assert_metric('.queue.retries',
                  metric_type=aggregator.MONOTONIC_COUNT,
                  value=13,
                  tags=["queue:deployment"])

    assert_metric('.queue.work_duration.sum',
                  value=255667,
                  tags=["queue:replicaset"])
    assert_metric('.queue.work_duration.count',
                  value=29,
                  tags=["queue:replicaset"])
    assert_metric('.queue.work_duration.quantile',
                  value=110,
                  tags=["queue:replicaset", "quantile:0.5"])

    assert_metric('.queue.latency.sum',
                  value=423889,
                  tags=["queue:deployment"])
    assert_metric('.queue.latency.count', value=29, tags=["queue:deployment"])
    assert_metric('.queue.latency.quantile',
                  value=1005,
                  tags=["queue:deployment", "quantile:0.9"])

    # Extra name from the instance
    assert_metric('.rate_limiter.use',
                  value=0,
                  tags=["limiter:extra_controller"])
    assert_metric('.queue.adds',
                  metric_type=aggregator.MONOTONIC_COUNT,
                  value=13,
                  tags=["queue:extra"])
    assert_metric('.queue.depth',
                  metric_type=aggregator.GAUGE,
                  value=2,
                  tags=["queue:extra"])
    assert_metric('.queue.retries',
                  metric_type=aggregator.MONOTONIC_COUNT,
                  value=55,
                  tags=["queue:extra"])
    assert_metric('.queue.work_duration.sum',
                  value=45171,
                  tags=["queue:extra"])
    assert_metric('.queue.work_duration.count', value=13, tags=["queue:extra"])
    assert_metric('.queue.work_duration.quantile',
                  value=6,
                  tags=["queue:extra", "quantile:0.5"])
    assert_metric('.queue.latency.sum', value=9309, tags=["queue:extra"])
    assert_metric('.queue.latency.count', value=13, tags=["queue:extra"])
    assert_metric('.queue.latency.quantile',
                  value=10,
                  tags=["queue:extra", "quantile:0.9"])

    # Leader election mixin
    expected_le_tags = [
        "record_kind:endpoints", "record_name:kube-controller-manager",
        "record_namespace:kube-system"
    ]
    assert_metric('.leader_election.transitions',
                  value=3,
                  tags=expected_le_tags)
    assert_metric('.leader_election.lease_duration',
                  value=15,
                  tags=expected_le_tags)
    aggregator.assert_service_check(NAMESPACE + ".leader_election.status",
                                    tags=expected_le_tags)

    aggregator.assert_all_metrics_covered()
예제 #3
0
def test_check_metrics_without_deprecated(aggregator, mock_metrics,
                                          mock_leader):
    c = KubeControllerManagerCheck(CHECK_NAME, {}, [instance])
    c.check(instance2)

    generic_check_metrics(aggregator, False)