Example #1
0
    d.Graph(
        title="coredns: # running",
        targets=[
            d.Target(
                expr=
                'count(container_memory_usage_bytes{namespace="kube-system", container="coredns"}) by (container, namespace)'
            )
        ],
        nullPointMode="null",
    ),
    d.Graph(
        title="coredns: memory usage",
        targets=d.min_max_avg(
            base=
            'process_resident_memory_bytes{namespace="kube-system", job="kube-dns"}',
            by=["job", "namespace"],
            legend="{{job}}",
        ),
        nullPointMode="null",
        yAxes=g.single_y_axis(format=g.BYTES_FORMAT),
    ),
]

dashboard = d.Dashboard(
    title="DNS",
    rows=[
        d.Row(title="In-cluster DNS latency", panels=DNS_LATENCY_PANEL),
        d.Row(title="CoreDNS", panels=COREDNS_PANELS),
    ],
).auto_panel_ids()
Example #2
0
            g.Target(
                expr=
                "sum(rate(node_netstat_Tcp_RetransSegs[1m])) by (instance)",
                legendFormat="RetransSegs {{instance}}",
            ),
        ],
        yAxes=g.single_y_axis(format=g.SHORT_FORMAT, logBase=10),
    ),
]

# The final dashboard must be named 'dashboard' so that grafanalib will find it.
dashboard = d.Dashboard(
    title="Master dashboard",
    refresh="",
    rows=[
        d.Row(title="Clusterloader", panels=CLUSTERLOADER_PANELS),
        d.Row(title="Overall cluster health",
              panels=HEALTH_PANELS,
              collapse=True),
        d.Row(title="etcd", panels=ETCD_PANELS, collapse=True),
        d.Row(title="kube-apiserver", panels=APISERVER_PANELS, collapse=True),
        d.Row(
            title="kube-controller-manager",
            panels=[
                d.simple_graph(
                    "Workqueue depths",
                    'workqueue_depth{endpoint="kube-controller-manager"}',
                    legend="{{name}}",
                )
            ],
            collapse=True,
    d.Graph(
        title="Service: # running",
        targets=[
            d.Target(
                expr=
                'count(process_resident_memory_bytes{namespace="kube-system", job="kube-dns"}) by (job, namespace)'
            )
        ],
        nullPointMode="null",
    ),
    d.Graph(
        title="Service: memory usage",
        targets=d.min_max_avg(
            base=
            'process_resident_memory_bytes{namespace="kube-system", job="kube-dns"}',
            by=["job", "namespace"],
            legend="{{job}}",
        ),
        nullPointMode="null",
        yAxes=g.single_y_axis(format=g.BYTES_FORMAT),
    ),
]

dashboard = d.Dashboard(
    title="DNS",
    rows=[
        d.Row(title="In-cluster DNS prober", panels=PROBER_PANEL),
        d.Row(title="In-cluster DNS service", panels=SERVICE_PANELS),
    ],
).auto_panel_ids()
            ),
            g.Target(
                expr=
                "sum(rate(node_netstat_Tcp_RetransSegs[1m])) by (instance)",
                legendFormat="RetransSegs {{instance}}",
            ),
        ],
        yAxes=g.single_y_axis(format=g.SHORT_FORMAT, logBase=10),
    ),
]

# The final dashboard must be named 'dashboard' so that grafanalib will find it.
dashboard = d.Dashboard(
    title="Master dashboard",
    rows=[
        d.Row(title="Clusterloader", panels=CLUSTERLOADER_PANELS),
        d.Row(title="Overall cluster health", panels=HEALTH_PANELS),
        d.Row(title="etcd", panels=ETCD_PANELS),
        d.Row(title="kube-apiserver", panels=APISERVER_PANELS),
        d.Row(
            title="kube-controller-manager",
            panels=[
                d.simple_graph(
                    "Workqueue depths",
                    'workqueue_depth{endpoint="kube-controller-manager"}',
                    legend="{{name}}",
                )
            ],
        ),
        d.Row(title="Master VM", panels=VM_PANELS),
        d.Row(
        # secondary panel
        # same criteria, different data source and starting point
        panel.title = "[SECONDARY] " + panel.title
        panel.dataSource = "$secondary_source"
        panel.timeShift = "$timeshift"
        extended_panels.append(panel)

    return extended_panels


dashboard = d.Dashboard(
    title="Comparison Master dashboard",
    refresh="",
    rows=[
        d.Row(title="API call latency", panels=extended_copy(API_CALL_LATENCY_PANELS)),
        d.Row(title="API call latency aggregated with quantile", panels=extended_copy(QUANTILE_API_CALL_LATENCY_PANELS), collapse=True),
        d.Row(title="P&F metrics", panels=extended_copy(PAF_PANELS), collapse=True),
        d.Row(title="Overall cluster health", panels=extended_copy(HEALTH_PANELS), collapse=True),
        d.Row(title="etcd", panels=extended_copy(ETCD_PANELS), collapse=True),
        d.Row(title="kube-apiserver", panels=extended_copy(APISERVER_PANELS), collapse=True),
        d.Row(title="kube-controller-manager", panels=extended_copy(CONTROLLER_MANAGER_PANELS), collapse=True),
        d.Row(title="Master VM", panels=extended_copy(VM_PANELS), collapse=True),
    ],
    templating=g.Templating(
        list=[
            d.SOURCE_TEMPLATE,
            g.Template(
                name="secondary_source",
                type="datasource",
                query="prometheus",
Example #6
0
            g.Target(
                expr=
                "sum(rate(node_netstat_Tcp_RetransSegs[1m])) by (instance)",
                legendFormat="RetransSegs {{instance}}",
            ),
        ],
        yAxes=g.single_y_axis(format=g.SHORT_FORMAT, logBase=10),
    ),
]

# The final dashboard must be named 'dashboard' so that grafanalib will find it.
dashboard = d.Dashboard(
    title="Master dashboard",
    refresh="",
    rows=[
        d.Row(title="API call latency", panels=API_CALL_LATENCY_PANELS),
        d.Row(title="API call latency aggregated with quantile",
              panels=QUANTILE_API_CALL_LATENCY_PANELS,
              collapse=True),
        d.Row(title="Overall cluster health",
              panels=HEALTH_PANELS,
              collapse=True),
        d.Row(title="etcd", panels=ETCD_PANELS, collapse=True),
        d.Row(title="kube-apiserver", panels=APISERVER_PANELS, collapse=True),
        d.Row(
            title="kube-controller-manager",
            panels=[
                d.simple_graph(
                    "Workqueue depths",
                    'workqueue_depth{endpoint="kube-controller-manager"}',
                    legend="{{name}}",
Example #7
0
        api_call_latency(
            title="Read-only API call latency (scope=cluster, threshold=30s)",
            metric=metric,
            verb="LIST",
            scope="cluster",
            threshold=30,
        ),
        api_call_latency(
            title="Mutating API call latency (threshold=1s)",
            metric=metric,
            verb=d.any_of("CREATE", "DELETE", "PATCH", "POST", "PUT"),
            scope=d.any_of("namespace", "cluster"),
            threshold=1,
        ),
    ]


# The final dashboard must be named 'dashboard' so that grafanalib will find it.
dashboard = d.Dashboard(
    title="SLO",
    rows=[
        d.Row(title="SLO", panels=create_slo_panel()),
        d.Row(
            title="Experimental: SLO (window 1m)",
            panels=create_slo_panel(
                metric="apiserver:apiserver_request_latency_1m:histogram_quantile"
            ),
        ),
    ],
).auto_panel_ids()
Example #8
0
        nullPointMode="null",
    ),
    d.Graph(
        title="probes: memory usage",
        targets=[
            d.Target(
                expr='min(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container)',
                legendFormat="min {{container}}",
            ),
            d.Target(
                expr='avg(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container)',
                legendFormat="avg {{container}}",
            ),
            d.Target(
                expr='max(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container)',
                legendFormat="max {{container}}",
            ),
        ],
        nullPointMode="null",
    ),
]


dashboard = d.Dashboard(
    title="Network",
    rows=[
        d.Row(title="Network progamming latency", panels=NETWORK_PROGRAMMING_PANEL),
        d.Row(title="In-cluster network latency", panels=NETWORK_LATENCY_PANEL),
    ],
).auto_panel_ids()