def run(self): templateList = [ G.Template(default="", dataSource="default", name="serverid", label="ServerID", query="label_values(serverid)") ] dashboard = G.Dashboard(title=self.options.title, templating=G.Templating(list=templateList)) # Simple table processing - could be enhanced to use GridPos etc. for metric in metrics: if 'section' in metric: dashboard.rows.append( G.Row(title=metric['section'], showTitle=True)) continue if 'row' in metric: dashboard.rows.append(G.Row(title='', showTitle=False)) continue graph = G.Graph(title=metric['title'], dataSource='default', maxDataPoints=1000, legend=G.Legend(show=True, alignAsTable=True, min=True, max=True, avg=True, current=True, total=True, sort='max', sortDesc=True), yAxes=G.single_y_axis()) ref_id = 'A' for texp in metric['expr']: graph.targets.append(G.Target(expr=texp, refId=ref_id)) ref_id = chr(ord(ref_id) + 1) dashboard.rows[-1].panels.append(graph) # Auto-number panels - returns new dashboard dashboard = dashboard.auto_panel_ids() s = io.StringIO() write_dashboard(dashboard, s) print("""{ "dashboard": %s } """ % s.getvalue())
def StatusQPSGraph(data_source, title, expression, **kwargs): """Create a graph of QPS, coloured by status code. :param title: Title of the graph. :param expression: Format and PromQL expression; must sum by label which is http code like 404 or "success" and "error" :param kwargs: Passed on to Graph. """ return W.stacked( prometheus.PromGraph( data_source=data_source, title=title, expressions=[('{{status_code}}', 'sum by (status_code)(%s)' % (expression))], seriesOverrides=QPS_SERIES_OVERRIDES, legend=G.Legend(hideZero=True), yAxes=[ G.YAxis(format=G.OPS_FORMAT), G.YAxis(format=G.SHORT_FORMAT), ], **kwargs ) )
def make(prefix, title): def target(expr, **kw): return G.Target(expr=expr.format(prefix), **kw) return G.Dashboard( title=title, rows=[ G.Row(panels=[ G.SingleStat( title='Pods up (web)', dataSource='prometheus', valueName='current', sparkline=G.SparkLine(show=True), targets=[ target( expr= 'count by(service) (up{{service="{}-isaacranks-web"}} == 1)' ) ]), G.SingleStat( title='Pods up (rebuild)', dataSource='prometheus', valueName='current', sparkline=G.SparkLine(show=True), targets=[ target( expr= 'count by(service) (up{{service="{}-isaacranks-rebuild"}} == 1)' ) ]), ]), G.Row(panels=[ G.Graph( title='HTTP RPS', dataSource='prometheus', targets=[ target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"1.."}}', legendFormat='1xx', refId='A'), target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"2.."}}', legendFormat='2xx', refId='B'), target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"3.."}}', legendFormat='3xx', refId='C'), target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"4.."}}', legendFormat='4xx', refId='D'), target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"5.."}}', legendFormat='5xx', refId='E'), ], aliasColors=ALIAS_COLORS, yAxes=[ G.YAxis(format=G.OPS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ], nullPointMode=G.NULL_AS_ZERO, stack=True, lineWidth=0, fill=10, tooltip=G.Tooltip(valueType=G.INDIVIDUAL)), G.Graph( title='HTTP latency', dataSource='prometheus', targets=[ target( expr= 'service:http_request_duration_seconds:50p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.5q', refId='A'), target( expr= 'service:http_request_duration_seconds:90p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.9q', refId='B'), target( expr= 'service:http_request_duration_seconds:99p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.99q', refId='C'), ], aliasColors=ALIAS_COLORS, yAxes=[ G.YAxis(format=G.MILLISECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), ]), G.Row(panels=[ G.Graph( title='Ballots', dataSource='prometheus', targets=[ target( expr= 'service_version:isaacranks_ballot_generation_seconds_count:irate{{service="{}-isaacranks-web"}}', legendFormat='{{version}}', refId='A') ], yAxes=[ G.YAxis(format=G.OPS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False), ], nullPointMode=G.NULL_AS_ZERO, stack=True, lineWidth=0, fill=10, tooltip=G.Tooltip(valueType=G.INDIVIDUAL)), G.Graph( title='Ballot latency', dataSource='prometheus', targets=[ target( expr= 'service:isaacranks_ballot_generation_seconds:50p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.5q', refId='A'), target( expr= 'service:isaacranks_ballot_generation_seconds:90p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.9q', refId='B'), target( expr= 'service:isaacranks_ballot_generation_seconds:99p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.99q', refId='C'), ], yAxes=[ G.YAxis(format=G.MILLISECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), ]), G.Row(panels=[ G.Graph( title='Votes', dataSource='prometheus', targets=[ target( expr= 'service_version:isaacranks_vote_casting_seconds_count:irate{{service="{}-isaacranks-web"}}', legendFormat='{{version}}', refId='A') ], yAxes=[ G.YAxis(format=G.OPS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ], nullPointMode=G.NULL_AS_ZERO, stack=True, lineWidth=0, fill=10, tooltip=G.Tooltip(valueType=G.INDIVIDUAL)), G.Graph( title='Vote latency', dataSource='prometheus', targets=[ target( expr= 'service:isaacranks_vote_casting_seconds:50p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.5q', refId='A'), target( expr= 'service:isaacranks_vote_casting_seconds:90p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.9q', refId='B'), target( expr= 'service:isaacranks_vote_casting_seconds:99p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.99q', refId='C'), ], yAxes=[ G.YAxis(format=G.MILLISECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), ]), G.Row(panels=[ G.Graph( title='Time since last rebuild', dataSource='prometheus', targets=[ target( expr= 'time() - (isaacranks_last_rebuild_timestamp{{service="{}-isaacranks-rebuild"}} != 0)', legendFormat='Age') ], legend=G.Legend(current=True), yAxes=[ G.YAxis(format=G.SECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), G.Graph( title='Rebuild duration', dataSource='prometheus', targets=[ target( expr= 'isaacranks_last_rebuild_duration_seconds{{service="{}-isaacranks-rebuild"}} != 0', legendFormat='Duration') ], legend=G.Legend(current=True), yAxes=[ G.YAxis(format=G.SECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), ]) ]).auto_panel_ids()
yAxes=common.LATENCY_AXES, ), ], ), G.Row( title="Samples", collapse=True, panels=[ common.PromGraph( title="Push sample ingest rate by instance (>1%)", expressions=[ ('{{user}}', 'sum by (user)(rate(cortex_distributor_received_samples_total{job="cortex/distributor"}[1m])) > ignoring(user) group_left() (sum(rate(cortex_distributor_received_samples_total{job="cortex/distributor"}[1m]))/100)' ), ], legend=G.Legend(show=False), yAxes=common.OPS_AXIS, ), common.PromGraph( title="Rule sample ingest rate by instance", expressions=[ ( '{{user}}', # '> 1' is to exclude instances which are not connected and simply alerting on absent metrics 'sum by (user)(rate(cortex_distributor_received_samples_total{job="cortex/ruler"}[1m])) > 1' ), ], legend=G.Legend(show=False), yAxes=common.OPS_AXIS, ), common.PromGraph(
def run(self): templateList = [] if self.options.use_sdp: templateList.append( G.Template(default="1", dataSource="default", name="sdpinst", label="SDPInstance", query="label_values(sdpinst)")) if self.options.customer: templateList.append( G.Template(default="1", dataSource="default", name="customer", label="Customer", query="label_values(customer)")) templateList.append( G.Template(default="", dataSource="default", name="serverid", label="ServerID", query="label_values(serverid)")) dashboard = G.Dashboard(title=self.options.title, templating=G.Templating(list=templateList)) for metric in metrics: if 'section' in metric: dashboard.rows.append( G.Row(title=metric['section'], showTitle=True)) continue if 'row' in metric: dashboard.rows.append(G.Row(title='', showTitle=False)) continue if 'type' in metric and metric['type'] == 'gauge': pass # text = G.Text(title=metric['title'], # dataSource='default') # dashboard.rows[-1].panels.append(G.Text) else: yAxis = G.single_y_axis(format="short") if 'yformat' in metric: yAxis = G.single_y_axis(format=metric['yformat']) graph = G.Graph(title=metric['title'], dataSource='default', maxDataPoints=1000, legend=G.Legend(show=True, alignAsTable=True, min=True, max=True, avg=True, current=True, total=True, sort='max', sortDesc=True), yAxes=yAxis) refId = 'A' for targ in metric['target']: texp = targ['expr'] legend = "instance {{instance}}, serverid {{serverid}}" if 'legend' in targ: legend += ' %s' % targ['legend'] # Remove SDP if not self.options.use_sdp: texp = texp.replace('sdpinst="$sdpinst",', '') if self.options.customer: texp = texp.replace('{', '{customer="$customer",') graph.targets.append( G.Target(expr=texp, legendFormat=legend, refId=refId)) refId = chr(ord(refId) + 1) dashboard.rows[-1].panels.append(graph) # Auto-number panels - returns new dashboard dashboard = dashboard.auto_panel_ids() s = io.StringIO() write_dashboard(dashboard, s) print("""{ "dashboard": %s } """ % s.getvalue())