def PromGraph(*args, **kwargs): """A graph of data from our Prometheus.""" kwargs_with_defaults = dict( tooltip=G.Tooltip(sort=G.SORT_DESC), nullPointMode=G.NULL_AS_NULL, ) kwargs_with_defaults.update(kwargs) return prometheus.PromGraph(data_source=PROMETHEUS, *args, **kwargs_with_defaults)
def stacked(graph): """Turn a graph into a stacked graph.""" return attr.assoc( graph, lineWidth=0, nullPointMode=G.NULL_AS_ZERO, stack=True, fill=10, tooltip=G.Tooltip(valueType=G.INDIVIDUAL, ), )
def stacked(graph): """Turn a graph into a stacked graph.""" return attr.evolve( graph, lineWidth=0, nullPointMode=G.NULL_AS_ZERO, stack=True, fill=10, tooltip=G.Tooltip( sort=G.SORT_DESC, valueType=G.INDIVIDUAL, ), )
def stacked(graph): """Turn a graph into a stacked graph.""" newGraph = dict(graph) # Shallow copy. newGraph.update( dict( # Bit of a gotcha here. `graph` is a Python dictionary form of the # Grafana JSON object (i.e. an AST). In the Python DSL, we use # consistent camelCase naming, but Grafana itself is inconsistent. # Thus, what was specified as `lineWidth` when passed to Graph is # overridden as `linewidth` here. linewidth=0, nullPointMode=G.NULL_AS_ZERO, stack=True, fill=10, tooltip=G.Tooltip(valueType=G.INDIVIDUAL, ), )) return newGraph
def make(prefix, title): def target(expr, **kw): return G.Target(expr=expr.format(prefix), **kw) return G.Dashboard( title=title, rows=[ G.Row(panels=[ G.SingleStat( title='Pods up (web)', dataSource='prometheus', valueName='current', sparkline=G.SparkLine(show=True), targets=[ target( expr= 'count by(service) (up{{service="{}-isaacranks-web"}} == 1)' ) ]), G.SingleStat( title='Pods up (rebuild)', dataSource='prometheus', valueName='current', sparkline=G.SparkLine(show=True), targets=[ target( expr= 'count by(service) (up{{service="{}-isaacranks-rebuild"}} == 1)' ) ]), ]), G.Row(panels=[ G.Graph( title='HTTP RPS', dataSource='prometheus', targets=[ target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"1.."}}', legendFormat='1xx', refId='A'), target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"2.."}}', legendFormat='2xx', refId='B'), target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"3.."}}', legendFormat='3xx', refId='C'), target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"4.."}}', legendFormat='4xx', refId='D'), target( expr= 'service_status:http_request_duration_seconds_count:irate{{service="{}-isaacranks-web",status_code=~"5.."}}', legendFormat='5xx', refId='E'), ], aliasColors=ALIAS_COLORS, yAxes=[ G.YAxis(format=G.OPS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ], nullPointMode=G.NULL_AS_ZERO, stack=True, lineWidth=0, fill=10, tooltip=G.Tooltip(valueType=G.INDIVIDUAL)), G.Graph( title='HTTP latency', dataSource='prometheus', targets=[ target( expr= 'service:http_request_duration_seconds:50p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.5q', refId='A'), target( expr= 'service:http_request_duration_seconds:90p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.9q', refId='B'), target( expr= 'service:http_request_duration_seconds:99p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.99q', refId='C'), ], aliasColors=ALIAS_COLORS, yAxes=[ G.YAxis(format=G.MILLISECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), ]), G.Row(panels=[ G.Graph( title='Ballots', dataSource='prometheus', targets=[ target( expr= 'service_version:isaacranks_ballot_generation_seconds_count:irate{{service="{}-isaacranks-web"}}', legendFormat='{{version}}', refId='A') ], yAxes=[ G.YAxis(format=G.OPS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False), ], nullPointMode=G.NULL_AS_ZERO, stack=True, lineWidth=0, fill=10, tooltip=G.Tooltip(valueType=G.INDIVIDUAL)), G.Graph( title='Ballot latency', dataSource='prometheus', targets=[ target( expr= 'service:isaacranks_ballot_generation_seconds:50p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.5q', refId='A'), target( expr= 'service:isaacranks_ballot_generation_seconds:90p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.9q', refId='B'), target( expr= 'service:isaacranks_ballot_generation_seconds:99p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.99q', refId='C'), ], yAxes=[ G.YAxis(format=G.MILLISECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), ]), G.Row(panels=[ G.Graph( title='Votes', dataSource='prometheus', targets=[ target( expr= 'service_version:isaacranks_vote_casting_seconds_count:irate{{service="{}-isaacranks-web"}}', legendFormat='{{version}}', refId='A') ], yAxes=[ G.YAxis(format=G.OPS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ], nullPointMode=G.NULL_AS_ZERO, stack=True, lineWidth=0, fill=10, tooltip=G.Tooltip(valueType=G.INDIVIDUAL)), G.Graph( title='Vote latency', dataSource='prometheus', targets=[ target( expr= 'service:isaacranks_vote_casting_seconds:50p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.5q', refId='A'), target( expr= 'service:isaacranks_vote_casting_seconds:90p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.9q', refId='B'), target( expr= 'service:isaacranks_vote_casting_seconds:99p{{service="{}-isaacranks-web"}} * 1000', legendFormat='0.99q', refId='C'), ], yAxes=[ G.YAxis(format=G.MILLISECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), ]), G.Row(panels=[ G.Graph( title='Time since last rebuild', dataSource='prometheus', targets=[ target( expr= 'time() - (isaacranks_last_rebuild_timestamp{{service="{}-isaacranks-rebuild"}} != 0)', legendFormat='Age') ], legend=G.Legend(current=True), yAxes=[ G.YAxis(format=G.SECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), G.Graph( title='Rebuild duration', dataSource='prometheus', targets=[ target( expr= 'isaacranks_last_rebuild_duration_seconds{{service="{}-isaacranks-rebuild"}} != 0', legendFormat='Duration') ], legend=G.Legend(current=True), yAxes=[ G.YAxis(format=G.SECONDS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ]), ]) ]).auto_panel_ids()
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import re import attr from grafanalib import core as g DECREASING_ORDER_TOOLTIP = g.Tooltip(sort=g.SORT_DESC) PANEL_HEIGHT = g.Pixels(300) QUANTILES = [0.99, 0.9, 0.5] SOURCE_TEMPLATE = g.Template(name="source", type="datasource", query="prometheus") @attr.s class Dashboard(g.Dashboard): time = attr.ib(default=g.Time("now-30d", "now")) # Make it possible to use $source as a source. templating = attr.ib(default=g.Templating(list=[SOURCE_TEMPLATE]))
def dashboard(): PROMETHEUS = "prometheus" return G.Dashboard( title="S4", rows=[ G.Row(panels=[ G.Graph( title="Signups", dataSource=PROMETHEUS, xAxis=X_TIME, yAxes=[ G.YAxis( format="none", label="Count", ), G.YAxis( format="none", label="Count", ), ], targets=[ G.Target( # Filter down to just the signup pod since that's # the only one where this metric value is # meaningful. Some other pods report a 0 value # for this metric because they happen to import # the Python code that defines the object # representing it. # # Also, sum over the selected series to account # for pod replacement. expr='sum(wormhole_signup_started{pod=~"s4-signup.*"})', legendFormat="Wormhole Signups Started", refId="A", ), G.Target( expr='sum(wormhole_signup_success{pod=~"s4-signup.*"})', legendFormat="Wormhole Signups Completed", refId="B", ), G.Target( expr='sum(wormhole_signup_failure{pod=~"s4-signup.*"})', legendFormat="Wormhole Signups Failed", refId="C", ), ], ), G.Graph( title="Usage", dataSource=PROMETHEUS, # Stack the connection graphs on each other, revealing # both a total and a distribution across different grid # router instances. stack=True, tooltip=G.Tooltip( valueType=G.INDIVIDUAL, ), xAxis=X_TIME, yAxes=[ G.YAxis( format="none", label="Count", ), G.YAxis( format="none", label="Count", ), ], targets=[ G.Target( expr="grid_router_connections", legendFormat="Tahoe-LAFS Connections", refId="D", ), ], ), last_convergence(PROMETHEUS), ]), G.Row( title="Cluster", panels=[ cpu_usage(PROMETHEUS, ["1m", "5m", "10m"]), memory_usage(PROMETHEUS), network_usage(PROMETHEUS), filesystem_usage(PROMETHEUS), ], ), G.Row( title="Cluster2", panels=[ process_open_fds(PROMETHEUS), ], ), G.Row(panels=[ tahoe_lafs_transfer_rate(PROMETHEUS), s4_customer_deployments(PROMETHEUS), unhandled_errors(PROMETHEUS), ]), ], ).auto_panel_ids()
def dashboard(): PROMETHEUS = "prometheus" return G.Dashboard( title="S4", rows=[ G.Row(panels=[ G.Graph( title="Signups", dataSource=PROMETHEUS, xAxis=X_TIME, yAxes=[ G.YAxis( format="none", label="Count", ), G.YAxis( format="none", label="Count", ), ], targets=[ G.Target( expr='wormhole_signup_started{pod=~"s4-signup.*"}', legendFormat="Wormhole Signups Started", refId="A", ), G.Target( expr='wormhole_signup_success{pod=~"s4-signup.*"}', legendFormat="Wormhole Signups Completed", refId="B", ), G.Target( expr='wormhole_signup_failure{pod=~"s4-signup.*"}', legendFormat="Wormhole Signups Failed", refId="C", ), ], ), G.Graph( title="Usage", dataSource=PROMETHEUS, # Stack the connection graphs on each other, revealing # both a total and a distribution across different grid # router instances. stack=True, tooltip=G.Tooltip( valueType=G.INDIVIDUAL, ), xAxis=X_TIME, yAxes=[ G.YAxis( format="none", label="Count", ), G.YAxis( format="none", label="Count", ), ], targets=[ G.Target( expr="grid_router_connections", legendFormat="Tahoe-LAFS Connections", refId="D", ), ], ), ]), G.Row( title="Cluster", panels=[ cpu_usage(PROMETHEUS, ["1m", "5m", "10m"]), memory_usage(PROMETHEUS), network_usage(PROMETHEUS), filesystem_usage(PROMETHEUS), ], ), G.Row(panels=[ G.SingleStat( title='Current Customer Deployments', dataSource='prometheus', valueName='current', sparkline=G.SparkLine(show=True), targets=[ G.Target( expr='s4_deployment_gauge', refId="E", ), ], ), G.SingleStat( title='Unhandled Errors', dataSource='prometheus', valueName='current', sparkline=G.SparkLine(show=True), targets=[ G.Target( expr='s4_unhandled_error_counter', refId="F", ), ], ), ]), ], ).auto_panel_ids()
G.Target( expr= 'service_status:http_request_duration_seconds_count:irate{service="ucdapi",status_code=~"5.."}', legendFormat='5xx', refId='E'), ], aliasColors=ALIAS_COLORS, yAxes=[ G.YAxis(format=G.OPS_FORMAT), G.YAxis(format=G.SHORT_FORMAT, show=False) ], nullPointMode=G.NULL_AS_ZERO, stack=True, lineWidth=0, fill=10, tooltip=G.Tooltip(valueType=G.INDIVIDUAL)), G.Graph( title='RPS', dataSource='prometheus', targets=[ G.Target( expr= 'sum(irate(http_request_duration_seconds_count{service="ucdapi"}[1m])) by (status_code, method)', ), ], yAxes=[ G.YAxis(format=G.OPS_FORMAT), G.YAxis(format=G.SHORT_FORMAT), ]), ]), G.Row(panels=[