def generate_sfn_execution_duration_graph(name: str, cloudwatch_data_source: str, *args, **kwargs): """ Generate step function graph """ targets = [ CloudwatchMetricsTarget( alias=DURATION_MINIMUM_ALIAS, namespace=NAMESPACE, metricName="ExecutionTime", statistics=["Minimum"], dimensions={"StateMachineArn": name}, ), CloudwatchMetricsTarget( alias=DURATION_AVERAGE_ALIAS, namespace=NAMESPACE, metricName="ExecutionTime", statistics=["Average"], dimensions={"StateMachineArn": name}, ), CloudwatchMetricsTarget( alias=DURATION_MAXIMUM_ALIAS, namespace=NAMESPACE, metricName="ExecutionTime", statistics=["Maximum"], dimensions={"StateMachineArn": name}, ), ] yAxes = YAxes( YAxis(format=MILLISECONDS_FORMAT, decimals=2), YAxis(format=MILLISECONDS_FORMAT, decimals=2), ) seriesOverrides = [ { "alias": DURATION_MINIMUM_ALIAS, "color": "#C8F2C2", "lines": False }, { "alias": DURATION_AVERAGE_ALIAS, "color": "#FADE2A", "fill": 0 }, { "alias": DURATION_MAXIMUM_ALIAS, "color": "rgb(77, 159, 179)", "fillBelowTo": DURATION_MINIMUM_ALIAS, "lines": False, }, ] alert = None return Graph( title="Step function execution duration", dataSource=cloudwatch_data_source, targets=targets, seriesOverrides=seriesOverrides, yAxes=yAxes, transparent=TRANSPARENT, editable=EDITABLE, alert=alert, alertThreshold=ALERT_THRESHOLD, ).auto_ref_ids()
dashboard = Dashboard( title="{}s Dashboard".format(GRR_COMPONENT).title(), rows=[ Row(panels=[panel(GRR_COMPONENT) for panel in row]) for row in GENERAL_PANELS ] + [ Row(panels=[ Graph( title="QPS", targets=[ Target( expr='sum(rate(frontend_request_count_total[1m]))', legendFormat="Requests", ), ], yAxes=YAxes(left=YAxis(format="reqps")), ), Graph( title="Request Latency", targets=[ Target( expr= 'sum(rate(frontend_request_latency_sum[10m])) / sum(rate(frontend_request_latency_count[10m]))', legendFormat="Latency", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Graph( title="Well Known Flows Requests Rate by Flow", targets=[
), Target( expr= 'aws_ec_hash_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="hash-{{dimension_cache_cluster_id}}", refId='G', ), Target( expr= 'aws_ec_list_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="list-{{dimension_cache_cluster_id}}", refId='H', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_set_type_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="set_type-{{dimension_cache_cluster_id}}", refId='A', ), Target(
), Graph( title="Threadpool Latency vs. Queuing Time", targets=[ Target( expr= 'sum(rate(threadpool_working_time_sum{job="grr_worker"}[10m])) / sum(rate(threadpool_working_time_count{job="grr_worker"}[10m]))', legendFormat="Latency", ), Target( expr= 'sum(rate(threadpool_queueing_time_sum{job="grr_worker"}[10m])) / sum(rate(threadpool_queueing_time_count{job="grr_worker"}[10m]))', legendFormat="Queueing Time", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Graph( title= "Rate of Flow States a GRR Worker has moved through by GRR Worker Instance", targets=[ Target( expr= 'sum by (instance) (rate(grr_worker_states_run_total{job="grr_worker"}[10m]))', legendFormat="{{instance}}", ), ], ), ]), ], ).auto_panel_ids()
from grr_grafanalib_dashboards.config import CLIENT_LOAD_STATS_DATA_SOURCE client_id_variable = Template(name="ClientID", query="", type="textbox") dashboard = Dashboard( title="Client Load Stats Dashboard", templating=Templating([client_id_variable]), rows=[ Row(panels=[ Graph( title="User CPU Usage", targets=[ Target(target='Mean User CPU Rate', ), Target(target='Max User CPU Rate', ), ], yAxes=YAxes(left=YAxis(max=105, format="percent")), ), Graph( title="System CPU Usage", targets=[ Target(target='Mean System CPU Rate', ), Target(target='Max System CPU Rate', ), ], yAxes=YAxes(left=YAxis(max=105, format="percent")), ), ]), Row(panels=[ Graph( title="Resident Memory", targets=[ Target(target='Mean Resident Memory MB', ),
Target( expr= 'sum(process_resident_memory_bytes{job="fleetspeak"})', legendFormat="Resident Memory", ), ]), Graph( title="CPU Usage", targets=[ Target( expr= 'avg(rate(process_cpu_seconds_total{job="fleetspeak"}[30s])) * 100', legendFormat="Average Process CPU Usage", ), ], yAxes=YAxes(left=YAxis(max=105, format="percent")), ), ]), Row(panels=[ Graph( title="Datastore Latency per Operation", targets=[ Target( expr= 'sum by (operation) (rate(fleetspeak_server_datastore_operations_completed_latency_sum[10m]) / rate(fleetspeak_server_datastore_operations_completed_latency_count[10m]))', legendFormat="{{operation}}", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Heatmap(
OPS_FORMAT, RowPanel, SHORT_FORMAT, SqlTarget, YAxes, YAxis, ) dashboard = Dashboard( title="Random stats from SQL DB", panels=[ RowPanel(title="New row", gridPos=GridPos(h=1, w=24, x=0, y=8)), Graph( title="Some SQL Queries", dataSource="Your SQL Source", targets=[ SqlTarget( rawSql= 'SELECT date as "time", metric FROM example WHERE $__timeFilter("time")', refId="A", ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), gridPos=GridPos(h=8, w=24, x=0, y=9), ), ], ).auto_panel_ids()
def workflow_stats(collapse: bool) -> Row: return Row( title="Workflow Stats", collapse=collapse, panels=[ Graph( title="Accepted Workflow", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(rate(flyte:propeller:all:workflow:accepted{project=~"$project", domain=~"$domain", wf=~"$workflow"}[5m]))', refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Successful Workflow", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(rate(flyte:propeller:all:workflow:success_duration_ms_count{project=~"$project", domain=~"$domain", wf=~"$workflow"}[5m]))', refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Failed Workflow", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(rate(flyte:propeller:all:workflow:failure_duration_ms_count{project=~"$project", domain=~"$domain", wf=~"$workflow"}[5m]))', refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Aborted Workflow", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(rate(flyte:propeller:all:workflow:workflow_aborted{project=~"$project", domain=~"$domain", wf=~"$workflow"}[5m]))', refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Successful workflow execution time by Quantile", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(flyte:propeller:all:workflow:success_duration_ms{project=~"$project", domain=~"$domain", wf=~"$workflow"}) by (quantile)', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), ), Graph( title="Failed workflow execution time by Quantile", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(flyte:propeller:all:workflow:failed_duration_ms{project=~"$project", domain=~"$domain", wf=~"$workflow"}) by (quantile)', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), ), Graph( title="Node queuing latency by Quantile", dataSource=DATASOURCE, targets=[ Target( expr= 'sum(flyte:propeller:all:node:queueing_latency_ms{project=~"$project", domain=~"$domain", wf=~"$workflow"}) by (quantile)', refId='A', ), ], yAxes=single_y_axis(format=MILLISECONDS_FORMAT), ), ])