def export(self, output_file=''): from baskerville.models.metrics.registry import metrics_registry panels = [] for i, (metric_name, value) in enumerate(metrics_registry.registry.items()): if i % 4 == 0 or i == len(metrics_registry.registry): self.rows.append(Row(panels=panels)) panels = [] if 'timer' in metric_name: g = Gauge() g.maxValue = 0 g.maxValue = 100 g.show = True g.thresholdMarkers = True panels.append( SingleStat( title=metric_name, dataSource=self.ds, gauge=g, targets=[ Target( expr= f'({metric_name}_sum / {metric_name}_count)', target=metric_name, refId='A', metric=metric_name, datasource=self.ds, ) ])) else: panels.append( Graph(title=metric_name, dataSource=self.ds, targets=[ Target(expr=f'{metric_name}_total' if 'total' in metric_name else metric_name, target=metric_name, refId='A', metric=metric_name, datasource=self.ds) ])) for panel in panels: self.rows.append(Row(panels=[panel])) self.dashboard = Dashboard(title=self.dash_title, rows=self.rows).auto_panel_ids() with open(output_file, 'w') as f: write_dashboard(self.dashboard, f)
def create_dashboard( title: str, datasource_name: str, queries: List[RawInfluxDbQuery], start: datetime, end: datetime, timezone: str, yaxe_types: List[YAxis], thresholds: List[Threshold], grafana_graph_params: Dict[str, any], ) -> Dashboard: """ Create a dashboard object that can be serialized to JSON and sent to Grafana. """ targets = [] series_overrides = [] for query in queries: targets.append(InfluxDBTarget(query=query.query, alias=query.alias)) if query.yaxis == "right": series_overrides.append(SeriesOverride(alias=query.alias, yaxis=2)) left = yaxe_types[0] right = yaxe_types[1] if len(yaxe_types) > 1 else None yaxes = YAxes(left, right) if right else YAxes(left=left) return Dashboard( title=title, time=Time(start.isoformat(), end.isoformat()), timezone=timezone, rows=[ Row(panels=[ CustomGraph( title=title, dataSource=datasource_name, targets=targets, thresholds=thresholds, seriesOverrides=series_overrides, yAxes=yaxes, transparent=True, **grafana_graph_params, ), ], ), ], ).auto_panel_ids()
def create_lambda_only_dashboard( tags: List[str], name: str, cloudwatch_data_source: str, lambda_insights_namespace: str, notifications: List[str], environment: str, *args, **kwargs, ): """Create a dashboard with just the lambda""" return Dashboard( title="{}{}".format(LAMBDA_DASHBOARD_PREFIX, name), editable=EDITABLE, tags=tags + ["lambda", environment], timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, rows=[ Row(panels=[ lambda_generate_invocations_graph( name, cloudwatch_data_source, notifications=notifications), lambda_generate_duration_graph(name, cloudwatch_data_source), ]), Row(panels=[ lambda_generate_memory_utilization_percentage_graph( name, cloudwatch_data_source, lambda_insights_namespace, notifications=notifications, ), lambda_generate_memory_utilization_graph( name, cloudwatch_data_source, lambda_insights_namespace), ]), Row(panels=[ lambda_generate_logs_panel(name, cloudwatch_data_source), ]), ], ).auto_panel_ids()
def generate_firehose_dashboard( influxdb_data_source: str, environment: str, *args, **kwargs ) -> Dashboard: """Generate Firehose dashboard""" tags = ["firehose", environment] rows = [ Row( panels=[generate_firehose_graph(influxdb_data_source=influxdb_data_source)], editable=EDITABLE, repeat="firehose", title="$firehose", ) ] return Dashboard( title="Firehose", editable=EDITABLE, tags=tags, timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, rows=rows, ).auto_panel_ids()
dashboard = Dashboard( title="{}s Dashboard".format(GRR_COMPONENT).title(), rows=[ Row(panels=[panel(GRR_COMPONENT) for panel in row]) for row in GENERAL_PANELS ] + [ Row(panels=[ Graph( title="QPS", targets=[ Target( expr='sum(rate(frontend_request_count_total[1m]))', legendFormat="Requests", ), ], ), Graph( title="Request Latency Rate", targets=[ Target( expr= 'sum(rate(frontend_request_latency_sum[10m])) / sum(rate(frontend_request_latency_count[10m]))', legendFormat="Latency", ), ], ), Graph( title="Well Known Flows Requests Rate", targets=[ Target( expr='rate(well_known_flow_requests_total[10m])', legendFormat="Flow: {{flow}}", ), ], ), Graph( title="GRR Client Crashes", targets=[ Target( expr= 'sum(rate(grr_client_crashes_total{job="grr_frontend"}[10m]))', legendFormat="Rate of Client crashes", ), ], ) ]), ]).auto_panel_ids()
dashboard = Dashboard( title="Fleetspeak Servers Dashboard", rows=[ Row(panels=[ Graph( title="Number of Active Processes", targets=[ Target( expr='sum(up{job="fleetspeak"})', legendFormat="Active Processes", ), ], alert=Alert( name="Number of Active Processes alert", message= "The number of active Fleetspeak Server processes is below {}" .format(ACTIVE_PROCESSES_ALERTING_CONDITION), alertConditions=[ AlertCondition( Target( expr='sum(up{job="fleetspeak"})', legendFormat="Active Processes", ), timeRange=TimeRange("10s", "now"), evaluator=LowerThan( ACTIVE_PROCESSES_ALERTING_CONDITION), operator=OP_AND, reducerType=RTYPE_SUM) ], )), Graph( title="Sum of Process Memory Bytes (across all instances)", targets=[ Target( expr= 'sum(process_resident_memory_bytes{job="fleetspeak"})', legendFormat="Resident Memory", ), ]), Graph( title="CPU Usage", targets=[ Target( expr= 'avg(rate(process_cpu_seconds_total{job="fleetspeak"}[30s])) * 100', legendFormat="Average Process CPU Usage", ), ], yAxes=YAxes(left=YAxis(max=105, format="percent")), ), ]), Row(panels=[ Graph( title="Datastore Latency per Operation", targets=[ Target( expr= 'sum by (operation) (rate(fleetspeak_server_datastore_operations_completed_latency_sum[10m]) / rate(fleetspeak_server_datastore_operations_completed_latency_count[10m]))', legendFormat="{{operation}}", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Heatmap( title="Datastore Latency Distribution", targets=[ Target( expr= 'sum(rate(fleetspeak_server_datastore_operations_completed_latency_bucket[10m])) by (le)', ), ], yAxis=YAxis(format=SECONDS_FORMAT), legend={'show': True}, ), Graph( title="Datastore Latency Quantiles", targets=[ Target( expr= 'histogram_quantile(0.5, sum by (le) (rate(fleetspeak_server_datastore_operations_completed_latency_bucket[10m])))', legendFormat="50th percentile", ), Target( expr= 'histogram_quantile(0.9, sum by (le) (rate(fleetspeak_server_datastore_operations_completed_latency_bucket[10m])))', legendFormat="90th percentile", ), Target( expr= 'histogram_quantile(0.99, sum by (le) (rate(fleetspeak_server_datastore_operations_completed_latency_bucket[10m])))', legendFormat="99th percentile", ), ]), Graph( title="Datastore Errors Rate per Operation", targets=[ Target( expr= 'sum by (operation) (rate(fleetspeak_server_datastore_operations_completed_latency_count{errored="true"}[10m]))', legendFormat="{{operation}}", ), ]), ]), Row(panels=[ Graph( title="Successful Datastore Operations Rate per Operation", targets=[ Target( expr= 'sum by (operation) (rate(fleetspeak_server_datastore_operations_completed_latency_count{errored="false"}[10m]))', legendFormat="{{operation}}", ), ]), Graph( title="Client Polls Rate per Type", targets=[ Target( expr= 'sum by (poll_type) (rate(fleetspeak_server_client_polls_total[10m]))', legendFormat="{{poll_type}}", ), ]), Graph( title="Client Polls Errors Rate per Status", targets=[ Target( expr= 'sum by (http_status_code) (rate(fleetspeak_server_client_polls_total{http_status_code="^(4|5).*"}[10m]))', legendFormat="{{http_status_code}}", ), ]), Heatmap( title="Client Poll Latency Distribution", targets=[ Target( expr= 'sum(rate(fleetspeak_server_client_polls_operation_time_latency_bucket[10m])) by (le)', ), ], legend={'show': True}, yAxis=YAxis(format=SECONDS_FORMAT), ), ]), Row(panels=[ Graph( title="Client Poll Latency Quantiles", targets=[ Target( expr= 'histogram_quantile(0.5, sum by (le) (rate(fleetspeak_server_client_polls_operation_time_latency_bucket[10m])))', legendFormat="50th percentile", ), Target( expr= 'histogram_quantile(0.9, sum by (le) (rate(fleetspeak_server_client_polls_operation_time_latency_bucket[10m])))', legendFormat="90th percentile", ), Target( expr= 'histogram_quantile(0.99, sum by (le) (rate(fleetspeak_server_client_polls_operation_time_latency_bucket[10m])))', legendFormat="99th percentile", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Graph( title="Client Poll Operation Time Latency per Poll Type", targets=[ Target( expr= 'sum by (poll_type) (rate(fleetspeak_server_client_polls_operation_time_latency_sum[10m]) / rate(fleetspeak_server_client_polls_operation_time_latency_count[10m]))', legendFormat="{{poll_type}}", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Graph( title="Client Polls Rate per Cache Status", targets=[ Target( expr= 'sum by (cache_hit) (rate(fleetspeak_server_client_polls_total[10m]))', legendFormat="Hit: {{cache_hit}}", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Graph( title="Messages Ingested per Destination Service", targets=[ Target( expr= 'sum by (destination_service) (rate(fleetspeak_messages_ingested_total[10m]))', legendFormat="{{destination_service}}", ), ]), ]), Row(panels=[ Heatmap( title="Message Processing Latency Distribution", targets=[ Target( expr= 'sum(rate(fleetspeak_server_messages_processed_latency_bucket[10m])) by (le)', ), ], yAxis=YAxis(format=SECONDS_FORMAT), legend={'show': True}, ), Graph( title="Message Processing Latency Quantiles per Service", targets=[ Target( expr= 'histogram_quantile(0.5, sum by (le, service) (rate(fleetspeak_server_messages_processed_latency_bucket[10m])))', legendFormat="50th percentile - {{service}}", ), Target( expr= 'histogram_quantile(0.9, sum by (le, service) (rate(fleetspeak_server_messages_processed_latency_bucket[10m])))', legendFormat="90th percentile - {{service}}", ), Target( expr= 'histogram_quantile(0.99, sum by (le, service) (rate(fleetspeak_server_messages_processed_latency_bucket[10m])))', legendFormat="99th percentile - {{service}}", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Graph( title="Payload Bytes Saved per Service", targets=[ Target( expr= 'sum by (service) (rate(fleetspeak_messages_saved_payload_bytes_size[10m]))', legendFormat="{{service}}", ), ], yAxes=YAxes(left=YAxis(format=BYTES_FORMAT)), ), Graph( title="Payload Bytes Ingested per Destination Service", targets=[ Target( expr= 'sum by (destination_service) (rate(fleetspeak_messages_ingested_payload_bytes_size[10m]))', legendFormat="{{destination_service}}", ), ], yAxes=YAxes(left=YAxis(format=BYTES_FORMAT)), ), ]) ]).auto_panel_ids()
dashboard = Dashboard( title="{}s Dashboard".format(GRR_COMPONENT).title(), rows=[ Row(panels=[panel(GRR_COMPONENT) for panel in row]) for row in GENERAL_PANELS ] + [ Row(panels=[ Graph( title="Successful Flows Rate vs. Failed Flows Rate", targets=[ Target( expr= 'sum(rate(flow_completions_total{job="grr_worker"}[10m]))', legendFormat="Successes", ), Target( expr= 'sum(rate(flow_errors_total{job="grr_worker"}[10m]))', legendFormat="Failures", ), ], ), Graph( title="Threadpool Latency Rate vs. Queuing Time Rate", targets=[ Target( expr= 'sum(rate(threadpool_working_time_sum{job="grr_worker"}[10m])) / sum(rate(threadpool_working_time_count{job="grr_worker"}[10m]))', legendFormat="Latency", ), Target( expr= 'sum(rate(threadpool_queueing_time_sum{job="grr_worker"}[10m])) / sum(rate(threadpool_queueing_time_count{job="grr_worker"}[10m]))', legendFormat="Queueing Time", ), ], ), Graph( title="Rate of Flow States a GRR Worker has moved through", targets=[ Target( expr= 'rate(grr_worker_states_run_total{job="grr_worker"}[10m])', legendFormat="Rate of Fow States moved through", ), ], ), ]), ], ).auto_panel_ids()
"yaxis": 1, "color": "#629E51" }, { "alias": clt_err_label, "bars": True, "lines": False, "stack": "A", "yaxis": 1, "color": "#E5AC0E" }, { "alias": resptime_label, "lines": True, "fill": 0, "nullPointMode": "connected", "steppedLine": True, "yaxis": 2, "color": "#447EBC" }, ], yAxes=YAxes( YAxis(label="Count", format=SHORT_FORMAT, decimals=0), YAxis(label="Response Time", format=SECONDS_FORMAT, decimals=2), ), transparent=True, span=12, ) dashboard = Dashboard(title="HTTP dashboard", rows=[Row(panels=[g])])
dashboard = Dashboard( title="{}s Dashboard".format(GRR_COMPONENT).title().replace("_", " "), rows=[ Row(panels=[panel(GRR_COMPONENT) for panel in row]) for row in GENERAL_PANELS ] + [ Row(panels=[ Graph( title="API Method Latency Rate by Method", targets=[ Target( expr= 'sum by (method_name) (rate(api_method_latency_sum[10m]) / rate(api_method_latency_count[10m]))', legendFormat="{{method_name}}", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Graph( title="API Calls Count Rate with Status SUCCESS", targets=[ Target( expr= 'sum(rate(api_method_latency_count{status="SUCCESS"}[10m]))', legendFormat="Successful Calls Rate", ), ], yAxes=YAxes(left=YAxis(format=OPS_FORMAT)), ), Graph( title= "API Calls Count Rate with other statuses (not SUCCESS) by Method", targets=[ Target( expr= 'sum by (method_name) (rate(api_method_latency_count{status!="SUCCESS"}[10m]))', legendFormat="{{method_name}}", ), ], yAxes=YAxes(left=YAxis(format=OPS_FORMAT)), ), Graph( title="API Access Probe Latency by Method", targets=[ Target( expr= 'sum by (method_name) (rate(api_access_probe_latency_sum[10m]) / rate(api_access_probe_latency_count[10m]))', legendFormat="{{method_name}}", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), ]), ], ).auto_panel_ids()
dashboard = Dashboard( title="{}s Dashboard".format(GRR_COMPONENT).title(), rows=[ Row(panels=[panel(GRR_COMPONENT) for panel in row]) for row in GENERAL_PANELS ] + [ Row(panels=[ Graph( title="QPS", targets=[ Target( expr='sum(rate(frontend_request_count_total[1m]))', legendFormat="Requests", ), ], yAxes=YAxes(left=YAxis(format="reqps")), ), Graph( title="Request Latency", targets=[ Target( expr= 'sum(rate(frontend_request_latency_sum[10m])) / sum(rate(frontend_request_latency_count[10m]))', legendFormat="Latency", ), ], yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)), ), Graph( title="Well Known Flows Requests Rate by Flow", targets=[ Target( expr= 'sum by (flow) (rate(well_known_flow_requests_total[10m]))', legendFormat="{{flow}}", ), ], ), Graph( title="GRR Client Crashes", targets=[ Target( expr= 'sum(rate(grr_client_crashes_total{job="grr_frontend"}[10m]))', legendFormat="Rate of Client crashes", ), ], ) ]), Row(panels=[ Graph( title="RSA Operations Rate (across all instances)", targets=[ Target( expr='sum(rate(grr_rsa_operations_total[10m]))', legendFormat="Rate of Operations", ), ], yAxes=YAxes(left=YAxis(format=OPS_FORMAT)), ), Graph( title="GRR Unknown Client Rate (across all instances)", targets=[ Target( expr='sum(rate(grr_client_unknown_total[10m]))', legendFormat="Rate of Unknown Clients", ), ], ), Graph( title="Decoding Errors Rate (across all instances)", targets=[ Target( expr='sum(rate(grr_decoding_error_total[10m]))', legendFormat="Rate of Decoding Errors", ), ], ), Graph( title="Decryption Errors Rate (across all instances)", targets=[ Target( expr='sum(rate(grr_decryption_error_total[10m]))', legendFormat="Rate of Decryption Errors", ), ], ) ]), Row(panels=[ Graph( title="Authenticated Messages Rate (across all instances)", targets=[ Target( expr='sum(rate(grr_authenticated_messages_total[10m]))', legendFormat="Rate of Authenticated Messages", ), ], ), Graph( title="Unauthenticated Messages Rate (across all instances)", targets=[ Target( expr= 'sum(rate(grr_unauthenticated_messages_total[10m]))', legendFormat="Rate of Unauthenticated Messages", ), ], ), ]), ]).auto_panel_ids()
def generate_ecs_alb_service_dashboard( name: str, cluster_name: str, cloudwatch_data_source: str, notifications: List[str], environment: str, loadbalancer: str, target_group: str, elasticsearch_data_source: str, lucene_query: str, max: int, *args, **kwargs, ): """Generate ECS Service dashboard""" tags = ["ecs", "ecs-service", "containers", "service", environment] panels = [ RowPanel( title="Summary", gridPos=GridPos(1, 24, 0, 0), ), generate_running_count_stats_panel( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 12, 0, 1), ), generate_deployment_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 12, 12, 1), ), RowPanel(title="Capacity", gridPos=GridPos(1, 24, 0, 9)), generate_running_count_graph( name=name, cluster_name=cluster_name, max=max, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 0, 10), notifications=notifications, ), generate_desired_count_graph( name=name, cluster_name=cluster_name, max=max, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 8, 10), notifications=notifications, ), generate_pending_count_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 16, 10), notifications=notifications, ), RowPanel(title="Utilization", gridPos=GridPos(1, 24, 0, 18)), generate_cpu_utilization_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 0, 19), ), generate_mem_utilization_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 8, 19), ), generate_mem_utilization_percentage_graph( name=name, cluster_name=cluster_name, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 8, 16, 19), notifications=notifications, ), RowPanel(title="Requests and Responses", gridPos=GridPos(1, 24, 0, 27)), generate_req_count_graph( loadbalancer=loadbalancer, target_group=target_group, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 12, 0, 28), ), generate_res_count_graph( name=name, loadbalancer=loadbalancer, target_group=target_group, cloudwatch_data_source=cloudwatch_data_source, grid_pos=GridPos(8, 12, 12, 28), notifications=notifications, ), ] if elasticsearch_data_source and lucene_query: panels += [ RowPanel(title="Logs", gridPos=GridPos(1, 24, 0, 36)), generate_helpful_resources_panel(lucene_query=lucene_query, grid_pos=GridPos(8, 24, 0, 37)), generate_error_logs_panel( grid_pos=GridPos(24, 24, 0, 45), elasticsearch_data_source=elasticsearch_data_source, lucene_query=lucene_query, ), ] return Dashboard( title="{} {}".format("ECS Service:", name), editable=EDITABLE, tags=tags, timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, panels=panels, refresh=DEFAULT_REFRESH, ).auto_panel_ids()
def generate_elasticache_redis_dashboard( name: str, cache_cluster_id: str, influxdb_data_source: str, cloudwatch_data_source: str, environment: str, notifications: List[str], *args, **kwargs, ): """Generate ElastiCache Redis dashboard""" tags = ["elasticache", "redis", environment] rows = [ Row( panels=[ generate_elasticache_redis_cpu_usage_graph( cache_cluster_id=cache_cluster_id, cloudwatch_data_source=cloudwatch_data_source, ), generate_elasticache_redis_cpu_credit_usage_graph( cache_cluster_id=cache_cluster_id, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ), generate_elasticache_redis_swap_and_memory_usage_graph( cache_cluster_id=cache_cluster_id, cloudwatch_data_source=cloudwatch_data_source, ), ], editable=EDITABLE, ), Row( panels=[ generate_elasticache_redis_network_in_graph( cache_cluster_id=cache_cluster_id, cloudwatch_data_source=cloudwatch_data_source, ), generate_elasticache_redis_connections_graph( cache_cluster_id=cache_cluster_id, cloudwatch_data_source=cloudwatch_data_source, ), generate_elasticache_redis_db_memory_usage_and_evicitons_graph( cache_cluster_id=cache_cluster_id, cloudwatch_data_source=cloudwatch_data_source, ), ], editable=EDITABLE, ), Row( panels=[ generate_elasticache_redis_network_out_graph( cache_cluster_id=cache_cluster_id, cloudwatch_data_source=cloudwatch_data_source, ), generate_elasticache_redis_replication_graph( cache_cluster_id=cache_cluster_id, cloudwatch_data_source=cloudwatch_data_source, ), generate_elasticache_redis_latency_graph( cache_cluster_id=cache_cluster_id, cloudwatch_data_source=cloudwatch_data_source, ), ], editable=EDITABLE, ), ] return Dashboard( title="ElastiCache Redis: {}".format(name), editable=EDITABLE, tags=tags, timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, rows=rows, links=[DOCUMENTATION_LINK], refresh=DEFAULT_REFRESH, ).auto_panel_ids()
dashboard = Dashboard( title="Python generated example dashboard", description="Example dashboard using the Random Walk and default Prometheus datasource", tags=[ 'example' ], timezone="browser", panels=[ TimeSeries( title="Random Walk", dataSource='default', targets=[ Target( datasource='grafana', expr='example', ), ], gridPos=GridPos(h=8, w=16, x=0, y=0), ), GaugePanel( title="Random Walk", dataSource='default', targets=[ Target( datasource='grafana', expr='example', ), ], gridPos=GridPos(h=4, w=4, x=17, y=0), ), TimeSeries( title="Prometheus http requests", dataSource='prometheus', targets=[ Target( expr='rate(prometheus_http_requests_total[5m])', legendFormat="{{ handler }}", refId='A', ), ], unit=OPS_FORMAT, gridPos=GridPos(h=8, w=16, x=0, y=10), ), ], ).auto_panel_ids()
def generate_rds_dashboard( name: str, environment: str, influxdb_data_source: str, cloudwatch_data_source: str, engine: str, notifications: List[str], **kwargs, ): tags = [environment, engine, "rds", "database"] cpu_graph = generate_rds_cpu_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ) burst_graph = generate_rds_burst_balance_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ) connections_graph = generate_rds_database_connections_graph( name=name, cloudwatch_data_source=cloudwatch_data_source) freeable_memory_graph = generate_rds_freeable_memory_graph( name=name, cloudwatch_data_source=cloudwatch_data_source) free_storage_graph = generate_rds_free_storage_space_graph( name=name, cloudwatch_data_source=cloudwatch_data_source) rows = [ Row(panels=[cpu_graph, burst_graph]), Row(panels=[ connections_graph, freeable_memory_graph, free_storage_graph ]), Row(panels=[ generate_rds_disk_latency_graph( name=name, cloudwatch_data_source=cloudwatch_data_source), generate_rds_disk_ops_graph( name=name, cloudwatch_data_source=cloudwatch_data_source), generate_rds_network_throughput_graph( name=name, cloudwatch_data_source=cloudwatch_data_source), ]), ] if engine == "postgres": rows += [ Row(panels=[ generate_rds_transaction_id_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ) ]) ] return Dashboard( title="RDS: {}".format(name), editable=EDITABLE, tags=tags, timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, rows=rows, links=[ get_documentation_link( "https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/MonitoringOverview.html" ) ], refresh=DEFAULT_REFRESH, ).auto_panel_ids()
dashboard = Dashboard( title="Test Resolver dashboard", rows=[ Row(panels=[ Graph( title="gRPC Rate", dataSource='Prometheus', targets=[ Target( expr= 'rate(grpc_server_handled_total{grpc_service="ResolverService"}[1m])', legendFormat="Total-{{pod}}", refId='A', ), Target( expr= 'rate(grpc_server_handled_total{grpc_method="Resolve", grpc_service="ResolverService"}[1m])', legendFormat="Resolve-{{pod}}", refId='B', ) ], xAxis=XAxis(mode="time"), yAxes=single_y_axis(format=OPS_FORMAT, min=None), ), ]), Row(panels=[ Graph( title="gRPC latency", dataSource='Prometheus', targets=[ Target( expr= 'histogram_quantile(0.95, sum(rate(grpc_server_handled_latency_seconds_bucket{service="monitoring-compute-resolver-primary", grpc_service="ResolverService", grpc_method="Resolve"}[1m])) by (le))', legendFormat="Resolve p95", refId='A', ), ], xAxis=XAxis(mode="time"), yAxes=single_y_axis(format=SECONDS_FORMAT, min=None), ), ]), Row(panels=[ Graph( title="Memory usage", dataSource='Prometheus', targets=[ Target( expr= 'process_resident_memory_bytes{service="monitoring-compute-resolver-primary"}', legendFormat="{{pod}}", refId='A', ), ], xAxis=XAxis(mode="time"), yAxes=single_y_axis(format=BYTES_FORMAT, min=None), ), ]) ], ).auto_panel_ids()
def lambda_sns_sqs_dashboard( name: str, cloudwatch_data_source: str, lambda_insights_namespace: str, notifications: List[str], environment: str, topics: List[str], fifo: bool, *args, **kwargs, ): """Create a dashboard with Lambda, the SNS topics it is invoked from and its SQS dead letter queue""" # noqa: E501 tags = ["lambda", "sqs", environment, "sns"] if fifo: tags += ["fifo"] sqs_graph = create_lambda_sqs_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, fifo=fifo) dead_letter_sqs_graph = create_lambda_sqs_dlq_graph( name=name + "-dlq", cloudwatch_data_source=cloudwatch_data_source, fifo=fifo, notifications=notifications, ) sns_topic_panels = [ create_sns_graph( name=topic, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ) for topic in topics ] return Dashboard( title="{}{}".format(LAMBDA_DASHBOARD_PREFIX, name), editable=EDITABLE, tags=tags, timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, rows=[ Row( title="SNS Topics", showTitle=True, collapse=False, panels=sns_topic_panels, ), Row( title="Invocations", showTitle=True, panels=[ lambda_generate_invocations_graph(name, cloudwatch_data_source, notifications=[]), lambda_generate_duration_graph(name, cloudwatch_data_source), ], ), Row( title="Memory Utilization", showTitle=True, panels=[ lambda_generate_memory_utilization_percentage_graph( name, cloudwatch_data_source, lambda_insights_namespace, notifications=notifications, ), lambda_generate_memory_utilization_graph( name, cloudwatch_data_source, lambda_insights_namespace), ], ), Row( title="Logs", showTitle=True, collapse=True, panels=[ lambda_generate_logs_panel(name, cloudwatch_data_source), ], ), Row(title="Queues", showTitle=True, panels=[sqs_graph]), Row( title="Dead Letter Queues", showTitle=True, panels=[dead_letter_sqs_graph], ), ], refresh=DEFAULT_REFRESH, ).auto_panel_ids()
dashboard = Dashboard( title="SRE Elasticache", templating=Templating(list=[Environment, Cluster]), rows=[ Row(panels=[ Graph( title="Redis Commands Ops(avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_set_type_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="set_type-{{dimension_cache_cluster_id}}", refId='A', ), Target( expr= 'aws_ec_get_type_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="get-{{dimension_cache_cluster_id}}", refId='B', ), Target( expr= 'aws_ec_string_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="string-{{dimension_cache_cluster_id}}", refId='C', ), Target( expr= 'aws_ec_key_based_commands_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="key-{{dimension_cache_cluster_id}}", refId='D', ), Target( expr= 'aws_ec_sorted_set_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat= "sorted_set-{{dimension_cache_cluster_id}}", refId='E', ), Target( expr= 'aws_ec_set_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="set_based-{{dimension_cache_cluster_id}}", refId='F', ), Target( expr= 'aws_ec_hash_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="hash-{{dimension_cache_cluster_id}}", refId='G', ), Target( expr= 'aws_ec_list_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="list-{{dimension_cache_cluster_id}}", refId='H', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_set_type_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="set_type-{{dimension_cache_cluster_id}}", refId='A', ), Target( expr= 'aws_ec_get_type_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="get-{{dimension_cache_cluster_id}}", refId='B', ), Target( expr= 'aws_ec_string_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="string-{{dimension_cache_cluster_id}}", refId='C', ), Target( expr= 'aws_ec_key_type_based_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="key-{{dimension_cache_cluster_id}}", refId='D', ), Target( expr= 'aws_ec_sorted_set_based_latency_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat= "sorted_set-{{dimension_cache_cluster_id}}", refId='E', ), Target( expr= 'aws_ec_set_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="set_based-{{dimension_cache_cluster_id}}", refId='F', ), Target( expr= 'aws_ec_hash_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="hash-{{dimension_cache_cluster_id}}", refId='G', ), Target( expr= 'aws_ec_list_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="list-{{dimension_cache_cluster_id}}", refId='H', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ) ]), Row(panels=[ Graph( title="Redis Database Memory Usage (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_database_memory_usage_percentage_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=PERCENT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), GaugePanel( title="Redis Swap (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_swap_usage_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], ), ]), Row(panels=[ Graph( title="Memory Fragmentation Ratio (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_memory_fragmentation_ratio_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ) ], yAxes=YAxes( YAxis(format=SHORT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Current Connections (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_curr_connections_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ) ], yAxes=YAxes( YAxis(format=SHORT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="New Connections (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_new_connections_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ) ], yAxes=YAxes( YAxis(format=SHORT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Hit Rate (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_cache_hit_rate_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ) ], yAxes=YAxes( YAxis(format=PERCENT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Misses (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_cache_misses_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ) ], yAxes=YAxes( YAxis(format=SHORT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Current Items Count (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_curr_items_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ) ], yAxes=YAxes( YAxis(format=SHORT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Evictions (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_evictions_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ) ], yAxes=YAxes( YAxis(format=SHORT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), ], ).auto_panel_ids()
dashboard = Dashboard( title="simple-test", time=Time("2015-01-01", "2019-01-01"), timezone="CET", rows=[ Row( panels=[ Graph( title="NEDU Aardgas 2015-2018", dataSource="InfluxDB", targets=[ InfluxDBTarget( query= 'SELECT mean("G1A") FROM "autogen"."nedu_aardgas_2015-2018" WHERE $timeFilter GROUP BY time($__interval) fill(null)', refId="A", ), InfluxDBTarget( query= 'SELECT mean("G2A") FROM "autogen"."nedu_aardgas_2015-2018" WHERE $timeFilter GROUP BY time($__interval) fill(null)', refId="B", ), InfluxDBTarget( query= 'SELECT mean("G2A") FROM "autogen"."nedu_aardgas_2015-2018" WHERE $timeFilter GROUP BY time($__interval) fill(null)', refId="C", ), InfluxDBTarget( query= 'SELECT mean("SPT") FROM "autogen"."nedu_aardgas_2015-2018" WHERE $timeFilter GROUP BY time($__interval) fill(null)', refId="D", ), ], yAxes=single_y_axis(format=SECONDS_FORMAT), # yAxes=YAxes( # YAxis(format=SECONDS_FORMAT), YAxis(format=SECONDS_FORMAT), # ), ), ], ), ], ).auto_panel_ids()
dashboard = Dashboard( title="{}s Dashboard".format(GRR_COMPONENT).title().replace("_", " "), rows=[ Row(panels=[panel(GRR_COMPONENT) for panel in row]) for row in GENERAL_PANELS ] + [ Row(panels=[ Graph( title="API Method Latency Rate", targets=[ Target( expr= 'rate(api_method_latency_sum[10m]) / rate(api_method_latency_count[10m])', legendFormat="Latency - Method: {{method_name}}", ), ], ), Graph( title="API Calls Count Rate by Status SUCCESS", targets=[ Target( expr= 'sum(rate(api_method_latency_count{status="SUCCESS"}[10m]))', legendFormat="Successful Calls Rate", ), ], ), Graph( title="API Calls Count Rate by other statuses (not SUCCESS)", targets=[ Target( expr= 'rate(api_method_latency_count{status!="SUCCESS"}[10m])', legendFormat="Unsuccessful Calls Rate", ), ], ), Graph( title="API Access Probe Latency", targets=[ Target( expr= 'rate(api_access_probe_latency_sum[10m]) / rate(api_access_probe_latency_count[10m])', legendFormat="Latency - Method: {{method_name}}", ), ], ), ]), ], ).auto_panel_ids()
def generate_elasticsearch_dashboard( name: str, client_id: str, influxdb_data_source: str, cloudwatch_data_source: str, environment: str, notifications: List[str], *args, **kwargs, ): """Generate Elasticsearch dashboard""" tags = ["elasticsearch", environment] rows = [ Row( panels=[ generate_elasticsearch_cpu_graph( name=name, client_id=client_id, cloudwatch_data_source=cloudwatch_data_source, ), generate_elasticsearch_jvm_memory_pressure_graph( name=name, client_id=client_id, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ), ], editable=EDITABLE, ), Row( panels=[ generate_elasticsearch_documents_graph( name=name, client_id=client_id, cloudwatch_data_source=cloudwatch_data_source, ) ], editable=EDITABLE, ), Row( panels=[ generate_elasticsearch_storage_graph( name=name, client_id=client_id, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ) ], editable=EDITABLE, ), Row( panels=[ generate_elasticsearch_requests_graph( name=name, client_id=client_id, cloudwatch_data_source=cloudwatch_data_source, ) ], editable=EDITABLE, ), Row( panels=[ generate_elasticsearch_status_red_alert_graph( name=name, client_id=client_id, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ), generate_elasticsearch_nodes_alert_graph( name=name, client_id=client_id, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ), generate_elasticsearch_writes_blocked_alert_graph( name=name, client_id=client_id, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ), generate_elasticsearch_automated_snapshot_failure_alert_graph( name=name, client_id=client_id, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ), ], editable=EDITABLE, ), ] return Dashboard( title="Elasticsearch: {}".format(name), editable=EDITABLE, tags=tags, timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, rows=rows, links=[DOCUMENTATION_LINK], refresh=DEFAULT_REFRESH, ).auto_panel_ids()
query="label_values(instance)"), chip_template, ] dashboard = Dashboard( title="Temperature", templating=Templating(template_list), panels=[ RowPanel( title="New Row", gridPos=GridPos(h=1, w=24, x=0, y=8), ), Graph( title="$chip", dataSource="Prometheus", targets=[ Target(expr=("sensors_temp_input{" + 'instance="$instance",chip="$chip"}'), legendFormat="{{feature}}", refId="A"), ], repeat=Repeat("v", "chip"), yAxes=YAxes( YAxis(format=CELSIUS_FORMAT), YAxis(format=SHORT_FORMAT), ), gridPos=GridPos(h=10, w=24, x=0, y=9), ), ], ).auto_panel_ids()
@staticmethod def create_all_rows(interval: int = 5) -> typing.List[Row]: return [ FlytePropeller.core_metrics(interval, False), FlytePropeller.metastore_metrics(interval, True), FlytePropeller.metastore_latencies(True), FlytePropeller.node_metrics(True), ] dashboard = Dashboard( tags=["flyte", "prometheus", "flytepropeller", "flyte-dataplane"], inputs=[ DataSourceInput( name=DATASOURCE_NAME, label="Prometheus", description="Prometheus server that connects to Flyte", pluginId="prometheus", pluginName="Prometheus", ), ], editable=False, title="Flyte Propeller Dashboard (via Prometheus)", rows=FlytePropeller.create_all_rows(interval=5), description="Flyte Propeller Dashboard. This is great for monitoring FlytePropeller / Flyte data plane deployments. This is mostly useful for the Flyte deployment maintainer", ).auto_panel_ids() if __name__ == "__main__": print(dashboard.to_json_data())
cls=DashboardEncoder) def upload_to_grafana(json, server, api_key, verify=True): ''' upload_to_grafana tries to upload dashboard to grafana and prints response :param json - dashboard json generated by grafanalib :param server - grafana server name :param api_key - grafana api key with read and write privileges ''' headers = { 'Authorization': f"Bearer {api_key}", 'Content-Type': 'application/json' } r = requests.post(f"https://{server}/api/dashboards/db", data=json, headers=headers, verify=verify) # TODO: add error handling print(f"{r.status_code} - {r.content}") grafana_api_key = getenv("GRAFANA_API_KEY") grafana_server = getenv("GRAFANA_SERVER") my_dashboard = Dashboard(title="My awesome dashboard", uid='abifsd') my_dashboard_json = get_dashboard_json(my_dashboard, overwrite=True) upload_to_grafana(my_dashboard_json, grafana_server, grafana_api_key)
from grafanalib.core import (Alert, AlertCondition, Dashboard, Graph, GaugePanel, GreaterThan, OP_AND, OPS_FORMAT, Row, RTYPE_SUM, SECONDS_FORMAT, SHORT_FORMAT, single_y_axis, Target, TimeRange, YAxes, YAxis) dashboard = Dashboard( title="Frontend Stats", rows=[ Row(panels=[ GaugePanel(title="Power Consumption", dataSource='prometheus', targets=[ Target( expr='_mqtt:instantaneous_power_house', legendFormat="Consumption Now", refId='A', ), ]) ]) ]).auto_panel_ids()
dashboard = Dashboard( title="SRE Elasticache Drilldowns", templating=Templating(list=[Environment, Cluster]), rows=[ Row(panels=[ Graph( title="Freeable Memory", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_freeable_memory_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=BYTES_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Bytes Used for Cache", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_bytes_used_for_cache_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=BYTES_FORMAT), YAxis(format=SHORT_FORMAT), ), ) ]), Row(panels=[ Graph( title="Swap Usage", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_swap_usage_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=BYTES_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Database Memory Usage", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_database_memory_usage_percentage_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=PERCENT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Freeable Memory", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_freeable_memory_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=BYTES_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Fragmentation Ratio", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_memory_fragmentation_ratio_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=PERCENT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Current Connections", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_curr_connections_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="New Connections", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_new_connections_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Cache Hits", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_cache_hits_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Cache Misses", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_cache_misses_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis SET Type Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_set_type_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Redis GET Type Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_get_type_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis KEY Based Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_key_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Redis SORTED SET Based Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_sorted_set_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis HASH Based Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_hash_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Redis String Based Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_string_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis SET Based Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_set_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Redis LIST Based Commands Latency (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_list_based_cmds_latency_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format="µs"), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis SET Type Commands (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_set_type_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Redis GET Type Commands (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_get_type_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis KEY Based Commands (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_key_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Redis SORTED SET Based Commands (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_sorted_set_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis HASH Based Commands (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_hash_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Redis String Based Commands (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_string_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Redis SET Based Commands (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_set_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Redis LIST Based Commands (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_list_based_cmds_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Reclaimed Items (key expiration events)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_reclaimed_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Current Items", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_curr_items_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Evicted Items", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_evictions_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Host CPU Utilization", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_cpuutilization_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=PERCENT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Redis CPU Utilization", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_engine_cpuutilization_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=PERCENT_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Network Bytes In (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_network_bytes_in_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Network Bytes Out (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_network_bytes_out_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), Row(panels=[ Graph( title="Network Packets In (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_network_packets_in_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), Graph( title="Network Packets Out (avg)", dataSource=DATASOURCE, targets=[ Target( expr= 'aws_ec_network_packet_out_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}', legendFormat="{{dimension_cache_cluster_id}}", refId='A', ), ], yAxes=YAxes( YAxis(format=OPS_FORMAT), YAxis(format=SHORT_FORMAT), ), ), ]), ], ).auto_panel_ids()
def generate_sfn_dashboard( name: str, cloudwatch_data_source: str, lambda_insights_namespace: str, notifications: List[str], environment: str, lambdas: List[str], *args, **kwargs, ): """Create a dashboard for the step function""" tags = ["step-function", environment] if not name.startswith("arn:aws:states"): raise Exception("Statemachine ARN should be provided") sfn_name = name.split(":")[-1] sfn_execution_duration_graph = generate_sfn_execution_duration_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, ) sfn_execution_metrics_graph = generate_sfn_execution_metrics_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ) rows = [ Row( title="Step Function Execution Metrics", showTitle=True, panels=[sfn_execution_duration_graph, sfn_execution_metrics_graph], ) ] if lambdas: tags = tags + ["lambda"] for lambda_fn in lambdas: lambda_metrics_row = Row( title="{} Lambda Metrics".format(lambda_fn), showTitle=True, collapse=False, panels=[ lambda_generate_invocations_graph(lambda_fn, cloudwatch_data_source, notifications=[]), lambda_generate_duration_graph(lambda_fn, cloudwatch_data_source), lambda_generate_memory_utilization_percentage_graph( lambda_fn, cloudwatch_data_source, lambda_insights_namespace, notifications=notifications, ), lambda_generate_memory_utilization_graph( lambda_fn, cloudwatch_data_source, lambda_insights_namespace), ], ) lambda_logs_row = Row( title="{} Lambda Logs".format(lambda_fn), showTitle=True, collapse=True, panels=[ lambda_generate_logs_panel(lambda_fn, cloudwatch_data_source), ], ) rows.append(lambda_metrics_row) rows.append(lambda_logs_row) return Dashboard( title="{}{}".format(SFN_DASHBOARD_PREFIX, sfn_name), editable=EDITABLE, tags=tags, timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, rows=rows, refresh=DEFAULT_REFRESH, ).auto_panel_ids()
] ) @staticmethod def create_all_apis(interval: int = 5) -> typing.List[Row]: rows = [] for api in FlyteAdmin.APIS: rows.append(FlyteAdmin.create_api_row(api, collapse=True, interval=interval)) return rows dashboard = Dashboard( tags=["flyte", "prometheus", "flyteadmin", "flyte-controlplane"], inputs=[ DataSourceInput( name=DATASOURCE_NAME, label="Prometheus", description="Prometheus server that connects to Flyte", pluginId="prometheus", pluginName="Prometheus", ), ], editable=False, title="Flyte Admin Dashboard (via Prometheus)", rows=FlyteAdmin.create_all_apis(interval=5), description="Flyte Admin/Control Plane Dashboard. This is great for monitoring FlyteAdmin and the Service API.", ).auto_panel_ids() if __name__ == "__main__": print(dashboard.to_json_data())
panel = Graph( title='mysql-maindb disk consumption', dataSource='default', targets=[target], yAxes=[ YAxis(format=BYTES_FORMAT), YAxis(format=SHORT_FORMAT), ], ) row = Row(panels=[panel]) db = Dashboard( title='Autogenerated MySQL Disk Consumption', rows=[row], time=Time('now-6M', 'now'), ) s = StringIO.StringIO() write_dashboard(db, s) dashboard_json = s.getvalue() print dashboard_json payload = { "dashboard": json.loads(dashboard_json), "overwrite": True # "folderId": 0, } r = requests.post(GRAFANA_API_URL, json=payload)
def generate_api_gateways_dashboard( name: str, cloudwatch_data_source: str, lambda_insights_namespace: str, notifications: List[str], environment: str, lambdas: List[str], *args, **kwargs, ): tags = ["api-gateway", environment] if lambdas: tags = tags + ["lambda"] api_gateway_graph = generate_api_gateway_requests_graph( name, cloudwatch_data_source, notifications) rows = [ Row(title="API Gateway Metrics", showTitle=True, panels=[api_gateway_graph]) ] if lambdas: for lambda_fn in lambdas: lambda_metrics_row = Row( title="{} Lambda Metrics".format(lambda_fn), showTitle=True, collapse=False, panels=[ lambda_generate_invocations_graph(name, cloudwatch_data_source, notifications=[]), lambda_generate_duration_graph(name, cloudwatch_data_source), lambda_generate_memory_utilization_percentage_graph( name, cloudwatch_data_source, lambda_insights_namespace, notifications=notifications, ), lambda_generate_memory_utilization_graph( name, cloudwatch_data_source, lambda_insights_namespace), ], ) lambda_logs_row = Row( title="{} Lambda Logs".format(lambda_fn), showTitle=True, collapse=True, panels=[ lambda_generate_logs_panel(name, cloudwatch_data_source), ], ) rows.append(lambda_metrics_row) rows.append(lambda_logs_row) return Dashboard( title="{} {}".format("API Gateway:", name), editable=EDITABLE, tags=tags, timezone=TIMEZONE, sharedCrosshair=SHARED_CROSSHAIR, refresh=DEFAULT_REFRESH, rows=rows, ).auto_panel_ids()
indent=2, cls=DashboardEncoder) def upload_to_grafana(json, server, api_key): ''' upload_to_grafana tries to upload dashboard to grafana and prints response :param json - dashboard json generated by grafanalib :param server - grafana server name :param api_key - grafana api key with read and write privileges ''' headers = { 'Authorization': f"Bearer {api_key}", 'Content-Type': 'application/json' } r = requests.post(f"https://{server}/api/dashboards/db", data=json, headers=headers) # TODO: add error handling print(f"{r.status_code} - {r.content}") grafana_api_key = getenv("GRAFANA_API_KEY") grafana_server = getenv("GRAFANA_SERVER") my_dashboard = Dashboard(title="My awesome dashboard") my_dashboard_json = get_dashboard_json(my_dashboard) upload_to_grafana(my_dashboard_json, grafana_server, grafana_api_key)