def generate_running_count_stats_panel( name: str, cloudwatch_data_source: str, cluster_name: str, grid_pos: GridPos, ) -> Stat: """ Generate running count stats panel """ targets = [ CloudwatchMetricsTarget( alias="Desired", namespace=CONTAINER_INSIGHTS_NAMESPACE, statistics=["Maximum"], metricName="DesiredTaskCount", dimensions={ "ServiceName": name, "ClusterName": cluster_name }, ), CloudwatchMetricsTarget( alias="Pending", namespace=CONTAINER_INSIGHTS_NAMESPACE, statistics=["Maximum"], metricName="PendingTaskCount", dimensions={ "ServiceName": name, "ClusterName": cluster_name }, ), CloudwatchMetricsTarget( alias="Running", namespace=CONTAINER_INSIGHTS_NAMESPACE, statistics=["Maximum"], metricName="RunningTaskCount", dimensions={ "ServiceName": name, "ClusterName": cluster_name }, refId=ALERT_REF_ID, ), ] return Stat( title="Task Count", dataSource=cloudwatch_data_source, targets=targets, transparent=TRANSPARENT, editable=EDITABLE, colorMode="background", alignment="center", reduceCalc="lastNotNull", thresholds=[{ "color": "blue" }], gridPos=grid_pos, )
def lambda_generate_memory_utilization_graph( name: str, cloudwatch_data_source: str, lambda_insights_namespace: str, *args, **kwargs, ) -> Graph: """ Generate lambda graph """ targets = [ CloudwatchMetricsTarget( alias="used_memory_max", namespace=lambda_insights_namespace, statistics=["Maximum"], metricName="used_memory_max", dimensions={"function_name": name}, ), CloudwatchMetricsTarget( alias="allocated_memory", namespace=lambda_insights_namespace, statistics=["Maximum"], metricName="total_memory", dimensions={"function_name": name}, ), ] yAxes = YAxes(YAxis(format="decmbytes"), ) seriesOverrides = [ { "alias": "used_memory_max", "points": False, "color": colors.GREEN, }, { "alias": "allocated_memory", "points": False, "color": colors.RED, "fill": 0 }, ] alert = None return Graph( title="Lambda Memory Utilization", dataSource=cloudwatch_data_source, targets=targets, seriesOverrides=seriesOverrides, yAxes=yAxes, transparent=TRANSPARENT, editable=EDITABLE, alert=alert, alertThreshold=ALERT_THRESHOLD, # gridPos=GridPos(8,12,12,0) ).auto_ref_ids()
def generate_req_count_graph( cloudwatch_data_source: str, loadbalancer: str, target_group: str, grid_pos: GridPos, ) -> Graph: """ Generate req graph """ request_count_alias = "RequestCount Per Service" request_count_per_target_alias = "RequestCount Per Container" targets = [ CloudwatchMetricsTarget( alias=request_count_alias, namespace="AWS/ApplicationELB", statistics=["Sum"], metricName="RequestCount", dimensions={ "LoadBalancer": loadbalancer, "TargetGroup": target_group }, ), CloudwatchMetricsTarget( alias=request_count_per_target_alias, namespace="AWS/ApplicationELB", statistics=["Sum"], metricName="RequestCountPerTarget", dimensions={ "LoadBalancer": loadbalancer, "TargetGroup": target_group }, ), ] seriesOverrides = [ { "alias": request_count_alias, "color": colors.YELLOW, "fill": 0 }, { "alias": request_count_per_target_alias, "color": colors.GREEN, "fill": 0 }, ] return Graph( title="Requests", dataSource=cloudwatch_data_source, targets=targets, seriesOverrides=seriesOverrides, transparent=TRANSPARENT, editable=EDITABLE, gridPos=grid_pos, ).auto_ref_ids()
def generate_elasticache_redis_swap_and_memory_usage_graph( cache_cluster_id: str, cloudwatch_data_source: str) -> Graph: """ Generate ElastiCache Redis graph """ y_axes = single_y_axis(format=BYTES) aliases = { "bytes": "Bytes used for cache", "swap": "Swap Usage", } targets = [ CloudwatchMetricsTarget( alias=aliases["bytes"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="BytesUsedForCache", ), CloudwatchMetricsTarget( alias=aliases["swap"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="SwapUsage", ), ] series_overrides = [ { "alias": aliases["swap"], "color": colors.BLUE, "lines": True, "bars": False, }, { "alias": aliases["bytes"], "color": colors.GREEN, "lines": True, "bars": False, }, ] return Graph( title="Memory and Swap usage", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, ).auto_ref_ids()
def generate_rds_network_throughput_graph(name: str, cloudwatch_data_source: str): """ Generate rds graph """ y_axes = single_y_axis(format=BYTES_SEC, min=None) targets = [ CloudwatchMetricsTarget( alias="RX", metricName="NetworkReceiveThroughput", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), CloudwatchMetricsTarget( alias="TX", metricName="NetworkTransmitThroughput", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), ] series_overrides = [ { "alias": "TX", "color": colors.GREEN, "transform": "negative-Y", "fillGradient": 10, }, { "alias": "RX", "color": colors.YELLOW, "fillGradient": 10 }, ] return Graph( title="Network throughput", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, seriesOverrides=series_overrides, ).auto_ref_ids()
def generate_rds_database_connections_graph(name: str, cloudwatch_data_source: str): """ Generate rds graph """ y_axes = single_y_axis(format=SHORT_FORMAT) min_alias = "min" max_alias = "max" mean_alias = "mean" targets = [ CloudwatchMetricsTarget( alias=max_alias, namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, metricName="DatabaseConnections", statistics=["Maximum"], period="1m", refId=ALERT_REF_ID, ), CloudwatchMetricsTarget( alias=mean_alias, metricName="DatabaseConnections", statistics=["Average"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), CloudwatchMetricsTarget( alias=min_alias, metricName="DatabaseConnections", statistics=["Minimum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), ] series_overrides = get_series_overrides(min_alias, mean_alias, max_alias) return Graph( title="Database connections", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, ).auto_ref_ids()
def test_should_generate_deployment_graph(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) panel = generate_deployment_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, ) panel.should.be.a(TimeSeries) panel.title.should.eql("Deployments") panel.dataSource.should.eql(cloudwatch_data_source) panel.targets.should.have.length_of(1) panel.targets[0].should.eql( CloudwatchMetricsTarget( alias="Deployment", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="DeploymentCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ) ) panel.gridPos.should.eql(grid_pos)
def test_should_generate_pending_count_graph(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) notifications = [] panel = generate_pending_count_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, notifications=notifications, ) panel.should.be.a(Graph) panel.title.should.eql("Pending Tasks") panel.dataSource.should.eql(cloudwatch_data_source) panel.targets.should.have.length_of(1) panel.targets[0].should.eql( CloudwatchMetricsTarget( alias="Containers", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="PendingTaskCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, refId="A", ) ) panel.gridPos.should.eql(grid_pos)
def generate_deployment_graph( name: str, cloudwatch_data_source: str, cluster_name: str, grid_pos: GridPos, ) -> TimeSeries: """ Generate deployment graph """ targets = [ CloudwatchMetricsTarget( alias="Deployment", namespace=CONTAINER_INSIGHTS_NAMESPACE, statistics=["Maximum"], metricName="DeploymentCount", dimensions={ "ServiceName": name, "ClusterName": cluster_name }, ), ] return TimeSeries( title="Deployments", dataSource=cloudwatch_data_source, targets=targets, transparent=TRANSPARENT, editable=EDITABLE, axisPlacement="hidden", tooltipMode="none", gridPos=grid_pos, )
def test_should_generate_lambda_memory_utilization_percentage_graph_with_alert_notifications( self, ): lambda_name = "lambda-1" cloudwatch_data_source = "cloudwatch" lambda_insights_namespace = "insights" notifications = ["lorem", "ipsum"] expected_alert_query = CloudwatchMetricsTarget( alias="Avg", namespace=lambda_insights_namespace, statistics=["Average"], metricName="memory_utilization", dimensions={"function_name": lambda_name}, refId="A", ) generated_lambda_graph = lambda_generate_memory_utilization_percentage_graph( name=lambda_name, cloudwatch_data_source=cloudwatch_data_source, lambda_insights_namespace=lambda_insights_namespace, notifications=notifications, ) generated_lambda_graph.should.have.property("alert").be.a(Alert) generated_lambda_graph.alert.executionErrorState.should.eql("alerting") generated_lambda_graph.alert.noDataState.should.eql("no_data") generated_lambda_graph.alert.alertConditions.should.have.length_of(1) generated_lambda_graph.alert.alertConditions[0].should.be.a( AlertCondition) generated_lambda_graph.alert.alertConditions[0].target.should.eql( Target(refId="A")) generated_lambda_graph.targets.should.contain(expected_alert_query)
def test_should_create_lambda_sqs_dlq_fifo_graph(self): lambda_name = "lambda-1" sqs_dlq_name = lambda_name + "-dlq" cloudwatch_data_source = "influxdb" notifications = ["lorem"] expected_alert_query = CloudwatchMetricsTarget( alias="Approximate number of messages available", namespace="AWS/SQS", statistics=["Maximum"], metricName="ApproximateNumberOfMessagesVisible", dimensions={"QueueName": sqs_dlq_name + ".fifo"}, refId="A", ) generated_lambda_graph = create_lambda_sqs_dlq_graph( name=sqs_dlq_name, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, fifo=True, ) generated_lambda_graph.should.be.a(Graph) generated_lambda_graph.should.have.property("title").with_value.equal( "SQS Dead Letter Queue: {}.fifo".format(sqs_dlq_name)) generated_lambda_graph.should.have.property( "dataSource").with_value.equal(cloudwatch_data_source) generated_lambda_graph.should.have.property("targets") generated_lambda_graph.targets.should.have.length_of(1) generated_lambda_graph.targets[0].should.eql(expected_alert_query) generated_lambda_graph.should.have.property("alert").be.a(Alert) generated_lambda_graph.alert.executionErrorState.should.eql("alerting") generated_lambda_graph.alert.noDataState.should.eql("no_data")
def create_lambda_sqs_graph(name: str, cloudwatch_data_source: str, fifo: bool): """Create SQS graph""" if fifo: name += ".fifo" targets = [ CloudwatchMetricsTarget( alias="Number of messages sent to the queue", namespace="AWS/SQS", statistics=["Sum"], metricName="NumberOfMessagesSent", dimensions={"QueueName": name}, ) ] yAxes = single_y_axis(format=SHORT_FORMAT) return Graph( title="SQS: {}".format(name), dataSource=cloudwatch_data_source, targets=targets, yAxes=yAxes, transparent=TRANSPARENT, editable=EDITABLE, ).auto_ref_ids()
def test_should_create_lambda_sqs_fifo_graph(self): lambda_name = "lambda-1" sqs_name = lambda_name + ".fifo" cloudwatch_data_source = "cloudwatch" expected_query = CloudwatchMetricsTarget( alias="Number of messages sent to the queue", namespace="AWS/SQS", statistics=["Sum"], metricName="NumberOfMessagesSent", dimensions={"QueueName": sqs_name}, refId="A", ) generated_lambda_graph = create_lambda_sqs_graph( name=lambda_name, cloudwatch_data_source=cloudwatch_data_source, fifo=True) generated_lambda_graph.should.be.a(Graph) generated_lambda_graph.should.have.property("title").with_value.equal( "SQS: {}".format(sqs_name)) generated_lambda_graph.should.have.property( "dataSource").with_value.equal(cloudwatch_data_source) generated_lambda_graph.should.have.property("targets") generated_lambda_graph.targets.should.have.length_of(1) generated_lambda_graph.targets[0].should.eql(expected_query)
def generate_rds_free_storage_space_graph(name: str, cloudwatch_data_source: str): """ Generate rds graph """ y_axes = single_y_axis(format=BYTES) targets = [ CloudwatchMetricsTarget( alias="Free storage", metricName="FreeStorageSpace", statistics=["Minimum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", refId=ALERT_REF_ID, ), ] return Graph( title="Free storage", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, ).auto_ref_ids()
def test_should_generate_proper_api_gateway_requests_graph_with_notifications(self): apig_name = "apig-1" cloudwatch_data_source = "prod" lambda_insights_namespace = "insights" notifications = ["foo-1", "foo-2"] targets = [ CloudwatchMetricsTarget( alias="5xx", namespace="AWS/ApiGateway", statistics=["Sum"], metricName="5XXError", dimensions={"ApiName": apig_name}, refId="A", ), CloudwatchMetricsTarget( alias="requests", namespace="AWS/ApiGateway", dimensions={"ApiName": apig_name}, statistics=["Sum"], metricName="Count", refId="B", ), CloudwatchMetricsTarget( alias="requests", namespace="AWS/ApiGateway", dimensions={"ApiName": apig_name}, statistics=["Sum"], metricName="4XXError", refId="C", ), ] generated_graph = generate_api_gateway_requests_graph( name=apig_name, cloudwatch_data_source=cloudwatch_data_source, lambda_insights_namespace=lambda_insights_namespace, notifications=notifications, ) generated_graph.should.be.a(Graph) generated_graph.title.should.match(r"API Gateway Requests") generated_graph.dataSource.should.eql(cloudwatch_data_source) generated_graph.alert.should.be.a(Alert) generated_graph.alert.alertConditions.should.have.length_of(1) generated_graph.alert.alertConditions[0].target.should.equal(Target(refId="A")) generated_graph.targets[0].should.eql(targets[0])
def test_should_generate_lambda_duration_graph(self): lambda_name = "lambda-1" cloudwatch_data_source = "cloudwatch" lambda_insights_namespace = "insights" expected_targets = [ CloudwatchMetricsTarget( alias="Min", namespace="AWS/Lambda", statistics=["Minimum"], metricName="Duration", dimensions={"FunctionName": lambda_name}, refId="A", ), CloudwatchMetricsTarget( alias="Avg", namespace="AWS/Lambda", statistics=["Average"], metricName="Duration", dimensions={"FunctionName": lambda_name}, refId="B", ), CloudwatchMetricsTarget( alias="Max", namespace="AWS/Lambda", statistics=["Maximum"], metricName="Duration", dimensions={"FunctionName": lambda_name}, refId="C", ), ] generated_lambda_graph = lambda_generate_duration_graph( name=lambda_name, cloudwatch_data_source=cloudwatch_data_source, lambda_insights_namespace=lambda_insights_namespace, notifications=[], ) generated_lambda_graph.should.be.a(Graph) generated_lambda_graph.should.have.property("title").with_value.equal( "Lambda Invocation Duration") generated_lambda_graph.should.have.property( "dataSource").with_value.equal(cloudwatch_data_source) generated_lambda_graph.should.have.property("alert").with_value.equal( None) generated_lambda_graph.should.have.property("targets") generated_lambda_graph.targets.should.have.length_of(3) generated_lambda_graph.targets.should.equal(expected_targets)
def generate_elasticsearch_status_red_alert_graph( name: str, client_id: str, cloudwatch_data_source: str, notifications: List[str]) -> Graph: """ Generate Elasticsearch graph """ y_axes = YAxes( YAxis(format=SHORT_FORMAT), YAxis(format=SHORT_FORMAT), ) targets = [ CloudwatchMetricsTarget( alias="Red status", namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={ "DomainName": name, "ClientId": client_id }, metricName="ClusterStatus.red", ), ] alert = None if notifications: alert = Alert( name="Elasticsearch is in status red", message="Elasticsearch is in status red", executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=GreaterThan(0), reducerType=RTYPE_MAX, operator=OP_OR, ), ], frequency="2m", gracePeriod="2m", notifications=notifications, ) return Graph( title="Status RED alerts", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, alert=alert, ).auto_ref_ids()
def test_should_generate_running_count_stats_panel(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) expected_targets = [ CloudwatchMetricsTarget( alias="Desired", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="DesiredTaskCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), CloudwatchMetricsTarget( alias="Pending", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="PendingTaskCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), CloudwatchMetricsTarget( alias="Running", namespace="ECS/ContainerInsights", statistics=["Maximum"], metricName="RunningTaskCount", dimensions={"ServiceName": name, "ClusterName": cluster_name}, refId="A", ), ] panel = generate_running_count_stats_panel( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, ) panel.should.be.a(Stat) panel.title.should.eql("Task Count") panel.dataSource.should.eql(cloudwatch_data_source) panel.gridPos.should.eql(grid_pos) panel.colorMode.should.eql("background") panel.alignment.should.eql("center") panel.targets.should.have.length_of(3) panel.targets.should.eql(expected_targets)
def generate_rds_disk_ops_graph(name: str, cloudwatch_data_source: str): """ Generate rds graph """ y_axes = single_y_axis(format=SHORT_FORMAT, min=None) targets = [ CloudwatchMetricsTarget( alias="write iops", metricName="WriteIOPS", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), CloudwatchMetricsTarget( alias="read iops", metricName="ReadIOPS", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), CloudwatchMetricsTarget( alias="disk queue depth", metricName="DiskQueueDepth", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), ] return Graph( title="Disk iops", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, ).auto_ref_ids()
def test_should_generate_sfn_execution_duration_graph(self): sfn_name = "arn:aws:states:eu-west-1:1234567890:stateMachine:sfn-1" cloudwatch_data_source = "prod" notifications = [] expected_targets = [ CloudwatchMetricsTarget( alias="Min", namespace="AWS/States", metricName="ExecutionTime", statistics=["Minimum"], dimensions={"StateMachineArn": sfn_name}, refId="A", ), CloudwatchMetricsTarget( alias="Avg", namespace="AWS/States", metricName="ExecutionTime", statistics=["Average"], dimensions={"StateMachineArn": sfn_name}, refId="B", ), CloudwatchMetricsTarget( alias="Max", namespace="AWS/States", metricName="ExecutionTime", statistics=["Maximum"], dimensions={"StateMachineArn": sfn_name}, refId="C", ), ] generated_graph = generate_sfn_execution_duration_graph( name=sfn_name, cloudwatch_data_source=cloudwatch_data_source, notifications=notifications, ) generated_graph.should.be.a(Graph) generated_graph.title.should.match(r"Step function execution duration") generated_graph.dataSource.should.equal(cloudwatch_data_source) generated_graph.targets.should.have.length_of(3) generated_graph.targets.should.equal(expected_targets) generated_graph.alert.should.be(None)
def test_should_generate_cpu_utilization_graph(self): name = "service-1" cloudwatch_data_source = "prod" cluster_name = "cluster-1" grid_pos = GridPos(1, 2, 3, 4) panel = generate_cpu_utilization_graph( name=name, cloudwatch_data_source=cloudwatch_data_source, cluster_name=cluster_name, grid_pos=grid_pos, ) expected_targets = [ CloudwatchMetricsTarget( alias="Min", namespace="AWS/ECS", statistics=["Minimum"], metricName="CPUUtilization", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), CloudwatchMetricsTarget( alias="Avg", namespace="AWS/ECS", statistics=["Average"], metricName="CPUUtilization", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), CloudwatchMetricsTarget( alias="Max", namespace="AWS/ECS", statistics=["Maximum"], metricName="CPUUtilization", dimensions={"ServiceName": name, "ClusterName": cluster_name}, ), ] panel.should.be.a(Graph) panel.title.should.eql("CPU Utilization Percentage") panel.dataSource.should.eql(cloudwatch_data_source) panel.targets.should.have.length_of(3) panel.targets.should.eql(expected_targets) panel.gridPos.should.eql(grid_pos)
def generate_desired_count_graph( name: str, cluster_name: str, max: int, cloudwatch_data_source: str, notifications: List[str], grid_pos: GridPos, ): targets = [ CloudwatchMetricsTarget( alias="Containers", namespace=CONTAINER_INSIGHTS_NAMESPACE, statistics=["Maximum"], metricName="DesiredTaskCount", dimensions={ "ServiceName": name, "ClusterName": cluster_name }, refId=ALERT_REF_ID, ), ] alert = None if notifications and max > 1: alert = Alert( name="{} Desired count of containers nearing the max".format(name), message="{} is having Desired count of containers nearing the max". format(name), executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("15m", "now"), evaluator=GreaterThan(0.9 * max), # 90% of max reducerType=RTYPE_MAX, operator=OP_AND, ) ], gracePeriod="1m", notifications=notifications, ) return Graph( title="Desired Tasks", dataSource=cloudwatch_data_source, targets=targets, transparent=TRANSPARENT, editable=EDITABLE, alert=alert, gridPos=grid_pos, alertThreshold=ALERT_THRESHOLD, ).auto_ref_ids()
def generate_rds_transaction_id_graph(name: str, cloudwatch_data_source: str, notifications: List[str]): """ Generate rds graph """ y_axes = single_y_axis(format=SHORT_FORMAT) targets = [ CloudwatchMetricsTarget( alias="Transaction ids used", metricName="MaximumUsedTransactionIDs", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", refId=ALERT_REF_ID, ), ] alert = None if notifications: alert = Alert( name="{} transaction ids used Errors".format(name), message="{} is having transaction ids used errors".format(name), executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=GreaterThan(1000000000), reducerType=RTYPE_MAX, operator=OP_AND, ) ], gracePeriod="2m", frequency="2m", notifications=notifications, ) return Graph( title="Transaction ids used", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, alert=alert, ).auto_ref_ids()
def create_lambda_sqs_dlq_graph(name: str, cloudwatch_data_source: str, fifo: bool, notifications: List[str]): """Create SQS Deadletter graph""" if fifo: name += ".fifo" targets = [ CloudwatchMetricsTarget( alias="Approximate number of messages available", namespace="AWS/SQS", statistics=["Maximum"], metricName="ApproximateNumberOfMessagesVisible", dimensions={"QueueName": name}, refId=ALERT_REF_ID if notifications else None, ) ] yAxes = single_y_axis(format=SHORT_FORMAT) alert = None # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-monitoring-using-cloudwatch.html # https://aws.amazon.com/about-aws/whats-new/2019/12/amazon-sqs-now-supports-1-minute-cloudwatch-metrics/ if notifications: alert = Alert( name="{} messages".format(name), message="{} is having messages".format(name), executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=GreaterThan(0), reducerType=RTYPE_MAX, operator=OP_AND, ), ], gracePeriod="5m", notifications=notifications, ) return Graph( title="SQS Dead Letter Queue: {}".format(name), dataSource=cloudwatch_data_source, targets=targets, yAxes=yAxes, transparent=TRANSPARENT, editable=EDITABLE, alert=alert, alertThreshold=ALERT_THRESHOLD, ).auto_ref_ids()
def test_should_generate_lambda_memory_utilization_percentage_graph(self): lambda_name = "lambda-1" cloudwatch_data_source = "cloudwatch" lambda_insights_namespace = "insights" expected_targets = [ CloudwatchMetricsTarget( alias="used_memory_max", namespace=lambda_insights_namespace, statistics=["Maximum"], metricName="used_memory_max", dimensions={"function_name": lambda_name}, refId="A", ), CloudwatchMetricsTarget( alias="allocated_memory", namespace=lambda_insights_namespace, statistics=["Maximum"], metricName="total_memory", dimensions={"function_name": lambda_name}, refId="B", ), ] generated_lambda_graph = lambda_generate_memory_utilization_graph( name=lambda_name, cloudwatch_data_source=cloudwatch_data_source, lambda_insights_namespace=lambda_insights_namespace, notifications=[], ) generated_lambda_graph.should.be.a(Graph) generated_lambda_graph.should.have.property("title").with_value.equal( "Lambda Memory Utilization") generated_lambda_graph.should.have.property( "dataSource").with_value.equal(cloudwatch_data_source) generated_lambda_graph.should.have.property("alert").with_value.equal( None) generated_lambda_graph.should.have.property("targets") generated_lambda_graph.targets.should.have.length_of(2) generated_lambda_graph.targets.should.equal(expected_targets)
def generate_rds_disk_latency_graph(name: str, cloudwatch_data_source: str): """ Generate rds graph """ y_axes = single_y_axis(format=SECONDS) targets = [ CloudwatchMetricsTarget( alias="read latency", metricName="ReadLatency", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), CloudwatchMetricsTarget( alias="write latency", metricName="WriteLatency", statistics=["Maximum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", ), ] return Graph( title="Disk latency", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, ).auto_ref_ids()
def generate_elasticache_redis_connections_graph( cache_cluster_id: str, cloudwatch_data_source: str) -> Graph: """ Generate ElastiCache Redis graph """ y_axes = single_y_axis(format=SHORT_FORMAT) aliases = { "current": "Current connections", } targets = [ CloudwatchMetricsTarget( alias=aliases["current"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="CurrConnections", refId=ALERT_REF_ID, ), ] series_overrides = [ { "alias": aliases["current"], "color": colors.GREEN, "lines": True, "bars": False, }, ] return Graph( title="Current connections", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, ).auto_ref_ids()
def generate_elasticache_redis_latency_graph( cache_cluster_id: str, cloudwatch_data_source: str) -> Graph: """ Generate ElastiCache Redis graph """ y_axes = single_y_axis(format=MILLISECONDS_FORMAT) aliases = { "latency": "String based CMDs latency", } targets = [ CloudwatchMetricsTarget( alias=aliases["latency"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="StringBasedCmdsLatency", ), ] series_overrides = [ { "alias": aliases["latency"], "color": colors.GREEN, "lines": True, "bars": False, }, ] return Graph( title="Latency", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, ).auto_ref_ids()
def generate_elasticache_redis_network_out_graph( cache_cluster_id: str, cloudwatch_data_source: str) -> Graph: """ Generate ElastiCache Redis graph """ y_axes = single_y_axis(format=BYTES) aliases = { "out": "Network bytes out", } targets = [ CloudwatchMetricsTarget( alias=aliases["out"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="NetworkBytesOut", ), ] series_overrides = [ { "alias": aliases["out"], "color": colors.RED, "lines": False, "bars": True, }, ] return Graph( title="Network out", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, ).auto_ref_ids()
def generate_elasticsearch_cpu_graph(name: str, client_id: str, cloudwatch_data_source: str) -> Graph: """ Generate Elasticsearch graph """ y_axes = single_y_axis(format=PERCENT_FORMAT) alias = "CPU utilization" targets = [ CloudwatchMetricsTarget( alias=alias, namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={ "DomainName": name, "ClientId": client_id }, metricName="CPUUtilization", ) ] series_overrides = [{ "alias": alias, "color": colors.GREEN, "lines": True, "bars": False, }] return Graph( title=alias, dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, ).auto_ref_ids()