def number_of_active_processes_graph(grr_component): return Graph( title="Number of Active Processes", targets=[ Target( expr='sum(up{{job="grr_{}"}})'.format(grr_component), legendFormat="Active Processes", ), ], alert=Alert( name="Number of Active Processes alert", message="The number of active {} processes is below {}".format( grr_component.capitalize(), config.ACTIVE_PROCESSES_ALERTING_CONDITION), alertConditions=[ AlertCondition(Target( expr='sum(up{{job="grr_{}"}})'.format(grr_component), legendFormat="Active Processes", ), timeRange=TimeRange("10s", "now"), evaluator=LowerThan( config.ACTIVE_PROCESSES_ALERTING_CONDITION), operator=OP_AND, reducerType=RTYPE_SUM) ], ))
def generate_rds_burst_balance_graph(name: str, cloudwatch_data_source: str, notifications: List[str]): """ Generate rds graph """ y_axes = single_y_axis(format=PERCENT_FORMAT) targets = [ CloudwatchMetricsTarget( alias="Burst balance", metricName="BurstBalance", statistics=["Minimum"], namespace=NAMESPACE, dimensions={"DBInstanceIdentifier": name}, period="1m", refId=ALERT_REF_ID, ), ] alert = None if notifications: alert = Alert( name="{} Burst Balance Errors".format(name), message="{} is having Burst Balance errors".format(name), executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=LowerThan(40), reducerType=RTYPE_MAX, operator=OP_AND, ) ], gracePeriod="2m", frequency="2m", notifications=notifications, ) return Graph( title="Burst Balance", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, alert=alert, ).auto_ref_ids()
def test_should_generate_elasticache_cpu_credit_usage_graph_with_notifications( self, ): cache_cluster_id = "1234567890" cloudwatch_data_source = "cw" notifications = ["foo", "bar"] generated_graph = generate_elasticache_redis_cpu_credit_usage_graph( cache_cluster_id=cache_cluster_id, notifications=notifications, cloudwatch_data_source=cloudwatch_data_source, ) generated_graph.targets[0].refId.should.equal("A") generated_graph.alert.should.be.a(Alert) generated_graph.alert.frequency.should.equal("2m") generated_graph.alert.gracePeriod.should.equal("2m") generated_graph.alert.notifications.should.equal(notifications) generated_graph.alert.alertConditions[0].evaluator.should.be.equal( LowerThan(250)) generated_graph.alert.alertConditions[0].reducerType.should.be.equal( RTYPE_MAX) generated_graph.alert.alertConditions[0].operator.should.be.equal( OP_OR)
def generate_elasticsearch_nodes_alert_graph(name: str, client_id: str, cloudwatch_data_source: str, notifications: List[str]): """ Generate Elasticsearch graph """ y_axes = YAxes( YAxis(format=SHORT_FORMAT), YAxis(format=SHORT_FORMAT), ) targets = [ CloudwatchMetricsTarget( alias="Minimum number of nodes", namespace=NAMESPACE, period="1m", statistics=["Minimum"], dimensions={ "DomainName": name, "ClientId": client_id }, metricName="Nodes", refId=ALERT_REF_ID, ), CloudwatchMetricsTarget( alias="Maximum number of nodes", namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={ "DomainName": name, "ClientId": client_id }, metricName="Nodes", ), ] alert = None if notifications: alert = Alert( name="Elasticsearch nodes alert", message="Elasticsearch might have no nodes", executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=LowerThan(1), reducerType=RTYPE_MAX, operator=OP_OR, ), ], frequency="2m", gracePeriod="2m", notifications=notifications, ) return Graph( title="Elasticsearch node alerts", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, transparent=TRANSPARENT, editable=EDITABLE, bars=False, lines=True, alert=alert, ).auto_ref_ids()
def generate_elasticsearch_storage_graph(name: str, client_id: str, cloudwatch_data_source: str, notifications: List[str]) -> Graph: """ Generate Elasticsearch graph """ y_axes = YAxes( YAxis(format=MEGA_BYTES), YAxis(format=MEGA_BYTES), ) free_storage_alias = "Free storage" cluster_used_space_alias = "Used space" targets = [ CloudwatchMetricsTarget( alias=free_storage_alias, namespace=NAMESPACE, period="1m", statistics=["Minimum"], dimensions={ "DomainName": name, "ClientId": client_id }, metricName="FreeStorageSpace", refId=ALERT_REF_ID, ), CloudwatchMetricsTarget( alias=cluster_used_space_alias, namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={ "DomainName": name, "ClientId": client_id }, metricName="ClusterUsedSpace", ), ] alert = None if notifications: alert = Alert( name="Elasticsearch storage alert", message="Elasticsearch might be low on storage", executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=LowerThan(10240), reducerType=RTYPE_MAX, operator=OP_OR, ), ], frequency="2m", gracePeriod="2m", notifications=notifications, ) series_overrides = [ { "alias": free_storage_alias, "color": colors.GREEN, "lines": True, "bars": False, }, { "alias": cluster_used_space_alias, "color": colors.ORANGE, "lines": True, "bars": False, "yaxis": 2, }, ] return Graph( title="Storage", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, alert=alert, ).auto_ref_ids()
legendFormat="Active Processes", ), ], alert=Alert( name="Number of Active Processes alert", message= "The number of active Fleetspeak Server processes is below {}" .format(ACTIVE_PROCESSES_ALERTING_CONDITION), alertConditions=[ AlertCondition( Target( expr='sum(up{job="fleetspeak"})', legendFormat="Active Processes", ), timeRange=TimeRange("10s", "now"), evaluator=LowerThan( ACTIVE_PROCESSES_ALERTING_CONDITION), operator=OP_AND, reducerType=RTYPE_SUM) ], )), Graph( title="Sum of Process Memory Bytes (across all instances)", targets=[ Target( expr= 'sum(process_resident_memory_bytes{job="fleetspeak"})', legendFormat="Resident Memory", ), ]), Graph( title="CPU Usage",
def generate_elasticache_redis_cpu_credit_usage_graph( cache_cluster_id: str, cloudwatch_data_source: str, notifications: List[str]) -> Graph: """ Generate ElastiCache Redis graph """ y_axes = single_y_axis(format=SHORT_FORMAT) aliases = { "credit balance": "CPU credit balance", "credit usage": "CPU credit usage", } targets = [ CloudwatchMetricsTarget( alias=aliases["credit balance"], namespace=NAMESPACE, period="1m", statistics=["Minimum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="CPUCreditBalance", refId=ALERT_REF_ID, ), CloudwatchMetricsTarget( alias=aliases["credit usage"], namespace=NAMESPACE, period="1m", statistics=["Maximum"], dimensions={"CacheClusterId": cache_cluster_id}, metricName="CPUCreditUsage", ), ] alert = None if notifications: alert = Alert( name="ElastiCache Redis CPU credit balance alert", message="ElastiCache Redis CPU credit balance alert", executionErrorState="alerting", alertConditions=[ AlertCondition( Target(refId=ALERT_REF_ID), timeRange=TimeRange("5m", "now"), evaluator=LowerThan(250), reducerType=RTYPE_MAX, operator=OP_OR, ), ], frequency="2m", gracePeriod="2m", notifications=notifications, ) series_overrides = [ { "alias": aliases["credit balance"], "color": colors.GREEN, "lines": True, "bars": False, }, { "alias": aliases["credit usage"], "color": colors.YELLOW, "lines": True, "bars": False, }, ] return Graph( title="CPU credit utilization", dataSource=cloudwatch_data_source, targets=targets, yAxes=y_axes, seriesOverrides=series_overrides, transparent=TRANSPARENT, editable=EDITABLE, bars=True, lines=False, alert=alert, ).auto_ref_ids()