Exemple #1
0
def number_of_active_processes_graph(grr_component):
    return Graph(
        title="Number of Active Processes",
        targets=[
            Target(
                expr='sum(up{{job="grr_{}"}})'.format(grr_component),
                legendFormat="Active Processes",
            ),
        ],
        alert=Alert(
            name="Number of Active Processes alert",
            message="The number of active {} processes is below {}".format(
                grr_component.capitalize(),
                config.ACTIVE_PROCESSES_ALERTING_CONDITION),
            alertConditions=[
                AlertCondition(Target(
                    expr='sum(up{{job="grr_{}"}})'.format(grr_component),
                    legendFormat="Active Processes",
                ),
                               timeRange=TimeRange("10s", "now"),
                               evaluator=LowerThan(
                                   config.ACTIVE_PROCESSES_ALERTING_CONDITION),
                               operator=OP_AND,
                               reducerType=RTYPE_SUM)
            ],
        ))
Exemple #2
0
def generate_rds_burst_balance_graph(name: str, cloudwatch_data_source: str,
                                     notifications: List[str]):
    """
    Generate rds graph
    """

    y_axes = single_y_axis(format=PERCENT_FORMAT)

    targets = [
        CloudwatchMetricsTarget(
            alias="Burst balance",
            metricName="BurstBalance",
            statistics=["Minimum"],
            namespace=NAMESPACE,
            dimensions={"DBInstanceIdentifier": name},
            period="1m",
            refId=ALERT_REF_ID,
        ),
    ]

    alert = None

    if notifications:
        alert = Alert(
            name="{} Burst Balance Errors".format(name),
            message="{} is having Burst Balance errors".format(name),
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("5m", "now"),
                    evaluator=LowerThan(40),
                    reducerType=RTYPE_MAX,
                    operator=OP_AND,
                )
            ],
            gracePeriod="2m",
            frequency="2m",
            notifications=notifications,
        )

    return Graph(
        title="Burst Balance",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=False,
        lines=True,
        alert=alert,
    ).auto_ref_ids()
Exemple #3
0
    def test_should_generate_elasticache_cpu_credit_usage_graph_with_notifications(
        self, ):

        cache_cluster_id = "1234567890"
        cloudwatch_data_source = "cw"
        notifications = ["foo", "bar"]
        generated_graph = generate_elasticache_redis_cpu_credit_usage_graph(
            cache_cluster_id=cache_cluster_id,
            notifications=notifications,
            cloudwatch_data_source=cloudwatch_data_source,
        )
        generated_graph.targets[0].refId.should.equal("A")
        generated_graph.alert.should.be.a(Alert)
        generated_graph.alert.frequency.should.equal("2m")
        generated_graph.alert.gracePeriod.should.equal("2m")
        generated_graph.alert.notifications.should.equal(notifications)
        generated_graph.alert.alertConditions[0].evaluator.should.be.equal(
            LowerThan(250))
        generated_graph.alert.alertConditions[0].reducerType.should.be.equal(
            RTYPE_MAX)
        generated_graph.alert.alertConditions[0].operator.should.be.equal(
            OP_OR)
Exemple #4
0
def generate_elasticsearch_nodes_alert_graph(name: str, client_id: str,
                                             cloudwatch_data_source: str,
                                             notifications: List[str]):
    """
    Generate Elasticsearch graph
    """

    y_axes = YAxes(
        YAxis(format=SHORT_FORMAT),
        YAxis(format=SHORT_FORMAT),
    )

    targets = [
        CloudwatchMetricsTarget(
            alias="Minimum number of nodes",
            namespace=NAMESPACE,
            period="1m",
            statistics=["Minimum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="Nodes",
            refId=ALERT_REF_ID,
        ),
        CloudwatchMetricsTarget(
            alias="Maximum number of nodes",
            namespace=NAMESPACE,
            period="1m",
            statistics=["Maximum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="Nodes",
        ),
    ]

    alert = None
    if notifications:
        alert = Alert(
            name="Elasticsearch nodes alert",
            message="Elasticsearch might have no nodes",
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("5m", "now"),
                    evaluator=LowerThan(1),
                    reducerType=RTYPE_MAX,
                    operator=OP_OR,
                ),
            ],
            frequency="2m",
            gracePeriod="2m",
            notifications=notifications,
        )

    return Graph(
        title="Elasticsearch node alerts",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=False,
        lines=True,
        alert=alert,
    ).auto_ref_ids()
Exemple #5
0
def generate_elasticsearch_storage_graph(name: str, client_id: str,
                                         cloudwatch_data_source: str,
                                         notifications: List[str]) -> Graph:
    """
    Generate Elasticsearch graph
    """

    y_axes = YAxes(
        YAxis(format=MEGA_BYTES),
        YAxis(format=MEGA_BYTES),
    )
    free_storage_alias = "Free storage"
    cluster_used_space_alias = "Used space"

    targets = [
        CloudwatchMetricsTarget(
            alias=free_storage_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Minimum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="FreeStorageSpace",
            refId=ALERT_REF_ID,
        ),
        CloudwatchMetricsTarget(
            alias=cluster_used_space_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Maximum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="ClusterUsedSpace",
        ),
    ]

    alert = None
    if notifications:
        alert = Alert(
            name="Elasticsearch storage alert",
            message="Elasticsearch might be low on storage",
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("5m", "now"),
                    evaluator=LowerThan(10240),
                    reducerType=RTYPE_MAX,
                    operator=OP_OR,
                ),
            ],
            frequency="2m",
            gracePeriod="2m",
            notifications=notifications,
        )

    series_overrides = [
        {
            "alias": free_storage_alias,
            "color": colors.GREEN,
            "lines": True,
            "bars": False,
        },
        {
            "alias": cluster_used_space_alias,
            "color": colors.ORANGE,
            "lines": True,
            "bars": False,
            "yaxis": 2,
        },
    ]

    return Graph(
        title="Storage",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        seriesOverrides=series_overrides,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=True,
        lines=False,
        alert=alert,
    ).auto_ref_ids()
Exemple #6
0
             legendFormat="Active Processes",
         ),
     ],
     alert=Alert(
         name="Number of Active Processes alert",
         message=
         "The number of active Fleetspeak Server processes is below {}"
         .format(ACTIVE_PROCESSES_ALERTING_CONDITION),
         alertConditions=[
             AlertCondition(
                 Target(
                     expr='sum(up{job="fleetspeak"})',
                     legendFormat="Active Processes",
                 ),
                 timeRange=TimeRange("10s", "now"),
                 evaluator=LowerThan(
                     ACTIVE_PROCESSES_ALERTING_CONDITION),
                 operator=OP_AND,
                 reducerType=RTYPE_SUM)
         ],
     )),
 Graph(
     title="Sum of Process Memory Bytes (across all instances)",
     targets=[
         Target(
             expr=
             'sum(process_resident_memory_bytes{job="fleetspeak"})',
             legendFormat="Resident Memory",
         ),
     ]),
 Graph(
     title="CPU Usage",
Exemple #7
0
def generate_elasticache_redis_cpu_credit_usage_graph(
        cache_cluster_id: str, cloudwatch_data_source: str,
        notifications: List[str]) -> Graph:
    """
    Generate ElastiCache Redis graph
    """

    y_axes = single_y_axis(format=SHORT_FORMAT)
    aliases = {
        "credit balance": "CPU credit balance",
        "credit usage": "CPU credit usage",
    }

    targets = [
        CloudwatchMetricsTarget(
            alias=aliases["credit balance"],
            namespace=NAMESPACE,
            period="1m",
            statistics=["Minimum"],
            dimensions={"CacheClusterId": cache_cluster_id},
            metricName="CPUCreditBalance",
            refId=ALERT_REF_ID,
        ),
        CloudwatchMetricsTarget(
            alias=aliases["credit usage"],
            namespace=NAMESPACE,
            period="1m",
            statistics=["Maximum"],
            dimensions={"CacheClusterId": cache_cluster_id},
            metricName="CPUCreditUsage",
        ),
    ]

    alert = None
    if notifications:
        alert = Alert(
            name="ElastiCache Redis CPU credit balance alert",
            message="ElastiCache Redis CPU credit balance alert",
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("5m", "now"),
                    evaluator=LowerThan(250),
                    reducerType=RTYPE_MAX,
                    operator=OP_OR,
                ),
            ],
            frequency="2m",
            gracePeriod="2m",
            notifications=notifications,
        )

    series_overrides = [
        {
            "alias": aliases["credit balance"],
            "color": colors.GREEN,
            "lines": True,
            "bars": False,
        },
        {
            "alias": aliases["credit usage"],
            "color": colors.YELLOW,
            "lines": True,
            "bars": False,
        },
    ]

    return Graph(
        title="CPU credit utilization",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        seriesOverrides=series_overrides,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=True,
        lines=False,
        alert=alert,
    ).auto_ref_ids()