def error_codes(api: str, interval: int = 1) -> Graph:
     return Graph(
         title=f"{api} return codes",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:codes:OK[{interval}m]))',
                 legendFormat="ok",
                 refId='A',
             ),
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:codes:InvalidArgument[{interval}m]))',
                 legendFormat="invalid-args",
                 refId='B',
             ),
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:codes:AlreadyExists[{interval}m]))',
                 legendFormat="already-exists",
                 refId='C',
             ),
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:codes:FailedPrecondition[{interval}m]))',
                 legendFormat="failed-precondition",
                 refId='D',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Beispiel #2
0
def generate_elasticsearch_status_red_alert_graph(
        name: str, client_id: str, cloudwatch_data_source: str,
        notifications: List[str]) -> Graph:
    """
    Generate Elasticsearch graph
    """

    y_axes = YAxes(
        YAxis(format=SHORT_FORMAT),
        YAxis(format=SHORT_FORMAT),
    )

    targets = [
        CloudwatchMetricsTarget(
            alias="Red status",
            namespace=NAMESPACE,
            period="1m",
            statistics=["Maximum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="ClusterStatus.red",
        ),
    ]

    alert = None

    if notifications:
        alert = Alert(
            name="Elasticsearch is in status red",
            message="Elasticsearch is in status red",
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("5m", "now"),
                    evaluator=GreaterThan(0),
                    reducerType=RTYPE_MAX,
                    operator=OP_OR,
                ),
            ],
            frequency="2m",
            gracePeriod="2m",
            notifications=notifications,
        )

    return Graph(
        title="Status RED alerts",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=True,
        lines=False,
        alert=alert,
    ).auto_ref_ids()
Beispiel #3
0
 def quota_stats(collapse: bool) -> Row:
     return Row(
         title="Kubernetes Quota Usage stats",
         collapse=collapse,
         panels=[
             Graph(
                 title="CPU Limits vs usage",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         'kube_resourcequota{resource="limits.cpu", namespace="$project-$domain", type="hard"}',
                         refId='A',
                         legendFormat="max cpu",
                     ),
                     Target(
                         expr=
                         'kube_resourcequota{resource="limits.cpu", namespace="$project-$domain", type="used"}',
                         refId='B',
                         legendFormat="used cpu",
                     ),
                 ],
                 yAxes=YAxes(
                     YAxis(format=OPS_FORMAT),
                     YAxis(format=SHORT_FORMAT),
                 ),
             ),
             Graph(
                 title="Mem Limits vs usage",
                 dataSource=DATASOURCE,
                 targets=[
                     Target(
                         expr=
                         'kube_resourcequota{resource="limits.memory", namespace="$project-$domain", type="hard"}',
                         refId='A',
                         legendFormat="max mem",
                     ),
                     Target(
                         expr=
                         'kube_resourcequota{resource="limits.memory", namespace="$project-$domain", type="used"}',
                         refId='B',
                         legendFormat="used mem",
                     ),
                 ],
                 yAxes=YAxes(
                     YAxis(format=OPS_FORMAT),
                     YAxis(format=SHORT_FORMAT),
                 ),
             ),
         ])
Beispiel #4
0
 def system_errors() -> Graph:
     return Graph(
         title="System errors",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr='sum(deriv(flyte:propeller:all:round:system_error_unlabeled[5m]))*300',
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Beispiel #5
0
 def workflows_per_project() -> Graph:
     return Graph(
         title=f"Running Workflows per project",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=f'sum(flyte:propeller:all:collector:flyteworkflow) by (project)',
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Beispiel #6
0
 def create_free_workers() -> Graph:
     return Graph(
         title="Free workers count",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr='sum(flyte:propeller:all:free_workers_count) by (kubernetes_pod_name)',
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Beispiel #7
0
 def round_panic() -> Graph:
     return Graph(
         title="Round panic",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr='sum(rate(flyte:propeller:all:round:panic_unlabeled[5m]))',
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
 def abort_errors() -> Graph:
     return Graph(
         title="System errors",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr='sum(rate(flyte:propeller:all:round:abort_error[5m]))',
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Beispiel #9
0
 def streak_length() -> Graph:
     return Graph(
         title="Avg streak length",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr='avg(flyte:propeller:all:round:streak_length_unlabeled)',
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
 def db_count(entity: str, op: str, interval: int = 1) -> Graph:
     return Graph(
         title=f"{op} Count Ops",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=
                 f'sum(rate(flyte:admin:database:postgres:repositories:{entity}:{op}_ms_count[{interval}m]))',
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Beispiel #11
0
def lambda_generate_memory_utilization_graph(
    name: str,
    cloudwatch_data_source: str,
    lambda_insights_namespace: str,
    *args,
    **kwargs,
) -> Graph:
    """
    Generate lambda graph
    """

    targets = [
        CloudwatchMetricsTarget(
            alias="used_memory_max",
            namespace=lambda_insights_namespace,
            statistics=["Maximum"],
            metricName="used_memory_max",
            dimensions={"function_name": name},
        ),
        CloudwatchMetricsTarget(
            alias="allocated_memory",
            namespace=lambda_insights_namespace,
            statistics=["Maximum"],
            metricName="total_memory",
            dimensions={"function_name": name},
        ),
    ]

    yAxes = YAxes(YAxis(format="decmbytes"), )

    seriesOverrides = [
        {
            "alias": "used_memory_max",
            "points": False,
            "color": colors.GREEN,
        },
        {
            "alias": "allocated_memory",
            "points": False,
            "color": colors.RED,
            "fill": 0
        },
    ]

    alert = None

    return Graph(
        title="Lambda Memory Utilization",
        dataSource=cloudwatch_data_source,
        targets=targets,
        seriesOverrides=seriesOverrides,
        yAxes=yAxes,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        alert=alert,
        alertThreshold=ALERT_THRESHOLD,
        # gridPos=GridPos(8,12,12,0)
    ).auto_ref_ids()
Beispiel #12
0
 def metastore_cache_hit_percentage(interval: int) -> Graph:
     """
     TODO replace with metric math maybe?
     """
     return Graph(
         title="cache hit percentage",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=f'(sum(rate(flyte:propeller:all:metastore:cache_hit[{interval}m])) * 100) / (sum(rate(flyte:propeller:all:metastore:cache_miss[{interval}m])) + sum(rate(flyte:propeller:all:metastore:cache_hit[{interval}m])))',
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
 def metastore_failures():
     # Copy counts sum(rate(flyte:propeller:all:metastore:copy:overall_unlabeled_ms_count[5m]))
     return Graph(
         title=f"Failures from metastore",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:head_failure_unlabeled[5m]))',
                 legendFormat="head-failure",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:bad_container_unlabeled[5m]))',
                 legendFormat="bad-container",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:bad_key_unlabeled[5m]))',
                 legendFormat="bad-key",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:read_failure_unlabeled[5m]))',
                 legendFormat="read-failure",
                 refId='A',
             ),
             Target(
                 expr=
                 f'sum(rate(flyte:propeller:all:metastore:write_failure_unlabeled[5m]))',
                 legendFormat="write-failure",
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
 def error_vs_success(api: str, interval: int = 1) -> Graph:
     return Graph(
         title=f"{api} success vs errors",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:errors[{interval}m]))',
                 legendFormat="errors",
                 refId='A',
             ),
             Target(
                 expr=f'sum(irate(flyte:admin:{api}:success[{interval}m]))',
                 legendFormat="success",
                 refId='B',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Beispiel #15
0
def sum_process_memory_bytes(grr_component):
    return Graph(
        title="Sum of Process Memory Bytes (across all instances)",
        targets=[
            Target(
                expr='sum(process_resident_memory_bytes{{job="grr_{}"}})'.
                format(grr_component),
                legendFormat="Resident Memory",
            ),
        ],
        yAxes=YAxes(left=YAxis(format=BYTES_FORMAT)),
    )
Beispiel #16
0
def db_operations_errors(grr_component):
    return Graph(
        title="Database Operations Errors Rate by Call",
        targets=[
            Target(
                expr=
                'sum by (call) (rate(db_request_errors_total{{job="grr_{0}"}}[10m]))'
                .format(grr_component),
                legendFormat="{{call}}",
            ),
        ],
        yAxes=YAxes(left=YAxis(format=OPS_FORMAT)))
Beispiel #17
0
 def plugin_success_vs_failures() -> Graph:
     """
     TODO We need to convert the plugin names to be labels, so that prometheus can perform queries correctly
     """
     return Graph(
         title=f"Plugin Failures",
         dataSource=DATASOURCE,
         targets=[
             Target(
                 expr='{__name__=~"flyte:propeller:all:node:plugin:.*_failure_unlabeled"}',
                 refId='A',
             ),
             Target(
                 expr='{__name__=~"flyte:propeller:all:node:plugin:.*_success_unlabeled"}',
                 refId='A',
             ),
         ],
         yAxes=YAxes(
             YAxis(format=OPS_FORMAT),
             YAxis(format=SHORT_FORMAT),
         ),
     )
Beispiel #18
0
def db_operations_latency(grr_component):
    return Graph(
        title="Database Operations Latency by Call",
        targets=[
            Target(
                expr=
                'sum by (call) (rate(db_request_latency_sum{{job="grr_{0}"}}[10m]) / rate(db_request_latency_count{{job="grr_{0}"}}[10m]))'
                .format(grr_component),
                legendFormat="{{call}}",
            ),
        ],
        yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)),
    )
Beispiel #19
0
def avg_cpu_usage_percentage(grr_component):
    return Graph(
        title="CPU Usage",
        targets=[
            Target(
                expr=
                'avg(rate(process_cpu_seconds_total{{job="grr_{}"}}[30s])) * 100'
                .format(grr_component),
                legendFormat="Average Process CPU Usage in %",
            ),
        ],
        yAxes=YAxes(left=YAxis(max=105)),
    )
Beispiel #20
0
def threadpool_cpu_usage(grr_component):
    return Graph(
        title="Threadpool Average CPU Usage",
        targets=[
            Target(
                expr='avg(rate(threadpool_cpu_use{{job="grr_{}"}}[30s])) * 100'
                .format(grr_component),
                legendFormat=
                "Average Process CPU Usage in % (over all jobs & pools)",
            ),
        ],
        yAxes=YAxes(left=YAxis(max=105)),
    )
Beispiel #21
0
client = boto3.client('ec2')
instances = client.describe_instances()
print instances

GRAFANA_API_URL = 'http://statsd:3000/api/dashboards/db/'

target = Target(target='servers.prod-mysql-maindb*.df-data.df_complex-used',
                datasource='default')

panel = Graph(
    title='mysql-maindb disk consumption',
    dataSource='default',
    targets=[target],
    yAxes=[
        YAxis(format=BYTES_FORMAT),
        YAxis(format=SHORT_FORMAT),
    ],
)

row = Row(panels=[panel])

db = Dashboard(
    title='Autogenerated MySQL Disk Consumption',
    rows=[row],
    time=Time('now-6M', 'now'),
)

s = StringIO.StringIO()
write_dashboard(db, s)
dashboard_json = s.getvalue()
Beispiel #22
0
 title="{}s Dashboard".format(GRR_COMPONENT).title().replace("_", " "),
 rows=[
     Row(panels=[panel(GRR_COMPONENT) for panel in row])
     for row in GENERAL_PANELS
 ] + [
     Row(panels=[
         Graph(
             title="API Method Latency Rate by Method",
             targets=[
                 Target(
                     expr=
                     'sum by (method_name) (rate(api_method_latency_sum[10m]) / rate(api_method_latency_count[10m]))',
                     legendFormat="{{method_name}}",
                 ),
             ],
             yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)),
         ),
         Graph(
             title="API Calls Count Rate with Status SUCCESS",
             targets=[
                 Target(
                     expr=
                     'sum(rate(api_method_latency_count{status="SUCCESS"}[10m]))',
                     legendFormat="Successful Calls Rate",
                 ),
             ],
             yAxes=YAxes(left=YAxis(format=OPS_FORMAT)),
         ),
         Graph(
             title=
             "API Calls Count Rate with other statuses (not SUCCESS) by Method",
Beispiel #23
0
             Target(
                 expr=
                 'sum(process_resident_memory_bytes{job="fleetspeak"})',
                 legendFormat="Resident Memory",
             ),
         ]),
     Graph(
         title="CPU Usage",
         targets=[
             Target(
                 expr=
                 'avg(rate(process_cpu_seconds_total{job="fleetspeak"}[30s])) * 100',
                 legendFormat="Average Process CPU Usage",
             ),
         ],
         yAxes=YAxes(left=YAxis(max=105, format="percent")),
     ),
 ]),
 Row(panels=[
     Graph(
         title="Datastore Latency per Operation",
         targets=[
             Target(
                 expr=
                 'sum by (operation) (rate(fleetspeak_server_datastore_operations_completed_latency_sum[10m]) / rate(fleetspeak_server_datastore_operations_completed_latency_count[10m]))',
                 legendFormat="{{operation}}",
             ),
         ],
         yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)),
     ),
     Heatmap(
Beispiel #24
0
def generate_elasticsearch_requests_graph(
        name: str, client_id: str, cloudwatch_data_source: str) -> Graph:
    """
    Generate Elasticsearch graph
    """

    y_axes = YAxes(
        YAxis(format=SHORT_FORMAT),
        YAxis(format=SHORT_FORMAT),
    )
    xx2_alias = "2xx"
    xx3_alias = "3xx"
    xx4_alias = "4xx"
    xx5_alias = "5xx"

    targets = [
        CloudwatchMetricsTarget(
            alias=xx2_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Sum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="2xx",
        ),
        CloudwatchMetricsTarget(
            alias=xx3_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Sum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="3xx",
        ),
        CloudwatchMetricsTarget(
            alias=xx4_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Sum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="4xx",
        ),
        CloudwatchMetricsTarget(
            alias=xx5_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Sum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="5xx",
        ),
    ]

    series_overrides = [
        {
            "alias": xx2_alias,
            "color": colors.GREEN,
            "lines": True,
            "bars": False,
        },
        {
            "alias": xx3_alias,
            "color": colors.YELLOW,
            "lines": True,
            "bars": False,
        },
        {
            "alias": xx4_alias,
            "color": colors.ORANGE,
            "lines": True,
            "bars": False,
        },
        {
            "alias": xx5_alias,
            "color": colors.ORANGE,
            "lines": True,
            "bars": False,
        },
    ]

    return Graph(
        title="Requests",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        seriesOverrides=series_overrides,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=True,
        lines=False,
    ).auto_ref_ids()
Beispiel #25
0
def generate_elasticsearch_storage_graph(name: str, client_id: str,
                                         cloudwatch_data_source: str,
                                         notifications: List[str]) -> Graph:
    """
    Generate Elasticsearch graph
    """

    y_axes = YAxes(
        YAxis(format=MEGA_BYTES),
        YAxis(format=MEGA_BYTES),
    )
    free_storage_alias = "Free storage"
    cluster_used_space_alias = "Used space"

    targets = [
        CloudwatchMetricsTarget(
            alias=free_storage_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Minimum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="FreeStorageSpace",
            refId=ALERT_REF_ID,
        ),
        CloudwatchMetricsTarget(
            alias=cluster_used_space_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Maximum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="ClusterUsedSpace",
        ),
    ]

    alert = None
    if notifications:
        alert = Alert(
            name="Elasticsearch storage alert",
            message="Elasticsearch might be low on storage",
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("5m", "now"),
                    evaluator=LowerThan(10240),
                    reducerType=RTYPE_MAX,
                    operator=OP_OR,
                ),
            ],
            frequency="2m",
            gracePeriod="2m",
            notifications=notifications,
        )

    series_overrides = [
        {
            "alias": free_storage_alias,
            "color": colors.GREEN,
            "lines": True,
            "bars": False,
        },
        {
            "alias": cluster_used_space_alias,
            "color": colors.ORANGE,
            "lines": True,
            "bars": False,
            "yaxis": 2,
        },
    ]

    return Graph(
        title="Storage",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        seriesOverrides=series_overrides,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=True,
        lines=False,
        alert=alert,
    ).auto_ref_ids()
Beispiel #26
0
def generate_elasticsearch_documents_graph(
        name: str, client_id: str, cloudwatch_data_source: str) -> Graph:
    """
    Generate Elasticsearch graph
    """

    y_axes = YAxes(
        YAxis(format=SHORT_FORMAT),
        YAxis(format=SHORT_FORMAT),
    )
    searchable_documents_alias = "Searchable documents"
    deleted_documents_alias = "Deleted documents"

    targets = [
        CloudwatchMetricsTarget(
            alias=searchable_documents_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Maximum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="SearchableDocuments",
        ),
        CloudwatchMetricsTarget(
            alias=deleted_documents_alias,
            namespace=NAMESPACE,
            period="1m",
            statistics=["Maximum"],
            dimensions={
                "DomainName": name,
                "ClientId": client_id
            },
            metricName="DeletedDocuments",
        ),
    ]

    series_overrides = [
        {
            "alias": searchable_documents_alias,
            "color": colors.GREEN,
            "lines": True,
            "bars": False,
        },
        {
            "alias": deleted_documents_alias,
            "color": colors.ORANGE,
            "lines": True,
            "bars": False,
            "yaxis": 2,
        },
    ]

    return Graph(
        title="Documents",
        dataSource=cloudwatch_data_source,
        targets=targets,
        yAxes=y_axes,
        seriesOverrides=series_overrides,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        bars=True,
        lines=False,
    ).auto_ref_ids()
             query="label_values(instance)"),
    chip_template,
]

dashboard = Dashboard(
    title="Temperature",
    templating=Templating(template_list),
    panels=[
        RowPanel(
            title="New Row",
            gridPos=GridPos(h=1, w=24, x=0, y=8),
        ),
        Graph(
            title="$chip",
            dataSource="Prometheus",
            targets=[
                Target(expr=("sensors_temp_input{" +
                             'instance="$instance",chip="$chip"}'),
                       legendFormat="{{feature}}",
                       refId="A"),
            ],
            repeat=Repeat("v", "chip"),
            yAxes=YAxes(
                YAxis(format=CELSIUS_FORMAT),
                YAxis(format=SHORT_FORMAT),
            ),
            gridPos=GridPos(h=10, w=24, x=0, y=9),
        ),
    ],
).auto_panel_ids()
Beispiel #28
0
dashboard = Dashboard(
    title="{}s Dashboard".format(GRR_COMPONENT).title(),
    rows=[
        Row(panels=[panel(GRR_COMPONENT) for panel in row])
        for row in GENERAL_PANELS
    ] + [
        Row(panels=[
            Graph(
                title="QPS",
                targets=[
                    Target(
                        expr='sum(rate(frontend_request_count_total[1m]))',
                        legendFormat="Requests",
                    ),
                ],
                yAxes=YAxes(left=YAxis(format="reqps")),
            ),
            Graph(
                title="Request Latency",
                targets=[
                    Target(
                        expr=
                        'sum(rate(frontend_request_latency_sum[10m])) / sum(rate(frontend_request_latency_count[10m]))',
                        legendFormat="Latency",
                    ),
                ],
                yAxes=YAxes(left=YAxis(format=SECONDS_FORMAT)),
            ),
            Graph(
                title="Well Known Flows Requests Rate by Flow",
                targets=[
Beispiel #29
0
def generate_api_gateway_requests_graph(name: str, cloudwatch_data_source: str,
                                        notifications: List[str], *args,
                                        **kwargs):
    targets = [
        CloudwatchMetricsTarget(
            alias=API_GATEWAY_5XX_ALIAS,
            namespace=NAMESPACE,
            statistics=["Sum"],
            metricName="5XXError",
            dimensions={"ApiName": name},
            refId=ALERT_REF_ID,
        ),
        CloudwatchMetricsTarget(
            alias=API_GATEWAY_REQUESTS_ALIAS,
            namespace=NAMESPACE,
            statistics=["Sum"],
            metricName="Count",
            dimensions={"ApiName": name},
            refId=API_GATEWAY_REQUESTS_REF_ID,
        ),
        CloudwatchMetricsTarget(
            alias=API_GATEWAY_4XX_ALIAS,
            namespace=NAMESPACE,
            statistics=["Sum"],
            metricName="4XXError",
            dimensions={"ApiName": name},
            refId=API_GATEWAY_4XX_REF_ID,
        ),
    ]

    yAxes = YAxes(
        YAxis(format=SHORT_FORMAT),
        YAxis(format=SHORT_FORMAT),
    )

    seriesOverrides = [
        {
            "alias": API_GATEWAY_REQUESTS_ALIAS,
            "points": False,
            "color": colors.GREEN,
        },
        {
            "alias": API_GATEWAY_4XX_ALIAS,
            "color": colors.YELLOW,
        },
        {
            "alias": API_GATEWAY_5XX_ALIAS,
            "color": colors.RED,
        },
    ]

    alert = None

    # https://docs.aws.amazon.com/lambda/latest/dg/monitoring-metrics.html
    if notifications:
        alert = Alert(
            name="{} API Gateway 5XX Errors".format(name),
            message="{} is having 5XX errors".format(name),
            executionErrorState="alerting",
            alertConditions=[
                AlertCondition(
                    Target(refId=ALERT_REF_ID),
                    timeRange=TimeRange("15m", "now"),
                    evaluator=GreaterThan(0),
                    reducerType=RTYPE_MAX,
                    operator=OP_AND,
                )
            ],
            frequency="2m",
            gracePeriod="2m",
            notifications=notifications,
        )

    return Graph(
        title="API Gateway Requests: {}".format(name),
        dataSource=cloudwatch_data_source,
        targets=targets,
        seriesOverrides=seriesOverrides,
        yAxes=yAxes,
        transparent=TRANSPARENT,
        editable=EDITABLE,
        alert=alert,
        alertThreshold=ALERT_THRESHOLD,
    ).auto_ref_ids()
 templating=Templating(list=[Environment, Cluster]),
 rows=[
     Row(panels=[
         Graph(
             title="Freeable Memory",
             dataSource=DATASOURCE,
             targets=[
                 Target(
                     expr=
                     'aws_ec_freeable_memory_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}',
                     legendFormat="{{dimension_cache_cluster_id}}",
                     refId='A',
                 ),
             ],
             yAxes=YAxes(
                 YAxis(format=BYTES_FORMAT),
                 YAxis(format=SHORT_FORMAT),
             ),
         ),
         Graph(
             title="Bytes Used for Cache",
             dataSource=DATASOURCE,
             targets=[
                 Target(
                     expr=
                     'aws_ec_bytes_used_for_cache_average{environment="${environment}", dimension_cache_cluster_id=~"${cluster}.*"}',
                     legendFormat="{{dimension_cache_cluster_id}}",
                     refId='A',
                 ),
             ],
             yAxes=YAxes(