Exemple #1
0
    def setUp(self):
        self.sandbox = TempFolderSandbox()
        alert_rules_path = os.path.join(self.sandbox.root, "alerts")
        self.alert_rules_path = alert_rules_path

        class ConsumerCharm(CharmBase):
            metadata_yaml = textwrap.dedent(
                """
                provides:
                  metrics-endpoint:
                    interface: prometheus_scrape
                """
            )

            def __init__(self, *args, **kwargs):
                super().__init__(*args)
                self.rules_provider = PrometheusRulesProvider(self, dir_path=alert_rules_path)

        self.harness = Harness(ConsumerCharm, meta=ConsumerCharm.metadata_yaml)
        # self.harness = Harness(FakeConsumerCharm, meta=FakeConsumerCharm.metadata_yaml)
        self.addCleanup(self.harness.cleanup)
        self.harness.begin_with_initial_hooks()
        self.harness.set_leader(True)
        rel_id = self.harness.add_relation("metrics-endpoint", "prom")
        self.harness.add_relation_unit(rel_id, "prom/0")
    def test_duplicated_alert_names_within_alert_rules_list_are_silently_accepted(
            self):
        """Test official format when the alert rules list has a duplicated alert name."""
        rules_file_dict = {
            "groups": [{
                "name": "my_group",
                "rules": [self.gen_rule("same"),
                          self.gen_rule("same")],
            }]
        }
        sandbox = TempFolderSandbox()
        sandbox.put_file("rules/file.rule", yaml.safe_dump(rules_file_dict))

        rules = AlertRules(topology=self.topology)
        rules.add_path(os.path.join(sandbox.root, "rules"), recursive=False)
        rules_file_dict_read = rules.as_dict()

        expected_rules_file = {
            "groups": [
                {
                    "name":
                    f"{self.topology.identifier}_my_group_alerts",
                    "rules": [
                        self.gen_rule(
                            "same",
                            labels=self.topology.as_promql_label_dict()),
                        self.gen_rule(
                            "same",
                            labels=self.topology.as_promql_label_dict()),
                    ],
                },
            ]
        }
        self.assertDictEqual(expected_rules_file, rules_file_dict_read)
    def test_load_multiple_rules_per_file(self):
        """Test official format with multiple alert rules per group in multiple groups."""
        rules_file_dict = {"groups": [self.gen_group(1), self.gen_group(2)]}
        sandbox = TempFolderSandbox()
        sandbox.put_file("rules/file.rule", yaml.safe_dump(rules_file_dict))

        rules = AlertRules(topology=self.topology)
        rules.add_path(os.path.join(sandbox.root, "rules"), recursive=False)
        rules_file_dict_read = rules.as_dict()

        expected_rules_file = {
            "groups": [
                {
                    "name":
                    f"{self.topology.identifier}_group_1_alerts",
                    "rules": [
                        self.gen_rule(
                            1, labels=self.topology.as_promql_label_dict()),
                        self.gen_rule(
                            2, labels=self.topology.as_promql_label_dict()),
                    ],
                },
                {
                    "name":
                    f"{self.topology.identifier}_group_2_alerts",
                    "rules": [
                        self.gen_rule(
                            1, labels=self.topology.as_promql_label_dict()),
                        self.gen_rule(
                            2, labels=self.topology.as_promql_label_dict()),
                    ],
                },
            ]
        }
        self.assertDictEqual(expected_rules_file, rules_file_dict_read)
    def setUp(self) -> None:
        free_standing_rule = {
            "alert": "free_standing",
            "expr": "avg(some_vector[5m]) > 5",
        }

        alert_rule = {
            "alert": "CPUOverUse",
            "expr": "process_cpu_seconds_total{%%juju_topology%%} > 0.12",
        }
        rules_file_dict = {
            "groups": [{
                "name": "group1",
                "rules": [alert_rule]
            }]
        }

        self.sandbox = TempFolderSandbox()
        self.sandbox.put_files(
            ("rules/prom/mixed_format/lma_rule.rule",
             yaml.safe_dump(alert_rule)),
            ("rules/prom/mixed_format/standard_rule.rule",
             yaml.safe_dump(rules_file_dict)),
            ("rules/prom/lma_format/free_standing_rule.rule",
             yaml.safe_dump(free_standing_rule)),
            ("rules/prom/prom_format/standard_rule.rule",
             yaml.safe_dump(rules_file_dict)),
        )

        self.topology = ProviderTopology("MyModel", "MyUUID", "MyApp",
                                         "MyCharm")
    def test_deeply_nested(self):
        sandbox = TempFolderSandbox()
        sandbox.put_files(
            ("rules/file.rule", yaml.safe_dump(self.gen_rule(0))),
            ("rules/a/file.rule", yaml.safe_dump(self.gen_rule(1))),
            ("rules/a/b/file.rule", yaml.safe_dump(self.gen_rule(2))),
        )

        rules = AlertRules(topology=self.topology)
        rules.add_path(os.path.join(sandbox.root, "rules"), recursive=True)
        rules_file_dict_read = rules.as_dict()

        expected_rules_file = {
            "groups": [
                {
                    "name":
                    f"{self.topology.identifier}_file_alerts",
                    "rules": [
                        self.gen_rule(
                            0, labels=self.topology.as_promql_label_dict())
                    ],
                },
                {
                    "name":
                    f"{self.topology.identifier}_a_file_alerts",
                    "rules": [
                        self.gen_rule(
                            1, labels=self.topology.as_promql_label_dict())
                    ],
                },
                {
                    "name":
                    f"{self.topology.identifier}_a_b_file_alerts",
                    "rules": [
                        self.gen_rule(
                            2, labels=self.topology.as_promql_label_dict())
                    ],
                },
            ]
        }
        self.assertDictEqual(expected_rules_file, rules_file_dict_read)
    def test_duplicated_group_names_within_a_file_are_silently_accepted(self):
        rules_file_dict = {
            "groups": [self.gen_group("same"),
                       self.gen_group("same")]
        }
        sandbox = TempFolderSandbox()
        sandbox.put_file("rules/file.rule", yaml.safe_dump(rules_file_dict))

        rules = AlertRules(topology=self.topology)
        rules.add_path(os.path.join(sandbox.root, "rules"), recursive=False)
        rules_file_dict_read = rules.as_dict()

        expected_rules_file = {
            "groups": [
                {
                    "name":
                    f"{self.topology.identifier}_group_same_alerts",
                    "rules": [
                        self.gen_rule(
                            1, labels=self.topology.as_promql_label_dict()),
                        self.gen_rule(
                            2, labels=self.topology.as_promql_label_dict()),
                    ],
                },
                {
                    "name":
                    f"{self.topology.identifier}_group_same_alerts",
                    "rules": [
                        self.gen_rule(
                            1, labels=self.topology.as_promql_label_dict()),
                        self.gen_rule(
                            2, labels=self.topology.as_promql_label_dict()),
                    ],
                },
            ]
        }
        self.assertDictEqual(expected_rules_file, rules_file_dict_read)
Exemple #7
0
class TestReloadAlertRules(unittest.TestCase):
    """Feature: Provider charm can manually invoke reloading of alerts.

    Background: In use cases such as cos-configuration-k8s-operator, the last hook can fire before
    the alert files show up on disk. In that case relation data would remain empty of alerts. To
    circumvent that, a public method for reloading alert rules is offered.
    """

    NO_ALERTS = json.dumps({})  # relation data representation for the case of "no alerts"

    # use a short-form free-standing alert, for brevity
    ALERT = yaml.safe_dump({"alert": "free_standing", "expr": "avg(some_vector[5m]) > 5"})

    def setUp(self):
        self.sandbox = TempFolderSandbox()
        alert_rules_path = os.path.join(self.sandbox.root, "alerts")
        self.alert_rules_path = alert_rules_path

        class ConsumerCharm(CharmBase):
            metadata_yaml = textwrap.dedent(
                """
                provides:
                  metrics-endpoint:
                    interface: prometheus_scrape
                """
            )

            def __init__(self, *args, **kwargs):
                super().__init__(*args)
                self.rules_provider = PrometheusRulesProvider(self, dir_path=alert_rules_path)

        self.harness = Harness(ConsumerCharm, meta=ConsumerCharm.metadata_yaml)
        # self.harness = Harness(FakeConsumerCharm, meta=FakeConsumerCharm.metadata_yaml)
        self.addCleanup(self.harness.cleanup)
        self.harness.begin_with_initial_hooks()
        self.harness.set_leader(True)
        rel_id = self.harness.add_relation("metrics-endpoint", "prom")
        self.harness.add_relation_unit(rel_id, "prom/0")

    def test_reload_when_dir_is_still_empty_changes_nothing(self):
        """Scenario: The reload method is called when the alerts dir is still empty."""
        # GIVEN relation data contains no alerts
        relation = self.harness.charm.model.get_relation("metrics-endpoint")
        self.assertEqual(relation.data[self.harness.charm.app].get("alert_rules"), self.NO_ALERTS)

        # WHEN no rule files are present

        # AND the reload method is called
        self.harness.charm.rules_provider._reinitialize_alert_rules()

        # THEN relation data is unchanged
        relation = self.harness.charm.model.get_relation("metrics-endpoint")
        self.assertEqual(relation.data[self.harness.charm.app].get("alert_rules"), self.NO_ALERTS)

    def test_reload_after_dir_is_populated_updates_relation_data(self):
        """Scenario: The reload method is called after some alert files are added."""
        # GIVEN relation data contains no alerts
        relation = self.harness.charm.model.get_relation("metrics-endpoint")
        self.assertEqual(relation.data[self.harness.charm.app].get("alert_rules"), self.NO_ALERTS)

        # WHEN some rule files are added to the alerts dir
        self.sandbox.put_file(os.path.join(self.alert_rules_path, "alert.rule"), self.ALERT)

        # AND the reload method is called
        self.harness.charm.rules_provider._reinitialize_alert_rules()

        # THEN relation data is updated
        relation = self.harness.charm.model.get_relation("metrics-endpoint")
        self.assertNotEqual(
            relation.data[self.harness.charm.app].get("alert_rules"), self.NO_ALERTS
        )

    def test_reload_after_dir_is_emptied_updates_relation_data(self):
        """Scenario: The reload method is called after all the loaded alert files are removed."""
        # GIVEN alert files are present and relation data contains respective alerts
        alert_filename = os.path.join(self.alert_rules_path, "alert.rule")
        self.sandbox.put_file(alert_filename, self.ALERT)
        self.harness.charm.rules_provider._reinitialize_alert_rules()
        relation = self.harness.charm.model.get_relation("metrics-endpoint")
        self.assertNotEqual(
            relation.data[self.harness.charm.app].get("alert_rules"), self.NO_ALERTS
        )

        # WHEN all rule files are deleted from the alerts dir
        self.sandbox.remove(alert_filename)

        # AND the reload method is called
        self.harness.charm.rules_provider._reinitialize_alert_rules()

        # THEN relation data is empty again
        relation = self.harness.charm.model.get_relation("metrics-endpoint")
        self.assertEqual(relation.data[self.harness.charm.app].get("alert_rules"), self.NO_ALERTS)

    def test_reload_after_dir_itself_removed_updates_relation_data(self):
        """Scenario: The reload method is called after the alerts dir doesn't exist anymore."""
        # GIVEN alert files are present and relation data contains respective alerts
        alert_filename = os.path.join(self.alert_rules_path, "alert.rule")
        self.sandbox.put_file(alert_filename, self.ALERT)
        self.harness.charm.rules_provider._reinitialize_alert_rules()
        relation = self.harness.charm.model.get_relation("metrics-endpoint")
        self.assertNotEqual(
            relation.data[self.harness.charm.app].get("alert_rules"), self.NO_ALERTS
        )

        # WHEN the alerts dir itself is deleted
        self.sandbox.remove(alert_filename)
        self.sandbox.rmdir(self.alert_rules_path)

        # AND the reload method is called
        self.harness.charm.rules_provider._reinitialize_alert_rules()

        # THEN relation data is empty again
        relation = self.harness.charm.model.get_relation("metrics-endpoint")
        self.assertEqual(relation.data[self.harness.charm.app].get("alert_rules"), self.NO_ALERTS)

    def test_only_files_with_rule_or_rules_suffixes_are_loaded(self):
        """Scenario: User has both short-form rules (*.rule) and long-form rules (*.rules)."""
        # GIVEN various tricky combinations of files present
        filenames = ["alert.rule", "alert.rules", "alert.ruless", "alertrule", "alertrules"]
        for filename in filenames:
            alert_filename = os.path.join(self.alert_rules_path, filename)
            rule_file = yaml.safe_dump({"alert": filename, "expr": "avg(some_vector[5m]) > 5"})
            self.sandbox.put_file(alert_filename, rule_file)

        # AND the reload method is called
        self.harness.charm.rules_provider._reinitialize_alert_rules()

        # THEN only the *.rule and *.rules files are loaded
        relation = self.harness.charm.model.get_relation("metrics-endpoint")
        alert_rules = json.loads(relation.data[self.harness.charm.app].get("alert_rules"))
        alert_names = [groups["rules"][0]["alert"] for groups in alert_rules["groups"]]
        self.assertEqual(set(alert_names), {"alert.rule", "alert.rules"})
class TestAlertRulesWithOneRulePerFile(unittest.TestCase):
    def setUp(self) -> None:
        free_standing_rule = {
            "alert": "free_standing",
            "expr": "avg(some_vector[5m]) > 5",
        }

        alert_rule = {
            "alert": "CPUOverUse",
            "expr": "process_cpu_seconds_total{%%juju_topology%%} > 0.12",
        }
        rules_file_dict = {
            "groups": [{
                "name": "group1",
                "rules": [alert_rule]
            }]
        }

        self.sandbox = TempFolderSandbox()
        self.sandbox.put_files(
            ("rules/prom/mixed_format/lma_rule.rule",
             yaml.safe_dump(alert_rule)),
            ("rules/prom/mixed_format/standard_rule.rule",
             yaml.safe_dump(rules_file_dict)),
            ("rules/prom/lma_format/free_standing_rule.rule",
             yaml.safe_dump(free_standing_rule)),
            ("rules/prom/prom_format/standard_rule.rule",
             yaml.safe_dump(rules_file_dict)),
        )

        self.topology = ProviderTopology("MyModel", "MyUUID", "MyApp",
                                         "MyCharm")

    def test_non_recursive_is_default(self):
        rules = AlertRules(topology=self.topology)
        rules.add_path(os.path.join(self.sandbox.root, "rules", "prom"))
        rules_file_dict = rules.as_dict()
        self.assertEqual({}, rules_file_dict)

    def test_non_recursive_lma_format_loading_from_root_dir(self):
        rules = AlertRules(topology=self.topology)
        rules.add_path(
            os.path.join(self.sandbox.root, "rules", "prom", "lma_format"))
        rules_file_dict = rules.as_dict()

        expected_freestanding_rule = {
            "alert": "free_standing",
            "expr": "avg(some_vector[5m]) > 5",
            "labels": self.topology.as_promql_label_dict(),
        }

        expected_rules_file = {
            "groups": [
                {
                    "name":
                    f"{self.topology.identifier}_free_standing_rule_alerts",
                    "rules": [expected_freestanding_rule],
                },
            ]
        }

        self.assertEqual(expected_rules_file, rules_file_dict)

    def test_non_recursive_official_format_loading_from_root_dir(self):
        rules = AlertRules(topology=self.topology)
        rules.add_path(
            os.path.join(self.sandbox.root, "rules", "prom", "prom_format"))
        rules_file_dict = rules.as_dict()

        expected_alert_rule = {
            "alert": "CPUOverUse",
            "expr":
            f"process_cpu_seconds_total{{{self.topology.promql_labels}}} > 0.12",
            "labels": self.topology.as_promql_label_dict(),
        }

        expected_rules_file = {
            "groups": [
                {
                    "name": f"{self.topology.identifier}_group1_alerts",
                    "rules": [expected_alert_rule],
                },
            ]
        }

        self.assertEqual(expected_rules_file, rules_file_dict)

    def test_alerts_in_both_formats_are_recursively_aggregated(self):
        """This test covers several aspects of the rules format.

        - Group name:
          - For rules in lma format, core group name is the filename
          - For rules in official format, core group name is the group name in the file
        """
        rules = AlertRules(topology=self.topology)
        rules.add_path(os.path.join(self.sandbox.root, "rules", "prom"),
                       recursive=True)
        rules_file_dict = rules.as_dict()

        expected_alert_rule = {
            "alert": "CPUOverUse",
            "expr":
            f"process_cpu_seconds_total{{{self.topology.promql_labels}}} > 0.12",
            "labels": self.topology.as_promql_label_dict(),
        }

        expected_freestanding_rule = {
            "alert": "free_standing",
            "expr": "avg(some_vector[5m]) > 5",
            "labels": self.topology.as_promql_label_dict(),
        }

        expected_rules_file = {
            "groups": [
                {
                    "name":
                    f"{self.topology.identifier}_mixed_format_group1_alerts",
                    "rules": [expected_alert_rule],
                },
                {
                    "name":
                    f"{self.topology.identifier}_mixed_format_lma_rule_alerts",
                    "rules": [expected_alert_rule],
                },
                {
                    "name":
                    f"{self.topology.identifier}_lma_format_free_standing_rule_alerts",
                    "rules": [expected_freestanding_rule],
                },
                {
                    "name":
                    f"{self.topology.identifier}_prom_format_group1_alerts",
                    "rules": [expected_alert_rule],
                },
            ]
        }

        self.assertEqual({},
                         DeepDiff(expected_rules_file,
                                  rules_file_dict,
                                  ignore_order=True))