Beispiel #1
0
    def test_remove_all_whitelisted_outliers(self):
        self.test_settings.change_configuration_path(
            test_file_whitelist_path_config)

        doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(
            doc_generate.generate_document({
                "create_outlier":
                True,
                "outlier_observation":
                "dummy observation",
                "outlier.model_name":
                "dummy_test",
                "outlier.model_type":
                "analyzer",
                "command_query":
                "osquery_get_all_processes_with_listening_conns.log"
            }))

        # Check that outlier correctly generated
        result = [doc for doc in es._scan()][0]
        self.assertTrue("outliers" in result["_source"])

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf"
        )

        # Remove whitelisted outlier
        es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer})

        # Check that outlier is correctly remove
        result = [doc for doc in es._scan()][0]
        self.assertFalse("outliers" in result["_source"])
    def test_sudden_extra_outlier_infos_all_present(self):
        # Generate documents
        dummy_doc_generate = DummyDocumentsGenerate()
        list_delta_hour = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 15]
        field_1_name = "user_id"
        list_field_1_value = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        field_2_name = "hostname"
        list_field_2_value = []
        for _ in range(len(list_delta_hour)):
            list_field_2_value.append("host1")

        generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields(
            list_delta_hour, field_1_name, list_field_1_value, field_2_name,
            list_field_2_value)
        self.test_es.add_multiple_docs(generated_docs)

        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files + "sudden_appearance_dummy_test_03.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        list_outlier = list()
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                list_outlier.append(elem)

        all_fields_exists = [
            elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS
            for elem in list_outlier[0]['_source']['outliers']
        ]
        self.assertTrue(all(all_fields_exists))
    def test_sudden_appearance_detect_no_outlier_es_check(self):

        # Generate documents
        dummy_doc_generate = DummyDocumentsGenerate()
        list_delta_hour = [1, 1, 1, 3, 3, 3, 4, 5, 5, 5, 15, 15]
        field_1_name = "user_id"
        list_field_1_value = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
        field_2_name = "hostname"
        list_field_2_value = []
        for _ in range(len(list_delta_hour)):
            list_field_2_value.append("host1")
        generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields(
            list_delta_hour, field_1_name, list_field_1_value, field_2_name,
            list_field_2_value)
        self.test_es.add_multiple_docs(generated_docs)

        # Run analyzer
        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files + "sudden_appearance_dummy_test_02.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, 0)
Beispiel #4
0
    def test_non_outliers_present_in_metrics(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate documents
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))
        # Non outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 8}))
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 12}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][2]

        self.assertEqual(
            result["_source"]["outliers"]["non_outlier_values_sample"],
            ["8.0"])
Beispiel #5
0
    def test_metrics_whitelist_work_test_es_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        command_query = "SELECT * FROM dummy_table"  # must be bigger than the trigger value (here 3)
        nbr_generated_documents = 5

        # Generate document that match outlier
        for _ in range(nbr_generated_documents):
            self.test_es.add_doc(
                dummy_doc_generate.generate_document(
                    {"command_query": command_query}))
        # Generate whitelist document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "whitelist_hostname",
                "command_query": command_query
            }))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_length_dummy_test.conf"
        )
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, nbr_generated_documents)
    def test_simplequery_not_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            config_file_simplequery_test_02)
        analyzer = AnalyzerFactory.create(
            use_case_simplequery_dummy_test_not_derived)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("timestamp_year" in result['_source'])
    def test_simplequry_use_matched_values_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            config_file_simplequery_test_02)
        analyzer = AnalyzerFactory.create(
            use_case_simplequery_dummy_test_highlight_match_activated)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("matched_values" in result['_source']['outliers'])
    def test_sudden_appearance_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files +
            "sudden_appearance_derived_fields_01.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("timestamp_year" in result['_source'])
Beispiel #9
0
    def test_terms_not_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("timestamp_year" in result['_source'])
Beispiel #10
0
    def test_non_outlier_values_not_present_in_terms_within(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate documents
        # Outlier document
        # index: 0
        # Non outlier
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "one"
            }))
        # index: 1
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 2
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 3
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 4
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "three"
            }))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][4]

        self.assertEqual(
            result["_source"]["outliers"]["non_outlier_values_sample"],
            ["two"])
Beispiel #11
0
    def test_simplequery_not_use_derived_fields_but_present_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])
Beispiel #12
0
    def test_batch_whitelist_work_doent_match_outlier_in_across(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_across_float.conf"
        )

        doc_to_generate = [
            # agg1 (0, 1, 2) -> 3 but with whitelist: (0, 2) -> 2
            # agg2 (0, 3, 4) -> 3
            ("agg1", 0, False),
            ("agg1", 1, True),
            ("agg2", 0, False),
            ("agg2", 0, False),
            ("agg1", 2, False),
            ("agg2", 3, False),
            ("agg2", 4, False)
        ]

        dummy_doc_gen = DummyDocumentsGenerate()
        for aggregator, target_value, is_whitelist in doc_to_generate:
            deployment_name = None
            if is_whitelist:
                deployment_name = "whitelist-deployment"
            user_id = target_value
            hostname = aggregator

            doc_generated = dummy_doc_gen.generate_document({
                "deployment_name":
                deployment_name,
                "user_id":
                user_id,
                "hostname":
                hostname
            })
            self.test_es.add_doc(doc_generated)

        analyzer.evaluate_model()

        list_outliers = []
        for doc in es._scan():
            if "outliers" in doc["_source"]:
                list_outliers.append(
                    (doc["_source"]["outliers"]["aggregator"][0],
                     doc["_source"]["outliers"]["term"][0]))

        # We detect agg2 but not agg1
        self.assertEqual(list_outliers, [("agg2", "0"), ("agg2", "0"),
                                         ("agg2", "3"), ("agg2", "4")])
    def test_arbitrary_key_config_present_in_outlier(self):
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        analyzer = AnalyzerFactory.create(
            use_case_simplequery_arbitrary_dummy_test)

        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertEquals(result["_source"]["outliers"]["test_arbitrary_key"],
                          ["arbitrary_value"])
    def test_one_doc_outlier_correctly_add(self):
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file_01)

        # Insert value
        self.test_es.add_doc(doc_without_outlier)
        # Make test (supposed all doc work)
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test)
        analyzer.evaluate_model()

        # Fetch result to check if it is correct
        result = [elem for elem in es._scan()][0]
        self.assertEqual(result, doc_with_outlier)
Beispiel #15
0
    def test_flush_bulk_actions_using_one_save_outlier(self):
        doc_with_outlier_with_derived_timestamp = copy.deepcopy(
            doc_with_outlier_with_derived_timestamp_test_file)
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self.test_es.add_doc(doc_without_outlier)

        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=doc_without_outlier)
        test_outlier.outlier_dict["observation"] = "dummy observation"

        es.save_outlier(test_outlier)
        result = [elem for elem in es._scan()][0]
        self.assertEqual(result, doc_with_outlier_with_derived_timestamp)
    def test_arbitrary_key_config_not_present_int_other_model(self):
        # Dictionary and list could be share between different instance. This test check that a residual value is not
        # present in the dictionary
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test)

        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("test_arbitrary_key" in result["_source"]["outliers"])
    def test_bulk_update_do_not_remove_values(self):
        dummy_doc_gen = DummyDocumentsGenerate()
        doc = dummy_doc_gen.generate_document({"create_outlier": True})
        self.test_es.add_doc(doc)
        test_doc = copy.deepcopy(doc)

        # Remove outlier
        test_doc["_source"].pop("outliers")

        # Update the document (without outliers)
        es.add_update_bulk_action(test_doc)

        # Result in ES is the same that the original document (outliers wasn't removed)
        result = [elem for elem in es._scan()][0]
        self.assertEqual(doc, result)
    def test_whitelist_regex_per_model_match_whitelist(self):
        doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            doc_generate.generate_document({"hostname": "AAA-WHITELISTED"}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        analyzer = AnalyzerFactory.create(
            use_case_whitelist_tests_model_whitelist_02)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("outliers" in result["_source"])
Beispiel #19
0
    def test_simple_process_outlier_save_es(self):
        self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf")
        analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf")

        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self.test_es.add_doc(doc_without_outlier)
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)

        doc_fields = doc_without_outlier["_source"]
        outlier = analyzer.create_outlier(doc_fields, doc_without_outlier)

        es.save_outlier(outlier)

        result = [elem for elem in es._scan()][0]

        self.assertEqual(result, doc_with_outlier)
Beispiel #20
0
    def test_terms_not_use_derived_fields_but_present_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        # The parameter use_derived_fields haven't any impact on outliers keys
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])
    def test_simplequery_no_extra_outlier_infos(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        # Run analyzer
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in DEFAULT_OUTLIERS_KEY_FIELDS
            for elem in result['_source']['outliers']
        ]
        self.assertTrue(all(all_fields_exists))
Beispiel #22
0
    def test_whitelist_regex_per_model_not_match_whitelist(self):
        doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            doc_generate.generate_document(
                {"hostname": "Not-work-WHITELISTED"}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_02.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("outliers" in result["_source"])
Beispiel #23
0
    def test_terms_evaluate_coeff_of_variation_like_expected_document(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_no_bucket.conf"
        )

        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        expected_doc = copy.deepcopy(
            doc_with_terms_outlier_coeff_of_variation_no_score_sort)
        # Add doc to the database
        self.test_es.add_doc(doc_without_outlier)

        # Make test (suppose that all doc match with the query)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertEqual(result, expected_doc)
Beispiel #24
0
    def test_metrics_batch_whitelist_three_outliers_one_whitelist(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_batch_whitelist_float.conf"
        )
        backup_min_eval_batch = MetricsAnalyzer.MIN_EVALUATE_BATCH
        MetricsAnalyzer.MIN_EVALUATE_BATCH = 5

        #            aggregator, target, is_whitelist
        doc_to_generate = [
            ("agg1", 5, False),
            ("agg1", 3, True),
            ("agg2", 4, False),
            ("agg2", 5, True),
            # Batch limit
            ("agg2", 3, False),
            ("agg1", 5, False),
            ("agg1", 7, False),  # Outlier
            ("agg2", 2, False),
            # Batch limit
            ("agg1", 4, True),
            ("agg2", 6, True),  # Outlier (but whitelist)
            ("agg1", 3, False),
            ("agg1", 5, False),
            # Batch limit
            ("agg2", 1, False),
            ("agg2", 6, False),  # Outlier
            ("agg1", 3, False)
        ]
        self._generate_metrics_doc_with_whitelist(doc_to_generate)

        analyzer.evaluate_model()
        list_outliers = []
        for doc in es._scan():
            if "outliers" in doc["_source"]:
                list_outliers.append(
                    (doc["_source"]["outliers"]["aggregator"][0],
                     doc["_source"]["outliers"]["target"][0]))

        self.assertEqual(list_outliers, [("agg1", "7"), ("agg2", "6")])
        MetricsAnalyzer.MIN_EVALUATE_BATCH = backup_min_eval_batch
Beispiel #25
0
    def test_terms_extra_outlier_infos_new_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS
            for elem in result['_source']['outliers']
        ]
        self.assertTrue(all(all_fields_exists))
    def test_one_doc_outlier_with_highlight_04(self):
        """
        Test if a doc is correctly generated with the highlight fields. The use case is with the field
        highlight_match=0 and the configuration file has highlight_match=1.
        """
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file_02)

        # Insert value
        self.test_es.add_doc(doc_without_outlier)
        # Make test (supposed all doc work)
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_highlight_match_activated)
        analyzer = AnalyzerFactory.create(
            use_case_simplequery_dummy_test_highlight_match_unactivated)
        analyzer.evaluate_model()

        # Fetch result to check if it is correct
        result = [elem for elem in es._scan()][0]
        self.assertEqual(result, doc_with_outlier)
    def test_simplequery_whitelist_work_test_es_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        nbr_generated_documents = 5
        all_doc = dummy_doc_generate.create_documents(nbr_generated_documents)
        whitelisted_document = dummy_doc_generate.generate_document(
            {"hostname": "whitelist_hostname"})
        all_doc.append(whitelisted_document)
        self.test_es.add_multiple_docs(all_doc)

        # Run analyzer
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_whitelist)
        analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test)
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, nbr_generated_documents)
Beispiel #28
0
    def test_metrics_extra_outlier_infos_all_present(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in result['_source']['outliers']
            for elem in EXTRA_OUTLIERS_KEY_FIELDS
        ]
        self.assertTrue(all(all_fields_exists))
Beispiel #29
0
    def test_metrics_batch_whitelist_outlier_detect_after_process_all_and_remove_whitelist(
            self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_batch_whitelist_avg.conf"
        )

        backup_min_eval_batch = MetricsAnalyzer.MIN_EVALUATE_BATCH
        MetricsAnalyzer.MIN_EVALUATE_BATCH = 5

        #            aggregator, target, is_whitelist
        doc_to_generate = [
            ("agg1", 5, False),
            ("agg2", 5, False),
            ("agg1", 5, False),
            ("agg1", 3, False),
            # Batch limit
            ("agg1", 6, False),
            ("agg2", 5, False),
            ("agg1", 5, False),
            ("agg1", 7, True)
        ]
        self._generate_metrics_doc_with_whitelist(doc_to_generate)
        # The avg for agg1 is 5.1 but if we remove the whitelisted element, the avg is on 4.8

        analyzer.evaluate_model()
        list_outliers = []
        for doc in es._scan():
            if "outliers" in doc["_source"]:
                list_outliers.append(
                    (doc["_source"]["outliers"]["aggregator"][0],
                     doc["_source"]["outliers"]["target"][0]))

        # Without the batch whitelist, the only outlier will be ("agg1", 6) (the ("agg1", 7) is whitelist).
        # But with batch whitelist, the avg is update and all value of "agg1" (except 3) are detected outlier
        self.assertEqual(list_outliers, [("agg1", "5"), ("agg1", "5"),
                                         ("agg1", "6"), ("agg1", "5")])
        MetricsAnalyzer.MIN_EVALUATE_BATCH = backup_min_eval_batch
Beispiel #30
0
    def test_evaluate_batch_for_outliers_add_outlier(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_2.conf"
        )

        eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \
            self._preperate_data_terms_with_doc(metrics_value=12)
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self.test_es.add_doc(doc_without_outlier)
        metrics = MetricsAnalyzer.add_metric_to_batch(
            eval_metrics_array, aggregator_value, target_value, metrics_value,
            observations, doc_without_outlier)

        outliers, remaining_metrics = analyzer._evaluate_batch_for_outliers(
            metrics, True)
        analyzer.process_outlier(outliers[0])
        result = [elem for elem in es._scan()][0]
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.maxDiff = None
        self.assertEqual(result, doc_with_outlier)