Esempio n. 1
0
class TestEs(unittest.TestCase):
    def setUp(self):
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()

    def tearDown(self):
        self.test_es.restore_es()
        self.test_settings.restore_default_configuration_path()

    def test_add_tag_to_document_no_tag(self):
        elem = {"_source": {"key": {"test": 1}}}
        expected_result = copy.deepcopy(elem)
        expected_result["_source"]["tags"] = ["new_tag"]

        new_doc_result = helpers.es.add_tag_to_document(elem, "new_tag")
        self.assertEqual(new_doc_result, expected_result)

    def test_add_tag_to_document_already_a_tag(self):
        elem = {"_source": {"key": {"test": 1}, "tags": ["ok"]}}
        expected_result = copy.deepcopy(elem)
        expected_result["_source"]["tags"].append("new_tag")

        new_doc_result = helpers.es.add_tag_to_document(elem, "new_tag")
        self.assertEqual(new_doc_result, expected_result)

    def test_remove_all_whitelisted_outliers(self):
        self.test_settings.change_configuration_path(
            test_file_whitelist_path_config)

        doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(
            doc_generate.generate_document({
                "create_outlier":
                True,
                "outlier_observation":
                "dummy observation",
                "outlier.model_name":
                "dummy_test",
                "outlier.model_type":
                "analyzer",
                "command_query":
                "osquery_get_all_processes_with_listening_conns.log"
            }))

        # Check that outlier correctly generated
        result = [doc for doc in es._scan()][0]
        self.assertTrue("outliers" in result["_source"])

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf"
        )

        # Remove whitelisted outlier
        es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer})

        # Check that outlier is correctly remove
        result = [doc for doc in es._scan()][0]
        self.assertFalse("outliers" in result["_source"])
Esempio n. 2
0
class TestAnalyzer(unittest.TestCase):

    def setUp(self):
        # "es" use in Analyzer construction and in the method "process_outlier"
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()


    def tearDown(self):
        # restore the default configuration file so we don't influence other unit tests that use the settings singleton
        self.test_settings.restore_default_configuration_path()
        self.test_es.restore_es()

    def test_simple_process_outlier_return_good_outlier(self):
        self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf")
        analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf")

        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        doc_fields = doc_without_outlier["_source"]
        outlier = analyzer.create_outlier(doc_fields, doc_without_outlier)
        expected_outlier = Outlier(outlier_type=["dummy type"], outlier_reason=['dummy reason'],
                                   outlier_summary='dummy summary',
                                   doc=doc_without_outlier)
        expected_outlier.outlier_dict['model_name'] = 'dummy_test'
        expected_outlier.outlier_dict['model_type'] = 'analyzer'
        expected_outlier.outlier_dict['elasticsearch_filter'] = 'es_valid_query'

        self.assertTrue(outlier.outlier_dict == expected_outlier.outlier_dict)

    def test_simple_process_outlier_save_es(self):
        self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf")
        analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf")

        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self.test_es.add_doc(doc_without_outlier)
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)

        doc_fields = doc_without_outlier["_source"]
        outlier = analyzer.create_outlier(doc_fields, doc_without_outlier)

        es.save_outlier(outlier)

        result = [elem for elem in es._scan()][0]

        self.assertEqual(result, doc_with_outlier)

    def test_arbitrary_key_config_present_in_analyzer(self):
        self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf")
        analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_arbitrary_dummy_test.conf")

        self.assertDictEqual(analyzer.extra_model_settings, {"test_arbitrary_key": "arbitrary_value",
                                                             "elasticsearch_filter": "es_valid_query"})
Esempio n. 3
0
class TestSettings(unittest.TestCase):

    def setUp(self):
        self.test_settings = UpdateSettings()

    def tearDown(self):
        self.test_settings.restore_default_configuration_path()

    def test_whitelist_correctly_reload_after_update_config(self):
        self.test_settings.change_configuration_path(test_whitelist_single_literal_file)

        dummy_doc_gen = DummyDocumentsGenerate()
        doc = dummy_doc_gen.generate_document({"create_outlier": True, "outlier_observation": "dummy observation",
                                               "filename": "osquery_get_all_processes_with_listening_conns.log"})

        # With this configuration, outlier is not whitlisted
        self.assertFalse(Outlier.is_whitelisted_doc(doc))

        # Update configuration
        self.test_settings.change_configuration_path(test_whitelist_multiple_literal_file)
        # Now outlier is whitelisted
        self.assertTrue(Outlier.is_whitelisted_doc(doc))

    def test_duplicate_whitelist_keys_not_crash(self):
        self.test_settings.change_configuration_path(test_whitelist_duplicate_option_file)
        self.assertEqual(settings.config.get("whitelist_literals", "single_key"), "dummy_whitelist_item_two")

    def test_error_when_forgot_whitelist_config(self):
        with self.assertRaises(NoSectionError):
            self.test_settings.change_configuration_path(test_config_without_whitelist_file)

    def test_error_on_duplicate_key_check(self):
        self.test_settings.change_configuration_path(test_whitelist_duplicate_option_file)
        result = settings.check_no_duplicate_key()
        self.assertIsInstance(result, DuplicateOptionError)

    def test_error_on_duplicate_section_check(self):
        self.test_settings.change_configuration_path(test_whitelist_duplicate_section_file)
        result = settings.check_no_duplicate_key()
        self.assertIsInstance(result, DuplicateSectionError)
Esempio n. 4
0
class TestNotifier(unittest.TestCase):
    def setUp(self):
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()

    def tearDown(self):
        self.test_settings.restore_default_configuration_path()
        self.test_es.restore_es()

    def test_notify_on_outlier_correctly_create_email(self):
        import logging, sys

        logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/notifications_test.conf")
        self.test_notifier = TestStubNotifier()

        doc_generate = DummyDocumentsGenerate()

        # Create outlier
        doc = doc_generate.generate_document()
        outlier = Outlier("dummy type", "dummy reason", "dummy summary", doc)

        # execute notification
        es.notifier.notify_on_outlier(outlier)

        self.assertEqual(len(self.test_notifier.get_list_email()), 1)
        self.test_notifier.restore_notifier()

    def test_email_dict_key(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/notifications_test.conf")
        self.test_notifier = TestStubNotifier()

        doc_generate = DummyDocumentsGenerate()

        # Create outlier
        doc = doc_generate.generate_document()
        outlier = Outlier("dummy type", "dummy reason", "dummy summary", doc)

        # execute notification
        es.notifier.notify_on_outlier(outlier)

        email_dict = self.test_notifier.get_list_email()[0]
        self.assertEqual(list(email_dict.keys()), ["subject", "body"])
        self.test_notifier.restore_notifier()

    def test_notification_on_outlier_match_metrics(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/notifications_test.conf")
        self.test_notifier = TestStubNotifier()

        doc_generate = DummyDocumentsGenerate()

        # Create document that's an outlier
        doc = doc_generate.generate_document({"user_id": 11})
        self.test_es.add_doc(doc)

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/notifications/metrics_numerical_value_dummy_test.conf"
        )
        analyzer.evaluate_model()

        self.assertEqual(len(self.test_notifier.get_list_email()), 1)
        self.test_notifier.restore_notifier()

    def test_notification_on_outlier_match_metrics_not_notification_enable(
            self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/notifications_test.conf")
        self.test_notifier = TestStubNotifier()

        doc_generate = DummyDocumentsGenerate()

        # Create document that's an outlier
        doc = doc_generate.generate_document({"user_id": 11})
        self.test_es.add_doc(doc)

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/notifications/metrics_no_notif_numerical_value_dummy_test.conf"
        )
        analyzer.evaluate_model()

        self.assertEqual(len(self.test_notifier.get_list_email()), 0)
        self.test_notifier.restore_notifier()

    def test_notification_on_outlier_already_detected(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/notifications_test.conf")
        self.test_notifier = TestStubNotifier()

        doc_generate = DummyDocumentsGenerate()

        # Create outliers
        doc1 = doc_generate.generate_document()
        outlier1 = Outlier("dummy type", "dummy reason", "dummy summary", doc1)
        doc2 = doc_generate.generate_document()
        outlier2 = Outlier("dummy type2", "dummy reason2", "dummy summary",
                           doc2)

        # execute notification
        es.notifier.notify_on_outlier(outlier1)
        es.notifier.notify_on_outlier(outlier2)

        self.assertEqual(len(self.test_notifier.get_list_email()), 1)
        self.test_notifier.restore_notifier()

    def test_notification_on_two_different_outliers(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/notifications_test.conf")
        self.test_notifier = TestStubNotifier()

        doc_generate = DummyDocumentsGenerate()

        # Create outliers
        doc1 = doc_generate.generate_document()
        outlier1 = Outlier("dummy type", "dummy reason", "dummy summary", doc1)
        doc2 = doc_generate.generate_document()
        outlier2 = Outlier("dummy type2", "dummy reason2", "dummy summary2",
                           doc2)

        # execute notification
        es.notifier.notify_on_outlier(outlier1)
        es.notifier.notify_on_outlier(outlier2)

        self.assertEqual(len(self.test_notifier.get_list_email()), 2)
        self.test_notifier.restore_notifier()

    def test_notification_on_outlier_already_detected_but_not_in_queue(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/notifications_test.conf")
        self.test_notifier = TestStubNotifier()

        doc_generate = DummyDocumentsGenerate()

        # Create outliers
        doc = doc_generate.generate_document()
        # Full the queue (3 elements)
        outlier1 = Outlier("dummy type", "dummy reason", "dummy summary1", doc)
        es.notifier.notify_on_outlier(outlier1)
        outlier2 = Outlier("dummy type2", "dummy reason2", "dummy summary2",
                           doc)
        es.notifier.notify_on_outlier(outlier2)
        outlier3 = Outlier("dummy type3", "dummy reason3", "dummy summary3",
                           doc)
        es.notifier.notify_on_outlier(outlier3)
        # Add a new one that will remove the first
        outlier4 = Outlier("dummy type4", "dummy reason4", "dummy summary4",
                           doc)
        es.notifier.notify_on_outlier(outlier4)

        # Add again the first one
        es.notifier.notify_on_outlier(outlier1)

        # All outliers notify need to be present (so 5)
        self.assertEqual(len(self.test_notifier.get_list_email()), 5)
        self.test_notifier.restore_notifier()
Esempio n. 5
0
class TestSettings(unittest.TestCase):
    def setUp(self):
        import logging as base_logging
        base_logging.disable(base_logging.CRITICAL)

        self.test_settings = UpdateSettings()

    def tearDown(self):
        import logging as base_logging
        base_logging.disable(base_logging.NOTSET)

        self.test_settings.restore_default_configuration_path()

    def test_whitelist_correctly_reload_after_update_config(self):
        self.test_settings.change_configuration_path(
            test_whitelist_single_literal_file)

        dummy_doc_gen = DummyDocumentsGenerate()
        doc = dummy_doc_gen.generate_document({
            "create_outlier":
            True,
            "outlier_observation":
            "dummy observation",
            "filename":
            "osquery_get_all_processes_with_listening_conns.log"
        })

        # With this configuration, outlier is not whitlisted
        self.assertFalse(Outlier.is_whitelisted_doc(doc))

        # Update configuration
        self.test_settings.change_configuration_path(
            test_whitelist_multiple_literal_file)
        # Now outlier is whitelisted
        self.assertTrue(Outlier.is_whitelisted_doc(doc))

    def test_duplicate_whitelist_keys_not_crash(self):
        self.test_settings.change_configuration_path(
            test_whitelist_duplicate_option_file)
        self.assertEqual(
            settings.config.get("whitelist_literals", "single_key"),
            "dummy_whitelist_item_two")

    def test_error_when_forgot_whitelist_config(self):
        with self.assertRaises(NoSectionError):
            self.test_settings.change_configuration_path(
                test_config_without_whitelist_file)

    def test_error_on_duplicate_key_check(self):
        self.test_settings.change_configuration_path(
            test_whitelist_duplicate_option_file)
        result = settings.check_no_duplicate_key()
        self.assertIsInstance(result, DuplicateOptionError)

    def test_error_on_duplicate_section_check(self):
        self.test_settings.change_configuration_path(
            test_whitelist_duplicate_section_file)
        result = settings.check_no_duplicate_key()
        self.assertIsInstance(result, DuplicateSectionError)

    # Test on process_configuration_files function
    def test_error_when_config_file_does_not_exist(self):
        with self.assertRaises(SystemExit) as cm:
            self.test_settings.change_configuration_path(
                test_config_that_does_not_exist)
        self.assertEqual(cm.exception.code, 2)

    # Test on process_configuration_files function
    def test_error_when_config_file_is_a_directory(self):
        with self.assertRaises(SystemExit) as cm:
            self.test_settings.change_configuration_path(
                test_config_that_is_a_directory)
        self.assertEqual(cm.exception.code, 2)

    # Test on check_no_failed_config_paths function
    def test_error_when_failed_config_file_exists_on_interactive_mode(self):
        with self.assertRaises(SystemExit) as cm:
            print_failed_configs_and_exit({test_config_that_does_not_exist})
        self.assertEqual(cm.exception.code, 2)

    def test_error_when_multiple_failed_config_files_exist(self):
        failed_config_files = {
            test_config_that_does_not_exist, test_config_that_is_a_directory
        }
        with self.assertRaises(SystemExit) as cm:
            print_failed_configs_and_exit(failed_config_files)
        self.assertEqual(cm.exception.code, 2)

    def test_no_exceptions_on_valid_config_file(self):
        try:
            self.test_settings.change_configuration_path(
                test_whitelist_multiple_literal_file)
        except Exception:
            self.fail(
                "loading a valid configuration file raised an unexpected exception!"
            )

    # Test on check_no_failed_config_paths function
    def test_error_when_no_failed_config_paths_exist(self):
        failed_config_files = {}
        raised = False
        try:
            print_failed_configs_and_exit(failed_config_files)
        except SystemExit:
            raised = True
        self.assertFalse(raised)
Esempio n. 6
0
class TestSimplequeryAnalyzer(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        logging.verbosity = 0

    def setUp(self):
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()

    def tearDown(self):
        # restore the default configuration file so we don't influence other unit tests that use the settings singleton
        self.test_settings.restore_default_configuration_path()
        self.test_es.restore_es()

    def _get_simplequery_analyzer(self, config_file, config_section):
        self.test_settings.change_configuration_path(config_file)
        return SimplequeryAnalyzer(config_section_name=config_section)

    # Simply test if use cases containing a % sign also work correctly and don't generate an expcetion when being
    # parsed by the ConfigParser. This is the reason we use the RawConfigParser.
    # https://docs.python.org/2/library/configparser.html
    def test_simplequery_raw_configparser_test_percent_signs_in_query(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_whitelist.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_raw_configparser_test_percent_signs.conf"
        )
        analyzer.evaluate_model()

    def test_simplequery_whitelist_work_test_es_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        nbr_generated_documents = 5
        all_doc = dummy_doc_generate.create_documents(nbr_generated_documents)
        whitelisted_document = dummy_doc_generate.generate_document(
            {"hostname": "whitelist_hostname"})
        all_doc.append(whitelisted_document)
        self.test_es.add_multiple_docs(all_doc)

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_whitelist.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf"
        )
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, nbr_generated_documents)

    def test_one_doc_outlier_correctly_add(self):
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)

        # Insert value
        self.test_es.add_doc(doc_without_outlier)
        # Make test (supposed all doc work)
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf"
        )
        analyzer.evaluate_model()

        # Fetch result to check if it is correct
        result = [elem for elem in es._scan()][0]
        self.assertEqual(result, doc_with_outlier)

    def test_simplequery_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("timestamp_year" in result['_source'])

    def test_simplequery_use_derived_fields_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])

    def test_simplequery_not_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("timestamp_year" in result['_source'])

    def test_simplequery_not_use_derived_fields_but_present_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])

    def test_simplequery_default_outlier_infos(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in result['_source']['outliers']
            for elem in DEFAULT_OUTLIERS_KEY_FIELDS
        ]
        self.assertTrue(all(all_fields_exists))

    def test_simplequery_no_extra_outlier_infos(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in DEFAULT_OUTLIERS_KEY_FIELDS
            for elem in result['_source']['outliers']
        ]
        self.assertTrue(all(all_fields_exists))

    def test_whitelist_literal_per_model_match_whitelist(self):
        doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            doc_generate.generate_document(
                {"hostname": "HOSTNAME-WHITELISTED"}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_01.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("outliers" in result["_source"])

    def test_whitelist_literal_per_model_not_match_whitelist(self):
        doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            doc_generate.generate_document(
                {"hostname": "not_whitelist_hostname"}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_01.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("outliers" in result["_source"])

    def test_whitelist_regex_per_model_match_whitelist(self):
        doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            doc_generate.generate_document({"hostname": "AAA-WHITELISTED"}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_02.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("outliers" in result["_source"])

    def test_whitelist_regex_per_model_not_match_whitelist(self):
        doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            doc_generate.generate_document(
                {"hostname": "Not-work-WHITELISTED"}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_02.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("outliers" in result["_source"])

    def test_arbitrary_key_config_present_in_outlier(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_arbitrary_dummy_test.conf"
        )

        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertEquals(result["_source"]["outliers"]["test_arbitrary_key"],
                          ["arbitrary_value"])

    def test_arbitrary_key_config_not_present_int_other_model(self):
        # Dictionary and list could be share between different instance. This test check that a residual value is not
        # present in the dictionary
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf"
        )

        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("test_arbitrary_key" in result["_source"]["outliers"])
Esempio n. 7
0
class TestOutlierOperations(unittest.TestCase):
    def setUp(self):
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()

    def tearDown(self):
        # restore the default configuration file so we don't influence other unit tests that use the settings singleton
        self.test_settings.restore_default_configuration_path()
        self.test_es.restore_es()

    def test_add_outlier_to_doc(self):
        doc = copy.deepcopy(doc_without_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=doc)
        # Model name, model type are added by analyzer
        test_outlier.outlier_dict["observation"] = "dummy observation"

        doc_with_outlier = helpers.es.add_outlier_to_document(test_outlier)
        self.assertDictEqual(doc_with_outlier_without_model_info_test_file,
                             doc_with_outlier)

    def test_remove_outlier_from_doc(self):
        doc = copy.deepcopy(doc_without_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=doc)
        test_outlier.outlier_dict["observation"] = "dummy observation"

        doc_with_outlier = helpers.es.add_outlier_to_document(test_outlier)

        doc_without_outlier = helpers.es.remove_outliers_from_document(
            doc_with_outlier)
        self.assertDictEqual(doc_without_outlier,
                             doc_without_outlier_test_file)

    def test_add_duplicate_outlier_to_doc(self):
        doc = copy.deepcopy(doc_without_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=doc)

        doc_with_outlier = helpers.es.add_outlier_to_document(test_outlier)
        doc_with_outlier = helpers.es.add_outlier_to_document(test_outlier)

        self.assertDictEqual(doc, doc_with_outlier)

    def test_add_two_outliers_to_doc(self):
        doc = copy.deepcopy(doc_without_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=doc)
        test_outlier.outlier_dict["observation"] = "dummy observation"

        test_outlier_2 = Outlier(outlier_type="dummy type 2",
                                 outlier_reason="dummy reason 2",
                                 outlier_summary="dummy summary 2",
                                 doc=doc)
        test_outlier_2.outlier_dict["observation_2"] = "dummy observation 2"

        helpers.es.add_outlier_to_document(test_outlier)
        doc_with_two_outliers = helpers.es.add_outlier_to_document(
            test_outlier_2)

        self.assertDictEqual(doc_with_two_outliers,
                             doc_with_two_outliers_test_file)

    def test_add_three_outliers_to_doc(self):
        doc = copy.deepcopy(doc_without_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=doc)
        test_outlier.outlier_dict["observation"] = "dummy observation"

        test_outlier_2 = Outlier(outlier_type="dummy type 2",
                                 outlier_reason="dummy reason 2",
                                 outlier_summary="dummy summary 2",
                                 doc=doc)
        test_outlier_2.outlier_dict["observation_2"] = "dummy observation 2"

        test_outlier_3 = Outlier(outlier_type="dummy type 3",
                                 outlier_reason="dummy reason 3",
                                 outlier_summary="dummy summary 3",
                                 doc=doc)
        test_outlier_3.outlier_dict["observation_3"] = "dummy observation 3"

        helpers.es.add_outlier_to_document(test_outlier)
        helpers.es.add_outlier_to_document(test_outlier_2)
        doc_with_three_outliers = helpers.es.add_outlier_to_document(
            test_outlier_3)

        self.assertDictEqual(doc_with_three_outliers,
                             doc_with_three_outliers_test_file)

    def test_add_remove_tag_from_doc(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)

        # Remove non-existing tag
        doc = helpers.es.remove_tag_from_document(orig_doc,
                                                  "tag_does_not_exist")
        self.assertDictEqual(doc, orig_doc)

        # Remove existing tag
        doc = helpers.es.remove_tag_from_document(orig_doc, "outlier")

        if "outlier" in doc["_source"]["tags"]:
            raise AssertionError(
                "Tag still present in document, even after removal!")

    def test_whitelist_literal_match(self):
        self.test_settings.change_configuration_path(
            test_file_outliers_path_config)
        # Contain: "C:\Windows\system32\msfeedssync.exe sync"

        dummy_doc_gen = DummyDocumentsGenerate()
        doc = dummy_doc_gen.generate_document(
            {"command_query": r'C:\Windows\system32\msfeedssync.exe sync'})

        result = Outlier.is_whitelisted_doc(doc)
        self.assertTrue(result)

    def test_whitelist_literal_mismatch(self):
        self.test_settings.change_configuration_path(
            test_file_outliers_path_config)
        # Contain: "C:\Windows\system32\msfeedssync.exe sync"
        dummy_doc_gen = DummyDocumentsGenerate()
        doc = dummy_doc_gen.generate_document({
            "command_query":
            r'C:\Windows\system32\msfeedssync.exe syncOther'
        })

        result = Outlier.is_whitelisted_doc(doc)
        self.assertFalse(result)

    def test_whitelist_regexp_match(self):
        whitelist_item = r"^.*.exe sync$"
        p = re.compile(whitelist_item.strip(), re.IGNORECASE)
        result = Outlier.dictionary_matches_specific_whitelist_item_regexp(
            p, nested_doc_for_whitelist_test)
        self.assertTrue(result)

    def test_whitelist_regexp_mismatch(self):
        whitelist_item = r"^.*.exeZZZZZ sync$"
        p = re.compile(whitelist_item.strip(), re.IGNORECASE)
        result = Outlier.dictionary_matches_specific_whitelist_item_regexp(
            p, nested_doc_for_whitelist_test)
        self.assertFalse(result)

    def test_whitelist_config_file_multi_item_match(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_01_with_general.conf")
        self.assertTrue(test_outlier.is_whitelisted())

    def test_whitelist_config_file_multi_item_match_with_whitelist_element_part_of_list_in_event(
            self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_08_with_general.conf")
        self.assertTrue(test_outlier.is_whitelisted())

    def test_whitelist_config_wipe_all_bug(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_10_issue_462.conf")
        self.assertFalse(test_outlier.is_whitelisted())

    def test_single_literal_to_match_in_doc_with_outlier(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_02_with_general.conf")
        self.assertTrue(test_outlier.is_whitelisted())

    def test_single_literal_not_to_match_in_doc_with_outlier(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_03_with_general.conf")
        self.assertFalse(test_outlier.is_whitelisted())

    def test_single_regex_to_match_in_doc_with_outlier(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_06_with_general.conf")
        self.assertTrue(test_outlier.is_whitelisted())

    def test_single_regex_not_to_match_in_doc_with_outlier(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_07_with_general.conf")
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)
        result = test_outlier.is_whitelisted()
        self.assertFalse(result)

    def test_whitelist_config_file_multi_item_match_with_three_fields_and_whitespace(
            self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_04_with_general.conf")
        self.assertTrue(test_outlier.is_whitelisted())

    def test_whitelist_config_file_multi_item_mismatch_with_three_fields_and_whitespace(
            self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_05_with_general.conf")
        self.assertFalse(test_outlier.is_whitelisted())

    def test_whitelist_config_change_remove_multi_item_literal(self):
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        # Without score because "remove whitelisted outlier" use "bulk" operation which doesn't take into account score
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_01_with_general.conf")

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf"
        )
        es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer})

        result = [elem for elem in es._scan()][0]
        self.assertDictEqual(result, doc_without_outlier)

    def test_whitelist_config_change_single_literal_not_to_match_in_doc_with_outlier(
            self):
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_03_with_general.conf")

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf"
        )

        es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer})
        result = [elem for elem in es._scan()][0]
        self.assertEqual(result, doc_with_outlier)

    def test_test_osquery_ticket_1933_single_regexp_should_not_match(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_09_ticket_1933.conf")
        self.assertFalse(test_outlier.is_whitelisted())
Esempio n. 8
0
class TestTermsAnalyzer(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        logging.verbosity = 0

    def setUp(self):
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()

    def tearDown(self):
        # restore the default configuration file so we don't influence other unit tests that use the settings singleton
        self.test_settings.restore_default_configuration_path()
        self.test_es.restore_es()

    # This test work only if we try to detect whitelist element on non outliers elements
    # Here the count is not lower than three, so documents aren't outliers, and we never see that the first one is
    # whitelisted
    #
    # def test_whitelist_batch_document_not_process_all(self):
    #     self.test_settings.change_configuration_path("/app/tests/unit_tests/files/terms_test_with_whitelist.conf")
    #     analyzer = TermsAnalyzer("terms_dummy_test")
    #
    #     # Whitelisted (ignored)
    #     doc1_without_outlier = copy.deepcopy(doc_without_outliers_test_whitelist_01_test_file)
    #     self.test_es.add_doc(doc1_without_outlier)
    #     # Not whitelisted (add)
    #     doc2_without_outlier = copy.deepcopy(doc_without_outliers_test_whitelist_02_test_file)
    #     self.test_es.add_doc(doc2_without_outlier)
    #     # Not whitelisted
    #     doc3_without_outlier = copy.deepcopy(doc_without_outliers_test_whitelist_03_test_file)
    #     self.test_es.add_doc(doc3_without_outlier)
    #
    #     analyzer.evaluate_model()
    #
    #     self.assertEqual(len(analyzer.outliers), 2)

    def test_terms_whitelist_work_test_es_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        command_query = "SELECT * FROM dummy_table"  # must be bigger than the trigger value (here 3)
        nbr_generated_documents = 5

        # Generate document that match outlier
        command_name = "default_name_"
        for i in range(nbr_generated_documents):
            self.test_es.add_doc(
                dummy_doc_generate.generate_document({
                    "command_query":
                    command_query,
                    "command_name":
                    command_name + str(i)
                }))
        # Generate whitelist document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname":
                "whitelist_hostname",
                "command_query":
                command_query,
                "command_name":
                command_name + str(nbr_generated_documents)
            }))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_with_whitelist.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test.conf"
        )
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, nbr_generated_documents)

    def test_terms_detect_one_outlier_es_check(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        nbr_doc_generated_per_hours = [5, 3, 1, 2]

        # Generate documents
        self.test_es.add_multiple_docs(
            dummy_doc_generate.generate_doc_time_variable_sensitivity(
                nbr_doc_generated_per_hours))
        # Only the first groupe of document must be detected like an Outlier because the limit is on 3

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf"
        )
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, 5)

    def test_terms_detect_one_outlier_batch_check(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        nbr_doc_generated_per_hours = [5, 3, 1, 2]

        # Generate documents
        self.test_es.add_multiple_docs(
            dummy_doc_generate.generate_doc_time_variable_sensitivity(
                nbr_doc_generated_per_hours))
        # Only the first groupe of document must be detected like an Outlier because the limit is on 3

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf"
        )
        analyzer.evaluate_model()

        self.assertEqual(analyzer.total_outliers, 5)

    def test_terms_small_batch_treat_all(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Init the list of user
        nbr_doc_per_hours = 5
        nbr_hours = 10
        nbr_doc_generated_per_hours = [
            nbr_doc_per_hours for _ in range(nbr_hours)
        ]
        # If the number of document per hours is not a divisor of the batch limit, all document will not be detected

        # Generate documents
        self.test_es.add_multiple_docs(
            dummy_doc_generate.generate_doc_time_variable_sensitivity(
                nbr_doc_generated_per_hours))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_small_batch_eval.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf"
        )
        analyzer.evaluate_model()

        self.assertEqual(analyzer.total_outliers,
                         nbr_doc_per_hours * nbr_hours)

    def test_terms_small_batch_last_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Init the list of user with 18 values of 2
        nbr_doc_generated_per_hours = [2 for _ in range(18)]
        # Add a value at the end that must be detected like outlier (limit on 3)
        nbr_doc_generated_per_hours.append(4)

        # Generate documents
        self.test_es.add_multiple_docs(
            dummy_doc_generate.generate_doc_time_variable_sensitivity(
                nbr_doc_generated_per_hours))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_small_batch_eval.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf"
        )
        analyzer.evaluate_model()

        self.assertEqual(analyzer.total_outliers, 4)

    def test_evaluate_batch_for_outliers_not_enough_target_buckets_one_doc_max_two(
            self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_2.conf"
        )

        aggregator_value = LIST_AGGREGATOR_VALUE[0]
        target_value = random.choice(LIST_TARGET_VALUE)
        doc = copy.deepcopy(random.choice(LIST_DOC))
        current_batch = analyzer._add_document_to_batch(
            defaultdict(), [target_value], [aggregator_value], doc)

        result, remaining_terms = analyzer._evaluate_batch_for_outliers(
            batch=current_batch)
        self.assertEqual(result, [])

    def test_evaluate_batch_for_outliers_limit_target_buckets_two_doc_max_two(
            self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_2.conf"
        )

        # Create one document with one aggregator
        aggregator_value = LIST_AGGREGATOR_VALUE[0]
        target_value = random.choice(LIST_TARGET_VALUE)
        doc = copy.deepcopy(random.choice(LIST_DOC))
        current_batch = analyzer._add_document_to_batch(
            defaultdict(), [target_value], [aggregator_value], doc)
        # Create a second document with another aggregator
        aggregator_value2 = LIST_AGGREGATOR_VALUE[1]
        target_value2 = random.choice(LIST_TARGET_VALUE)
        doc2 = copy.deepcopy(random.choice(LIST_DOC))
        current_batch = analyzer._add_document_to_batch(
            current_batch, [target_value2], [aggregator_value2], doc2)

        # Expect to get nothing due to "min_target_buckets" set to 2
        result, remaining_terms = analyzer._evaluate_batch_for_outliers(
            batch=current_batch)
        self.assertEqual(result, [])

    # coeff_of_variation
    def test_terms_evaluate_coeff_of_variation_like_expected_document(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_no_bucket.conf"
        )

        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        expected_doc = copy.deepcopy(
            doc_with_terms_outlier_coeff_of_variation_no_score_sort)
        # Add doc to the database
        self.test_es.add_doc(doc_without_outlier)

        # Make test (suppose that all doc match with the query)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertEqual(result, expected_doc)

    def test_terms_generated_document_coeff_of_variation_not_respect_min(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_no_bucket.conf"
        )

        doc_generator = DummyDocumentsGenerate()
        nbr_val = 24  # Like 24 hours
        min_trigger_sensitivity = analyzer.model_settings[
            "trigger_sensitivity"]
        default_value = 5  # Per default, 5 documents create per hour (arbitrarily)
        max_difference = 3  # Maximum difference between the number of document (so between 2 and 8 (included))
        all_doc = doc_generator.create_doc_uniq_target_variable_at_least_specific_coef_variation(
            nbr_val, min_trigger_sensitivity, max_difference, default_value)
        self.test_es.add_multiple_docs(all_doc)
        analyzer.evaluate_model()

        nbr_outliers = 0
        for doc in es._scan():
            if "outliers" in doc['_source']:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, 0)

    def test_terms_generated_document_coeff_of_variation_respect_min(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_no_bucket.conf"
        )

        doc_generator = DummyDocumentsGenerate()
        nbr_val = 24  # Like 24 hours
        max_trigger_sensitivity = analyzer.model_settings[
            "trigger_sensitivity"]
        default_value = 5  # Per default, 5 documents create per hour (arbitrarily)
        max_difference = 3  # Maximum difference between the number of document (so between 2 and 8 (included))
        all_doc = doc_generator.create_doc_uniq_target_variable_at_most_specific_coef_variation(
            nbr_val, max_trigger_sensitivity, max_difference, default_value)
        self.test_es.add_multiple_docs(all_doc)
        analyzer.evaluate_model()

        nbr_outliers = 0
        for doc in es._scan():
            if "outliers" in doc['_source']:
                nbr_outliers += 1

        self.assertEqual(nbr_outliers, len(all_doc))

    def test_terms_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("timestamp_year" in result['_source'])

    def test_terms_use_derived_fields_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])

    def test_terms_not_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("timestamp_year" in result['_source'])

    def test_terms_not_use_derived_fields_but_present_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        # The parameter use_derived_fields haven't any impact on outliers keys
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])

    def test_terms_default_outlier_infos(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in result['_source']['outliers']
            for elem in DEFAULT_OUTLIERS_KEY_FIELDS
        ]
        self.assertTrue(all(all_fields_exists))

    def test_terms_extra_outlier_infos_all_present(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in result['_source']['outliers']
            for elem in EXTRA_OUTLIERS_KEY_FIELDS
        ]
        self.assertTrue(all(all_fields_exists))

    def test_terms_extra_outlier_infos_new_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS
            for elem in result['_source']['outliers']
        ]
        self.assertTrue(all(all_fields_exists))

    def test_add_document_to_batch_empty_target(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        dummy_doc = dummy_doc_generate.generate_document()

        current_batch = {"dummy_key": "dummy_value"}
        result = TermsAnalyzer._add_document_to_batch(current_batch, list(),
                                                      ["dummy_aggregator"],
                                                      dummy_doc)
        self.assertEqual(result, current_batch)

    def test_add_document_to_batch_empty_aggergator(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        dummy_doc = dummy_doc_generate.generate_document()

        current_batch = {"dummy_key": "dummy_value"}
        result = TermsAnalyzer._add_document_to_batch(current_batch,
                                                      ["dummy_target"], list(),
                                                      dummy_doc)
        self.assertEqual(result, current_batch)

    def test_add_document_to_batch_one_aggregator_and_one_target(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        dummy_doc = dummy_doc_generate.generate_document()
        target_value = "dummy_target"
        aggregator_value = "dummy_aggregator"

        current_batch = {"dummy_key": "dummy_value"}
        result = TermsAnalyzer._add_document_to_batch(current_batch,
                                                      [target_value],
                                                      [aggregator_value],
                                                      dummy_doc)

        expected_batch = current_batch.copy()
        expected_batch[aggregator_value] = defaultdict(list)
        expected_batch[aggregator_value]["targets"].append(target_value)
        expected_batch[aggregator_value]["observations"].append(dict())
        expected_batch[aggregator_value]["raw_docs"].append(dummy_doc)

        self.assertEqual(result, expected_batch)

    def test_min_target_buckets_detect_outlier(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_within_float.conf"
        )
        # Recap:
        # min_target_buckets=4
        # trigger_sensitivity=5
        # trigger_on=high
        # trigger_method=float

        # Dont encode with a matrix to keep order of document
        doc_to_generate = [
            # New batch:
            #       0  1  2
            # agg1 [5, 1, 1]
            # agg2 [1, 1, 1]
            ("agg1", 0),
            ("agg2", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg2", 1),
            ("agg1", 1),
            ("agg2", 2),
            ("agg1", 2),
            # New batch
            #       2  3
            # agg1 [1, 1]
            # agg2 [5, 1]
            ("agg2", 2),
            ("agg2", 2),
            ("agg2", 2),
            ("agg2", 2),
            ("agg1", 2),
            ("agg2", 2),
            ("agg1", 3),
            ("agg2", 3)
        ]

        # At the end:
        #       0  1  2  3
        # agg1 [5, 0, 2, 1]
        # agg2 [1, 1, 6, 1]
        # So only agg2 - 2 (6 documents) need to be flagged

        dummy_doc_gen = DummyDocumentsGenerate()
        for aggregator, target_value in doc_to_generate:
            user_id = target_value
            hostname = aggregator
            doc_generated = dummy_doc_gen.generate_document({
                "user_id":
                user_id,
                "hostname":
                hostname
            })
            self.test_es.add_doc(doc_generated)

        analyzer.evaluate_model()

        list_outliers = []
        for doc in es._scan():
            if "outliers" in doc["_source"]:
                list_outliers.append(
                    (doc["_source"]["outliers"]["aggregator"][0],
                     doc["_source"]["outliers"]["term"][0]))

        self.assertEqual(list_outliers, [("agg2", "2") for _ in range(6)])

    def test_min_target_buckets_dont_detect_outlier(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_within_float.conf"
        )
        # Recap:
        # min_target_buckets=4
        # trigger_sensitivity=5
        # trigger_on=high
        # trigger_method=float

        # Dont encode with a matrix to keep order of document
        doc_to_generate = [
            # New batch:
            #       0  1
            # agg1 [6, 1]
            # agg2 [1, 2]
            ("agg1", 0),
            ("agg2", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg2", 1),
            ("agg2", 1),
            ("agg1", 1),
            # New Batch
            #       2
            # agg1 [0]
            # agg2 [1]
            ("agg1", 2)
        ]

        # At the end:
        #       0  1  2
        # agg1 [6, 1, 1]
        # agg2 [1, 2]
        # Normally agg1 - 0 must be flagged, but here they doesn't have enough buckets values

        dummy_doc_gen = DummyDocumentsGenerate()
        for aggregator, target_value in doc_to_generate:
            user_id = target_value
            hostname = aggregator
            doc_generated = dummy_doc_gen.generate_document({
                "user_id":
                user_id,
                "hostname":
                hostname
            })
            self.test_es.add_doc(doc_generated)

        analyzer.evaluate_model()
        self.assertEqual(analyzer.total_outliers, 0)

    def test_batch_whitelist_work_with_min_target_bucket(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_within_float.conf"
        )
        # Recap:
        # min_target_buckets=4
        # trigger_sensitivity=5
        # trigger_on=high
        # trigger_method=float

        doc_to_generate = [
            # New batch:
            #       0  1  2
            # agg1 [3, 0, 1]
            # agg2 [1, 3, 2]
            ("agg1", 0, False),
            ("agg2", 0, False),
            ("agg1", 0, True),
            ("agg1", 0, False),
            ("agg2", 1, False),
            ("agg2", 1, False),
            ("agg2", 1, False),
            ("agg2", 2, False),
            ("agg2", 2, False),
            ("agg1", 2, False),
            # New batch
            #       2  3  4
            # agg1 [1, 0, 2]
            # agg2 [4, 3]
            ("agg2", 2, False),
            ("agg2", 2, False),
            ("agg2", 2, True),
            ("agg2", 2, False),
            ("agg1", 2, False),
            ("agg2", 3, False),
            ("agg2", 3, False),
            ("agg2", 3, False),
            ("agg1", 4, False),
            ("agg1", 4, False),
            # New batch
            #       4  5
            # agg1 [4, 1]
            ("agg1", 4, False),
            ("agg1", 4, False),
            ("agg1", 4, False),
            ("agg1", 4, False),
            ("agg1", 5, False)
        ]

        # At the end:
        #       0  1  2  3  4
        # agg1 [3, 2, 2, 2, 6]
        # agg2 [1, 1, 6, 1]
        # So two outlier: agg1 - 4 and agg2 - 2.  But one of agg2 - 2 is whitelisted. So only 5 occurrences

        dummy_doc_gen = DummyDocumentsGenerate()
        for aggregator, target_value, is_whitelist in doc_to_generate:
            deployment_name = None
            if is_whitelist:
                deployment_name = "whitelist-deployment"
            user_id = target_value
            hostname = aggregator

            doc_generated = dummy_doc_gen.generate_document({
                "deployment_name":
                deployment_name,
                "user_id":
                user_id,
                "hostname":
                hostname
            })
            self.test_es.add_doc(doc_generated)
        analyzer.evaluate_model()

        list_outliers = []
        for doc in es._scan():
            if "outliers" in doc["_source"]:
                list_outliers.append(
                    (doc["_source"]["outliers"]["aggregator"][0],
                     doc["_source"]["outliers"]["term"][0]))

        self.assertEqual(list_outliers, [("agg1", "4") for _ in range(6)])

    def test_batch_whitelist_work_doent_match_outlier_in_across(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_across_float.conf"
        )

        doc_to_generate = [
            # agg1 (0, 1, 2) -> 3 but with whitelist: (0, 2) -> 2
            # agg2 (0, 3, 4) -> 3
            ("agg1", 0, False),
            ("agg1", 1, True),
            ("agg2", 0, False),
            ("agg2", 0, False),
            ("agg1", 2, False),
            ("agg2", 3, False),
            ("agg2", 4, False)
        ]

        dummy_doc_gen = DummyDocumentsGenerate()
        for aggregator, target_value, is_whitelist in doc_to_generate:
            deployment_name = None
            if is_whitelist:
                deployment_name = "whitelist-deployment"
            user_id = target_value
            hostname = aggregator

            doc_generated = dummy_doc_gen.generate_document({
                "deployment_name":
                deployment_name,
                "user_id":
                user_id,
                "hostname":
                hostname
            })
            self.test_es.add_doc(doc_generated)

        analyzer.evaluate_model()

        list_outliers = []
        for doc in es._scan():
            if "outliers" in doc["_source"]:
                list_outliers.append(
                    (doc["_source"]["outliers"]["aggregator"][0],
                     doc["_source"]["outliers"]["term"][0]))

        # We detect agg2 but not agg1
        self.assertEqual(list_outliers, [("agg2", "0"), ("agg2", "0"),
                                         ("agg2", "3"), ("agg2", "4")])

    def test_extract_additional_model_settings_no_terms_section(self):
        # Terms section not define produce a warning
        with self.assertRaises(ValueError):
            AnalyzerFactory.create(
                "/app/tests/unit_tests/files/use_cases/terms/terms_test_no_terms_section.conf"
            )

    def test_extract_additional_model_settings_too_many_terms_section(self):
        # Terms section not define produce a warning
        with self.assertRaises(ValueError):
            AnalyzerFactory.create(
                "/app/tests/unit_tests/files/use_cases/terms/terms_test_too_many_terms_section.conf"
            )

    def test_non_outlier_values_not_present_in_terms_for_first(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate documents
        # Outlier document
        # index: 0
        # Non outlier
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "one"
            }))
        # index: 1
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 2
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 3
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]

        self.assertEqual(
            result["_source"]["outliers"]["non_outlier_values_sample"], list())

    def test_non_outlier_values_not_present_in_terms_within(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate documents
        # Outlier document
        # index: 0
        # Non outlier
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "one"
            }))
        # index: 1
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 2
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 3
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 4
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "three"
            }))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][4]

        self.assertEqual(
            result["_source"]["outliers"]["non_outlier_values_sample"],
            ["two"])

    def test_non_outlier_values_empty_terms_across(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate documents
        # Outlier document
        # index: 0
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"hostname": "one"}))
        # Non outlier
        # index: 1
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "two",
                "deployment_name": "one"
            }))
        # index: 2
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "two",
                "deployment_name": "two"
            }))
        # index: 3
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "two",
                "deployment_name": "three"
            }))
        # Outlier document
        # index 4
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"hostname": "three"}))
        # index: 4

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_across_dummy_test_float_low.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][4]

        self.assertEqual(
            result["_source"]["outliers"]["non_outlier_values_sample"], list())
Esempio n. 9
0
class TestHousekeeping(unittest.TestCase):
    def setUp(self):
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()
        self.config_backup = dict()

    def tearDown(self):
        self.test_es.restore_es()
        self.test_settings.restore_default_configuration_path()

    def _backup_config(self, file_path):
        with open(file_path, 'r') as content_file:
            self.config_backup[file_path] = content_file.read()

    def _restore_config(self, file_path):
        if file_path in self.config_backup.keys():
            with open(file_path, 'w') as file_object:
                file_object.write(self.config_backup[file_path])
        else:
            raise KeyError('The configuration ' + file_path +
                           ' was never backup')

    def _enable_debug_logging(self):
        import logging, sys
        logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)

    def test_housekeeping_correctly_remove_whitelisted_outlier_when_file_modification(
            self):

        self.test_settings.change_configuration_path(
            test_file_no_whitelist_path_config)
        self._backup_config(test_file_no_whitelist_path_config)
        housekeeping = HousekeepingJob()

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test.conf"
        )
        housekeeping.update_analyzer_list([analyzer])

        # Add document to "Database"
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)

        housekeeping.file_mod_watcher._previous_mtimes[
            test_file_no_whitelist_path_config] = 0
        filecontent = ""
        with open(test_file_no_whitelist_path_config, 'r') as test_file:
            for line in test_file:
                if "# WHITELIST" in line:
                    break
                filecontent += line

        # Update configuration (read new config and append to default)
        with open(test_file_whitelist_path_config, 'r') as test_file:
            filecontent += test_file.read()

        with open(test_file_no_whitelist_path_config, 'w') as test_file:
            test_file.write(filecontent)

        housekeeping.execute_housekeeping()

        # Fetch result
        result = [elem for elem in self.test_es._scan()][0]

        # Compute expected result:
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self._restore_config(test_file_no_whitelist_path_config)
        self.maxDiff = None
        self.assertEqual(result, doc_without_outlier)

    def test_housekeeping_execute_no_whitelist_parameter_change(self):
        # Check that housekeeping run even when we change new part in the configuration
        self.test_settings.change_configuration_path(
            test_file_whitelist_dummy_reason_path_config)
        self._backup_config(test_file_whitelist_dummy_reason_path_config)
        housekeeping = HousekeepingJob()

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test.conf"
        )
        housekeeping.update_analyzer_list([analyzer])

        # Add document to "Database"
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        expected_doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)

        # Update configuration (create new section and append to default)
        filecontent = "\n\n[dummy_section]\nparam=1"

        # Force the date of the file
        housekeeping.file_mod_watcher._previous_mtimes[
            test_file_whitelist_dummy_reason_path_config] = 0

        with open(test_file_whitelist_dummy_reason_path_config,
                  'a') as test_file:
            test_file.write(filecontent)

        housekeeping.execute_housekeeping()

        # Fetch result
        result = [elem for elem in self.test_es._scan()][0]

        self._restore_config(test_file_whitelist_dummy_reason_path_config)
        self.assertNotEqual(result, expected_doc_with_outlier)

    def test_whitelist_literals_per_model_removed_by_housekeeping(self):
        # Init
        doc_generate = DummyDocumentsGenerate()
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/housekeeping.conf")
        housekeeping = HousekeepingJob()

        # Generate document
        document = doc_generate.generate_document({
            "hostname":
            "HOSTNAME-WHITELISTED",
            "create_outlier":
            True,
            "outlier.model_name":
            "dummy_test",
            "outlier.model_type":
            "analyzer"
        })
        self.assertTrue("outliers" in document["_source"])

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test_with_whitelist.conf"
        )
        housekeeping.update_analyzer_list([analyzer])

        self.test_es.add_doc(document)

        housekeeping.execute_housekeeping()

        result = [elem for elem in self.test_es._scan()][0]

        self.assertTrue("outliers" not in result["_source"])

    def test_whitelist_literals_per_model_not_removed_by_housekeeping(self):
        # Init
        doc_generate = DummyDocumentsGenerate()
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/housekeeping.conf")
        housekeeping = HousekeepingJob()

        # Generate document
        document = doc_generate.generate_document({
            "hostname":
            "NOT-WHITELISTED",
            "create_outlier":
            True,
            "outlier.model_name":
            "dummy_test",
            "outlier.model_type":
            "simplequery"
        })
        self.assertTrue("outliers" in document["_source"])

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test_with_whitelist.conf"
        )
        housekeeping.update_analyzer_list([analyzer])

        self.test_es.add_doc(document)

        housekeeping.execute_housekeeping()

        result = [elem for elem in self.test_es._scan()][0]
        self.assertTrue("outliers" in result["_source"])
Esempio n. 10
0
class TestMetricsAnalyzer(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        logging.verbosity = 0

    def setUp(self):
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()

    def tearDown(self):
        # restore the default configuration file so we don't influence other unit tests that use the settings singleton
        self.test_settings.restore_default_configuration_path()
        self.test_es.restore_es()

    def test_metrics_whitelist_work_test_es_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        command_query = "SELECT * FROM dummy_table"  # must be bigger than the trigger value (here 3)
        nbr_generated_documents = 5

        # Generate document that match outlier
        for _ in range(nbr_generated_documents):
            self.test_es.add_doc(
                dummy_doc_generate.generate_document(
                    {"command_query": command_query}))
        # Generate whitelist document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "whitelist_hostname",
                "command_query": command_query
            }))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_length_dummy_test.conf"
        )
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, nbr_generated_documents)

    def test_metrics_detect_one_outlier_es_check(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        list_user_id = [11, 10, 8, 0, 0, 0]

        # Generate document
        for user_id in list_user_id:
            self.test_es.add_doc(
                dummy_doc_generate.generate_document({"user_id": user_id}))
        # Only the fist one must be detected like outlier, because user_id need to be bigger than 10

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_numerical_value_dummy_test.conf"
        )
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, 1)

    def test_metrics_detect_one_outlier_batch_check(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        list_user_id = [11, 10, 8, 0, 0, 0]

        # Generate document
        for user_id in list_user_id:
            self.test_es.add_doc(
                dummy_doc_generate.generate_document({"user_id": user_id}))
        # Only the fist one must be detected like outlier, because user_id need to be bigger than 10

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_numerical_value_dummy_test.conf"
        )
        analyzer.evaluate_model()

        self.assertEqual(analyzer.total_outliers, 1)

    def test_metrics_small_batch_treat_all(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Init the list of user
        default_user_id = 11
        number_of_user = 20
        list_user_id = [default_user_id for _ in range(number_of_user)]

        # Generate document
        for user_id in list_user_id:
            self.test_es.add_doc(
                dummy_doc_generate.generate_document({"user_id": user_id}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_small_batch_eval.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_numerical_value_dummy_test.conf"
        )
        analyzer.evaluate_model()

        self.assertEqual(analyzer.total_outliers, number_of_user)

    def test_metrics_small_batch_last_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Init the list of user
        default_user_id = 0
        number_of_user = 19
        list_user_id = [default_user_id for _ in range(number_of_user)]
        # Add a value at the end that must be detected like outlier (limit on 10)
        list_user_id.append(11)

        # Generate document
        for user_id in list_user_id:
            self.test_es.add_doc(
                dummy_doc_generate.generate_document({"user_id": user_id}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_small_batch_eval.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_numerical_value_dummy_test.conf"
        )
        analyzer.evaluate_model()

        self.assertEqual(analyzer.total_outliers, 1)

    def test_metrics_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("timestamp_year" in result['_source'])

    def test_metrics_use_derived_fields_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])

    def test_metrics_not_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("timestamp_year" in result['_source'])

    def test_metrics_not_use_derived_fields_but_present_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        # The parameter use_derived_fields haven't any impact on outliers keys
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])

    def test_whitelist_batch_document_not_process_all(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_with_whitelist.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_length_dummy_test.conf"
        )

        # Not whitelisted (add)
        doc2_without_outlier = copy.deepcopy(
            doc_without_outliers_test_whitelist_02_test_file)
        self.test_es.add_doc(doc2_without_outlier)
        # Not whitelisted
        doc3_without_outlier = copy.deepcopy(
            doc_without_outliers_test_whitelist_03_test_file)
        self.test_es.add_doc(doc3_without_outlier)
        # Whitelisted (ignored)
        doc4_without_outlier = copy.deepcopy(
            doc_without_outliers_test_whitelist_04_test_file)
        self.test_es.add_doc(doc4_without_outlier)

        analyzer.evaluate_model()

        self.assertEqual(analyzer.total_outliers, 2)

    def _generate_metrics_doc_with_whitelist(self, doc_to_generate):
        # Use list of tuple (and not dict) to keep order
        dummy_doc_gen = DummyDocumentsGenerate()
        for aggregator, target_value, is_whitelist in doc_to_generate:
            deployment_name = None
            if is_whitelist:
                deployment_name = "whitelist-deployment"
            user_id = target_value
            hostname = aggregator

            doc_generated = dummy_doc_gen.generate_document({
                "deployment_name":
                deployment_name,
                "user_id":
                user_id,
                "hostname":
                hostname
            })
            self.test_es.add_doc(doc_generated)

    def test_metrics_batch_whitelist_three_outliers_one_whitelist(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_batch_whitelist_float.conf"
        )
        backup_min_eval_batch = MetricsAnalyzer.MIN_EVALUATE_BATCH
        MetricsAnalyzer.MIN_EVALUATE_BATCH = 5

        #            aggregator, target, is_whitelist
        doc_to_generate = [
            ("agg1", 5, False),
            ("agg1", 3, True),
            ("agg2", 4, False),
            ("agg2", 5, True),
            # Batch limit
            ("agg2", 3, False),
            ("agg1", 5, False),
            ("agg1", 7, False),  # Outlier
            ("agg2", 2, False),
            # Batch limit
            ("agg1", 4, True),
            ("agg2", 6, True),  # Outlier (but whitelist)
            ("agg1", 3, False),
            ("agg1", 5, False),
            # Batch limit
            ("agg2", 1, False),
            ("agg2", 6, False),  # Outlier
            ("agg1", 3, False)
        ]
        self._generate_metrics_doc_with_whitelist(doc_to_generate)

        analyzer.evaluate_model()
        list_outliers = []
        for doc in es._scan():
            if "outliers" in doc["_source"]:
                list_outliers.append(
                    (doc["_source"]["outliers"]["aggregator"][0],
                     doc["_source"]["outliers"]["target"][0]))

        self.assertEqual(list_outliers, [("agg1", "7"), ("agg2", "6")])
        MetricsAnalyzer.MIN_EVALUATE_BATCH = backup_min_eval_batch

    def test_metrics_batch_whitelist_outlier_detect_after_process_all_and_remove_whitelist(
            self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_batch_whitelist_avg.conf"
        )

        backup_min_eval_batch = MetricsAnalyzer.MIN_EVALUATE_BATCH
        MetricsAnalyzer.MIN_EVALUATE_BATCH = 5

        #            aggregator, target, is_whitelist
        doc_to_generate = [
            ("agg1", 5, False),
            ("agg2", 5, False),
            ("agg1", 5, False),
            ("agg1", 3, False),
            # Batch limit
            ("agg1", 6, False),
            ("agg2", 5, False),
            ("agg1", 5, False),
            ("agg1", 7, True)
        ]
        self._generate_metrics_doc_with_whitelist(doc_to_generate)
        # The avg for agg1 is 5.1 but if we remove the whitelisted element, the avg is on 4.8

        analyzer.evaluate_model()
        list_outliers = []
        for doc in es._scan():
            if "outliers" in doc["_source"]:
                list_outliers.append(
                    (doc["_source"]["outliers"]["aggregator"][0],
                     doc["_source"]["outliers"]["target"][0]))

        # Without the batch whitelist, the only outlier will be ("agg1", 6) (the ("agg1", 7) is whitelist).
        # But with batch whitelist, the avg is update and all value of "agg1" (except 3) are detected outlier
        self.assertEqual(list_outliers, [("agg1", "5"), ("agg1", "5"),
                                         ("agg1", "6"), ("agg1", "5")])
        MetricsAnalyzer.MIN_EVALUATE_BATCH = backup_min_eval_batch

    def test_simplequery_default_outlier_infos(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in result['_source']['outliers']
            for elem in DEFAULT_OUTLIERS_KEY_FIELDS
        ]
        self.assertTrue(all(all_fields_exists))

    def test_metrics_extra_outlier_infos_all_present(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in result['_source']['outliers']
            for elem in EXTRA_OUTLIERS_KEY_FIELDS
        ]
        self.assertTrue(all(all_fields_exists))

    def test_metrics_extra_outlier_infos_new_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        all_fields_exists = [
            elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS
            for elem in result['_source']['outliers']
        ]
        self.assertTrue(all(all_fields_exists))

    def _preperate_dummy_data_terms(self):
        eval_metrics_array = defaultdict()
        # "random" value
        aggregator_value = "key"
        target_value = "test"
        metrics_value = 12
        observations = {'a': 1, 'test': 'ok'}
        doc = {'source': 'this', 'target': 12}
        return eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc

    def _preperate_data_terms_with_doc(self, metrics_value=1):
        eval_metrics_array = defaultdict()
        # "random" value
        aggregator_value = "key"
        target_value = "test"
        observations = {}
        return eval_metrics_array, aggregator_value, target_value, metrics_value, observations

    def test_add_metric_to_batch_empty(self):
        eval_metrics_array = defaultdict()
        aggregator_value = ""
        target_value = ""
        metrics_value = ""
        observations = {}
        doc = {}
        # Create expected result
        observations["target"] = [target_value]
        observations["aggregator"] = [aggregator_value]
        expected_eval_terms = defaultdict()
        expected_eval_terms[aggregator_value] = defaultdict(list)
        expected_eval_terms[aggregator_value]["metrics"] = [metrics_value]
        expected_eval_terms[aggregator_value]["observations"] = [observations]
        expected_eval_terms[aggregator_value]["raw_docs"] = [doc]

        result = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array,
                                                     aggregator_value,
                                                     target_value,
                                                     metrics_value,
                                                     observations, doc)
        self.assertEqual(result, expected_eval_terms)

    def test_add_metric_to_batch_no_modification(self):
        eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc = \
            self._preperate_dummy_data_terms()

        # Create expected result
        observations["target"] = [target_value]
        observations["aggregator"] = [aggregator_value]
        expected_eval_terms = defaultdict()
        expected_eval_terms[aggregator_value] = defaultdict(list)
        expected_eval_terms[aggregator_value]["metrics"] = [metrics_value]
        expected_eval_terms[aggregator_value]["observations"] = [observations]
        expected_eval_terms[aggregator_value]["raw_docs"] = [doc]

        result = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array,
                                                     aggregator_value,
                                                     target_value,
                                                     metrics_value,
                                                     observations, doc)
        self.assertEqual(result, expected_eval_terms)

    def test_calculate_metric_numerical_value(self):
        self.assertEqual(
            MetricsAnalyzer.calculate_metric("numerical_value", "12"),
            (float(12), dict()))

    def test_calculate_metric_length(self):
        self.assertEqual(MetricsAnalyzer.calculate_metric("length", "test"),
                         (len("test"), dict()))

    def test_calculate_metric_entropy(self):
        self.assertEqual(MetricsAnalyzer.calculate_metric("entropy", "test"),
                         (helpers.utils.shannon_entropy("test"), dict()))

    def test_calculate_metric_hex_encoded_length(self):
        result = MetricsAnalyzer.calculate_metric("hex_encoded_length",
                                                  "12c322adc020 12322029620")
        expected_observation = {
            'max_hex_encoded_length': 12,
            'max_hex_encoded_word': '12c322adc020'
        }
        self.assertEqual(result, (12, expected_observation))

    def test_calculate_metric_base64_encoded_length(self):
        result = MetricsAnalyzer.calculate_metric(
            "base64_encoded_length", "houston we have a cHJvYmxlbQ==")
        expected_observation = {
            'max_base64_decoded_length': 7,
            'max_base64_decoded_word': 'problem'
        }

        self.assertEqual(result, (7, expected_observation))

    def test_calculate_metric_url_length(self):
        result = MetricsAnalyzer.calculate_metric(
            "url_length", "why don't we go http://www.nviso.com")
        expected_observation = {
            'extracted_urls_length': 20,
            'extracted_urls': 'http://www.nviso.com'
        }

        self.assertEqual(result, (20, expected_observation))

    def test_calculate_metric_unexist_operation(self):
        self.assertEqual(
            MetricsAnalyzer.calculate_metric("dummy operation", ""),
            (None, dict()))

    def test_evaluate_batch_for_outliers_fetch_remain_metrics(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test.conf"
        )

        eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \
            self._preperate_data_terms_with_doc()
        doc = DummyDocumentsGenerate().generate_document()
        metrics = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array,
                                                      aggregator_value,
                                                      target_value,
                                                      metrics_value,
                                                      observations, doc)

        result = analyzer._evaluate_batch_for_outliers(metrics, False)
        # outliers, not_enough_value, document_need_to_be_recompute
        self.assertEqual(result, ([], metrics))

    def test_evaluate_batch_for_outliers_add_outlier(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_2.conf"
        )

        eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \
            self._preperate_data_terms_with_doc(metrics_value=12)
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self.test_es.add_doc(doc_without_outlier)
        metrics = MetricsAnalyzer.add_metric_to_batch(
            eval_metrics_array, aggregator_value, target_value, metrics_value,
            observations, doc_without_outlier)

        outliers, remaining_metrics = analyzer._evaluate_batch_for_outliers(
            metrics, True)
        analyzer.process_outlier(outliers[0])
        result = [elem for elem in es._scan()][0]
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.maxDiff = None
        self.assertEqual(result, doc_with_outlier)

    def test_extract_additional_model_settings_no_metrics_section(self):
        import logging as base_logging
        base_logging.disable(base_logging.NOTSET)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_no_metrics_section.conf")

        # Metrics section not define produce an error
        with self.assertLogs(logging.logger, level='ERROR'):
            analyzer = AnalyzerFactory.create(
                "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test.conf"
            )
            # Check that we detect a problem in configuration
            self.assertTrue(analyzer.configuration_parsing_error)

        base_logging.disable(base_logging.CRITICAL)

    def test_remove_metric_from_batch_simple_value(self):
        eval_metrics_array = defaultdict()
        aggregator_value = "agg"
        target_value = "dummy_target"
        metrics_value = "dummy_metric"
        observations = {}
        dummy_doc_gen = DummyDocumentsGenerate()
        doc = dummy_doc_gen.generate_document()

        batch = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array,
                                                    aggregator_value,
                                                    target_value,
                                                    metrics_value,
                                                    observations, doc)
        result = MetricsAnalyzer.remove_metric_from_batch(
            batch[aggregator_value], 0)

        expected_aggregator_value = defaultdict(list)
        expected_aggregator_value["metrics"] = []
        expected_aggregator_value["observations"] = []
        expected_aggregator_value["raw_docs"] = []

        self.assertEqual(result, expected_aggregator_value)

    def test_non_outliers_not_present_in_metrics_for_first(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate documents
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))
        # Non outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 8}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]

        self.assertEqual(
            result["_source"]["outliers"]["non_outlier_values_sample"], list())

    def test_non_outliers_present_in_metrics(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate documents
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))
        # Non outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 8}))
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 12}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][2]

        self.assertEqual(
            result["_source"]["outliers"]["non_outlier_values_sample"],
            ["8.0"])
class TestSuddenAppearanceAnalyzer(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        logging.verbosity = 0

    def setUp(self):
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()

    def tearDown(self):
        # restore the default configuration file so we don't influence other unit tests that use the settings singleton
        self.test_settings.restore_default_configuration_path()
        self.test_es.restore_es()

    def test_sudden_appearance_whitelist_work_test_es_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        command_query = "SELECT * FROM dummy_table"  # must be bigger than the trigger value (here 3)
        nbr_generated_documents = 5

        # Generate document that match outlier
        command_name = "default_name_"
        for i in range(nbr_generated_documents):
            dummy_doc_generated = dummy_doc_generate.generate_document({
                "command_query":
                command_query,
                "command_name":
                command_name + str(i)
            })
            self.test_es.add_doc(dummy_doc_generated)

        whitelist_doc_generated = dummy_doc_generate.generate_document({
            "hostname":
            "whitelist_hostname",
            "command_query":
            command_query,
            "command_name":
            command_name + str(nbr_generated_documents)
        })
        self.test_es.add_doc(whitelist_doc_generated)

        # Run analyzer
        self.test_settings.change_configuration_path(
            test_conf_file_with_whitelist)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files + "sudden_appearance_dummy_test_01.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, nbr_generated_documents)

    def test_sudden_appearance_detect_no_outlier_es_check(self):

        # Generate documents
        dummy_doc_generate = DummyDocumentsGenerate()
        list_delta_hour = [1, 1, 1, 3, 3, 3, 4, 5, 5, 5, 15, 15]
        field_1_name = "user_id"
        list_field_1_value = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
        field_2_name = "hostname"
        list_field_2_value = []
        for _ in range(len(list_delta_hour)):
            list_field_2_value.append("host1")
        generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields(
            list_delta_hour, field_1_name, list_field_1_value, field_2_name,
            list_field_2_value)
        self.test_es.add_multiple_docs(generated_docs)

        # Run analyzer
        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files + "sudden_appearance_dummy_test_02.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, 0)

    def test_sudden_appearance_detect_one_outlier_es_check_1(self):
        # Generate documents
        dummy_doc_generate = DummyDocumentsGenerate()
        list_delta_hour = [1, 1, 1, 3, 3, 3, 4, 5, 5, 5, 15, 15]
        field_1_name = "user_id"
        list_field_1_value = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
        field_2_name = "hostname"
        list_field_2_value = []
        for _ in range(len(list_delta_hour) - 1):
            list_field_2_value.append("host1")
        list_field_2_value.append("host2")
        generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields(
            list_delta_hour, field_1_name, list_field_1_value, field_2_name,
            list_field_2_value)
        self.test_es.add_multiple_docs(generated_docs)

        # Run analyzer
        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files + "sudden_appearance_dummy_test_02.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, 1)

    def test_sudden_appearance_detect_one_outlier_es_check_2(self):
        # Generate documents
        dummy_doc_generate = DummyDocumentsGenerate()
        list_delta_hour = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 15]
        field_1_name = "user_id"
        list_field_1_value = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        field_2_name = "hostname"
        list_field_2_value = []
        for _ in range(len(list_delta_hour)):
            list_field_2_value.append("host1")

        generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields(
            list_delta_hour, field_1_name, list_field_1_value, field_2_name,
            list_field_2_value)
        self.test_es.add_multiple_docs(generated_docs)

        # Run analyzer
        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files + "sudden_appearance_dummy_test_03.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, 1)

    def test_sudden_appearance_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files +
            "sudden_appearance_derived_fields_01.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("timestamp_year" in result['_source'])

    def test_sudden_appearance_no_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files +
            "sudden_appearance_no_derived_fields.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]

        self.assertFalse("timestamp_year" in result['_source'])

    def test_sudden_appearance_derived_fields_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files +
            "sudden_appearance_derived_fields_02.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])

    def test_sudden_appearance_no_derived_fields(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files +
            "sudden_appearance_no_derived_fields.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]

        self.assertFalse(
            "derived_timestamp_year" in result['_source']['outliers'])

    def test_sudden_extra_outlier_infos_all_present(self):
        # Generate documents
        dummy_doc_generate = DummyDocumentsGenerate()
        list_delta_hour = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 15]
        field_1_name = "user_id"
        list_field_1_value = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        field_2_name = "hostname"
        list_field_2_value = []
        for _ in range(len(list_delta_hour)):
            list_field_2_value.append("host1")

        generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields(
            list_delta_hour, field_1_name, list_field_1_value, field_2_name,
            list_field_2_value)
        self.test_es.add_multiple_docs(generated_docs)

        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files + "sudden_appearance_dummy_test_03.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        list_outlier = list()
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                list_outlier.append(elem)

        all_fields_exists = [
            elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS
            for elem in list_outlier[0]['_source']['outliers']
        ]
        self.assertTrue(all(all_fields_exists))
Esempio n. 12
0
class TestEs(unittest.TestCase):
    def setUp(self):
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()

    def tearDown(self):
        self.test_es.restore_es()
        self.test_settings.restore_default_configuration_path()

    def test_add_tag_to_document_no_tag(self):
        elem = {"_source": {"key": {"test": 1}}}
        expected_result = copy.deepcopy(elem)
        expected_result["_source"]["tags"] = ["new_tag"]

        new_doc_result = helpers.es.add_tag_to_document(elem, "new_tag")
        self.assertEqual(new_doc_result, expected_result)

    def test_add_tag_to_document_already_a_tag(self):
        elem = {"_source": {"key": {"test": 1}, "tags": ["ok"]}}
        expected_result = copy.deepcopy(elem)
        expected_result["_source"]["tags"].append("new_tag")

        new_doc_result = helpers.es.add_tag_to_document(elem, "new_tag")
        self.assertEqual(new_doc_result, expected_result)

    def test_remove_all_whitelisted_outliers(self):
        self.test_settings.change_configuration_path(
            test_file_whitelist_path_config)

        doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(
            doc_generate.generate_document({
                "create_outlier":
                True,
                "outlier_observation":
                "dummy observation",
                "outlier.model_name":
                "dummy_test",
                "outlier.model_type":
                "analyzer",
                "command_query":
                "osquery_get_all_processes_with_listening_conns.log"
            }))

        # Check that outlier correctly generated
        result = [doc for doc in es._scan()][0]
        self.assertTrue("outliers" in result["_source"])

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf"
        )

        # Remove whitelisted outlier
        es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer})

        # Check that outlier is correctly remove
        result = [doc for doc in es._scan()][0]
        self.assertFalse("outliers" in result["_source"])

    def test_get_highlight_settings_with_metrics_analyzer(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test.conf"
        )
        highlight_settings = es._get_highlight_settings(
            analyzer.model_settings)
        self.assertTrue(highlight_settings is None)

    def test_get_highlight_settings_with_terms_analyzer(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test.conf"
        )
        highlight_settings = es._get_highlight_settings(
            analyzer.model_settings)
        self.assertTrue(highlight_settings is None)

    def test_get_highlight_settings_with_simplequery_analyzer_and_highlight_match_activated(
            self):
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/" \
                        "simplequery_dummy_test_highlight_match_activated.conf"
        analyzer = AnalyzerFactory.create(use_case_file)
        highlight_settings = es._get_highlight_settings(
            analyzer.model_settings)
        highlight_settings_test = dict()

        highlight_settings_test["pre_tags"] = ["<value>"]
        highlight_settings_test["post_tags"] = ["</value>"]
        highlight_settings_test["fields"] = dict()
        highlight_settings_test["fields"]["*"] = dict()

        self.assertTrue(highlight_settings == highlight_settings_test)

    def test_get_highlight_settings_with_simplequery_analyzer_and_highlight_match_unactivated(
            self):
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/" \
                        "simplequery_dummy_test_highlight_match_unactivated.conf"
        analyzer = AnalyzerFactory.create(use_case_file)
        highlight_settings = es._get_highlight_settings(
            analyzer.model_settings)

        self.assertTrue(highlight_settings is None)

    def test_get_highlight_settings_with_simplequery_analyzer_without_highlight_parameter(
            self):
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf"
        analyzer = AnalyzerFactory.create(use_case_file)
        highlight_settings = es._get_highlight_settings(
            analyzer.model_settings)

        self.assertTrue(highlight_settings is None)

    def test_build_search_query(self):
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf"
        analyzer = AnalyzerFactory.create(use_case_file)
        timestamp_field, history_window_days, history_window_hours = es._get_history_window(
            analyzer.model_settings)
        search_range = es.get_time_filter(days=history_window_days,
                                          hours=history_window_hours,
                                          timestamp_field=timestamp_field)
        dsl_search_query_1 = build_search_query(
            search_range=search_range, search_query=analyzer.search_query)
        dsl_search_query_2 = dict()
        dsl_search_query_2['query'] = dict()
        dsl_search_query_2['query']['bool'] = dict()
        dsl_search_query_2['query']['bool']['filter'] = list()
        dsl_search_query_2['query']['bool']['filter'].append(search_range)
        dsl_search_query_2['query']['bool']['filter'].extend(
            analyzer.search_query["filter"].copy())

        self.assertEquals(dsl_search_query_1, dsl_search_query_2)
Esempio n. 13
0
class TestAnalyzer(unittest.TestCase):
    def setUp(self):
        # "es" use in Analyzer construction and in the method "process_outlier"
        self.test_es = TestStubEs()
        self.test_settings = UpdateSettings()

    def tearDown(self):
        # restore the default configuration file so we don't influence other unit tests that use the settings singleton
        self.test_settings.restore_default_configuration_path()
        self.test_es.restore_es()

    def test_simple_process_outlier_return_good_outlier(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test)

        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        doc_fields = doc_without_outlier["_source"]
        outlier = analyzer.create_outlier(doc_fields, doc_without_outlier)
        expected_outlier = Outlier(outlier_type=["dummy type"],
                                   outlier_reason=['dummy reason'],
                                   outlier_summary='dummy summary',
                                   doc=doc_without_outlier)
        expected_outlier.outlier_dict['model_name'] = 'dummy_test'
        expected_outlier.outlier_dict['model_type'] = 'analyzer'
        expected_outlier.outlier_dict[
            'elasticsearch_filter'] = 'es_valid_query'

        self.assertTrue(outlier.outlier_dict == expected_outlier.outlier_dict)

    def test_simple_process_outlier_save_es(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test)

        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self.test_es.add_doc(doc_without_outlier)
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)

        doc_fields = doc_without_outlier["_source"]
        outlier = analyzer.create_outlier(doc_fields, doc_without_outlier)

        es.save_outlier(outlier)

        result = [elem for elem in es._scan()][0]

        self.assertEqual(result, doc_with_outlier)

    def test_arbitrary_key_config_present_in_analyzer(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzer = AnalyzerFactory.create(
            use_case_analyzer_arbitrary_dummy_test)

        self.assertDictEqual(
            analyzer.extra_model_settings, {
                "test_arbitrary_key": "arbitrary_value",
                "elasticsearch_filter": "es_valid_query"
            })

    def test_create_multi_with_empty_config(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzers = AnalyzerFactory.create_multi(config_file_analyzer_test_01)

        self.assertTrue(len(analyzers) == 0)

    def test_create_multi_with_single(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzers = AnalyzerFactory.create_multi(
            use_case_analyzer_arbitrary_dummy_test)

        self.assertTrue(len(analyzers) == 1)

    def test_create_multi_with_malformed_duplicate_option(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzers = AnalyzerFactory.create_multi(
            use_case_analyzer_multi_malformed_duplicate_option,
            {'strict': False})

        self.assertTrue(len(analyzers) == 3)

    def test_create_multi_with_malformed_duplicate_section(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzers = AnalyzerFactory.create_multi(
            use_case_analyzer_multi_malformed_duplicate_section,
            {'strict': False})

        self.assertTrue(len(analyzers) == 2)

    def test_create_multi_with_malformed_duplicate_option_strict(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)

        with self.assertRaises(configparser.DuplicateOptionError):
            AnalyzerFactory.create_multi(
                use_case_analyzer_multi_malformed_duplicate_option)

    def test_create_multi_with_malformed_duplicate_section_strict(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)

        with self.assertRaises(configparser.DuplicateSectionError):
            AnalyzerFactory.create_multi(
                use_case_analyzer_multi_malformed_duplicate_section)

    def test_create_multi_mixed_types(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzers = AnalyzerFactory.create_multi(
            use_case_analyzer_files_path + "analyzer_multi_mixed_types.conf")

        simplequery_analyzer = analyzers[0]
        metrics_analyzer = analyzers[1]
        terms_analyzer = analyzers[2]

        self.assertTrue(simplequery_analyzer.model_type == 'simplequery')
        self.assertTrue(metrics_analyzer.model_type == 'metrics')
        self.assertTrue(terms_analyzer.model_type == 'terms')

    def test_default_timestamp_field(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test)
        timestamp_field = analyzer.model_settings["timestamp_field"]
        default_timestamp_field = "@timestamp"
        self.assertEquals(timestamp_field, default_timestamp_field)

    def test_non_default_timestamp_field(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_with_custom_timestamp_field)
        analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test)
        timestamp_field = analyzer.model_settings["timestamp_field"]
        non_default_timestamp_field = "timestamp"
        self.assertEquals(timestamp_field, non_default_timestamp_field)