Esempio n. 1
0
    def test_create_multi_with_malformed_duplicate_option_strict(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)

        with self.assertRaises(configparser.DuplicateOptionError):
            AnalyzerFactory.create_multi(
                use_case_analyzer_multi_malformed_duplicate_option)
Esempio n. 2
0
    def test_whitelist_literals_per_model_not_removed_by_housekeeping(self):
        # Init
        doc_generate = DummyDocumentsGenerate()
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/housekeeping.conf")
        housekeeping = HousekeepingJob()

        # Generate document
        document = doc_generate.generate_document({
            "hostname":
            "NOT-WHITELISTED",
            "create_outlier":
            True,
            "outlier.model_name":
            "dummy_test",
            "outlier.model_type":
            "simplequery"
        })
        self.assertTrue("outliers" in document["_source"])

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test_with_whitelist.conf"
        )
        housekeeping.update_analyzer_list([analyzer])

        self.test_es.add_doc(document)

        housekeeping.execute_housekeeping()

        result = [elem for elem in self.test_es._scan()][0]
        self.assertTrue("outliers" in result["_source"])
Esempio n. 3
0
    def test_remove_all_whitelisted_outliers(self):
        self.test_settings.change_configuration_path(
            test_file_whitelist_path_config)

        doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(
            doc_generate.generate_document({
                "create_outlier":
                True,
                "outlier_observation":
                "dummy observation",
                "outlier.model_name":
                "dummy_test",
                "outlier.model_type":
                "analyzer",
                "command_query":
                "osquery_get_all_processes_with_listening_conns.log"
            }))

        # Check that outlier correctly generated
        result = [doc for doc in es._scan()][0]
        self.assertTrue("outliers" in result["_source"])

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf"
        )

        # Remove whitelisted outlier
        es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer})

        # Check that outlier is correctly remove
        result = [doc for doc in es._scan()][0]
        self.assertFalse("outliers" in result["_source"])
Esempio n. 4
0
    def test_housekeeping_execute_no_whitelist_parameter_change(self):
        # Check that housekeeping run even when we change new part in the configuration
        self.test_settings.change_configuration_path(
            test_file_whitelist_dummy_reason_path_config)
        self._backup_config(test_file_whitelist_dummy_reason_path_config)
        housekeeping = HousekeepingJob()

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test.conf"
        )
        housekeeping.update_analyzer_list([analyzer])

        # Add document to "Database"
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        expected_doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)

        # Update configuration (create new section and append to default)
        filecontent = "\n\n[dummy_section]\nparam=1"

        # Force the date of the file
        housekeeping.file_mod_watcher._previous_mtimes[
            test_file_whitelist_dummy_reason_path_config] = 0

        with open(test_file_whitelist_dummy_reason_path_config,
                  'a') as test_file:
            test_file.write(filecontent)

        housekeeping.execute_housekeeping()

        # Fetch result
        result = [elem for elem in self.test_es._scan()][0]

        self._restore_config(test_file_whitelist_dummy_reason_path_config)
        self.assertNotEqual(result, expected_doc_with_outlier)
Esempio n. 5
0
    def test_create_multi_with_single(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzers = AnalyzerFactory.create_multi(
            use_case_analyzer_arbitrary_dummy_test)

        self.assertTrue(len(analyzers) == 1)
Esempio n. 6
0
 def test_non_default_timestamp_field(self):
     self.test_settings.change_configuration_path(
         config_file_analyzer_test_with_custom_timestamp_field)
     analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test)
     timestamp_field = analyzer.model_settings["timestamp_field"]
     non_default_timestamp_field = "timestamp"
     self.assertEquals(timestamp_field, non_default_timestamp_field)
Esempio n. 7
0
    def test_evaluate_batch_for_outliers_limit_target_buckets_two_doc_max_two(
            self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_2.conf"
        )

        # Create one document with one aggregator
        aggregator_value = LIST_AGGREGATOR_VALUE[0]
        target_value = random.choice(LIST_TARGET_VALUE)
        doc = copy.deepcopy(random.choice(LIST_DOC))
        current_batch = analyzer._add_document_to_batch(
            defaultdict(), [target_value], [aggregator_value], doc)
        # Create a second document with another aggregator
        aggregator_value2 = LIST_AGGREGATOR_VALUE[1]
        target_value2 = random.choice(LIST_TARGET_VALUE)
        doc2 = copy.deepcopy(random.choice(LIST_DOC))
        current_batch = analyzer._add_document_to_batch(
            current_batch, [target_value2], [aggregator_value2], doc2)

        # Expect to get nothing due to "min_target_buckets" set to 2
        result, remaining_terms = analyzer._evaluate_batch_for_outliers(
            batch=current_batch)
        self.assertEqual(result, [])
Esempio n. 8
0
    def test_non_outliers_present_in_metrics(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate documents
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 11}))
        # Non outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 8}))
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({"user_id": 12}))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][2]

        self.assertEqual(
            result["_source"]["outliers"]["non_outlier_values_sample"],
            ["8.0"])
    def test_sudden_appearance_detect_no_outlier_es_check(self):

        # Generate documents
        dummy_doc_generate = DummyDocumentsGenerate()
        list_delta_hour = [1, 1, 1, 3, 3, 3, 4, 5, 5, 5, 15, 15]
        field_1_name = "user_id"
        list_field_1_value = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
        field_2_name = "hostname"
        list_field_2_value = []
        for _ in range(len(list_delta_hour)):
            list_field_2_value.append("host1")
        generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields(
            list_delta_hour, field_1_name, list_field_1_value, field_2_name,
            list_field_2_value)
        self.test_es.add_multiple_docs(generated_docs)

        # Run analyzer
        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files + "sudden_appearance_dummy_test_02.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, 0)
Esempio n. 10
0
    def test_metrics_whitelist_work_test_es_result(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        command_query = "SELECT * FROM dummy_table"  # must be bigger than the trigger value (here 3)
        nbr_generated_documents = 5

        # Generate document that match outlier
        for _ in range(nbr_generated_documents):
            self.test_es.add_doc(
                dummy_doc_generate.generate_document(
                    {"command_query": command_query}))
        # Generate whitelist document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "whitelist_hostname",
                "command_query": command_query
            }))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/metrics_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/metrics/metrics_length_dummy_test.conf"
        )
        analyzer.evaluate_model()

        nbr_outliers = 0
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                nbr_outliers += 1
        self.assertEqual(nbr_outliers, nbr_generated_documents)
    def test_sudden_extra_outlier_infos_all_present(self):
        # Generate documents
        dummy_doc_generate = DummyDocumentsGenerate()
        list_delta_hour = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 15]
        field_1_name = "user_id"
        list_field_1_value = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        field_2_name = "hostname"
        list_field_2_value = []
        for _ in range(len(list_delta_hour)):
            list_field_2_value.append("host1")

        generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields(
            list_delta_hour, field_1_name, list_field_1_value, field_2_name,
            list_field_2_value)
        self.test_es.add_multiple_docs(generated_docs)

        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files + "sudden_appearance_dummy_test_03.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        list_outlier = list()
        for elem in es._scan():
            if "outliers" in elem["_source"]:
                list_outlier.append(elem)

        all_fields_exists = [
            elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS
            for elem in list_outlier[0]['_source']['outliers']
        ]
        self.assertTrue(all(all_fields_exists))
Esempio n. 12
0
 def test_simplequery_raw_configparser_test_percent_signs_in_query(self):
     self.test_settings.change_configuration_path(
         "/app/tests/unit_tests/files/simplequery_test_whitelist.conf")
     analyzer = AnalyzerFactory.create(
         "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_raw_configparser_test_percent_signs.conf"
     )
     analyzer.evaluate_model()
Esempio n. 13
0
    def test_terms_small_batch_treat_all(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Init the list of user
        nbr_doc_per_hours = 5
        nbr_hours = 10
        nbr_doc_generated_per_hours = [
            nbr_doc_per_hours for _ in range(nbr_hours)
        ]
        # If the number of document per hours is not a divisor of the batch limit, all document will not be detected

        # Generate documents
        self.test_es.add_multiple_docs(
            dummy_doc_generate.generate_doc_time_variable_sensitivity(
                nbr_doc_generated_per_hours))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_small_batch_eval.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf"
        )
        analyzer.evaluate_model()

        self.assertEqual(analyzer.total_outliers,
                         nbr_doc_per_hours * nbr_hours)
Esempio n. 14
0
    def test_create_multi_with_malformed_duplicate_section(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzers = AnalyzerFactory.create_multi(
            use_case_analyzer_multi_malformed_duplicate_section,
            {'strict': False})

        self.assertTrue(len(analyzers) == 2)
Esempio n. 15
0
 def test_get_highlight_settings_with_terms_analyzer(self):
     self.test_settings.change_configuration_path(
         "/app/tests/unit_tests/files/terms_test_01.conf")
     analyzer = AnalyzerFactory.create(
         "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test.conf"
     )
     highlight_settings = es._get_highlight_settings(
         analyzer.model_settings)
     self.assertTrue(highlight_settings is None)
Esempio n. 16
0
    def test_get_highlight_settings_with_simplequery_analyzer_without_highlight_parameter(
            self):
        self.test_settings.change_configuration_path(
            config_file_simplequery_test_01)
        use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf"
        analyzer = AnalyzerFactory.create(use_case_file)
        highlight_settings = es._get_highlight_settings(
            analyzer.model_settings)

        self.assertTrue(highlight_settings is None)
Esempio n. 17
0
    def test_min_target_buckets_dont_detect_outlier(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_within_float.conf"
        )
        # Recap:
        # min_target_buckets=4
        # trigger_sensitivity=5
        # trigger_on=high
        # trigger_method=float

        # Dont encode with a matrix to keep order of document
        doc_to_generate = [
            # New batch:
            #       0  1
            # agg1 [6, 1]
            # agg2 [1, 2]
            ("agg1", 0),
            ("agg2", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg1", 0),
            ("agg2", 1),
            ("agg2", 1),
            ("agg1", 1),
            # New Batch
            #       2
            # agg1 [0]
            # agg2 [1]
            ("agg1", 2)
        ]

        # At the end:
        #       0  1  2
        # agg1 [6, 1, 1]
        # agg2 [1, 2]
        # Normally agg1 - 0 must be flagged, but here they doesn't have enough buckets values

        dummy_doc_gen = DummyDocumentsGenerate()
        for aggregator, target_value in doc_to_generate:
            user_id = target_value
            hostname = aggregator
            doc_generated = dummy_doc_gen.generate_document({
                "user_id":
                user_id,
                "hostname":
                hostname
            })
            self.test_es.add_doc(doc_generated)

        analyzer.evaluate_model()
        self.assertEqual(analyzer.total_outliers, 0)
Esempio n. 18
0
    def test_arbitrary_key_config_present_in_analyzer(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzer = AnalyzerFactory.create(
            use_case_analyzer_arbitrary_dummy_test)

        self.assertDictEqual(
            analyzer.extra_model_settings, {
                "test_arbitrary_key": "arbitrary_value",
                "elasticsearch_filter": "es_valid_query"
            })
    def test_simplequry_use_matched_values_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            config_file_simplequery_test_02)
        analyzer = AnalyzerFactory.create(
            use_case_simplequery_dummy_test_highlight_match_activated)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("matched_values" in result['_source']['outliers'])
    def test_simplequery_not_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            config_file_simplequery_test_02)
        analyzer = AnalyzerFactory.create(
            use_case_simplequery_dummy_test_not_derived)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("timestamp_year" in result['_source'])
Esempio n. 21
0
    def test_create_multi_mixed_types(self):
        self.test_settings.change_configuration_path(
            config_file_analyzer_test_01)
        analyzers = AnalyzerFactory.create_multi(
            use_case_analyzer_files_path + "analyzer_multi_mixed_types.conf")

        simplequery_analyzer = analyzers[0]
        metrics_analyzer = analyzers[1]
        terms_analyzer = analyzers[2]

        self.assertTrue(simplequery_analyzer.model_type == 'simplequery')
        self.assertTrue(metrics_analyzer.model_type == 'metrics')
        self.assertTrue(terms_analyzer.model_type == 'terms')
Esempio n. 22
0
def load_analyzers():
    analyzers = list()

    for use_case_arg in settings.args.use_cases:
        for use_case_file in glob.glob(use_case_arg):
            logging.logger.debug("Loading use case %s" % use_case_file)
            try:
                analyzers.append(AnalyzerFactory.create(use_case_file))
            except ValueError as e:
                logging.logger.error("An error occured when loading %s: %s" %
                                     (use_case_file, str(e)))

    return analyzers
    def test_sudden_appearance_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(test_conf_file_01)
        analyzer = AnalyzerFactory.create(
            root_test_use_case_files +
            "sudden_appearance_derived_fields_01.conf")
        set_new_current_date(analyzer)
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue("timestamp_year" in result['_source'])
Esempio n. 24
0
    def test_terms_not_use_derived_fields_in_doc(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertFalse("timestamp_year" in result['_source'])
Esempio n. 25
0
    def test_non_outlier_values_not_present_in_terms_within(self):
        dummy_doc_generate = DummyDocumentsGenerate()

        # Generate documents
        # Outlier document
        # index: 0
        # Non outlier
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "one"
            }))
        # index: 1
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 2
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 3
        # Outlier document
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "two"
            }))
        # index: 4
        self.test_es.add_doc(
            dummy_doc_generate.generate_document({
                "hostname": "one",
                "deployment_name": "three"
            }))

        # Run analyzer
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_01.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][4]

        self.assertEqual(
            result["_source"]["outliers"]["non_outlier_values_sample"],
            ["two"])
Esempio n. 26
0
    def test_simplequery_not_use_derived_fields_but_present_in_outlier(self):
        dummy_doc_generate = DummyDocumentsGenerate()
        self.test_es.add_doc(dummy_doc_generate.generate_document())

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/simplequery_test_02.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_not_derived.conf"
        )
        analyzer.evaluate_model()

        result = [elem for elem in es._scan()][0]
        self.assertTrue(
            "derived_timestamp_year" in result['_source']['outliers'])
Esempio n. 27
0
    def test_whitelist_config_change_single_literal_not_to_match_in_doc_with_outlier(
            self):
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_03_with_general.conf")

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf"
        )

        es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer})
        result = [elem for elem in es._scan()][0]
        self.assertEqual(result, doc_with_outlier)
Esempio n. 28
0
    def test_batch_whitelist_work_doent_match_outlier_in_across(self):
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf")
        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_across_float.conf"
        )

        doc_to_generate = [
            # agg1 (0, 1, 2) -> 3 but with whitelist: (0, 2) -> 2
            # agg2 (0, 3, 4) -> 3
            ("agg1", 0, False),
            ("agg1", 1, True),
            ("agg2", 0, False),
            ("agg2", 0, False),
            ("agg1", 2, False),
            ("agg2", 3, False),
            ("agg2", 4, False)
        ]

        dummy_doc_gen = DummyDocumentsGenerate()
        for aggregator, target_value, is_whitelist in doc_to_generate:
            deployment_name = None
            if is_whitelist:
                deployment_name = "whitelist-deployment"
            user_id = target_value
            hostname = aggregator

            doc_generated = dummy_doc_gen.generate_document({
                "deployment_name":
                deployment_name,
                "user_id":
                user_id,
                "hostname":
                hostname
            })
            self.test_es.add_doc(doc_generated)

        analyzer.evaluate_model()

        list_outliers = []
        for doc in es._scan():
            if "outliers" in doc["_source"]:
                list_outliers.append(
                    (doc["_source"]["outliers"]["aggregator"][0],
                     doc["_source"]["outliers"]["term"][0]))

        # We detect agg2 but not agg1
        self.assertEqual(list_outliers, [("agg2", "0"), ("agg2", "0"),
                                         ("agg2", "3"), ("agg2", "4")])
Esempio n. 29
0
def load_analyzers():
    analyzers = list()

    for use_case_arg in settings.args.use_cases:
        for use_case_file in glob.glob(use_case_arg, recursive=True):
            if not os.path.isdir(use_case_file):
                logging.logger.debug("Loading use case %s" % use_case_file)
                try:
                    analyzers.append(AnalyzerFactory.create(use_case_file))
                except (ValueError, MissingSectionHeaderError) as e:
                    logging.logger.error(
                        "An error occured when loading %s: %s" %
                        (use_case_file, str(e)))

    return analyzers
Esempio n. 30
0
    def test_whitelist_config_change_remove_multi_item_literal(self):
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        # Without score because "remove whitelisted outlier" use "bulk" operation which doesn't take into account score
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_01_with_general.conf")

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf"
        )
        es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer})

        result = [elem for elem in es._scan()][0]
        self.assertDictEqual(result, doc_without_outlier)