class TestEs(unittest.TestCase): def setUp(self): self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): self.test_es.restore_es() self.test_settings.restore_default_configuration_path() def test_add_tag_to_document_no_tag(self): elem = {"_source": {"key": {"test": 1}}} expected_result = copy.deepcopy(elem) expected_result["_source"]["tags"] = ["new_tag"] new_doc_result = helpers.es.add_tag_to_document(elem, "new_tag") self.assertEqual(new_doc_result, expected_result) def test_add_tag_to_document_already_a_tag(self): elem = {"_source": {"key": {"test": 1}, "tags": ["ok"]}} expected_result = copy.deepcopy(elem) expected_result["_source"]["tags"].append("new_tag") new_doc_result = helpers.es.add_tag_to_document(elem, "new_tag") self.assertEqual(new_doc_result, expected_result) def test_remove_all_whitelisted_outliers(self): self.test_settings.change_configuration_path( test_file_whitelist_path_config) doc_generate = DummyDocumentsGenerate() self.test_es.add_doc( doc_generate.generate_document({ "create_outlier": True, "outlier_observation": "dummy observation", "outlier.model_name": "dummy_test", "outlier.model_type": "analyzer", "command_query": "osquery_get_all_processes_with_listening_conns.log" })) # Check that outlier correctly generated result = [doc for doc in es._scan()][0] self.assertTrue("outliers" in result["_source"]) analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf" ) # Remove whitelisted outlier es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer}) # Check that outlier is correctly remove result = [doc for doc in es._scan()][0] self.assertFalse("outliers" in result["_source"])
class TestAnalyzer(unittest.TestCase): def setUp(self): # "es" use in Analyzer construction and in the method "process_outlier" self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): # restore the default configuration file so we don't influence other unit tests that use the settings singleton self.test_settings.restore_default_configuration_path() self.test_es.restore_es() def test_simple_process_outlier_return_good_outlier(self): self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf") analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf") doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) doc_fields = doc_without_outlier["_source"] outlier = analyzer.create_outlier(doc_fields, doc_without_outlier) expected_outlier = Outlier(outlier_type=["dummy type"], outlier_reason=['dummy reason'], outlier_summary='dummy summary', doc=doc_without_outlier) expected_outlier.outlier_dict['model_name'] = 'dummy_test' expected_outlier.outlier_dict['model_type'] = 'analyzer' expected_outlier.outlier_dict['elasticsearch_filter'] = 'es_valid_query' self.assertTrue(outlier.outlier_dict == expected_outlier.outlier_dict) def test_simple_process_outlier_save_es(self): self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf") analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf") doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_without_outlier) doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) doc_fields = doc_without_outlier["_source"] outlier = analyzer.create_outlier(doc_fields, doc_without_outlier) es.save_outlier(outlier) result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier) def test_arbitrary_key_config_present_in_analyzer(self): self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf") analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_arbitrary_dummy_test.conf") self.assertDictEqual(analyzer.extra_model_settings, {"test_arbitrary_key": "arbitrary_value", "elasticsearch_filter": "es_valid_query"})
class TestSettings(unittest.TestCase): def setUp(self): self.test_settings = UpdateSettings() def tearDown(self): self.test_settings.restore_default_configuration_path() def test_whitelist_correctly_reload_after_update_config(self): self.test_settings.change_configuration_path(test_whitelist_single_literal_file) dummy_doc_gen = DummyDocumentsGenerate() doc = dummy_doc_gen.generate_document({"create_outlier": True, "outlier_observation": "dummy observation", "filename": "osquery_get_all_processes_with_listening_conns.log"}) # With this configuration, outlier is not whitlisted self.assertFalse(Outlier.is_whitelisted_doc(doc)) # Update configuration self.test_settings.change_configuration_path(test_whitelist_multiple_literal_file) # Now outlier is whitelisted self.assertTrue(Outlier.is_whitelisted_doc(doc)) def test_duplicate_whitelist_keys_not_crash(self): self.test_settings.change_configuration_path(test_whitelist_duplicate_option_file) self.assertEqual(settings.config.get("whitelist_literals", "single_key"), "dummy_whitelist_item_two") def test_error_when_forgot_whitelist_config(self): with self.assertRaises(NoSectionError): self.test_settings.change_configuration_path(test_config_without_whitelist_file) def test_error_on_duplicate_key_check(self): self.test_settings.change_configuration_path(test_whitelist_duplicate_option_file) result = settings.check_no_duplicate_key() self.assertIsInstance(result, DuplicateOptionError) def test_error_on_duplicate_section_check(self): self.test_settings.change_configuration_path(test_whitelist_duplicate_section_file) result = settings.check_no_duplicate_key() self.assertIsInstance(result, DuplicateSectionError)
class TestNotifier(unittest.TestCase): def setUp(self): self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): self.test_settings.restore_default_configuration_path() self.test_es.restore_es() def test_notify_on_outlier_correctly_create_email(self): import logging, sys logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/notifications_test.conf") self.test_notifier = TestStubNotifier() doc_generate = DummyDocumentsGenerate() # Create outlier doc = doc_generate.generate_document() outlier = Outlier("dummy type", "dummy reason", "dummy summary", doc) # execute notification es.notifier.notify_on_outlier(outlier) self.assertEqual(len(self.test_notifier.get_list_email()), 1) self.test_notifier.restore_notifier() def test_email_dict_key(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/notifications_test.conf") self.test_notifier = TestStubNotifier() doc_generate = DummyDocumentsGenerate() # Create outlier doc = doc_generate.generate_document() outlier = Outlier("dummy type", "dummy reason", "dummy summary", doc) # execute notification es.notifier.notify_on_outlier(outlier) email_dict = self.test_notifier.get_list_email()[0] self.assertEqual(list(email_dict.keys()), ["subject", "body"]) self.test_notifier.restore_notifier() def test_notification_on_outlier_match_metrics(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/notifications_test.conf") self.test_notifier = TestStubNotifier() doc_generate = DummyDocumentsGenerate() # Create document that's an outlier doc = doc_generate.generate_document({"user_id": 11}) self.test_es.add_doc(doc) analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/notifications/metrics_numerical_value_dummy_test.conf" ) analyzer.evaluate_model() self.assertEqual(len(self.test_notifier.get_list_email()), 1) self.test_notifier.restore_notifier() def test_notification_on_outlier_match_metrics_not_notification_enable( self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/notifications_test.conf") self.test_notifier = TestStubNotifier() doc_generate = DummyDocumentsGenerate() # Create document that's an outlier doc = doc_generate.generate_document({"user_id": 11}) self.test_es.add_doc(doc) analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/notifications/metrics_no_notif_numerical_value_dummy_test.conf" ) analyzer.evaluate_model() self.assertEqual(len(self.test_notifier.get_list_email()), 0) self.test_notifier.restore_notifier() def test_notification_on_outlier_already_detected(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/notifications_test.conf") self.test_notifier = TestStubNotifier() doc_generate = DummyDocumentsGenerate() # Create outliers doc1 = doc_generate.generate_document() outlier1 = Outlier("dummy type", "dummy reason", "dummy summary", doc1) doc2 = doc_generate.generate_document() outlier2 = Outlier("dummy type2", "dummy reason2", "dummy summary", doc2) # execute notification es.notifier.notify_on_outlier(outlier1) es.notifier.notify_on_outlier(outlier2) self.assertEqual(len(self.test_notifier.get_list_email()), 1) self.test_notifier.restore_notifier() def test_notification_on_two_different_outliers(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/notifications_test.conf") self.test_notifier = TestStubNotifier() doc_generate = DummyDocumentsGenerate() # Create outliers doc1 = doc_generate.generate_document() outlier1 = Outlier("dummy type", "dummy reason", "dummy summary", doc1) doc2 = doc_generate.generate_document() outlier2 = Outlier("dummy type2", "dummy reason2", "dummy summary2", doc2) # execute notification es.notifier.notify_on_outlier(outlier1) es.notifier.notify_on_outlier(outlier2) self.assertEqual(len(self.test_notifier.get_list_email()), 2) self.test_notifier.restore_notifier() def test_notification_on_outlier_already_detected_but_not_in_queue(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/notifications_test.conf") self.test_notifier = TestStubNotifier() doc_generate = DummyDocumentsGenerate() # Create outliers doc = doc_generate.generate_document() # Full the queue (3 elements) outlier1 = Outlier("dummy type", "dummy reason", "dummy summary1", doc) es.notifier.notify_on_outlier(outlier1) outlier2 = Outlier("dummy type2", "dummy reason2", "dummy summary2", doc) es.notifier.notify_on_outlier(outlier2) outlier3 = Outlier("dummy type3", "dummy reason3", "dummy summary3", doc) es.notifier.notify_on_outlier(outlier3) # Add a new one that will remove the first outlier4 = Outlier("dummy type4", "dummy reason4", "dummy summary4", doc) es.notifier.notify_on_outlier(outlier4) # Add again the first one es.notifier.notify_on_outlier(outlier1) # All outliers notify need to be present (so 5) self.assertEqual(len(self.test_notifier.get_list_email()), 5) self.test_notifier.restore_notifier()
class TestSettings(unittest.TestCase): def setUp(self): import logging as base_logging base_logging.disable(base_logging.CRITICAL) self.test_settings = UpdateSettings() def tearDown(self): import logging as base_logging base_logging.disable(base_logging.NOTSET) self.test_settings.restore_default_configuration_path() def test_whitelist_correctly_reload_after_update_config(self): self.test_settings.change_configuration_path( test_whitelist_single_literal_file) dummy_doc_gen = DummyDocumentsGenerate() doc = dummy_doc_gen.generate_document({ "create_outlier": True, "outlier_observation": "dummy observation", "filename": "osquery_get_all_processes_with_listening_conns.log" }) # With this configuration, outlier is not whitlisted self.assertFalse(Outlier.is_whitelisted_doc(doc)) # Update configuration self.test_settings.change_configuration_path( test_whitelist_multiple_literal_file) # Now outlier is whitelisted self.assertTrue(Outlier.is_whitelisted_doc(doc)) def test_duplicate_whitelist_keys_not_crash(self): self.test_settings.change_configuration_path( test_whitelist_duplicate_option_file) self.assertEqual( settings.config.get("whitelist_literals", "single_key"), "dummy_whitelist_item_two") def test_error_when_forgot_whitelist_config(self): with self.assertRaises(NoSectionError): self.test_settings.change_configuration_path( test_config_without_whitelist_file) def test_error_on_duplicate_key_check(self): self.test_settings.change_configuration_path( test_whitelist_duplicate_option_file) result = settings.check_no_duplicate_key() self.assertIsInstance(result, DuplicateOptionError) def test_error_on_duplicate_section_check(self): self.test_settings.change_configuration_path( test_whitelist_duplicate_section_file) result = settings.check_no_duplicate_key() self.assertIsInstance(result, DuplicateSectionError) # Test on process_configuration_files function def test_error_when_config_file_does_not_exist(self): with self.assertRaises(SystemExit) as cm: self.test_settings.change_configuration_path( test_config_that_does_not_exist) self.assertEqual(cm.exception.code, 2) # Test on process_configuration_files function def test_error_when_config_file_is_a_directory(self): with self.assertRaises(SystemExit) as cm: self.test_settings.change_configuration_path( test_config_that_is_a_directory) self.assertEqual(cm.exception.code, 2) # Test on check_no_failed_config_paths function def test_error_when_failed_config_file_exists_on_interactive_mode(self): with self.assertRaises(SystemExit) as cm: print_failed_configs_and_exit({test_config_that_does_not_exist}) self.assertEqual(cm.exception.code, 2) def test_error_when_multiple_failed_config_files_exist(self): failed_config_files = { test_config_that_does_not_exist, test_config_that_is_a_directory } with self.assertRaises(SystemExit) as cm: print_failed_configs_and_exit(failed_config_files) self.assertEqual(cm.exception.code, 2) def test_no_exceptions_on_valid_config_file(self): try: self.test_settings.change_configuration_path( test_whitelist_multiple_literal_file) except Exception: self.fail( "loading a valid configuration file raised an unexpected exception!" ) # Test on check_no_failed_config_paths function def test_error_when_no_failed_config_paths_exist(self): failed_config_files = {} raised = False try: print_failed_configs_and_exit(failed_config_files) except SystemExit: raised = True self.assertFalse(raised)
class TestSimplequeryAnalyzer(unittest.TestCase): @classmethod def setUpClass(cls): logging.verbosity = 0 def setUp(self): self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): # restore the default configuration file so we don't influence other unit tests that use the settings singleton self.test_settings.restore_default_configuration_path() self.test_es.restore_es() def _get_simplequery_analyzer(self, config_file, config_section): self.test_settings.change_configuration_path(config_file) return SimplequeryAnalyzer(config_section_name=config_section) # Simply test if use cases containing a % sign also work correctly and don't generate an expcetion when being # parsed by the ConfigParser. This is the reason we use the RawConfigParser. # https://docs.python.org/2/library/configparser.html def test_simplequery_raw_configparser_test_percent_signs_in_query(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_whitelist.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_raw_configparser_test_percent_signs.conf" ) analyzer.evaluate_model() def test_simplequery_whitelist_work_test_es_result(self): dummy_doc_generate = DummyDocumentsGenerate() nbr_generated_documents = 5 all_doc = dummy_doc_generate.create_documents(nbr_generated_documents) whitelisted_document = dummy_doc_generate.generate_document( {"hostname": "whitelist_hostname"}) all_doc.append(whitelisted_document) self.test_es.add_multiple_docs(all_doc) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_whitelist.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf" ) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, nbr_generated_documents) def test_one_doc_outlier_correctly_add(self): doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) # Insert value self.test_es.add_doc(doc_without_outlier) # Make test (supposed all doc work) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf" ) analyzer.evaluate_model() # Fetch result to check if it is correct result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier) def test_simplequery_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("timestamp_year" in result['_source']) def test_simplequery_use_derived_fields_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue( "derived_timestamp_year" in result['_source']['outliers']) def test_simplequery_not_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("timestamp_year" in result['_source']) def test_simplequery_not_use_derived_fields_but_present_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue( "derived_timestamp_year" in result['_source']['outliers']) def test_simplequery_default_outlier_infos(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in result['_source']['outliers'] for elem in DEFAULT_OUTLIERS_KEY_FIELDS ] self.assertTrue(all(all_fields_exists)) def test_simplequery_no_extra_outlier_infos(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in DEFAULT_OUTLIERS_KEY_FIELDS for elem in result['_source']['outliers'] ] self.assertTrue(all(all_fields_exists)) def test_whitelist_literal_per_model_match_whitelist(self): doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( doc_generate.generate_document( {"hostname": "HOSTNAME-WHITELISTED"})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_01.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("outliers" in result["_source"]) def test_whitelist_literal_per_model_not_match_whitelist(self): doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( doc_generate.generate_document( {"hostname": "not_whitelist_hostname"})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_01.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("outliers" in result["_source"]) def test_whitelist_regex_per_model_match_whitelist(self): doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( doc_generate.generate_document({"hostname": "AAA-WHITELISTED"})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_02.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("outliers" in result["_source"]) def test_whitelist_regex_per_model_not_match_whitelist(self): doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( doc_generate.generate_document( {"hostname": "Not-work-WHITELISTED"})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_02.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("outliers" in result["_source"]) def test_arbitrary_key_config_present_in_outlier(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_arbitrary_dummy_test.conf" ) dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertEquals(result["_source"]["outliers"]["test_arbitrary_key"], ["arbitrary_value"]) def test_arbitrary_key_config_not_present_int_other_model(self): # Dictionary and list could be share between different instance. This test check that a residual value is not # present in the dictionary self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf" ) dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("test_arbitrary_key" in result["_source"]["outliers"])
class TestOutlierOperations(unittest.TestCase): def setUp(self): self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): # restore the default configuration file so we don't influence other unit tests that use the settings singleton self.test_settings.restore_default_configuration_path() self.test_es.restore_es() def test_add_outlier_to_doc(self): doc = copy.deepcopy(doc_without_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc) # Model name, model type are added by analyzer test_outlier.outlier_dict["observation"] = "dummy observation" doc_with_outlier = helpers.es.add_outlier_to_document(test_outlier) self.assertDictEqual(doc_with_outlier_without_model_info_test_file, doc_with_outlier) def test_remove_outlier_from_doc(self): doc = copy.deepcopy(doc_without_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc) test_outlier.outlier_dict["observation"] = "dummy observation" doc_with_outlier = helpers.es.add_outlier_to_document(test_outlier) doc_without_outlier = helpers.es.remove_outliers_from_document( doc_with_outlier) self.assertDictEqual(doc_without_outlier, doc_without_outlier_test_file) def test_add_duplicate_outlier_to_doc(self): doc = copy.deepcopy(doc_without_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc) doc_with_outlier = helpers.es.add_outlier_to_document(test_outlier) doc_with_outlier = helpers.es.add_outlier_to_document(test_outlier) self.assertDictEqual(doc, doc_with_outlier) def test_add_two_outliers_to_doc(self): doc = copy.deepcopy(doc_without_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc) test_outlier.outlier_dict["observation"] = "dummy observation" test_outlier_2 = Outlier(outlier_type="dummy type 2", outlier_reason="dummy reason 2", outlier_summary="dummy summary 2", doc=doc) test_outlier_2.outlier_dict["observation_2"] = "dummy observation 2" helpers.es.add_outlier_to_document(test_outlier) doc_with_two_outliers = helpers.es.add_outlier_to_document( test_outlier_2) self.assertDictEqual(doc_with_two_outliers, doc_with_two_outliers_test_file) def test_add_three_outliers_to_doc(self): doc = copy.deepcopy(doc_without_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc) test_outlier.outlier_dict["observation"] = "dummy observation" test_outlier_2 = Outlier(outlier_type="dummy type 2", outlier_reason="dummy reason 2", outlier_summary="dummy summary 2", doc=doc) test_outlier_2.outlier_dict["observation_2"] = "dummy observation 2" test_outlier_3 = Outlier(outlier_type="dummy type 3", outlier_reason="dummy reason 3", outlier_summary="dummy summary 3", doc=doc) test_outlier_3.outlier_dict["observation_3"] = "dummy observation 3" helpers.es.add_outlier_to_document(test_outlier) helpers.es.add_outlier_to_document(test_outlier_2) doc_with_three_outliers = helpers.es.add_outlier_to_document( test_outlier_3) self.assertDictEqual(doc_with_three_outliers, doc_with_three_outliers_test_file) def test_add_remove_tag_from_doc(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) # Remove non-existing tag doc = helpers.es.remove_tag_from_document(orig_doc, "tag_does_not_exist") self.assertDictEqual(doc, orig_doc) # Remove existing tag doc = helpers.es.remove_tag_from_document(orig_doc, "outlier") if "outlier" in doc["_source"]["tags"]: raise AssertionError( "Tag still present in document, even after removal!") def test_whitelist_literal_match(self): self.test_settings.change_configuration_path( test_file_outliers_path_config) # Contain: "C:\Windows\system32\msfeedssync.exe sync" dummy_doc_gen = DummyDocumentsGenerate() doc = dummy_doc_gen.generate_document( {"command_query": r'C:\Windows\system32\msfeedssync.exe sync'}) result = Outlier.is_whitelisted_doc(doc) self.assertTrue(result) def test_whitelist_literal_mismatch(self): self.test_settings.change_configuration_path( test_file_outliers_path_config) # Contain: "C:\Windows\system32\msfeedssync.exe sync" dummy_doc_gen = DummyDocumentsGenerate() doc = dummy_doc_gen.generate_document({ "command_query": r'C:\Windows\system32\msfeedssync.exe syncOther' }) result = Outlier.is_whitelisted_doc(doc) self.assertFalse(result) def test_whitelist_regexp_match(self): whitelist_item = r"^.*.exe sync$" p = re.compile(whitelist_item.strip(), re.IGNORECASE) result = Outlier.dictionary_matches_specific_whitelist_item_regexp( p, nested_doc_for_whitelist_test) self.assertTrue(result) def test_whitelist_regexp_mismatch(self): whitelist_item = r"^.*.exeZZZZZ sync$" p = re.compile(whitelist_item.strip(), re.IGNORECASE) result = Outlier.dictionary_matches_specific_whitelist_item_regexp( p, nested_doc_for_whitelist_test) self.assertFalse(result) def test_whitelist_config_file_multi_item_match(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_01_with_general.conf") self.assertTrue(test_outlier.is_whitelisted()) def test_whitelist_config_file_multi_item_match_with_whitelist_element_part_of_list_in_event( self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_08_with_general.conf") self.assertTrue(test_outlier.is_whitelisted()) def test_whitelist_config_wipe_all_bug(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_10_issue_462.conf") self.assertFalse(test_outlier.is_whitelisted()) def test_single_literal_to_match_in_doc_with_outlier(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_02_with_general.conf") self.assertTrue(test_outlier.is_whitelisted()) def test_single_literal_not_to_match_in_doc_with_outlier(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_03_with_general.conf") self.assertFalse(test_outlier.is_whitelisted()) def test_single_regex_to_match_in_doc_with_outlier(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_06_with_general.conf") self.assertTrue(test_outlier.is_whitelisted()) def test_single_regex_not_to_match_in_doc_with_outlier(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_07_with_general.conf") orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) result = test_outlier.is_whitelisted() self.assertFalse(result) def test_whitelist_config_file_multi_item_match_with_three_fields_and_whitespace( self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_04_with_general.conf") self.assertTrue(test_outlier.is_whitelisted()) def test_whitelist_config_file_multi_item_mismatch_with_three_fields_and_whitespace( self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_05_with_general.conf") self.assertFalse(test_outlier.is_whitelisted()) def test_whitelist_config_change_remove_multi_item_literal(self): doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) # Without score because "remove whitelisted outlier" use "bulk" operation which doesn't take into account score doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_with_outlier) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_01_with_general.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf" ) es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer}) result = [elem for elem in es._scan()][0] self.assertDictEqual(result, doc_without_outlier) def test_whitelist_config_change_single_literal_not_to_match_in_doc_with_outlier( self): doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) self.test_es.add_doc(doc_with_outlier) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_03_with_general.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf" ) es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer}) result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier) def test_test_osquery_ticket_1933_single_regexp_should_not_match(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_09_ticket_1933.conf") self.assertFalse(test_outlier.is_whitelisted())
class TestTermsAnalyzer(unittest.TestCase): @classmethod def setUpClass(cls): logging.verbosity = 0 def setUp(self): self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): # restore the default configuration file so we don't influence other unit tests that use the settings singleton self.test_settings.restore_default_configuration_path() self.test_es.restore_es() # This test work only if we try to detect whitelist element on non outliers elements # Here the count is not lower than three, so documents aren't outliers, and we never see that the first one is # whitelisted # # def test_whitelist_batch_document_not_process_all(self): # self.test_settings.change_configuration_path("/app/tests/unit_tests/files/terms_test_with_whitelist.conf") # analyzer = TermsAnalyzer("terms_dummy_test") # # # Whitelisted (ignored) # doc1_without_outlier = copy.deepcopy(doc_without_outliers_test_whitelist_01_test_file) # self.test_es.add_doc(doc1_without_outlier) # # Not whitelisted (add) # doc2_without_outlier = copy.deepcopy(doc_without_outliers_test_whitelist_02_test_file) # self.test_es.add_doc(doc2_without_outlier) # # Not whitelisted # doc3_without_outlier = copy.deepcopy(doc_without_outliers_test_whitelist_03_test_file) # self.test_es.add_doc(doc3_without_outlier) # # analyzer.evaluate_model() # # self.assertEqual(len(analyzer.outliers), 2) def test_terms_whitelist_work_test_es_result(self): dummy_doc_generate = DummyDocumentsGenerate() command_query = "SELECT * FROM dummy_table" # must be bigger than the trigger value (here 3) nbr_generated_documents = 5 # Generate document that match outlier command_name = "default_name_" for i in range(nbr_generated_documents): self.test_es.add_doc( dummy_doc_generate.generate_document({ "command_query": command_query, "command_name": command_name + str(i) })) # Generate whitelist document self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "whitelist_hostname", "command_query": command_query, "command_name": command_name + str(nbr_generated_documents) })) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_with_whitelist.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test.conf" ) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, nbr_generated_documents) def test_terms_detect_one_outlier_es_check(self): dummy_doc_generate = DummyDocumentsGenerate() nbr_doc_generated_per_hours = [5, 3, 1, 2] # Generate documents self.test_es.add_multiple_docs( dummy_doc_generate.generate_doc_time_variable_sensitivity( nbr_doc_generated_per_hours)) # Only the first groupe of document must be detected like an Outlier because the limit is on 3 # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf" ) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, 5) def test_terms_detect_one_outlier_batch_check(self): dummy_doc_generate = DummyDocumentsGenerate() nbr_doc_generated_per_hours = [5, 3, 1, 2] # Generate documents self.test_es.add_multiple_docs( dummy_doc_generate.generate_doc_time_variable_sensitivity( nbr_doc_generated_per_hours)) # Only the first groupe of document must be detected like an Outlier because the limit is on 3 # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf" ) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, 5) def test_terms_small_batch_treat_all(self): dummy_doc_generate = DummyDocumentsGenerate() # Init the list of user nbr_doc_per_hours = 5 nbr_hours = 10 nbr_doc_generated_per_hours = [ nbr_doc_per_hours for _ in range(nbr_hours) ] # If the number of document per hours is not a divisor of the batch limit, all document will not be detected # Generate documents self.test_es.add_multiple_docs( dummy_doc_generate.generate_doc_time_variable_sensitivity( nbr_doc_generated_per_hours)) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_small_batch_eval.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf" ) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, nbr_doc_per_hours * nbr_hours) def test_terms_small_batch_last_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() # Init the list of user with 18 values of 2 nbr_doc_generated_per_hours = [2 for _ in range(18)] # Add a value at the end that must be detected like outlier (limit on 3) nbr_doc_generated_per_hours.append(4) # Generate documents self.test_es.add_multiple_docs( dummy_doc_generate.generate_doc_time_variable_sensitivity( nbr_doc_generated_per_hours)) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_small_batch_eval.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf" ) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, 4) def test_evaluate_batch_for_outliers_not_enough_target_buckets_one_doc_max_two( self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_2.conf" ) aggregator_value = LIST_AGGREGATOR_VALUE[0] target_value = random.choice(LIST_TARGET_VALUE) doc = copy.deepcopy(random.choice(LIST_DOC)) current_batch = analyzer._add_document_to_batch( defaultdict(), [target_value], [aggregator_value], doc) result, remaining_terms = analyzer._evaluate_batch_for_outliers( batch=current_batch) self.assertEqual(result, []) def test_evaluate_batch_for_outliers_limit_target_buckets_two_doc_max_two( self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_2.conf" ) # Create one document with one aggregator aggregator_value = LIST_AGGREGATOR_VALUE[0] target_value = random.choice(LIST_TARGET_VALUE) doc = copy.deepcopy(random.choice(LIST_DOC)) current_batch = analyzer._add_document_to_batch( defaultdict(), [target_value], [aggregator_value], doc) # Create a second document with another aggregator aggregator_value2 = LIST_AGGREGATOR_VALUE[1] target_value2 = random.choice(LIST_TARGET_VALUE) doc2 = copy.deepcopy(random.choice(LIST_DOC)) current_batch = analyzer._add_document_to_batch( current_batch, [target_value2], [aggregator_value2], doc2) # Expect to get nothing due to "min_target_buckets" set to 2 result, remaining_terms = analyzer._evaluate_batch_for_outliers( batch=current_batch) self.assertEqual(result, []) # coeff_of_variation def test_terms_evaluate_coeff_of_variation_like_expected_document(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_no_bucket.conf" ) doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) expected_doc = copy.deepcopy( doc_with_terms_outlier_coeff_of_variation_no_score_sort) # Add doc to the database self.test_es.add_doc(doc_without_outlier) # Make test (suppose that all doc match with the query) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertEqual(result, expected_doc) def test_terms_generated_document_coeff_of_variation_not_respect_min(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_no_bucket.conf" ) doc_generator = DummyDocumentsGenerate() nbr_val = 24 # Like 24 hours min_trigger_sensitivity = analyzer.model_settings[ "trigger_sensitivity"] default_value = 5 # Per default, 5 documents create per hour (arbitrarily) max_difference = 3 # Maximum difference between the number of document (so between 2 and 8 (included)) all_doc = doc_generator.create_doc_uniq_target_variable_at_least_specific_coef_variation( nbr_val, min_trigger_sensitivity, max_difference, default_value) self.test_es.add_multiple_docs(all_doc) analyzer.evaluate_model() nbr_outliers = 0 for doc in es._scan(): if "outliers" in doc['_source']: nbr_outliers += 1 self.assertEqual(nbr_outliers, 0) def test_terms_generated_document_coeff_of_variation_respect_min(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_no_bucket.conf" ) doc_generator = DummyDocumentsGenerate() nbr_val = 24 # Like 24 hours max_trigger_sensitivity = analyzer.model_settings[ "trigger_sensitivity"] default_value = 5 # Per default, 5 documents create per hour (arbitrarily) max_difference = 3 # Maximum difference between the number of document (so between 2 and 8 (included)) all_doc = doc_generator.create_doc_uniq_target_variable_at_most_specific_coef_variation( nbr_val, max_trigger_sensitivity, max_difference, default_value) self.test_es.add_multiple_docs(all_doc) analyzer.evaluate_model() nbr_outliers = 0 for doc in es._scan(): if "outliers" in doc['_source']: nbr_outliers += 1 self.assertEqual(nbr_outliers, len(all_doc)) def test_terms_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("timestamp_year" in result['_source']) def test_terms_use_derived_fields_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue( "derived_timestamp_year" in result['_source']['outliers']) def test_terms_not_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("timestamp_year" in result['_source']) def test_terms_not_use_derived_fields_but_present_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] # The parameter use_derived_fields haven't any impact on outliers keys self.assertTrue( "derived_timestamp_year" in result['_source']['outliers']) def test_terms_default_outlier_infos(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in result['_source']['outliers'] for elem in DEFAULT_OUTLIERS_KEY_FIELDS ] self.assertTrue(all(all_fields_exists)) def test_terms_extra_outlier_infos_all_present(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in result['_source']['outliers'] for elem in EXTRA_OUTLIERS_KEY_FIELDS ] self.assertTrue(all(all_fields_exists)) def test_terms_extra_outlier_infos_new_result(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS for elem in result['_source']['outliers'] ] self.assertTrue(all(all_fields_exists)) def test_add_document_to_batch_empty_target(self): dummy_doc_generate = DummyDocumentsGenerate() dummy_doc = dummy_doc_generate.generate_document() current_batch = {"dummy_key": "dummy_value"} result = TermsAnalyzer._add_document_to_batch(current_batch, list(), ["dummy_aggregator"], dummy_doc) self.assertEqual(result, current_batch) def test_add_document_to_batch_empty_aggergator(self): dummy_doc_generate = DummyDocumentsGenerate() dummy_doc = dummy_doc_generate.generate_document() current_batch = {"dummy_key": "dummy_value"} result = TermsAnalyzer._add_document_to_batch(current_batch, ["dummy_target"], list(), dummy_doc) self.assertEqual(result, current_batch) def test_add_document_to_batch_one_aggregator_and_one_target(self): dummy_doc_generate = DummyDocumentsGenerate() dummy_doc = dummy_doc_generate.generate_document() target_value = "dummy_target" aggregator_value = "dummy_aggregator" current_batch = {"dummy_key": "dummy_value"} result = TermsAnalyzer._add_document_to_batch(current_batch, [target_value], [aggregator_value], dummy_doc) expected_batch = current_batch.copy() expected_batch[aggregator_value] = defaultdict(list) expected_batch[aggregator_value]["targets"].append(target_value) expected_batch[aggregator_value]["observations"].append(dict()) expected_batch[aggregator_value]["raw_docs"].append(dummy_doc) self.assertEqual(result, expected_batch) def test_min_target_buckets_detect_outlier(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_within_float.conf" ) # Recap: # min_target_buckets=4 # trigger_sensitivity=5 # trigger_on=high # trigger_method=float # Dont encode with a matrix to keep order of document doc_to_generate = [ # New batch: # 0 1 2 # agg1 [5, 1, 1] # agg2 [1, 1, 1] ("agg1", 0), ("agg2", 0), ("agg1", 0), ("agg1", 0), ("agg1", 0), ("agg1", 0), ("agg2", 1), ("agg1", 1), ("agg2", 2), ("agg1", 2), # New batch # 2 3 # agg1 [1, 1] # agg2 [5, 1] ("agg2", 2), ("agg2", 2), ("agg2", 2), ("agg2", 2), ("agg1", 2), ("agg2", 2), ("agg1", 3), ("agg2", 3) ] # At the end: # 0 1 2 3 # agg1 [5, 0, 2, 1] # agg2 [1, 1, 6, 1] # So only agg2 - 2 (6 documents) need to be flagged dummy_doc_gen = DummyDocumentsGenerate() for aggregator, target_value in doc_to_generate: user_id = target_value hostname = aggregator doc_generated = dummy_doc_gen.generate_document({ "user_id": user_id, "hostname": hostname }) self.test_es.add_doc(doc_generated) analyzer.evaluate_model() list_outliers = [] for doc in es._scan(): if "outliers" in doc["_source"]: list_outliers.append( (doc["_source"]["outliers"]["aggregator"][0], doc["_source"]["outliers"]["term"][0])) self.assertEqual(list_outliers, [("agg2", "2") for _ in range(6)]) def test_min_target_buckets_dont_detect_outlier(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_within_float.conf" ) # Recap: # min_target_buckets=4 # trigger_sensitivity=5 # trigger_on=high # trigger_method=float # Dont encode with a matrix to keep order of document doc_to_generate = [ # New batch: # 0 1 # agg1 [6, 1] # agg2 [1, 2] ("agg1", 0), ("agg2", 0), ("agg1", 0), ("agg1", 0), ("agg1", 0), ("agg1", 0), ("agg1", 0), ("agg2", 1), ("agg2", 1), ("agg1", 1), # New Batch # 2 # agg1 [0] # agg2 [1] ("agg1", 2) ] # At the end: # 0 1 2 # agg1 [6, 1, 1] # agg2 [1, 2] # Normally agg1 - 0 must be flagged, but here they doesn't have enough buckets values dummy_doc_gen = DummyDocumentsGenerate() for aggregator, target_value in doc_to_generate: user_id = target_value hostname = aggregator doc_generated = dummy_doc_gen.generate_document({ "user_id": user_id, "hostname": hostname }) self.test_es.add_doc(doc_generated) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, 0) def test_batch_whitelist_work_with_min_target_bucket(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_within_float.conf" ) # Recap: # min_target_buckets=4 # trigger_sensitivity=5 # trigger_on=high # trigger_method=float doc_to_generate = [ # New batch: # 0 1 2 # agg1 [3, 0, 1] # agg2 [1, 3, 2] ("agg1", 0, False), ("agg2", 0, False), ("agg1", 0, True), ("agg1", 0, False), ("agg2", 1, False), ("agg2", 1, False), ("agg2", 1, False), ("agg2", 2, False), ("agg2", 2, False), ("agg1", 2, False), # New batch # 2 3 4 # agg1 [1, 0, 2] # agg2 [4, 3] ("agg2", 2, False), ("agg2", 2, False), ("agg2", 2, True), ("agg2", 2, False), ("agg1", 2, False), ("agg2", 3, False), ("agg2", 3, False), ("agg2", 3, False), ("agg1", 4, False), ("agg1", 4, False), # New batch # 4 5 # agg1 [4, 1] ("agg1", 4, False), ("agg1", 4, False), ("agg1", 4, False), ("agg1", 4, False), ("agg1", 5, False) ] # At the end: # 0 1 2 3 4 # agg1 [3, 2, 2, 2, 6] # agg2 [1, 1, 6, 1] # So two outlier: agg1 - 4 and agg2 - 2. But one of agg2 - 2 is whitelisted. So only 5 occurrences dummy_doc_gen = DummyDocumentsGenerate() for aggregator, target_value, is_whitelist in doc_to_generate: deployment_name = None if is_whitelist: deployment_name = "whitelist-deployment" user_id = target_value hostname = aggregator doc_generated = dummy_doc_gen.generate_document({ "deployment_name": deployment_name, "user_id": user_id, "hostname": hostname }) self.test_es.add_doc(doc_generated) analyzer.evaluate_model() list_outliers = [] for doc in es._scan(): if "outliers" in doc["_source"]: list_outliers.append( (doc["_source"]["outliers"]["aggregator"][0], doc["_source"]["outliers"]["term"][0])) self.assertEqual(list_outliers, [("agg1", "4") for _ in range(6)]) def test_batch_whitelist_work_doent_match_outlier_in_across(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_across_float.conf" ) doc_to_generate = [ # agg1 (0, 1, 2) -> 3 but with whitelist: (0, 2) -> 2 # agg2 (0, 3, 4) -> 3 ("agg1", 0, False), ("agg1", 1, True), ("agg2", 0, False), ("agg2", 0, False), ("agg1", 2, False), ("agg2", 3, False), ("agg2", 4, False) ] dummy_doc_gen = DummyDocumentsGenerate() for aggregator, target_value, is_whitelist in doc_to_generate: deployment_name = None if is_whitelist: deployment_name = "whitelist-deployment" user_id = target_value hostname = aggregator doc_generated = dummy_doc_gen.generate_document({ "deployment_name": deployment_name, "user_id": user_id, "hostname": hostname }) self.test_es.add_doc(doc_generated) analyzer.evaluate_model() list_outliers = [] for doc in es._scan(): if "outliers" in doc["_source"]: list_outliers.append( (doc["_source"]["outliers"]["aggregator"][0], doc["_source"]["outliers"]["term"][0])) # We detect agg2 but not agg1 self.assertEqual(list_outliers, [("agg2", "0"), ("agg2", "0"), ("agg2", "3"), ("agg2", "4")]) def test_extract_additional_model_settings_no_terms_section(self): # Terms section not define produce a warning with self.assertRaises(ValueError): AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_test_no_terms_section.conf" ) def test_extract_additional_model_settings_too_many_terms_section(self): # Terms section not define produce a warning with self.assertRaises(ValueError): AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_test_too_many_terms_section.conf" ) def test_non_outlier_values_not_present_in_terms_for_first(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate documents # Outlier document # index: 0 # Non outlier self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "one" })) # index: 1 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 2 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 3 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertEqual( result["_source"]["outliers"]["non_outlier_values_sample"], list()) def test_non_outlier_values_not_present_in_terms_within(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate documents # Outlier document # index: 0 # Non outlier self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "one" })) # index: 1 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 2 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 3 # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 4 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "three" })) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][4] self.assertEqual( result["_source"]["outliers"]["non_outlier_values_sample"], ["two"]) def test_non_outlier_values_empty_terms_across(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate documents # Outlier document # index: 0 self.test_es.add_doc( dummy_doc_generate.generate_document({"hostname": "one"})) # Non outlier # index: 1 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "two", "deployment_name": "one" })) # index: 2 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "two", "deployment_name": "two" })) # index: 3 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "two", "deployment_name": "three" })) # Outlier document # index 4 self.test_es.add_doc( dummy_doc_generate.generate_document({"hostname": "three"})) # index: 4 # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_across_dummy_test_float_low.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][4] self.assertEqual( result["_source"]["outliers"]["non_outlier_values_sample"], list())
class TestHousekeeping(unittest.TestCase): def setUp(self): self.test_es = TestStubEs() self.test_settings = UpdateSettings() self.config_backup = dict() def tearDown(self): self.test_es.restore_es() self.test_settings.restore_default_configuration_path() def _backup_config(self, file_path): with open(file_path, 'r') as content_file: self.config_backup[file_path] = content_file.read() def _restore_config(self, file_path): if file_path in self.config_backup.keys(): with open(file_path, 'w') as file_object: file_object.write(self.config_backup[file_path]) else: raise KeyError('The configuration ' + file_path + ' was never backup') def _enable_debug_logging(self): import logging, sys logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) def test_housekeeping_correctly_remove_whitelisted_outlier_when_file_modification( self): self.test_settings.change_configuration_path( test_file_no_whitelist_path_config) self._backup_config(test_file_no_whitelist_path_config) housekeeping = HousekeepingJob() analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test.conf" ) housekeeping.update_analyzer_list([analyzer]) # Add document to "Database" doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) self.test_es.add_doc(doc_with_outlier) housekeeping.file_mod_watcher._previous_mtimes[ test_file_no_whitelist_path_config] = 0 filecontent = "" with open(test_file_no_whitelist_path_config, 'r') as test_file: for line in test_file: if "# WHITELIST" in line: break filecontent += line # Update configuration (read new config and append to default) with open(test_file_whitelist_path_config, 'r') as test_file: filecontent += test_file.read() with open(test_file_no_whitelist_path_config, 'w') as test_file: test_file.write(filecontent) housekeeping.execute_housekeeping() # Fetch result result = [elem for elem in self.test_es._scan()][0] # Compute expected result: doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self._restore_config(test_file_no_whitelist_path_config) self.maxDiff = None self.assertEqual(result, doc_without_outlier) def test_housekeeping_execute_no_whitelist_parameter_change(self): # Check that housekeeping run even when we change new part in the configuration self.test_settings.change_configuration_path( test_file_whitelist_dummy_reason_path_config) self._backup_config(test_file_whitelist_dummy_reason_path_config) housekeeping = HousekeepingJob() analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test.conf" ) housekeeping.update_analyzer_list([analyzer]) # Add document to "Database" doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) expected_doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) self.test_es.add_doc(doc_with_outlier) # Update configuration (create new section and append to default) filecontent = "\n\n[dummy_section]\nparam=1" # Force the date of the file housekeeping.file_mod_watcher._previous_mtimes[ test_file_whitelist_dummy_reason_path_config] = 0 with open(test_file_whitelist_dummy_reason_path_config, 'a') as test_file: test_file.write(filecontent) housekeeping.execute_housekeeping() # Fetch result result = [elem for elem in self.test_es._scan()][0] self._restore_config(test_file_whitelist_dummy_reason_path_config) self.assertNotEqual(result, expected_doc_with_outlier) def test_whitelist_literals_per_model_removed_by_housekeeping(self): # Init doc_generate = DummyDocumentsGenerate() self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/housekeeping.conf") housekeeping = HousekeepingJob() # Generate document document = doc_generate.generate_document({ "hostname": "HOSTNAME-WHITELISTED", "create_outlier": True, "outlier.model_name": "dummy_test", "outlier.model_type": "analyzer" }) self.assertTrue("outliers" in document["_source"]) analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test_with_whitelist.conf" ) housekeeping.update_analyzer_list([analyzer]) self.test_es.add_doc(document) housekeeping.execute_housekeeping() result = [elem for elem in self.test_es._scan()][0] self.assertTrue("outliers" not in result["_source"]) def test_whitelist_literals_per_model_not_removed_by_housekeeping(self): # Init doc_generate = DummyDocumentsGenerate() self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/housekeeping.conf") housekeeping = HousekeepingJob() # Generate document document = doc_generate.generate_document({ "hostname": "NOT-WHITELISTED", "create_outlier": True, "outlier.model_name": "dummy_test", "outlier.model_type": "simplequery" }) self.assertTrue("outliers" in document["_source"]) analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test_with_whitelist.conf" ) housekeeping.update_analyzer_list([analyzer]) self.test_es.add_doc(document) housekeeping.execute_housekeeping() result = [elem for elem in self.test_es._scan()][0] self.assertTrue("outliers" in result["_source"])
class TestMetricsAnalyzer(unittest.TestCase): @classmethod def setUpClass(cls): logging.verbosity = 0 def setUp(self): self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): # restore the default configuration file so we don't influence other unit tests that use the settings singleton self.test_settings.restore_default_configuration_path() self.test_es.restore_es() def test_metrics_whitelist_work_test_es_result(self): dummy_doc_generate = DummyDocumentsGenerate() command_query = "SELECT * FROM dummy_table" # must be bigger than the trigger value (here 3) nbr_generated_documents = 5 # Generate document that match outlier for _ in range(nbr_generated_documents): self.test_es.add_doc( dummy_doc_generate.generate_document( {"command_query": command_query})) # Generate whitelist document self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "whitelist_hostname", "command_query": command_query })) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_length_dummy_test.conf" ) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, nbr_generated_documents) def test_metrics_detect_one_outlier_es_check(self): dummy_doc_generate = DummyDocumentsGenerate() list_user_id = [11, 10, 8, 0, 0, 0] # Generate document for user_id in list_user_id: self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": user_id})) # Only the fist one must be detected like outlier, because user_id need to be bigger than 10 # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_numerical_value_dummy_test.conf" ) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, 1) def test_metrics_detect_one_outlier_batch_check(self): dummy_doc_generate = DummyDocumentsGenerate() list_user_id = [11, 10, 8, 0, 0, 0] # Generate document for user_id in list_user_id: self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": user_id})) # Only the fist one must be detected like outlier, because user_id need to be bigger than 10 # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_numerical_value_dummy_test.conf" ) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, 1) def test_metrics_small_batch_treat_all(self): dummy_doc_generate = DummyDocumentsGenerate() # Init the list of user default_user_id = 11 number_of_user = 20 list_user_id = [default_user_id for _ in range(number_of_user)] # Generate document for user_id in list_user_id: self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": user_id})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_small_batch_eval.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_numerical_value_dummy_test.conf" ) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, number_of_user) def test_metrics_small_batch_last_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() # Init the list of user default_user_id = 0 number_of_user = 19 list_user_id = [default_user_id for _ in range(number_of_user)] # Add a value at the end that must be detected like outlier (limit on 10) list_user_id.append(11) # Generate document for user_id in list_user_id: self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": user_id})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_small_batch_eval.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_numerical_value_dummy_test.conf" ) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, 1) def test_metrics_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("timestamp_year" in result['_source']) def test_metrics_use_derived_fields_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue( "derived_timestamp_year" in result['_source']['outliers']) def test_metrics_not_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("timestamp_year" in result['_source']) def test_metrics_not_use_derived_fields_but_present_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] # The parameter use_derived_fields haven't any impact on outliers keys self.assertTrue( "derived_timestamp_year" in result['_source']['outliers']) def test_whitelist_batch_document_not_process_all(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_with_whitelist.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_length_dummy_test.conf" ) # Not whitelisted (add) doc2_without_outlier = copy.deepcopy( doc_without_outliers_test_whitelist_02_test_file) self.test_es.add_doc(doc2_without_outlier) # Not whitelisted doc3_without_outlier = copy.deepcopy( doc_without_outliers_test_whitelist_03_test_file) self.test_es.add_doc(doc3_without_outlier) # Whitelisted (ignored) doc4_without_outlier = copy.deepcopy( doc_without_outliers_test_whitelist_04_test_file) self.test_es.add_doc(doc4_without_outlier) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, 2) def _generate_metrics_doc_with_whitelist(self, doc_to_generate): # Use list of tuple (and not dict) to keep order dummy_doc_gen = DummyDocumentsGenerate() for aggregator, target_value, is_whitelist in doc_to_generate: deployment_name = None if is_whitelist: deployment_name = "whitelist-deployment" user_id = target_value hostname = aggregator doc_generated = dummy_doc_gen.generate_document({ "deployment_name": deployment_name, "user_id": user_id, "hostname": hostname }) self.test_es.add_doc(doc_generated) def test_metrics_batch_whitelist_three_outliers_one_whitelist(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_batch_whitelist_float.conf" ) backup_min_eval_batch = MetricsAnalyzer.MIN_EVALUATE_BATCH MetricsAnalyzer.MIN_EVALUATE_BATCH = 5 # aggregator, target, is_whitelist doc_to_generate = [ ("agg1", 5, False), ("agg1", 3, True), ("agg2", 4, False), ("agg2", 5, True), # Batch limit ("agg2", 3, False), ("agg1", 5, False), ("agg1", 7, False), # Outlier ("agg2", 2, False), # Batch limit ("agg1", 4, True), ("agg2", 6, True), # Outlier (but whitelist) ("agg1", 3, False), ("agg1", 5, False), # Batch limit ("agg2", 1, False), ("agg2", 6, False), # Outlier ("agg1", 3, False) ] self._generate_metrics_doc_with_whitelist(doc_to_generate) analyzer.evaluate_model() list_outliers = [] for doc in es._scan(): if "outliers" in doc["_source"]: list_outliers.append( (doc["_source"]["outliers"]["aggregator"][0], doc["_source"]["outliers"]["target"][0])) self.assertEqual(list_outliers, [("agg1", "7"), ("agg2", "6")]) MetricsAnalyzer.MIN_EVALUATE_BATCH = backup_min_eval_batch def test_metrics_batch_whitelist_outlier_detect_after_process_all_and_remove_whitelist( self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_batch_whitelist_avg.conf" ) backup_min_eval_batch = MetricsAnalyzer.MIN_EVALUATE_BATCH MetricsAnalyzer.MIN_EVALUATE_BATCH = 5 # aggregator, target, is_whitelist doc_to_generate = [ ("agg1", 5, False), ("agg2", 5, False), ("agg1", 5, False), ("agg1", 3, False), # Batch limit ("agg1", 6, False), ("agg2", 5, False), ("agg1", 5, False), ("agg1", 7, True) ] self._generate_metrics_doc_with_whitelist(doc_to_generate) # The avg for agg1 is 5.1 but if we remove the whitelisted element, the avg is on 4.8 analyzer.evaluate_model() list_outliers = [] for doc in es._scan(): if "outliers" in doc["_source"]: list_outliers.append( (doc["_source"]["outliers"]["aggregator"][0], doc["_source"]["outliers"]["target"][0])) # Without the batch whitelist, the only outlier will be ("agg1", 6) (the ("agg1", 7) is whitelist). # But with batch whitelist, the avg is update and all value of "agg1" (except 3) are detected outlier self.assertEqual(list_outliers, [("agg1", "5"), ("agg1", "5"), ("agg1", "6"), ("agg1", "5")]) MetricsAnalyzer.MIN_EVALUATE_BATCH = backup_min_eval_batch def test_simplequery_default_outlier_infos(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in result['_source']['outliers'] for elem in DEFAULT_OUTLIERS_KEY_FIELDS ] self.assertTrue(all(all_fields_exists)) def test_metrics_extra_outlier_infos_all_present(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in result['_source']['outliers'] for elem in EXTRA_OUTLIERS_KEY_FIELDS ] self.assertTrue(all(all_fields_exists)) def test_metrics_extra_outlier_infos_new_result(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS for elem in result['_source']['outliers'] ] self.assertTrue(all(all_fields_exists)) def _preperate_dummy_data_terms(self): eval_metrics_array = defaultdict() # "random" value aggregator_value = "key" target_value = "test" metrics_value = 12 observations = {'a': 1, 'test': 'ok'} doc = {'source': 'this', 'target': 12} return eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc def _preperate_data_terms_with_doc(self, metrics_value=1): eval_metrics_array = defaultdict() # "random" value aggregator_value = "key" target_value = "test" observations = {} return eval_metrics_array, aggregator_value, target_value, metrics_value, observations def test_add_metric_to_batch_empty(self): eval_metrics_array = defaultdict() aggregator_value = "" target_value = "" metrics_value = "" observations = {} doc = {} # Create expected result observations["target"] = [target_value] observations["aggregator"] = [aggregator_value] expected_eval_terms = defaultdict() expected_eval_terms[aggregator_value] = defaultdict(list) expected_eval_terms[aggregator_value]["metrics"] = [metrics_value] expected_eval_terms[aggregator_value]["observations"] = [observations] expected_eval_terms[aggregator_value]["raw_docs"] = [doc] result = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc) self.assertEqual(result, expected_eval_terms) def test_add_metric_to_batch_no_modification(self): eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc = \ self._preperate_dummy_data_terms() # Create expected result observations["target"] = [target_value] observations["aggregator"] = [aggregator_value] expected_eval_terms = defaultdict() expected_eval_terms[aggregator_value] = defaultdict(list) expected_eval_terms[aggregator_value]["metrics"] = [metrics_value] expected_eval_terms[aggregator_value]["observations"] = [observations] expected_eval_terms[aggregator_value]["raw_docs"] = [doc] result = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc) self.assertEqual(result, expected_eval_terms) def test_calculate_metric_numerical_value(self): self.assertEqual( MetricsAnalyzer.calculate_metric("numerical_value", "12"), (float(12), dict())) def test_calculate_metric_length(self): self.assertEqual(MetricsAnalyzer.calculate_metric("length", "test"), (len("test"), dict())) def test_calculate_metric_entropy(self): self.assertEqual(MetricsAnalyzer.calculate_metric("entropy", "test"), (helpers.utils.shannon_entropy("test"), dict())) def test_calculate_metric_hex_encoded_length(self): result = MetricsAnalyzer.calculate_metric("hex_encoded_length", "12c322adc020 12322029620") expected_observation = { 'max_hex_encoded_length': 12, 'max_hex_encoded_word': '12c322adc020' } self.assertEqual(result, (12, expected_observation)) def test_calculate_metric_base64_encoded_length(self): result = MetricsAnalyzer.calculate_metric( "base64_encoded_length", "houston we have a cHJvYmxlbQ==") expected_observation = { 'max_base64_decoded_length': 7, 'max_base64_decoded_word': 'problem' } self.assertEqual(result, (7, expected_observation)) def test_calculate_metric_url_length(self): result = MetricsAnalyzer.calculate_metric( "url_length", "why don't we go http://www.nviso.com") expected_observation = { 'extracted_urls_length': 20, 'extracted_urls': 'http://www.nviso.com' } self.assertEqual(result, (20, expected_observation)) def test_calculate_metric_unexist_operation(self): self.assertEqual( MetricsAnalyzer.calculate_metric("dummy operation", ""), (None, dict())) def test_evaluate_batch_for_outliers_fetch_remain_metrics(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test.conf" ) eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \ self._preperate_data_terms_with_doc() doc = DummyDocumentsGenerate().generate_document() metrics = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc) result = analyzer._evaluate_batch_for_outliers(metrics, False) # outliers, not_enough_value, document_need_to_be_recompute self.assertEqual(result, ([], metrics)) def test_evaluate_batch_for_outliers_add_outlier(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_2.conf" ) eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \ self._preperate_data_terms_with_doc(metrics_value=12) doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_without_outlier) metrics = MetricsAnalyzer.add_metric_to_batch( eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc_without_outlier) outliers, remaining_metrics = analyzer._evaluate_batch_for_outliers( metrics, True) analyzer.process_outlier(outliers[0]) result = [elem for elem in es._scan()][0] doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) self.maxDiff = None self.assertEqual(result, doc_with_outlier) def test_extract_additional_model_settings_no_metrics_section(self): import logging as base_logging base_logging.disable(base_logging.NOTSET) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_no_metrics_section.conf") # Metrics section not define produce an error with self.assertLogs(logging.logger, level='ERROR'): analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test.conf" ) # Check that we detect a problem in configuration self.assertTrue(analyzer.configuration_parsing_error) base_logging.disable(base_logging.CRITICAL) def test_remove_metric_from_batch_simple_value(self): eval_metrics_array = defaultdict() aggregator_value = "agg" target_value = "dummy_target" metrics_value = "dummy_metric" observations = {} dummy_doc_gen = DummyDocumentsGenerate() doc = dummy_doc_gen.generate_document() batch = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc) result = MetricsAnalyzer.remove_metric_from_batch( batch[aggregator_value], 0) expected_aggregator_value = defaultdict(list) expected_aggregator_value["metrics"] = [] expected_aggregator_value["observations"] = [] expected_aggregator_value["raw_docs"] = [] self.assertEqual(result, expected_aggregator_value) def test_non_outliers_not_present_in_metrics_for_first(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate documents # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) # Non outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 8})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertEqual( result["_source"]["outliers"]["non_outlier_values_sample"], list()) def test_non_outliers_present_in_metrics(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate documents # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) # Non outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 8})) # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 12})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][2] self.assertEqual( result["_source"]["outliers"]["non_outlier_values_sample"], ["8.0"])
class TestSuddenAppearanceAnalyzer(unittest.TestCase): @classmethod def setUpClass(cls): logging.verbosity = 0 def setUp(self): self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): # restore the default configuration file so we don't influence other unit tests that use the settings singleton self.test_settings.restore_default_configuration_path() self.test_es.restore_es() def test_sudden_appearance_whitelist_work_test_es_result(self): dummy_doc_generate = DummyDocumentsGenerate() command_query = "SELECT * FROM dummy_table" # must be bigger than the trigger value (here 3) nbr_generated_documents = 5 # Generate document that match outlier command_name = "default_name_" for i in range(nbr_generated_documents): dummy_doc_generated = dummy_doc_generate.generate_document({ "command_query": command_query, "command_name": command_name + str(i) }) self.test_es.add_doc(dummy_doc_generated) whitelist_doc_generated = dummy_doc_generate.generate_document({ "hostname": "whitelist_hostname", "command_query": command_query, "command_name": command_name + str(nbr_generated_documents) }) self.test_es.add_doc(whitelist_doc_generated) # Run analyzer self.test_settings.change_configuration_path( test_conf_file_with_whitelist) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_dummy_test_01.conf") set_new_current_date(analyzer) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, nbr_generated_documents) def test_sudden_appearance_detect_no_outlier_es_check(self): # Generate documents dummy_doc_generate = DummyDocumentsGenerate() list_delta_hour = [1, 1, 1, 3, 3, 3, 4, 5, 5, 5, 15, 15] field_1_name = "user_id" list_field_1_value = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] field_2_name = "hostname" list_field_2_value = [] for _ in range(len(list_delta_hour)): list_field_2_value.append("host1") generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields( list_delta_hour, field_1_name, list_field_1_value, field_2_name, list_field_2_value) self.test_es.add_multiple_docs(generated_docs) # Run analyzer self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_dummy_test_02.conf") set_new_current_date(analyzer) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, 0) def test_sudden_appearance_detect_one_outlier_es_check_1(self): # Generate documents dummy_doc_generate = DummyDocumentsGenerate() list_delta_hour = [1, 1, 1, 3, 3, 3, 4, 5, 5, 5, 15, 15] field_1_name = "user_id" list_field_1_value = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] field_2_name = "hostname" list_field_2_value = [] for _ in range(len(list_delta_hour) - 1): list_field_2_value.append("host1") list_field_2_value.append("host2") generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields( list_delta_hour, field_1_name, list_field_1_value, field_2_name, list_field_2_value) self.test_es.add_multiple_docs(generated_docs) # Run analyzer self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_dummy_test_02.conf") set_new_current_date(analyzer) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, 1) def test_sudden_appearance_detect_one_outlier_es_check_2(self): # Generate documents dummy_doc_generate = DummyDocumentsGenerate() list_delta_hour = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 15] field_1_name = "user_id" list_field_1_value = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] field_2_name = "hostname" list_field_2_value = [] for _ in range(len(list_delta_hour)): list_field_2_value.append("host1") generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields( list_delta_hour, field_1_name, list_field_1_value, field_2_name, list_field_2_value) self.test_es.add_multiple_docs(generated_docs) # Run analyzer self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_dummy_test_03.conf") set_new_current_date(analyzer) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, 1) def test_sudden_appearance_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_derived_fields_01.conf") set_new_current_date(analyzer) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("timestamp_year" in result['_source']) def test_sudden_appearance_no_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_no_derived_fields.conf") set_new_current_date(analyzer) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("timestamp_year" in result['_source']) def test_sudden_appearance_derived_fields_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_derived_fields_02.conf") set_new_current_date(analyzer) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue( "derived_timestamp_year" in result['_source']['outliers']) def test_sudden_appearance_no_derived_fields(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_no_derived_fields.conf") set_new_current_date(analyzer) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse( "derived_timestamp_year" in result['_source']['outliers']) def test_sudden_extra_outlier_infos_all_present(self): # Generate documents dummy_doc_generate = DummyDocumentsGenerate() list_delta_hour = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 15] field_1_name = "user_id" list_field_1_value = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] field_2_name = "hostname" list_field_2_value = [] for _ in range(len(list_delta_hour)): list_field_2_value.append("host1") generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields( list_delta_hour, field_1_name, list_field_1_value, field_2_name, list_field_2_value) self.test_es.add_multiple_docs(generated_docs) self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_dummy_test_03.conf") set_new_current_date(analyzer) analyzer.evaluate_model() list_outlier = list() for elem in es._scan(): if "outliers" in elem["_source"]: list_outlier.append(elem) all_fields_exists = [ elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS for elem in list_outlier[0]['_source']['outliers'] ] self.assertTrue(all(all_fields_exists))
class TestEs(unittest.TestCase): def setUp(self): self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): self.test_es.restore_es() self.test_settings.restore_default_configuration_path() def test_add_tag_to_document_no_tag(self): elem = {"_source": {"key": {"test": 1}}} expected_result = copy.deepcopy(elem) expected_result["_source"]["tags"] = ["new_tag"] new_doc_result = helpers.es.add_tag_to_document(elem, "new_tag") self.assertEqual(new_doc_result, expected_result) def test_add_tag_to_document_already_a_tag(self): elem = {"_source": {"key": {"test": 1}, "tags": ["ok"]}} expected_result = copy.deepcopy(elem) expected_result["_source"]["tags"].append("new_tag") new_doc_result = helpers.es.add_tag_to_document(elem, "new_tag") self.assertEqual(new_doc_result, expected_result) def test_remove_all_whitelisted_outliers(self): self.test_settings.change_configuration_path( test_file_whitelist_path_config) doc_generate = DummyDocumentsGenerate() self.test_es.add_doc( doc_generate.generate_document({ "create_outlier": True, "outlier_observation": "dummy observation", "outlier.model_name": "dummy_test", "outlier.model_type": "analyzer", "command_query": "osquery_get_all_processes_with_listening_conns.log" })) # Check that outlier correctly generated result = [doc for doc in es._scan()][0] self.assertTrue("outliers" in result["_source"]) analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf" ) # Remove whitelisted outlier es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer}) # Check that outlier is correctly remove result = [doc for doc in es._scan()][0] self.assertFalse("outliers" in result["_source"]) def test_get_highlight_settings_with_metrics_analyzer(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test.conf" ) highlight_settings = es._get_highlight_settings( analyzer.model_settings) self.assertTrue(highlight_settings is None) def test_get_highlight_settings_with_terms_analyzer(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test.conf" ) highlight_settings = es._get_highlight_settings( analyzer.model_settings) self.assertTrue(highlight_settings is None) def test_get_highlight_settings_with_simplequery_analyzer_and_highlight_match_activated( self): self.test_settings.change_configuration_path( config_file_simplequery_test_01) use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/" \ "simplequery_dummy_test_highlight_match_activated.conf" analyzer = AnalyzerFactory.create(use_case_file) highlight_settings = es._get_highlight_settings( analyzer.model_settings) highlight_settings_test = dict() highlight_settings_test["pre_tags"] = ["<value>"] highlight_settings_test["post_tags"] = ["</value>"] highlight_settings_test["fields"] = dict() highlight_settings_test["fields"]["*"] = dict() self.assertTrue(highlight_settings == highlight_settings_test) def test_get_highlight_settings_with_simplequery_analyzer_and_highlight_match_unactivated( self): self.test_settings.change_configuration_path( config_file_simplequery_test_01) use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/" \ "simplequery_dummy_test_highlight_match_unactivated.conf" analyzer = AnalyzerFactory.create(use_case_file) highlight_settings = es._get_highlight_settings( analyzer.model_settings) self.assertTrue(highlight_settings is None) def test_get_highlight_settings_with_simplequery_analyzer_without_highlight_parameter( self): self.test_settings.change_configuration_path( config_file_simplequery_test_01) use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf" analyzer = AnalyzerFactory.create(use_case_file) highlight_settings = es._get_highlight_settings( analyzer.model_settings) self.assertTrue(highlight_settings is None) def test_build_search_query(self): self.test_settings.change_configuration_path( config_file_simplequery_test_01) use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf" analyzer = AnalyzerFactory.create(use_case_file) timestamp_field, history_window_days, history_window_hours = es._get_history_window( analyzer.model_settings) search_range = es.get_time_filter(days=history_window_days, hours=history_window_hours, timestamp_field=timestamp_field) dsl_search_query_1 = build_search_query( search_range=search_range, search_query=analyzer.search_query) dsl_search_query_2 = dict() dsl_search_query_2['query'] = dict() dsl_search_query_2['query']['bool'] = dict() dsl_search_query_2['query']['bool']['filter'] = list() dsl_search_query_2['query']['bool']['filter'].append(search_range) dsl_search_query_2['query']['bool']['filter'].extend( analyzer.search_query["filter"].copy()) self.assertEquals(dsl_search_query_1, dsl_search_query_2)
class TestAnalyzer(unittest.TestCase): def setUp(self): # "es" use in Analyzer construction and in the method "process_outlier" self.test_es = TestStubEs() self.test_settings = UpdateSettings() def tearDown(self): # restore the default configuration file so we don't influence other unit tests that use the settings singleton self.test_settings.restore_default_configuration_path() self.test_es.restore_es() def test_simple_process_outlier_return_good_outlier(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test) doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) doc_fields = doc_without_outlier["_source"] outlier = analyzer.create_outlier(doc_fields, doc_without_outlier) expected_outlier = Outlier(outlier_type=["dummy type"], outlier_reason=['dummy reason'], outlier_summary='dummy summary', doc=doc_without_outlier) expected_outlier.outlier_dict['model_name'] = 'dummy_test' expected_outlier.outlier_dict['model_type'] = 'analyzer' expected_outlier.outlier_dict[ 'elasticsearch_filter'] = 'es_valid_query' self.assertTrue(outlier.outlier_dict == expected_outlier.outlier_dict) def test_simple_process_outlier_save_es(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test) doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_without_outlier) doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) doc_fields = doc_without_outlier["_source"] outlier = analyzer.create_outlier(doc_fields, doc_without_outlier) es.save_outlier(outlier) result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier) def test_arbitrary_key_config_present_in_analyzer(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzer = AnalyzerFactory.create( use_case_analyzer_arbitrary_dummy_test) self.assertDictEqual( analyzer.extra_model_settings, { "test_arbitrary_key": "arbitrary_value", "elasticsearch_filter": "es_valid_query" }) def test_create_multi_with_empty_config(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzers = AnalyzerFactory.create_multi(config_file_analyzer_test_01) self.assertTrue(len(analyzers) == 0) def test_create_multi_with_single(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzers = AnalyzerFactory.create_multi( use_case_analyzer_arbitrary_dummy_test) self.assertTrue(len(analyzers) == 1) def test_create_multi_with_malformed_duplicate_option(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzers = AnalyzerFactory.create_multi( use_case_analyzer_multi_malformed_duplicate_option, {'strict': False}) self.assertTrue(len(analyzers) == 3) def test_create_multi_with_malformed_duplicate_section(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzers = AnalyzerFactory.create_multi( use_case_analyzer_multi_malformed_duplicate_section, {'strict': False}) self.assertTrue(len(analyzers) == 2) def test_create_multi_with_malformed_duplicate_option_strict(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) with self.assertRaises(configparser.DuplicateOptionError): AnalyzerFactory.create_multi( use_case_analyzer_multi_malformed_duplicate_option) def test_create_multi_with_malformed_duplicate_section_strict(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) with self.assertRaises(configparser.DuplicateSectionError): AnalyzerFactory.create_multi( use_case_analyzer_multi_malformed_duplicate_section) def test_create_multi_mixed_types(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzers = AnalyzerFactory.create_multi( use_case_analyzer_files_path + "analyzer_multi_mixed_types.conf") simplequery_analyzer = analyzers[0] metrics_analyzer = analyzers[1] terms_analyzer = analyzers[2] self.assertTrue(simplequery_analyzer.model_type == 'simplequery') self.assertTrue(metrics_analyzer.model_type == 'metrics') self.assertTrue(terms_analyzer.model_type == 'terms') def test_default_timestamp_field(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test) timestamp_field = analyzer.model_settings["timestamp_field"] default_timestamp_field = "@timestamp" self.assertEquals(timestamp_field, default_timestamp_field) def test_non_default_timestamp_field(self): self.test_settings.change_configuration_path( config_file_analyzer_test_with_custom_timestamp_field) analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test) timestamp_field = analyzer.model_settings["timestamp_field"] non_default_timestamp_field = "timestamp" self.assertEquals(timestamp_field, non_default_timestamp_field)