def test_whitelist_literals_per_model_not_removed_by_housekeeping(self): # Init doc_generate = DummyDocumentsGenerate() self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/housekeeping.conf") housekeeping = HousekeepingJob() # Generate document document = doc_generate.generate_document({ "hostname": "NOT-WHITELISTED", "create_outlier": True, "outlier.model_name": "dummy_test", "outlier.model_type": "simplequery" }) self.assertTrue("outliers" in document["_source"]) analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test_with_whitelist.conf" ) housekeeping.update_analyzer_list([analyzer]) self.test_es.add_doc(document) housekeeping.execute_housekeeping() result = [elem for elem in self.test_es._scan()][0] self.assertTrue("outliers" in result["_source"])
def test_remove_all_whitelisted_outliers(self): self.test_settings.change_configuration_path( test_file_whitelist_path_config) doc_generate = DummyDocumentsGenerate() self.test_es.add_doc( doc_generate.generate_document({ "create_outlier": True, "outlier_observation": "dummy observation", "outlier.model_name": "dummy_test", "outlier.model_type": "analyzer", "command_query": "osquery_get_all_processes_with_listening_conns.log" })) # Check that outlier correctly generated result = [doc for doc in es._scan()][0] self.assertTrue("outliers" in result["_source"]) analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf" ) # Remove whitelisted outlier es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer}) # Check that outlier is correctly remove result = [doc for doc in es._scan()][0] self.assertFalse("outliers" in result["_source"])
def test_housekeeping_execute_no_whitelist_parameter_change(self): # Check that housekeeping run even when we change new part in the configuration self.test_settings.change_configuration_path( test_file_whitelist_dummy_reason_path_config) self._backup_config(test_file_whitelist_dummy_reason_path_config) housekeeping = HousekeepingJob() analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test.conf" ) housekeeping.update_analyzer_list([analyzer]) # Add document to "Database" doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) expected_doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) self.test_es.add_doc(doc_with_outlier) # Update configuration (create new section and append to default) filecontent = "\n\n[dummy_section]\nparam=1" # Force the date of the file housekeeping.file_mod_watcher._previous_mtimes[ test_file_whitelist_dummy_reason_path_config] = 0 with open(test_file_whitelist_dummy_reason_path_config, 'a') as test_file: test_file.write(filecontent) housekeeping.execute_housekeeping() # Fetch result result = [elem for elem in self.test_es._scan()][0] self._restore_config(test_file_whitelist_dummy_reason_path_config) self.assertNotEqual(result, expected_doc_with_outlier)
def test_non_default_timestamp_field(self): self.test_settings.change_configuration_path( config_file_analyzer_test_with_custom_timestamp_field) analyzer = AnalyzerFactory.create(use_case_analyzer_dummy_test) timestamp_field = analyzer.model_settings["timestamp_field"] non_default_timestamp_field = "timestamp" self.assertEquals(timestamp_field, non_default_timestamp_field)
def test_evaluate_batch_for_outliers_limit_target_buckets_two_doc_max_two( self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_2.conf" ) # Create one document with one aggregator aggregator_value = LIST_AGGREGATOR_VALUE[0] target_value = random.choice(LIST_TARGET_VALUE) doc = copy.deepcopy(random.choice(LIST_DOC)) current_batch = analyzer._add_document_to_batch( defaultdict(), [target_value], [aggregator_value], doc) # Create a second document with another aggregator aggregator_value2 = LIST_AGGREGATOR_VALUE[1] target_value2 = random.choice(LIST_TARGET_VALUE) doc2 = copy.deepcopy(random.choice(LIST_DOC)) current_batch = analyzer._add_document_to_batch( current_batch, [target_value2], [aggregator_value2], doc2) # Expect to get nothing due to "min_target_buckets" set to 2 result, remaining_terms = analyzer._evaluate_batch_for_outliers( batch=current_batch) self.assertEqual(result, [])
def test_non_outliers_present_in_metrics(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate documents # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) # Non outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 8})) # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 12})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][2] self.assertEqual( result["_source"]["outliers"]["non_outlier_values_sample"], ["8.0"])
def test_simplequery_raw_configparser_test_percent_signs_in_query(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_whitelist.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_raw_configparser_test_percent_signs.conf" ) analyzer.evaluate_model()
def test_metrics_whitelist_work_test_es_result(self): dummy_doc_generate = DummyDocumentsGenerate() command_query = "SELECT * FROM dummy_table" # must be bigger than the trigger value (here 3) nbr_generated_documents = 5 # Generate document that match outlier for _ in range(nbr_generated_documents): self.test_es.add_doc( dummy_doc_generate.generate_document( {"command_query": command_query})) # Generate whitelist document self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "whitelist_hostname", "command_query": command_query })) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_length_dummy_test.conf" ) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, nbr_generated_documents)
def test_sudden_appearance_detect_no_outlier_es_check(self): # Generate documents dummy_doc_generate = DummyDocumentsGenerate() list_delta_hour = [1, 1, 1, 3, 3, 3, 4, 5, 5, 5, 15, 15] field_1_name = "user_id" list_field_1_value = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] field_2_name = "hostname" list_field_2_value = [] for _ in range(len(list_delta_hour)): list_field_2_value.append("host1") generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields( list_delta_hour, field_1_name, list_field_1_value, field_2_name, list_field_2_value) self.test_es.add_multiple_docs(generated_docs) # Run analyzer self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_dummy_test_02.conf") set_new_current_date(analyzer) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, 0)
def test_sudden_extra_outlier_infos_all_present(self): # Generate documents dummy_doc_generate = DummyDocumentsGenerate() list_delta_hour = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 15] field_1_name = "user_id" list_field_1_value = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] field_2_name = "hostname" list_field_2_value = [] for _ in range(len(list_delta_hour)): list_field_2_value.append("host1") generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields( list_delta_hour, field_1_name, list_field_1_value, field_2_name, list_field_2_value) self.test_es.add_multiple_docs(generated_docs) self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_dummy_test_03.conf") set_new_current_date(analyzer) analyzer.evaluate_model() list_outlier = list() for elem in es._scan(): if "outliers" in elem["_source"]: list_outlier.append(elem) all_fields_exists = [ elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS for elem in list_outlier[0]['_source']['outliers'] ] self.assertTrue(all(all_fields_exists))
def test_terms_small_batch_treat_all(self): dummy_doc_generate = DummyDocumentsGenerate() # Init the list of user nbr_doc_per_hours = 5 nbr_hours = 10 nbr_doc_generated_per_hours = [ nbr_doc_per_hours for _ in range(nbr_hours) ] # If the number of document per hours is not a divisor of the batch limit, all document will not be detected # Generate documents self.test_es.add_multiple_docs( dummy_doc_generate.generate_doc_time_variable_sensitivity( nbr_doc_generated_per_hours)) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_small_batch_eval.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float.conf" ) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, nbr_doc_per_hours * nbr_hours)
def test_get_highlight_settings_with_terms_analyzer(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test.conf" ) highlight_settings = es._get_highlight_settings( analyzer.model_settings) self.assertTrue(highlight_settings is None)
def test_get_highlight_settings_with_simplequery_analyzer_without_highlight_parameter( self): self.test_settings.change_configuration_path( config_file_simplequery_test_01) use_case_file = "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test.conf" analyzer = AnalyzerFactory.create(use_case_file) highlight_settings = es._get_highlight_settings( analyzer.model_settings) self.assertTrue(highlight_settings is None)
def test_min_target_buckets_dont_detect_outlier(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_within_float.conf" ) # Recap: # min_target_buckets=4 # trigger_sensitivity=5 # trigger_on=high # trigger_method=float # Dont encode with a matrix to keep order of document doc_to_generate = [ # New batch: # 0 1 # agg1 [6, 1] # agg2 [1, 2] ("agg1", 0), ("agg2", 0), ("agg1", 0), ("agg1", 0), ("agg1", 0), ("agg1", 0), ("agg1", 0), ("agg2", 1), ("agg2", 1), ("agg1", 1), # New Batch # 2 # agg1 [0] # agg2 [1] ("agg1", 2) ] # At the end: # 0 1 2 # agg1 [6, 1, 1] # agg2 [1, 2] # Normally agg1 - 0 must be flagged, but here they doesn't have enough buckets values dummy_doc_gen = DummyDocumentsGenerate() for aggregator, target_value in doc_to_generate: user_id = target_value hostname = aggregator doc_generated = dummy_doc_gen.generate_document({ "user_id": user_id, "hostname": hostname }) self.test_es.add_doc(doc_generated) analyzer.evaluate_model() self.assertEqual(analyzer.total_outliers, 0)
def test_arbitrary_key_config_present_in_analyzer(self): self.test_settings.change_configuration_path( config_file_analyzer_test_01) analyzer = AnalyzerFactory.create( use_case_analyzer_arbitrary_dummy_test) self.assertDictEqual( analyzer.extra_model_settings, { "test_arbitrary_key": "arbitrary_value", "elasticsearch_filter": "es_valid_query" })
def test_simplequry_use_matched_values_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( config_file_simplequery_test_02) analyzer = AnalyzerFactory.create( use_case_simplequery_dummy_test_highlight_match_activated) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("matched_values" in result['_source']['outliers'])
def test_simplequery_not_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( config_file_simplequery_test_02) analyzer = AnalyzerFactory.create( use_case_simplequery_dummy_test_not_derived) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("timestamp_year" in result['_source'])
def test_terms_not_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("timestamp_year" in result['_source'])
def test_sudden_appearance_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_derived_fields_01.conf") set_new_current_date(analyzer) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("timestamp_year" in result['_source'])
def load_analyzers(): analyzers = list() for use_case_arg in settings.args.use_cases: for use_case_file in glob.glob(use_case_arg): logging.logger.debug("Loading use case %s" % use_case_file) try: analyzers.append(AnalyzerFactory.create(use_case_file)) except ValueError as e: logging.logger.error("An error occured when loading %s: %s" % (use_case_file, str(e))) return analyzers
def test_non_outlier_values_not_present_in_terms_within(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate documents # Outlier document # index: 0 # Non outlier self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "one" })) # index: 1 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 2 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 3 # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 4 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "three" })) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][4] self.assertEqual( result["_source"]["outliers"]["non_outlier_values_sample"], ["two"])
def test_simplequery_not_use_derived_fields_but_present_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue( "derived_timestamp_year" in result['_source']['outliers'])
def test_whitelist_config_change_single_literal_not_to_match_in_doc_with_outlier( self): doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) self.test_es.add_doc(doc_with_outlier) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_03_with_general.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf" ) es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer}) result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier)
def test_batch_whitelist_work_doent_match_outlier_in_across(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_across_float.conf" ) doc_to_generate = [ # agg1 (0, 1, 2) -> 3 but with whitelist: (0, 2) -> 2 # agg2 (0, 3, 4) -> 3 ("agg1", 0, False), ("agg1", 1, True), ("agg2", 0, False), ("agg2", 0, False), ("agg1", 2, False), ("agg2", 3, False), ("agg2", 4, False) ] dummy_doc_gen = DummyDocumentsGenerate() for aggregator, target_value, is_whitelist in doc_to_generate: deployment_name = None if is_whitelist: deployment_name = "whitelist-deployment" user_id = target_value hostname = aggregator doc_generated = dummy_doc_gen.generate_document({ "deployment_name": deployment_name, "user_id": user_id, "hostname": hostname }) self.test_es.add_doc(doc_generated) analyzer.evaluate_model() list_outliers = [] for doc in es._scan(): if "outliers" in doc["_source"]: list_outliers.append( (doc["_source"]["outliers"]["aggregator"][0], doc["_source"]["outliers"]["term"][0])) # We detect agg2 but not agg1 self.assertEqual(list_outliers, [("agg2", "0"), ("agg2", "0"), ("agg2", "3"), ("agg2", "4")])
def load_analyzers(): analyzers = list() for use_case_arg in settings.args.use_cases: for use_case_file in glob.glob(use_case_arg, recursive=True): if not os.path.isdir(use_case_file): logging.logger.debug("Loading use case %s" % use_case_file) try: analyzers.append(AnalyzerFactory.create(use_case_file)) except (ValueError, MissingSectionHeaderError) as e: logging.logger.error( "An error occured when loading %s: %s" % (use_case_file, str(e))) return analyzers
def test_one_doc_outlier_correctly_add(self): doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file_01) # Insert value self.test_es.add_doc(doc_without_outlier) # Make test (supposed all doc work) self.test_settings.change_configuration_path( config_file_simplequery_test_01) analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test) analyzer.evaluate_model() # Fetch result to check if it is correct result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier)
def test_whitelist_config_change_remove_multi_item_literal(self): doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) # Without score because "remove whitelisted outlier" use "bulk" operation which doesn't take into account score doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_with_outlier) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_01_with_general.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf" ) es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer}) result = [elem for elem in es._scan()][0] self.assertDictEqual(result, doc_without_outlier)
def test_arbitrary_key_config_not_present_int_other_model(self): # Dictionary and list could be share between different instance. This test check that a residual value is not # present in the dictionary self.test_settings.change_configuration_path( config_file_simplequery_test_01) analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test) dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("test_arbitrary_key" in result["_source"]["outliers"])
def test_simple_process_outlier_return_good_outlier(self): self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf") analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf") doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) doc_fields = doc_without_outlier["_source"] outlier = analyzer.create_outlier(doc_fields, doc_without_outlier) expected_outlier = Outlier(outlier_type=["dummy type"], outlier_reason=['dummy reason'], outlier_summary='dummy summary', doc=doc_without_outlier) expected_outlier.outlier_dict['model_name'] = 'dummy_test' expected_outlier.outlier_dict['model_type'] = 'analyzer' expected_outlier.outlier_dict['elasticsearch_filter'] = 'es_valid_query' self.assertTrue(outlier.outlier_dict == expected_outlier.outlier_dict)
def test_arbitrary_key_config_present_in_outlier(self): self.test_settings.change_configuration_path( config_file_simplequery_test_01) analyzer = AnalyzerFactory.create( use_case_simplequery_arbitrary_dummy_test) dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertEquals(result["_source"]["outliers"]["test_arbitrary_key"], ["arbitrary_value"])