def test_remove_all_whitelisted_outliers(self): self.test_settings.change_configuration_path( test_file_whitelist_path_config) doc_generate = DummyDocumentsGenerate() self.test_es.add_doc( doc_generate.generate_document({ "create_outlier": True, "outlier_observation": "dummy observation", "outlier.model_name": "dummy_test", "outlier.model_type": "analyzer", "command_query": "osquery_get_all_processes_with_listening_conns.log" })) # Check that outlier correctly generated result = [doc for doc in es._scan()][0] self.assertTrue("outliers" in result["_source"]) analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf" ) # Remove whitelisted outlier es.remove_all_whitelisted_outliers({"analyzer_dummy_test": analyzer}) # Check that outlier is correctly remove result = [doc for doc in es._scan()][0] self.assertFalse("outliers" in result["_source"])
def test_sudden_extra_outlier_infos_all_present(self): # Generate documents dummy_doc_generate = DummyDocumentsGenerate() list_delta_hour = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 15] field_1_name = "user_id" list_field_1_value = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] field_2_name = "hostname" list_field_2_value = [] for _ in range(len(list_delta_hour)): list_field_2_value.append("host1") generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields( list_delta_hour, field_1_name, list_field_1_value, field_2_name, list_field_2_value) self.test_es.add_multiple_docs(generated_docs) self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_dummy_test_03.conf") set_new_current_date(analyzer) analyzer.evaluate_model() list_outlier = list() for elem in es._scan(): if "outliers" in elem["_source"]: list_outlier.append(elem) all_fields_exists = [ elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS for elem in list_outlier[0]['_source']['outliers'] ] self.assertTrue(all(all_fields_exists))
def test_sudden_appearance_detect_no_outlier_es_check(self): # Generate documents dummy_doc_generate = DummyDocumentsGenerate() list_delta_hour = [1, 1, 1, 3, 3, 3, 4, 5, 5, 5, 15, 15] field_1_name = "user_id" list_field_1_value = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] field_2_name = "hostname" list_field_2_value = [] for _ in range(len(list_delta_hour)): list_field_2_value.append("host1") generated_docs = dummy_doc_generate.generate_doc_time_variable_witt_custom_fields( list_delta_hour, field_1_name, list_field_1_value, field_2_name, list_field_2_value) self.test_es.add_multiple_docs(generated_docs) # Run analyzer self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_dummy_test_02.conf") set_new_current_date(analyzer) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, 0)
def test_non_outliers_present_in_metrics(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate documents # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) # Non outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 8})) # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 12})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][2] self.assertEqual( result["_source"]["outliers"]["non_outlier_values_sample"], ["8.0"])
def test_metrics_whitelist_work_test_es_result(self): dummy_doc_generate = DummyDocumentsGenerate() command_query = "SELECT * FROM dummy_table" # must be bigger than the trigger value (here 3) nbr_generated_documents = 5 # Generate document that match outlier for _ in range(nbr_generated_documents): self.test_es.add_doc( dummy_doc_generate.generate_document( {"command_query": command_query})) # Generate whitelist document self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "whitelist_hostname", "command_query": command_query })) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_length_dummy_test.conf" ) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, nbr_generated_documents)
def test_simplequery_not_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( config_file_simplequery_test_02) analyzer = AnalyzerFactory.create( use_case_simplequery_dummy_test_not_derived) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("timestamp_year" in result['_source'])
def test_simplequry_use_matched_values_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( config_file_simplequery_test_02) analyzer = AnalyzerFactory.create( use_case_simplequery_dummy_test_highlight_match_activated) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("matched_values" in result['_source']['outliers'])
def test_sudden_appearance_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path(test_conf_file_01) analyzer = AnalyzerFactory.create( root_test_use_case_files + "sudden_appearance_derived_fields_01.conf") set_new_current_date(analyzer) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("timestamp_year" in result['_source'])
def test_terms_not_use_derived_fields_in_doc(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("timestamp_year" in result['_source'])
def test_non_outlier_values_not_present_in_terms_within(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate documents # Outlier document # index: 0 # Non outlier self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "one" })) # index: 1 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 2 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 3 # Outlier document self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "two" })) # index: 4 self.test_es.add_doc( dummy_doc_generate.generate_document({ "hostname": "one", "deployment_name": "three" })) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][4] self.assertEqual( result["_source"]["outliers"]["non_outlier_values_sample"], ["two"])
def test_simplequery_not_use_derived_fields_but_present_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc(dummy_doc_generate.generate_document()) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/simplequery_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue( "derived_timestamp_year" in result['_source']['outliers'])
def test_batch_whitelist_work_doent_match_outlier_in_across(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_batch_whitelist_across_float.conf" ) doc_to_generate = [ # agg1 (0, 1, 2) -> 3 but with whitelist: (0, 2) -> 2 # agg2 (0, 3, 4) -> 3 ("agg1", 0, False), ("agg1", 1, True), ("agg2", 0, False), ("agg2", 0, False), ("agg1", 2, False), ("agg2", 3, False), ("agg2", 4, False) ] dummy_doc_gen = DummyDocumentsGenerate() for aggregator, target_value, is_whitelist in doc_to_generate: deployment_name = None if is_whitelist: deployment_name = "whitelist-deployment" user_id = target_value hostname = aggregator doc_generated = dummy_doc_gen.generate_document({ "deployment_name": deployment_name, "user_id": user_id, "hostname": hostname }) self.test_es.add_doc(doc_generated) analyzer.evaluate_model() list_outliers = [] for doc in es._scan(): if "outliers" in doc["_source"]: list_outliers.append( (doc["_source"]["outliers"]["aggregator"][0], doc["_source"]["outliers"]["term"][0])) # We detect agg2 but not agg1 self.assertEqual(list_outliers, [("agg2", "0"), ("agg2", "0"), ("agg2", "3"), ("agg2", "4")])
def test_arbitrary_key_config_present_in_outlier(self): self.test_settings.change_configuration_path( config_file_simplequery_test_01) analyzer = AnalyzerFactory.create( use_case_simplequery_arbitrary_dummy_test) dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertEquals(result["_source"]["outliers"]["test_arbitrary_key"], ["arbitrary_value"])
def test_one_doc_outlier_correctly_add(self): doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file_01) # Insert value self.test_es.add_doc(doc_without_outlier) # Make test (supposed all doc work) self.test_settings.change_configuration_path( config_file_simplequery_test_01) analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test) analyzer.evaluate_model() # Fetch result to check if it is correct result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier)
def test_flush_bulk_actions_using_one_save_outlier(self): doc_with_outlier_with_derived_timestamp = copy.deepcopy( doc_with_outlier_with_derived_timestamp_test_file) doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_without_outlier) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc_without_outlier) test_outlier.outlier_dict["observation"] = "dummy observation" es.save_outlier(test_outlier) result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier_with_derived_timestamp)
def test_arbitrary_key_config_not_present_int_other_model(self): # Dictionary and list could be share between different instance. This test check that a residual value is not # present in the dictionary self.test_settings.change_configuration_path( config_file_simplequery_test_01) analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test) dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("test_arbitrary_key" in result["_source"]["outliers"])
def test_bulk_update_do_not_remove_values(self): dummy_doc_gen = DummyDocumentsGenerate() doc = dummy_doc_gen.generate_document({"create_outlier": True}) self.test_es.add_doc(doc) test_doc = copy.deepcopy(doc) # Remove outlier test_doc["_source"].pop("outliers") # Update the document (without outliers) es.add_update_bulk_action(test_doc) # Result in ES is the same that the original document (outliers wasn't removed) result = [elem for elem in es._scan()][0] self.assertEqual(doc, result)
def test_whitelist_regex_per_model_match_whitelist(self): doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( doc_generate.generate_document({"hostname": "AAA-WHITELISTED"})) # Run analyzer self.test_settings.change_configuration_path( config_file_simplequery_test_01) analyzer = AnalyzerFactory.create( use_case_whitelist_tests_model_whitelist_02) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertFalse("outliers" in result["_source"])
def test_simple_process_outlier_save_es(self): self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf") analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf") doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_without_outlier) doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) doc_fields = doc_without_outlier["_source"] outlier = analyzer.create_outlier(doc_fields, doc_without_outlier) es.save_outlier(outlier) result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier)
def test_terms_not_use_derived_fields_but_present_in_outlier(self): dummy_doc_generate = DummyDocumentsGenerate() self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] # The parameter use_derived_fields haven't any impact on outliers keys self.assertTrue( "derived_timestamp_year" in result['_source']['outliers'])
def test_simplequery_no_extra_outlier_infos(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) # Run analyzer self.test_settings.change_configuration_path( config_file_simplequery_test_01) analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in DEFAULT_OUTLIERS_KEY_FIELDS for elem in result['_source']['outliers'] ] self.assertTrue(all(all_fields_exists))
def test_whitelist_regex_per_model_not_match_whitelist(self): doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( doc_generate.generate_document( {"hostname": "Not-work-WHITELISTED"})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/simplequery_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/simplequery/whitelist_tests_model_whitelist_02.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertTrue("outliers" in result["_source"])
def test_terms_evaluate_coeff_of_variation_like_expected_document(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_no_bucket.conf" ) doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) expected_doc = copy.deepcopy( doc_with_terms_outlier_coeff_of_variation_no_score_sort) # Add doc to the database self.test_es.add_doc(doc_without_outlier) # Make test (suppose that all doc match with the query) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] self.assertEqual(result, expected_doc)
def test_metrics_batch_whitelist_three_outliers_one_whitelist(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_batch_whitelist_float.conf" ) backup_min_eval_batch = MetricsAnalyzer.MIN_EVALUATE_BATCH MetricsAnalyzer.MIN_EVALUATE_BATCH = 5 # aggregator, target, is_whitelist doc_to_generate = [ ("agg1", 5, False), ("agg1", 3, True), ("agg2", 4, False), ("agg2", 5, True), # Batch limit ("agg2", 3, False), ("agg1", 5, False), ("agg1", 7, False), # Outlier ("agg2", 2, False), # Batch limit ("agg1", 4, True), ("agg2", 6, True), # Outlier (but whitelist) ("agg1", 3, False), ("agg1", 5, False), # Batch limit ("agg2", 1, False), ("agg2", 6, False), # Outlier ("agg1", 3, False) ] self._generate_metrics_doc_with_whitelist(doc_to_generate) analyzer.evaluate_model() list_outliers = [] for doc in es._scan(): if "outliers" in doc["_source"]: list_outliers.append( (doc["_source"]["outliers"]["aggregator"][0], doc["_source"]["outliers"]["target"][0])) self.assertEqual(list_outliers, [("agg1", "7"), ("agg2", "6")]) MetricsAnalyzer.MIN_EVALUATE_BATCH = backup_min_eval_batch
def test_terms_extra_outlier_infos_new_result(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc(dummy_doc_generate.generate_document()) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/terms_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/terms/terms_dummy_test_float_low.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in EXTRA_OUTLIERS_KEY_FIELDS + DEFAULT_OUTLIERS_KEY_FIELDS for elem in result['_source']['outliers'] ] self.assertTrue(all(all_fields_exists))
def test_one_doc_outlier_with_highlight_04(self): """ Test if a doc is correctly generated with the highlight fields. The use case is with the field highlight_match=0 and the configuration file has highlight_match=1. """ doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file_02) # Insert value self.test_es.add_doc(doc_without_outlier) # Make test (supposed all doc work) self.test_settings.change_configuration_path( config_file_simplequery_test_highlight_match_activated) analyzer = AnalyzerFactory.create( use_case_simplequery_dummy_test_highlight_match_unactivated) analyzer.evaluate_model() # Fetch result to check if it is correct result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier)
def test_simplequery_whitelist_work_test_es_result(self): dummy_doc_generate = DummyDocumentsGenerate() nbr_generated_documents = 5 all_doc = dummy_doc_generate.create_documents(nbr_generated_documents) whitelisted_document = dummy_doc_generate.generate_document( {"hostname": "whitelist_hostname"}) all_doc.append(whitelisted_document) self.test_es.add_multiple_docs(all_doc) # Run analyzer self.test_settings.change_configuration_path( config_file_simplequery_test_whitelist) analyzer = AnalyzerFactory.create(use_case_simplequery_dummy_test) analyzer.evaluate_model() nbr_outliers = 0 for elem in es._scan(): if "outliers" in elem["_source"]: nbr_outliers += 1 self.assertEqual(nbr_outliers, nbr_generated_documents)
def test_metrics_extra_outlier_infos_all_present(self): dummy_doc_generate = DummyDocumentsGenerate() # Generate document self.test_es.add_doc( dummy_doc_generate.generate_document({"user_id": 11})) # Run analyzer self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_not_derived.conf" ) analyzer.evaluate_model() result = [elem for elem in es._scan()][0] all_fields_exists = [ elem in result['_source']['outliers'] for elem in EXTRA_OUTLIERS_KEY_FIELDS ] self.assertTrue(all(all_fields_exists))
def test_metrics_batch_whitelist_outlier_detect_after_process_all_and_remove_whitelist( self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_whitelist_batch.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_batch_whitelist_avg.conf" ) backup_min_eval_batch = MetricsAnalyzer.MIN_EVALUATE_BATCH MetricsAnalyzer.MIN_EVALUATE_BATCH = 5 # aggregator, target, is_whitelist doc_to_generate = [ ("agg1", 5, False), ("agg2", 5, False), ("agg1", 5, False), ("agg1", 3, False), # Batch limit ("agg1", 6, False), ("agg2", 5, False), ("agg1", 5, False), ("agg1", 7, True) ] self._generate_metrics_doc_with_whitelist(doc_to_generate) # The avg for agg1 is 5.1 but if we remove the whitelisted element, the avg is on 4.8 analyzer.evaluate_model() list_outliers = [] for doc in es._scan(): if "outliers" in doc["_source"]: list_outliers.append( (doc["_source"]["outliers"]["aggregator"][0], doc["_source"]["outliers"]["target"][0])) # Without the batch whitelist, the only outlier will be ("agg1", 6) (the ("agg1", 7) is whitelist). # But with batch whitelist, the avg is update and all value of "agg1" (except 3) are detected outlier self.assertEqual(list_outliers, [("agg1", "5"), ("agg1", "5"), ("agg1", "6"), ("agg1", "5")]) MetricsAnalyzer.MIN_EVALUATE_BATCH = backup_min_eval_batch
def test_evaluate_batch_for_outliers_add_outlier(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_2.conf" ) eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \ self._preperate_data_terms_with_doc(metrics_value=12) doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_without_outlier) metrics = MetricsAnalyzer.add_metric_to_batch( eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc_without_outlier) outliers, remaining_metrics = analyzer._evaluate_batch_for_outliers( metrics, True) analyzer.process_outlier(outliers[0]) result = [elem for elem in es._scan()][0] doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) self.maxDiff = None self.assertEqual(result, doc_with_outlier)