def process_outlier(self, fields, doc, extra_outlier_information=dict()): extra_outlier_information["model_name"] = self.model_name extra_outlier_information["model_type"] = self.model_type fields_and_extra_outlier_information = fields.copy() fields_and_extra_outlier_information.update(extra_outlier_information) outlier_summary = helpers.utils.replace_placeholder_fields_with_values( self.model_settings["outlier_summary"], fields_and_extra_outlier_information) outlier_type = helpers.utils.replace_placeholder_fields_with_values( self.model_settings["outlier_type"], fields_and_extra_outlier_information) outlier_reason = helpers.utils.replace_placeholder_fields_with_values( self.model_settings["outlier_reason"], fields_and_extra_outlier_information) outlier_assets = helpers.utils.extract_outlier_asset_information( fields, settings) outlier = Outlier(type=outlier_type, reason=outlier_reason, summary=outlier_summary) if len(outlier_assets) > 0: outlier.outlier_dict["assets"] = outlier_assets for k, v in extra_outlier_information.items(): outlier.outlier_dict[k] = v self.outliers.append(outlier) es.process_outliers(doc=doc, outliers=[outlier], should_notify=self.model_settings["should_notify"]) return outlier
def test_notification_on_outlier_already_detected_but_not_in_queue(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/notifications_test.conf") self.test_notifier = TestStubNotifier() doc_generate = DummyDocumentsGenerate() # Create outliers doc = doc_generate.generate_document() # Full the queue (3 elements) outlier1 = Outlier("dummy type", "dummy reason", "dummy summary1", doc) es.notifier.notify_on_outlier(outlier1) outlier2 = Outlier("dummy type2", "dummy reason2", "dummy summary2", doc) es.notifier.notify_on_outlier(outlier2) outlier3 = Outlier("dummy type3", "dummy reason3", "dummy summary3", doc) es.notifier.notify_on_outlier(outlier3) # Add a new one that will remove the first outlier4 = Outlier("dummy type4", "dummy reason4", "dummy summary4", doc) es.notifier.notify_on_outlier(outlier4) # Add again the first one es.notifier.notify_on_outlier(outlier1) # All outliers notify need to be present (so 5) self.assertEqual(len(self.test_notifier.get_list_email()), 5) self.test_notifier.restore_notifier()
def process_outlier(decision_frontier, non_outlier_values_sample, term_value_count, terms, aggregator_value, ii, term_value, model_settings): # Extract fields from raw document fields = es.extract_fields_from_document(terms[aggregator_value]["raw_docs"][ii]) observations = terms[aggregator_value]["observations"][ii] observations["non_outlier_values_sample"] = non_outlier_values_sample observations["aggregator"] = aggregator_value observations["term"] = term_value observations["term_count"] = term_value_count observations["decision_frontier"] = decision_frontier observations["trigger_method"] = str(model_settings["trigger_method"]) observations["confidence"] = np.abs(decision_frontier - term_value_count) merged_fields_and_observations = helpers.utils.merge_two_dicts(fields, observations) outlier_summary = helpers.utils.replace_placeholder_fields_with_values(model_settings["outlier_summary"], merged_fields_and_observations) outlier_assets = helpers.utils.extract_outlier_asset_information(fields, settings) if len(outlier_assets) > 0: observations["assets"] = outlier_assets outlier = Outlier(type=model_settings["outlier_type"], reason=model_settings["outlier_reason"], summary=outlier_summary) for k, v in observations.items(): outlier.add_observation(k, v) es.process_outliers(doc=terms[aggregator_value]["raw_docs"][ii], outliers=[outlier], should_notify=model_settings["should_notify"]) return outlier
def remove_all_whitelisted_outliers(self): from helpers.outlier import Outlier # import goes here to avoid issues with singletons & circular requirements ... //TODO: fix this must_clause = {"must": [{"match": {"tags": "outlier"}}]} total_docs_whitelisted = 0 for doc in self.scan(bool_clause=must_clause): total_outliers = int(doc["_source"]["outliers"]["total_outliers"]) # Generate all outlier objects for this document total_whitelisted = 0 for i in range(total_outliers): outlier_type = doc["_source"]["outliers"]["type"][i] outlier_reason = doc["_source"]["outliers"]["reason"][i] outlier_summary = doc["_source"]["outliers"]["summary"][i] outlier = Outlier(type=outlier_type, reason=outlier_reason, summary=outlier_summary) if outlier.is_whitelisted(additional_dict_values_to_check=doc): total_whitelisted += 1 # if all outliers for this document are whitelisted, removed them all. If not, don't touch the document. # this is a limitation in the way our outliers are stored: if not ALL of them are whitelisted, we can't remove just the whitelisted ones # from the Elasticsearch event, as they are stored as array elements and potentially contain observations that should be removed, too. # In this case, just don't touch the document. if total_whitelisted == total_outliers: total_docs_whitelisted += 1 doc = remove_outliers_from_document(doc) self.conn.delete(index=doc["_index"], doc_type=doc["_type"], id=doc["_id"], refresh=True) self.conn.create(index=doc["_index"], doc_type=doc["_type"], id=doc["_id"], body=doc["_source"], refresh=True) return total_docs_whitelisted
def create_outlier(self, fields, doc, extra_outlier_information=None): """ Create an outlier object with all the correct fields & parameters :param fields: extracted fields :param doc: document linked to this outlier :param extra_outlier_information: extra information that should be added to the outlier :return: the created outlier object """ if extra_outlier_information is None: extra_outlier_information = dict() outlier_type, outlier_reason, outlier_summary, outlier_assets = \ self._prepare_outlier_parameters(extra_outlier_information, fields) outlier = Outlier(outlier_type=outlier_type, outlier_reason=outlier_reason, outlier_summary=outlier_summary, doc=doc) if outlier_assets: outlier.outlier_dict["assets"] = outlier_assets # This loop add also model_name and model_type to Outlier for key, value in self.extra_model_settings.items(): outlier.outlier_dict[key] = value if extra_outlier_information: for outlier_key, outlier_value in extra_outlier_information.items( ): outlier.outlier_dict[outlier_key] = outlier_value return outlier
def test_remove_outlier_from_doc(self): test_outlier = Outlier(type="dummy type", reason="dummy reason", summary="dummy summary") test_outlier.outlier_dict["observation"] = "dummy observation" doc_with_outlier = helpers.es.add_outlier_to_document(doc_without_outlier_test_file, test_outlier) doc_without_outlier = helpers.es.remove_outliers_from_document(doc_with_outlier) self.assertDictEqual(doc_without_outlier, doc_without_outlier_test_file)
def test_whitelist_config_file_multi_item_match(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_01_with_general.conf") self.assertTrue(test_outlier.is_whitelisted())
def test_whitelist_config_wipe_all_bug(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_10_issue_462.conf") self.assertFalse(test_outlier.is_whitelisted())
def test_single_regex_not_to_match_in_doc_with_outlier(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_07_with_general.conf") orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) result = test_outlier.is_whitelisted() self.assertFalse(result)
def test_test_osquery_ticket_1933_single_regexp_should_not_match(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_09_ticket_1933.conf") self.assertFalse(test_outlier.is_whitelisted())
def test_add_outlier_to_doc(self): test_outlier = Outlier(type="dummy type", reason="dummy reason", summary="dummy summary") test_outlier.add_observation(field_name="observation", field_value="dummy observation") doc_with_outlier = helpers.es.add_outlier_to_document( doc_without_outlier_test_file, test_outlier) self.assertDictEqual(doc_with_outlier_test_file, doc_with_outlier)
def test_add_three_outliers_to_doc(self): doc = copy.deepcopy(doc_without_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc) test_outlier.outlier_dict["observation"] = "dummy observation" test_outlier_2 = Outlier(outlier_type="dummy type 2", outlier_reason="dummy reason 2", outlier_summary="dummy summary 2", doc=doc) test_outlier_2.outlier_dict["observation_2"] = "dummy observation 2" test_outlier_3 = Outlier(outlier_type="dummy type 3", outlier_reason="dummy reason 3", outlier_summary="dummy summary 3", doc=doc) test_outlier_3.outlier_dict["observation_3"] = "dummy observation 3" helpers.es.add_outlier_to_document(test_outlier) helpers.es.add_outlier_to_document(test_outlier_2) doc_with_three_outliers = helpers.es.add_outlier_to_document( test_outlier_3) self.assertDictEqual(doc_with_three_outliers, doc_with_three_outliers_test_file)
def test_add_outlier_to_doc(self): doc = copy.deepcopy(doc_without_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc) test_outlier.outlier_dict["observation"] = "dummy observation" doc_with_outlier = helpers.es.add_outlier_to_document( doc, test_outlier) self.assertDictEqual(doc_with_outlier_test_file, doc_with_outlier)
def test_whitelist_config_file_multi_item_mismatch_with_three_fields_and_whitespace( self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=orig_doc) self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/whitelist_tests_05.conf") self.assertFalse(test_outlier.is_whitelisted())
def test_whitelist_regexp_mismatch(self): whitelist_item = r"^.*.exeZZZZZ sync$" test_outlier = Outlier(type="dummy type", reason="dummy reason", summary="dummy summary") result = test_outlier.matches_specific_whitelist_item( whitelist_item, "regexp", additional_dict_values_to_check=doc_for_whitelist_testing_file) self.assertFalse(result)
def test_single_literal_not_to_match_in_doc_with_outlier(self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary") settings.process_configuration_files( "/app/tests/unit_tests/files/whitelist_tests_03.conf") self.assertFalse( test_outlier.is_whitelisted( additional_dict_values_to_check=orig_doc))
def test_whitelist_literal_mismatch(self): whitelist_item = r"C:\Windows\system32\msfeedssync.exe syncWRONG" test_outlier = Outlier(type="dummy type", reason="dummy reason", summary="dummy summary") result = test_outlier.matches_specific_whitelist_item( whitelist_item, "literal", additional_dict_values_to_check=doc_for_whitelist_testing_file) self.assertFalse(result)
def test_whitelist_config_file_multi_item_mismatch_with_three_fields_and_whitespace( self): orig_doc = copy.deepcopy(doc_with_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary") settings.process_configuration_files( "/app/tests/unit_tests/files/whitelist_tests_05.conf") self.assertFalse( test_outlier.is_whitelisted( additional_dict_values_to_check=orig_doc))
def test_simple_process_outlier_return_good_outlier(self): self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf") analyzer = TestStubAnalyzer("analyzer_dummy_test") doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) doc_fields = doc_without_outlier["_source"] outlier = analyzer.process_outlier(doc_fields, doc_without_outlier) expected_outlier = Outlier(outlier_type=["dummy type"], outlier_reason=['dummy reason'], outlier_summary='dummy summary', doc=doc_without_outlier) expected_outlier.outlier_dict['model_name'] = 'dummy_test' expected_outlier.outlier_dict['model_type'] = 'analyzer' self.assertEqual(outlier, expected_outlier)
def test_add_outlier_to_doc(self): doc = copy.deepcopy(doc_without_outlier_test_file) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc) # Model name, model type are added by analyzer test_outlier.outlier_dict["observation"] = "dummy observation" doc_with_outlier = helpers.es.add_outlier_to_document(test_outlier) self.assertDictEqual(doc_with_outlier_without_model_info_test_file, doc_with_outlier)
def test_whitelist_correctly_reload_after_update_config(self): self.test_settings.change_configuration_path(test_whitelist_single_literal_file) dummy_doc_gen = DummyDocumentsGenerate() doc = dummy_doc_gen.generate_document({"create_outlier": True, "outlier_observation": "dummy observation", "filename": "osquery_get_all_processes_with_listening_conns.log"}) # With this configuration, outlier is not whitlisted self.assertFalse(Outlier.is_whitelisted_doc(doc)) # Update configuration self.test_settings.change_configuration_path(test_whitelist_multiple_literal_file) # Now outlier is whitelisted self.assertTrue(Outlier.is_whitelisted_doc(doc))
def evaluate_batch_for_outliers(metrics=None, model_settings=None, last_batch=False): # Initialize outliers = list() remaining_metrics = metrics.copy() for i, aggregator_value in enumerate(metrics): # Check if we have sufficient data. if not, continue. Else, evaluate for outliers. if len(metrics[aggregator_value]["metrics"]) < 100 and last_batch is False: continue else: # Remove from remaining metrics, as we will be handling it in a second del remaining_metrics[aggregator_value] # Calculate the decision frontier decision_frontier = helpers.utils.get_decision_frontier(model_settings["trigger_method"], metrics[aggregator_value]["metrics"], model_settings["trigger_sensitivity"], model_settings["trigger_on"]) logging.logger.debug("using decision frontier " + str(decision_frontier) + " for aggregator " + str(aggregator_value) + " - " + model_settings["metric"]) logging.logger.debug("example metric from batch for " + metrics[aggregator_value]["observations"][0]["target"] + ": " + str(metrics[aggregator_value]["metrics"][0])) # Calculate all outliers in array for ii, metric_value in enumerate(metrics[aggregator_value]["metrics"]): is_outlier = helpers.utils.is_outlier(metric_value, decision_frontier, model_settings["trigger_on"]) if is_outlier: confidence = np.abs(decision_frontier - metric_value) # Extract fields from raw document fields = es.extract_fields_from_document(metrics[aggregator_value]["raw_docs"][ii]) observations = metrics[aggregator_value]["observations"][ii] merged_fields_and_observations = helpers.utils.merge_two_dicts(fields, observations) outlier_summary = helpers.utils.replace_placeholder_fields_with_values(model_settings["outlier_summary"], merged_fields_and_observations) outlier_assets = helpers.utils.extract_outlier_asset_information(fields, settings) if len(outlier_assets) > 0: observations["assets"] = outlier_assets outlier = Outlier(type=model_settings["outlier_type"], reason=model_settings["outlier_reason"], summary=outlier_summary) outlier.add_observation("metric", metric_value) outlier.add_observation("decision_frontier", decision_frontier) outlier.add_observation("confidence", confidence) for k, v in observations.items(): outlier.add_observation(k, v) outliers.append(outlier) es.process_outliers(doc=metrics[aggregator_value]["raw_docs"][ii], outliers=[outlier], should_notify=model_settings["should_notify"]) return outliers, remaining_metrics
def test_simple_process_outlier_return_good_outlier(self): self.test_settings.change_configuration_path("/app/tests/unit_tests/files/analyzer_test_01.conf") analyzer = AnalyzerFactory.create("/app/tests/unit_tests/files/use_cases/analyzer/analyzer_dummy_test.conf") doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) doc_fields = doc_without_outlier["_source"] outlier = analyzer.create_outlier(doc_fields, doc_without_outlier) expected_outlier = Outlier(outlier_type=["dummy type"], outlier_reason=['dummy reason'], outlier_summary='dummy summary', doc=doc_without_outlier) expected_outlier.outlier_dict['model_name'] = 'dummy_test' expected_outlier.outlier_dict['model_type'] = 'analyzer' expected_outlier.outlier_dict['elasticsearch_filter'] = 'es_valid_query' self.assertTrue(outlier.outlier_dict == expected_outlier.outlier_dict)
def test_flush_bulk_actions_using_one_save_outlier(self): doc_with_outlier_with_derived_timestamp = copy.deepcopy( doc_with_outlier_with_derived_timestamp_test_file) doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_without_outlier) test_outlier = Outlier(outlier_type="dummy type", outlier_reason="dummy reason", outlier_summary="dummy summary", doc=doc_without_outlier) test_outlier.outlier_dict["observation"] = "dummy observation" es.save_outlier(test_outlier) result = [elem for elem in es._scan()][0] self.assertEqual(result, doc_with_outlier_with_derived_timestamp)
def remove_all_whitelisted_outliers(self): from helpers.outlier import Outlier # import goes here to avoid issues with singletons & circular requirements ... //TODO: fix this outliers_filter_query = {"filter": [{"term": {"tags": "outlier"}}]} total_docs_whitelisted = 0 idx = self.settings.config.get("general", "es_index_pattern") total_nr_outliers = self.count_documents( index=idx, bool_clause=outliers_filter_query) self.logging.logger.info( "going to analyze %s outliers and remove all whitelisted items", "{:,}".format(total_nr_outliers)) for doc in self.scan(index=idx, bool_clause=outliers_filter_query): total_outliers = int(doc["_source"]["outliers"]["total_outliers"]) # Generate all outlier objects for this document total_whitelisted = 0 for i in range(total_outliers): outlier_type = doc["_source"]["outliers"]["type"][i] outlier_reason = doc["_source"]["outliers"]["reason"][i] outlier_summary = doc["_source"]["outliers"]["summary"][i] outlier = Outlier(outlier_type=outlier_type, outlier_reason=outlier_reason, outlier_summary=outlier_summary) if outlier.is_whitelisted(additional_dict_values_to_check=doc): total_whitelisted += 1 # if all outliers for this document are whitelisted, removed them all. If not, don't touch the document. # this is a limitation in the way our outliers are stored: if not ALL of them are whitelisted, we can't remove just the whitelisted ones # from the Elasticsearch event, as they are stored as array elements and potentially contain observations that should be removed, too. # In this case, just don't touch the document. if total_whitelisted == total_outliers: total_docs_whitelisted += 1 doc = remove_outliers_from_document(doc) self.conn.delete(index=doc["_index"], doc_type=doc["_type"], id=doc["_id"], refresh=True) self.conn.create(index=doc["_index"], doc_type=doc["_type"], id=doc["_id"], body=doc["_source"], refresh=True) return total_docs_whitelisted
def test_add_duplicate_outlier_to_doc(self): test_outlier = Outlier(type="dummy type", reason="dummy reason", summary="dummy summary") doc = copy.deepcopy(doc_without_outlier_test_file) doc_with_outlier = helpers.es.add_outlier_to_document(doc, test_outlier) doc_with_outlier = helpers.es.add_outlier_to_document(doc_with_outlier, test_outlier) self.assertDictEqual(doc, doc_with_outlier)
def evaluate_batch_for_outliers(w2v_model=None, eval_sentences=None, raw_docs=None, model_settings=None): # Initialize outliers = list() # all_words_probs: contains an array of arrays. the nested arrays contain the probabilities of a word on that index to have a certain probability, in the context of another word sentence_probs = w2v_model.evaluate_sentences(eval_sentences) for i, single_sentence_prob in enumerate(sentence_probs): # If the probability is nan, it means that the sentence could not be evaluated, and we can't reason about it. # This happens for example whenever the sentence is made up entirely of words that aren't known to the trained model. if single_sentence_prob is np.nan: continue unique_probs = list(set(sentence_probs)) # if is_outlier_cutoff_percentage(single_sentence_prob, cutoff=0.005): # if is_outlier_std(single_sentence_prob, unique_probs, model_settings): if is_outlier_mad(single_sentence_prob, unique_probs, model_settings): outlier_summary = model_settings["outlier_summary"] # Extract fields from raw document fields = es.extract_fields_from_document(raw_docs[i]) outlier_summary = replace_placeholder_string_with_fields( outlier_summary, fields) outlier = Outlier(type=model_settings["outlier_type"], reason=model_settings["outlier_reason"], summary=outlier_summary) outlier.add_observation("probability", str(single_sentence_prob)) outliers.append(outlier) es.process_outliers(doc=raw_docs[i], outliers=[outlier], should_notify=model_settings["should_notify"]) else: if w2v_model.use_test_data: logging.logger.info("Not an outlier: " + str(eval_sentences[i]) + " - " + str(single_sentence_prob)) return outliers
def process_outlier(self, fields, doc, extra_outlier_information=dict()): extra_outlier_information["model_name"] = self.model_name extra_outlier_information["model_type"] = self.model_type fields_and_extra_outlier_information = fields.copy() fields_and_extra_outlier_information.update(extra_outlier_information) outlier_summary = helpers.utils.replace_placeholder_fields_with_values( self.model_settings["outlier_summary"], fields_and_extra_outlier_information) # for both outlier types and reasons, we also allow the case where multiples values are provided at once. # example: type = malware, IDS outlier_type = helpers.utils.replace_placeholder_fields_with_values( self.model_settings["outlier_type"], fields_and_extra_outlier_information).split(",") outlier_reason = helpers.utils.replace_placeholder_fields_with_values( self.model_settings["outlier_reason"], fields_and_extra_outlier_information).split(",") # remove any leading or trailing whitespace from either. For example: "type = malware, IDS" should just return ["malware","IDS"] instead of ["malware", " IDS"] outlier_type = [item.strip() for item in outlier_type] outlier_reason = [item.strip() for item in outlier_reason] outlier_assets = helpers.utils.extract_outlier_asset_information( fields, settings) outlier = Outlier(outlier_type=outlier_type, outlier_reason=outlier_reason, outlier_summary=outlier_summary) if len(outlier_assets) > 0: outlier.outlier_dict["assets"] = outlier_assets for k, v in extra_outlier_information.items(): outlier.outlier_dict[k] = v self.outliers.append(outlier) es.process_outliers(doc=doc, outliers=[outlier], should_notify=self.model_settings["should_notify"]) return outlier
def test_notification_on_two_different_outliers(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/notifications_test.conf") self.test_notifier = TestStubNotifier() doc_generate = DummyDocumentsGenerate() # Create outliers doc1 = doc_generate.generate_document() outlier1 = Outlier("dummy type", "dummy reason", "dummy summary", doc1) doc2 = doc_generate.generate_document() outlier2 = Outlier("dummy type2", "dummy reason2", "dummy summary2", doc2) # execute notification es.notifier.notify_on_outlier(outlier1) es.notifier.notify_on_outlier(outlier2) self.assertEqual(len(self.test_notifier.get_list_email()), 2) self.test_notifier.restore_notifier()
def test_whitelist_literal_match(self): self.test_settings.change_configuration_path( test_file_outliers_path_config) # Contain: "C:\Windows\system32\msfeedssync.exe sync" dummy_doc_gen = DummyDocumentsGenerate() doc = dummy_doc_gen.generate_document( {"command_query": r'C:\Windows\system32\msfeedssync.exe sync'}) result = Outlier.is_whitelisted_doc(doc) self.assertTrue(result)