Exemple #1
0
    def remove_all_whitelisted_outliers(self):
        from helpers.outlier import Outlier  # import goes here to avoid issues with singletons & circular requirements ... //TODO: fix this

        must_clause = {"must": [{"match": {"tags": "outlier"}}]}
        total_docs_whitelisted = 0

        for doc in self.scan(bool_clause=must_clause):
            total_outliers = int(doc["_source"]["outliers"]["total_outliers"])

            # Generate all outlier objects for this document
            total_whitelisted = 0

            for i in range(total_outliers):
                outlier_type = doc["_source"]["outliers"]["type"][i]
                outlier_reason = doc["_source"]["outliers"]["reason"][i]
                outlier_summary = doc["_source"]["outliers"]["summary"][i]

                outlier = Outlier(type=outlier_type, reason=outlier_reason, summary=outlier_summary)
                if outlier.is_whitelisted(additional_dict_values_to_check=doc):
                    total_whitelisted += 1

            # if all outliers for this document are whitelisted, removed them all. If not, don't touch the document.
            # this is a limitation in the way our outliers are stored: if not ALL of them are whitelisted, we can't remove just the whitelisted ones
            # from the Elasticsearch event, as they are stored as array elements and potentially contain observations that should be removed, too.
            # In this case, just don't touch the document.
            if total_whitelisted == total_outliers:
                total_docs_whitelisted += 1
                doc = remove_outliers_from_document(doc)

                self.conn.delete(index=doc["_index"], doc_type=doc["_type"], id=doc["_id"], refresh=True)
                self.conn.create(index=doc["_index"], doc_type=doc["_type"], id=doc["_id"], body=doc["_source"], refresh=True)

        return total_docs_whitelisted
    def test_test_osquery_ticket_1933_single_regexp_should_not_match(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_09_ticket_1933.conf")
        self.assertFalse(test_outlier.is_whitelisted())
 def test_single_regex_not_to_match_in_doc_with_outlier(self):
     self.test_settings.change_configuration_path(
         "/app/tests/unit_tests/files/whitelist_tests_07_with_general.conf")
     orig_doc = copy.deepcopy(doc_with_outlier_test_file)
     test_outlier = Outlier(outlier_type="dummy type",
                            outlier_reason="dummy reason",
                            outlier_summary="dummy summary",
                            doc=orig_doc)
     result = test_outlier.is_whitelisted()
     self.assertFalse(result)
    def test_whitelist_config_wipe_all_bug(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_10_issue_462.conf")
        self.assertFalse(test_outlier.is_whitelisted())
    def test_whitelist_config_file_multi_item_match(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_01_with_general.conf")
        self.assertTrue(test_outlier.is_whitelisted())
    def test_whitelist_config_file_multi_item_mismatch_with_three_fields_and_whitespace(
            self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary",
                               doc=orig_doc)

        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/whitelist_tests_05.conf")
        self.assertFalse(test_outlier.is_whitelisted())
Exemple #7
0
    def test_single_literal_not_to_match_in_doc_with_outlier(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary")

        settings.process_configuration_files(
            "/app/tests/unit_tests/files/whitelist_tests_03.conf")
        self.assertFalse(
            test_outlier.is_whitelisted(
                additional_dict_values_to_check=orig_doc))
Exemple #8
0
    def test_whitelist_config_file_multi_item_mismatch_with_three_fields_and_whitespace(
            self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary")

        settings.process_configuration_files(
            "/app/tests/unit_tests/files/whitelist_tests_05.conf")
        self.assertFalse(
            test_outlier.is_whitelisted(
                additional_dict_values_to_check=orig_doc))
Exemple #9
0
    def remove_all_whitelisted_outliers(self):
        from helpers.outlier import Outlier  # import goes here to avoid issues with singletons & circular requirements ... //TODO: fix this

        outliers_filter_query = {"filter": [{"term": {"tags": "outlier"}}]}
        total_docs_whitelisted = 0

        idx = self.settings.config.get("general", "es_index_pattern")
        total_nr_outliers = self.count_documents(
            index=idx, bool_clause=outliers_filter_query)
        self.logging.logger.info(
            "going to analyze %s outliers and remove all whitelisted items",
            "{:,}".format(total_nr_outliers))

        for doc in self.scan(index=idx, bool_clause=outliers_filter_query):
            total_outliers = int(doc["_source"]["outliers"]["total_outliers"])
            # Generate all outlier objects for this document
            total_whitelisted = 0

            for i in range(total_outliers):
                outlier_type = doc["_source"]["outliers"]["type"][i]
                outlier_reason = doc["_source"]["outliers"]["reason"][i]
                outlier_summary = doc["_source"]["outliers"]["summary"][i]

                outlier = Outlier(outlier_type=outlier_type,
                                  outlier_reason=outlier_reason,
                                  outlier_summary=outlier_summary)
                if outlier.is_whitelisted(additional_dict_values_to_check=doc):
                    total_whitelisted += 1

            # if all outliers for this document are whitelisted, removed them all. If not, don't touch the document.
            # this is a limitation in the way our outliers are stored: if not ALL of them are whitelisted, we can't remove just the whitelisted ones
            # from the Elasticsearch event, as they are stored as array elements and potentially contain observations that should be removed, too.
            # In this case, just don't touch the document.
            if total_whitelisted == total_outliers:
                total_docs_whitelisted += 1
                doc = remove_outliers_from_document(doc)

                self.conn.delete(index=doc["_index"],
                                 doc_type=doc["_type"],
                                 id=doc["_id"],
                                 refresh=True)
                self.conn.create(index=doc["_index"],
                                 doc_type=doc["_type"],
                                 id=doc["_id"],
                                 body=doc["_source"],
                                 refresh=True)

        return total_docs_whitelisted
Exemple #10
0
    def remove_all_whitelisted_outliers(self, dict_with_analyzer):
        """
        Remove all whitelisted outliers present in Elasticsearch.
        This method is normally only call by housekeeping

        :return: the number of outliers removed
        """
        outliers_filter_query = {"filter": [{"term": {"tags": "outlier"}}]}

        total_outliers_whitelisted = 0
        total_outliers_processed = 0

        idx = self.settings.config.get("general", "es_index_pattern")
        total_nr_outliers, documents = self.count_and_scan_documents(
            index=idx, bool_clause=outliers_filter_query)

        if total_nr_outliers > 0:
            self.logging.logger.info(
                "going to analyze %s outliers and remove all whitelisted items",
                "{:,}".format(total_nr_outliers))
            start_time = dt.datetime.today().timestamp()

            for doc in documents:
                total_outliers_processed = total_outliers_processed + 1
                total_outliers_in_doc = int(
                    doc["_source"]["outliers"]["total_outliers"])
                # generate all outlier objects for this document
                total_whitelisted = 0

                for i in range(total_outliers_in_doc):
                    outlier_type = doc["_source"]["outliers"]["type"][i]
                    outlier_reason = doc["_source"]["outliers"]["reason"][i]
                    outlier_summary = doc["_source"]["outliers"]["summary"][i]

                    # Extract information and get analyzer linked to this outlier
                    model_name = doc["_source"]["outliers"]["model_name"][i]
                    model_type = doc["_source"]["outliers"]["model_type"][i]
                    config_section_name = model_type + "_" + model_name
                    if config_section_name not in dict_with_analyzer:
                        self.logging.logger.debug(
                            "Outlier '" + config_section_name + "' " +
                            " was not found in configuration, could not check whitelist"
                        )
                        break  # If one outlier is not whitelisted, we keep all other outliers
                    analyzer = dict_with_analyzer[config_section_name]

                    outlier = Outlier(outlier_type=outlier_type,
                                      outlier_reason=outlier_reason,
                                      outlier_summary=outlier_summary,
                                      doc=doc)
                    if outlier.is_whitelisted(
                            extra_literals_whitelist_value=analyzer.
                            model_whitelist_literals,
                            extra_regexps_whitelist_value=analyzer.
                            model_whitelist_regexps):
                        total_whitelisted += 1

                # if all outliers for this document are whitelisted, removed them all. If not, don't touch the document.
                # this is a limitation in the way our outliers are stored: if not ALL of them are whitelisted, we
                # can't remove just the whitelisted ones
                # from the Elasticsearch event, as they are stored as array elements and potentially contain
                # observations that should be removed, too.
                # In this case, just don't touch the document.
                if total_whitelisted == total_outliers_in_doc:
                    total_outliers_whitelisted += 1
                    doc = remove_outliers_from_document(doc)
                    self.add_remove_outlier_bulk_action(doc)

                # we don't use the ticker from the logger singleton, as this will be called from the housekeeping thread
                # if we share a same ticker between multiple threads, strange results would start to appear in
                # progress logging
                # so, we duplicate part of the functionality from the logger singleton
                if self.logging.verbosity >= 5:
                    should_log = True
                else:
                    should_log = total_outliers_processed % max(1,
                                                                int(math.pow(10, (6 - self.logging.verbosity)))) == 0 \
                                 or total_outliers_processed == total_nr_outliers

                if should_log:
                    # avoid a division by zero
                    time_diff = max(
                        float(1),
                        float(dt.datetime.today().timestamp() - start_time))
                    ticks_per_second = "{:,}".format(
                        round(float(total_outliers_processed) / time_diff))

                    self.logging.logger.info(
                        "whitelisting historical outliers " + " [" +
                        ticks_per_second + " eps." + " - " + '{:.2f}'.format(
                            round(
                                float(total_outliers_processed) /
                                float(total_nr_outliers) * 100, 2)) +
                        "% done" + " - " +
                        "{:,}".format(total_outliers_whitelisted) +
                        " outliers whitelisted]")

            self.flush_bulk_actions()

        return total_outliers_whitelisted
Exemple #11
0
    def remove_all_whitelisted_outliers(self):
        outliers_filter_query = {"filter": [{"term": {"tags": "outlier"}}]}

        total_outliers_whitelisted = 0
        total_outliers_processed = 0

        idx = self.settings.config.get("general", "es_index_pattern")
        total_nr_outliers = self.count_documents(index=idx, bool_clause=outliers_filter_query)
        self.logging.logger.info("going to analyze %s outliers and remove all whitelisted items", "{:,}"
                                 .format(total_nr_outliers))

        if total_nr_outliers > 0:
            start_time = dt.datetime.today().timestamp()

            for doc in self.scan(index=idx, bool_clause=outliers_filter_query):
                total_outliers_processed = total_outliers_processed + 1
                total_outliers_in_doc = int(doc["_source"]["outliers"]["total_outliers"])
                # generate all outlier objects for this document
                total_whitelisted = 0

                for i in range(total_outliers_in_doc):
                    outlier_type = doc["_source"]["outliers"]["type"][i]
                    outlier_reason = doc["_source"]["outliers"]["reason"][i]
                    outlier_summary = doc["_source"]["outliers"]["summary"][i]

                    outlier = Outlier(outlier_type=outlier_type, outlier_reason=outlier_reason,
                                      outlier_summary=outlier_summary, doc=doc)
                    if outlier.is_whitelisted():
                        total_whitelisted += 1

                # if all outliers for this document are whitelisted, removed them all. If not, don't touch the document.
                # this is a limitation in the way our outliers are stored: if not ALL of them are whitelisted, we
                # can't remove just the whitelisted ones
                # from the Elasticsearch event, as they are stored as array elements and potentially contain
                # observations that should be removed, too.
                # In this case, just don't touch the document.
                if total_whitelisted == total_outliers_in_doc:
                    total_outliers_whitelisted += 1
                    doc = remove_outliers_from_document(doc)
                    self._update_es(doc)

                # we don't use the ticker from the logger singleton, as this will be called from the housekeeping thread
                # if we share a same ticker between multiple threads, strange results would start to appear in
                # progress logging
                # so, we duplicate part of the functionality from the logger singleton
                if self.logging.verbosity >= 5:
                    should_log = True
                else:
                    should_log = total_outliers_processed % max(1,
                                                                int(math.pow(10, (5 - self.logging.verbosity)))) == 0 \
                                 or total_outliers_processed == total_nr_outliers

                if should_log:
                    # avoid a division by zero
                    time_diff = max(float(1), float(dt.datetime.today().timestamp() - start_time))
                    ticks_per_second = "{:,}".format(round(float(total_outliers_processed) / time_diff))

                    self.logging.logger.info("whitelisting historical outliers " + " [" + ticks_per_second + " eps." +
                                             " - " + '{:.2f}'.format(round(float(total_outliers_processed) /
                                                                           float(total_nr_outliers) * 100, 2)) +
                                             "% done" + " - " + str(total_outliers_whitelisted) +
                                             " outliers whitelisted]")

        return total_outliers_whitelisted