Exemple #1
0
    def test_single_literal_not_to_match_in_doc_with_outlier(self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary")

        settings.process_configuration_files(
            "/app/tests/unit_tests/files/whitelist_tests_03.conf")
        self.assertFalse(
            test_outlier.is_whitelisted(
                additional_dict_values_to_check=orig_doc))
Exemple #2
0
    def execute_housekeeping(self):
        """
        Execute the housekeeping
        """
        if self.file_mod_watcher.files_changed() or self.analyzers_updated:
            self.analyzers_updated = False
            # reload configuration file, in case new whitelisted items were added by the analyst, they
            # should be processed!
            settings.process_configuration_files()

            logging.logger.info("housekeeping - changes detected, process again housekeeping")
            self.remove_all_whitelisted_outliers()
Exemple #3
0
    def test_whitelist_config_file_multi_item_mismatch_with_three_fields_and_whitespace(
            self):
        orig_doc = copy.deepcopy(doc_with_outlier_test_file)
        test_outlier = Outlier(outlier_type="dummy type",
                               outlier_reason="dummy reason",
                               outlier_summary="dummy summary")

        settings.process_configuration_files(
            "/app/tests/unit_tests/files/whitelist_tests_05.conf")
        self.assertFalse(
            test_outlier.is_whitelisted(
                additional_dict_values_to_check=orig_doc))
Exemple #4
0
    def execute_housekeeping(self):
        if len(self.file_mod_watcher.files_changed()) > 0:
            # reload configuration file, in case new whitelisted items were added by the analyst, they
            # should be processed!
            settings.process_configuration_files()

            if self.last_config_parameters != self._get_config_whitelist_parameters(
            ):
                self.last_config_parameters = self._get_config_whitelist_parameters(
                )
                logging.logger.info(
                    "housekeeping - changes detected in the whitelist configuration"
                )
                self.remove_all_whitelisted_outliers()
Exemple #5
0
 def change_configuration_path(self, new_path):
     settings.args.config = [new_path]
     settings.process_configuration_files()
Exemple #6
0
 def restore_default_configuration_path(self):
     settings.args.config = self.backup_default_args_config
     settings.process_configuration_files()
Exemple #7
0
def run_daemon_mode():
    """
    Run outliers in daemon mode.
    In this mode, outliers will continue running based on the cron scheduled defined in the configuration file.
    """

    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file %s for changes",
                            config_file)

    # Monitor configuration files for potential changes
    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    while not es.init_connection():
        time.sleep(60)

    # Create housekeeping job, don't start it yet
    housekeeping_job = HousekeepingJob()

    first_run = True
    run_succeeded_without_errors = None

    # The daemon should run forever, until the user kills it
    while True:
        next_run = None
        should_schedule_next_run = False

        # This loop will run for as long we don't need to perform an analysis
        while (next_run is None or datetime.now() < next_run) and first_run is False and \
                run_succeeded_without_errors is True:

            # Check if we already know when to perform the analysis next; if not, we need to schedule it
            if next_run is None:
                should_schedule_next_run = True

            # Check for configuration file changes and load them in case it's needed
            if file_mod_watcher.files_changed():
                logging.logger.info("configuration file changed, reloading")
                settings.process_configuration_files()
                should_schedule_next_run = True

            # Schedule a next rune based on the cron schedule defined in the configuration file
            if should_schedule_next_run:
                next_run = croniter(settings.config.get("daemon", "schedule"),
                                    datetime.now()).get_next(datetime)
                logging.logger.info(
                    "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format(
                        next_run))
                should_schedule_next_run = False

            # Wait 5 seconds before checking the cron schedule again
            time.sleep(5)

        # Refresh settings in case the cron has changed for example
        settings.process_configuration_files()

        # On the first run, we might have to wipe all the existing outliers if this is set in the configuration file
        if first_run:
            first_run = False
            logging.logger.info(
                "first run, so we will start immediately - after this, we will respect the cron "
                "schedule defined in the configuration file")

            # Wipe all existing outliers if needed
            if settings.config.getboolean("general",
                                          "es_wipe_all_existing_outliers"):
                logging.logger.info(
                    "wiping all existing outliers on first run")
                es.remove_all_outliers()

        # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with
        # the connection in between runs
        while not es.init_connection():
            time.sleep(60)

        # Make sure housekeeping is up and running
        if not housekeeping_job.is_alive():
            housekeeping_job.start()

        # Perform analysis and print the analysis summary at the end
        logging.print_generic_intro("starting outlier detection")
        analyzed_models = perform_analysis(housekeeping_job)
        print_analysis_summary(analyzed_models)

        errored_models = [
            analyzer for analyzer in analyzed_models
            if analyzer.unknown_error_analysis
        ]

        # Check the result of the analysis. In case an error occured, we want to re-run right away (after a minute)
        if errored_models:
            run_succeeded_without_errors = False
            logging.logger.warning(
                "ran into errors while analyzing use cases - not going to wait for the cron "
                "schedule, we just start analyzing again after sleeping for a minute first"
            )
            time.sleep(60)
        else:
            run_succeeded_without_errors = True

        logging.print_generic_intro("finished performing outlier detection")
Exemple #8
0
def run_daemon_mode():
    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file " + config_file +
                            " for changes")

    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    es.init_connection()

    # Create housekeeping job, don't start it yet
    housekeeping_job = HousekeepingJob()

    num_runs = 0
    first_run = True
    run_succeeded_without_errors = None

    while True:
        num_runs += 1
        next_run = None
        should_schedule_next_run = False

        while (next_run is None or datetime.now() < next_run) and first_run is False and \
                run_succeeded_without_errors is True:
            if next_run is None:
                should_schedule_next_run = True

            # Check for configuration file changes and load them in case it's needed
            if file_mod_watcher.files_changed():
                logging.logger.info("configuration file changed, reloading")
                settings.process_configuration_files()
                should_schedule_next_run = True

            if should_schedule_next_run:
                next_run = croniter(settings.config.get("daemon", "schedule"),
                                    datetime.now()).get_next(datetime)
                logging.logger.info(
                    "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format(
                        next_run))
                should_schedule_next_run = False

            time.sleep(5)

        settings.process_configuration_files()  # Refresh settings

        if first_run:
            first_run = False
            logging.logger.info(
                "first run, so we will start immediately - after this, we will respect the cron "
                + "schedule defined in the configuration file")

            # Wipe all existing outliers if needed
            if settings.config.getboolean("general",
                                          "es_wipe_all_existing_outliers"):
                logging.logger.info(
                    "wiping all existing outliers on first run")
                es.remove_all_outliers()
        else:
            # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with
            # the connection in between runs
            es.init_connection()

        # Make sure housekeeping is up and running
        if not housekeeping_job.is_alive():
            housekeeping_job.start()

        # Perform analysis
        logging.print_generic_intro("starting outlier detection")
        analyzed_models = perform_analysis()
        print_analysis_summary(analyzed_models)

        errored_models = [
            analyzer for analyzer in analyzed_models
            if analyzer.unknown_error_analysis
        ]

        # Check the result of the analysis
        if errored_models:
            run_succeeded_without_errors = False
            logging.logger.warning(
                "ran into errors while analyzing use cases - not going to wait for the cron "
                +
                "schedule, we just start analyzing again after sleeping for a minute first"
            )
            time.sleep(60)
        else:
            run_succeeded_without_errors = True

        logging.print_generic_intro("finished performing outlier detection")
Exemple #9
0
 def tearDown(self):
     # restore the default configuration file so we don't influence other unit tests that use the settings singleton
     settings.process_configuration_files("/defaults/outliers.conf")
     settings.process_arguments()