Ejemplo n.º 1
0
    def __init__(self):
        threading.Thread.__init__(self)

        self.file_mod_watcher = FileModificationWatcher()
        self.file_mod_watcher.add_files(settings.args.config)

        self.last_config_parameters = self._get_config_whitelist_parameters()

        # The shutdown_flag is a threading.Event object that
        # indicates whether the thread should be terminated.
        self.shutdown_flag = threading.Event()
Ejemplo n.º 2
0
    def __init__(self):
        threading.Thread.__init__(self)

        self.file_mod_watcher = FileModificationWatcher()
        self.file_mod_watcher.add_files(settings.args.config)

        # The shutdown_flag is a threading.Event object that
        # indicates whether the thread should be terminated.
        self.shutdown_flag = threading.Event()
        self.analyzers_updated = False

        self.dict_analyzer = dict()
Ejemplo n.º 3
0
def run_daemon_mode():
    """
    Run outliers in daemon mode.
    In this mode, outliers will continue running based on the cron scheduled defined in the configuration file.
    """

    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file %s for changes",
                            config_file)

    # Monitor configuration files for potential changes
    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    while not es.init_connection():
        time.sleep(60)

    # Create housekeeping job, don't start it yet
    housekeeping_job = HousekeepingJob()

    first_run = True
    run_succeeded_without_errors = None

    # The daemon should run forever, until the user kills it
    while True:
        next_run = None
        should_schedule_next_run = False

        # This loop will run for as long we don't need to perform an analysis
        while (next_run is None or datetime.now() < next_run) and first_run is False and \
                run_succeeded_without_errors is True:

            # Check if we already know when to perform the analysis next; if not, we need to schedule it
            if next_run is None:
                should_schedule_next_run = True

            # Check for configuration file changes and load them in case it's needed
            if file_mod_watcher.files_changed():
                logging.logger.info("configuration file changed, reloading")
                settings.process_configuration_files()
                should_schedule_next_run = True

            # Schedule a next rune based on the cron schedule defined in the configuration file
            if should_schedule_next_run:
                next_run = croniter(settings.config.get("daemon", "schedule"),
                                    datetime.now()).get_next(datetime)
                logging.logger.info(
                    "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format(
                        next_run))
                should_schedule_next_run = False

            # Wait 5 seconds before checking the cron schedule again
            time.sleep(5)

        # Refresh settings in case the cron has changed for example
        settings.process_configuration_files()

        # On the first run, we might have to wipe all the existing outliers if this is set in the configuration file
        if first_run:
            first_run = False
            logging.logger.info(
                "first run, so we will start immediately - after this, we will respect the cron "
                "schedule defined in the configuration file")

            # Wipe all existing outliers if needed
            if settings.config.getboolean("general",
                                          "es_wipe_all_existing_outliers"):
                logging.logger.info(
                    "wiping all existing outliers on first run")
                es.remove_all_outliers()

        # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with
        # the connection in between runs
        while not es.init_connection():
            time.sleep(60)

        # Make sure housekeeping is up and running
        if not housekeeping_job.is_alive():
            housekeeping_job.start()

        # Perform analysis and print the analysis summary at the end
        logging.print_generic_intro("starting outlier detection")
        analyzed_models = perform_analysis(housekeeping_job)
        print_analysis_summary(analyzed_models)

        errored_models = [
            analyzer for analyzer in analyzed_models
            if analyzer.unknown_error_analysis
        ]

        # Check the result of the analysis. In case an error occured, we want to re-run right away (after a minute)
        if errored_models:
            run_succeeded_without_errors = False
            logging.logger.warning(
                "ran into errors while analyzing use cases - not going to wait for the cron "
                "schedule, we just start analyzing again after sleeping for a minute first"
            )
            time.sleep(60)
        else:
            run_succeeded_without_errors = True

        logging.print_generic_intro("finished performing outlier detection")
Ejemplo n.º 4
0
def run_daemon_mode():
    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file " + config_file +
                            " for changes")

    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    es.init_connection()

    # Create housekeeping job, don't start it yet
    housekeeping_job = HousekeepingJob()

    num_runs = 0
    first_run = True
    run_succeeded_without_errors = None

    while True:
        num_runs += 1
        next_run = None
        should_schedule_next_run = False

        while (next_run is None or datetime.now() < next_run) and first_run is False and \
                run_succeeded_without_errors is True:
            if next_run is None:
                should_schedule_next_run = True

            # Check for configuration file changes and load them in case it's needed
            if file_mod_watcher.files_changed():
                logging.logger.info("configuration file changed, reloading")
                settings.process_configuration_files()
                should_schedule_next_run = True

            if should_schedule_next_run:
                next_run = croniter(settings.config.get("daemon", "schedule"),
                                    datetime.now()).get_next(datetime)
                logging.logger.info(
                    "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format(
                        next_run))
                should_schedule_next_run = False

            time.sleep(5)

        settings.process_configuration_files()  # Refresh settings

        if first_run:
            first_run = False
            logging.logger.info(
                "first run, so we will start immediately - after this, we will respect the cron "
                + "schedule defined in the configuration file")

            # Wipe all existing outliers if needed
            if settings.config.getboolean("general",
                                          "es_wipe_all_existing_outliers"):
                logging.logger.info(
                    "wiping all existing outliers on first run")
                es.remove_all_outliers()
        else:
            # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with
            # the connection in between runs
            es.init_connection()

        # Make sure housekeeping is up and running
        if not housekeeping_job.is_alive():
            housekeeping_job.start()

        # Perform analysis
        logging.print_generic_intro("starting outlier detection")
        analyzed_models = perform_analysis()
        print_analysis_summary(analyzed_models)

        errored_models = [
            analyzer for analyzer in analyzed_models
            if analyzer.unknown_error_analysis
        ]

        # Check the result of the analysis
        if errored_models:
            run_succeeded_without_errors = False
            logging.logger.warning(
                "ran into errors while analyzing use cases - not going to wait for the cron "
                +
                "schedule, we just start analyzing again after sleeping for a minute first"
            )
            time.sleep(60)
        else:
            run_succeeded_without_errors = True

        logging.print_generic_intro("finished performing outlier detection")
Ejemplo n.º 5
0
class HousekeepingJob(threading.Thread):

    def __init__(self):
        threading.Thread.__init__(self)

        self.file_mod_watcher = FileModificationWatcher()
        self.file_mod_watcher.add_files(settings.args.config)

        # The shutdown_flag is a threading.Event object that
        # indicates whether the thread should be terminated.
        self.shutdown_flag = threading.Event()
        self.analyzers_updated = False

        self.dict_analyzer = dict()

    def run(self):
        """
        Task to launch the housekeeping
        """
        logging.logger.info('housekeeping thread #%s started' % self.ident)

        # Remove all existing whitelisted items if needed
        while not self.shutdown_flag.is_set():
            if not self.shutdown_flag.wait(5):  # Return True if flag was set by outliers
                self.execute_housekeeping()

        logging.logger.info('housekeeping thread #%s stopped' % self.ident)

    def execute_housekeeping(self):
        """
        Execute the housekeeping
        """
        if self.file_mod_watcher.files_changed() or self.analyzers_updated:
            self.analyzers_updated = False
            # reload configuration file, in case new whitelisted items were added by the analyst, they
            # should be processed!
            settings.process_configuration_files()

            logging.logger.info("housekeeping - changes detected, process again housekeeping")
            self.remove_all_whitelisted_outliers()

    def update_analyzer_list(self, list_analyzer):
        self.dict_analyzer = dict()  # Reset list
        for analyzer in list_analyzer:
            self.dict_analyzer[analyzer.model_type + "_" + analyzer.model_name] = analyzer
        logging.logger.info("housekeeping - list analyzer have been updated")
        self.analyzers_updated = True

    def stop_housekeeping(self):
        self.shutdown_flag.set()
        self.join()

    def remove_all_whitelisted_outliers(self):
        """
        Try to remove all whitelist outliers that are already in Elasticsearch
        """

        if settings.config.getboolean("general", "es_wipe_all_whitelisted_outliers"):
            try:
                logging.logger.info("housekeeping - going to remove all whitelisted outliers")
                total_docs_whitelisted = es.remove_all_whitelisted_outliers(self.dict_analyzer)

                if total_docs_whitelisted > 0:
                    logging.logger.info(
                        "housekeeping - total whitelisted documents cleared from outliers: " +
                        "{:,}".format(total_docs_whitelisted))
                else:
                    logging.logger.info("housekeeping - whitelist did not remove any outliers")

            except Exception:
                logging.logger.error("housekeeping - something went wrong removing whitelisted outliers", exc_info=True)

            logging.logger.info("housekeeping - finished round of cleaning whitelisted items")
Ejemplo n.º 6
0
class HousekeepingJob(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)

        self.file_mod_watcher = FileModificationWatcher()
        self.file_mod_watcher.add_files(settings.args.config)

        self.last_config_parameters = self._get_config_whitelist_parameters()

        # The shutdown_flag is a threading.Event object that
        # indicates whether the thread should be terminated.
        self.shutdown_flag = threading.Event()

    @staticmethod
    def _get_config_whitelist_parameters():
        return {
            'whitelist_literals':
            settings.config.items("whitelist_literals"),
            'whitelist_regexps':
            settings.config.items("whitelist_regexps"),
            'es_wipe_all_whitelisted_outliers':
            settings.config.getboolean("general",
                                       "es_wipe_all_whitelisted_outliers")
        }

    def run(self):
        logging.logger.info('housekeeping thread #%s started' % self.ident)
        self.remove_all_whitelisted_outliers()

        # Remove all existing whitelisted items if needed
        while not self.shutdown_flag.is_set():
            self.shutdown_flag.wait(5)
            self.execute_housekeeping()

        logging.logger.info('housekeeping thread #%s stopped' % self.ident)

    def execute_housekeeping(self):
        if len(self.file_mod_watcher.files_changed()) > 0:
            # reload configuration file, in case new whitelisted items were added by the analyst, they
            # should be processed!
            settings.process_configuration_files()

            if self.last_config_parameters != self._get_config_whitelist_parameters(
            ):
                self.last_config_parameters = self._get_config_whitelist_parameters(
                )
                logging.logger.info(
                    "housekeeping - changes detected in the whitelist configuration"
                )
                self.remove_all_whitelisted_outliers()

    @staticmethod
    def remove_all_whitelisted_outliers():
        if settings.config.getboolean("general",
                                      "es_wipe_all_whitelisted_outliers"):
            try:
                logging.logger.info(
                    "housekeeping - going to remove all whitelisted outliers")
                total_docs_whitelisted = es.remove_all_whitelisted_outliers()

                if total_docs_whitelisted > 0:
                    logging.logger.info(
                        "housekeeping - total whitelisted documents cleared from outliers: "
                        + "{:,}".format(total_docs_whitelisted))
                else:
                    logging.logger.info(
                        "housekeeping - whitelist did not remove any outliers")

            except Exception:
                logging.logger.error(
                    "housekeeping - something went removing whitelisted outliers",
                    exc_info=True)

            logging.logger.info(
                "housekeeping - finished round of cleaning whitelisted items")
Ejemplo n.º 7
0
        logging.logger.warning(
            "no use cases were analyzed. are you sure your configuration file contains use cases, which are enabled?"
        )

    return analyzed_models == len(analyzers_to_evaluate)


# Run modes
if settings.args.run_mode == "daemon":
    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file " + config_file +
                            " for changes")

    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    es.init_connection()

    # Start housekeeping activities
    housekeeping_job = HousekeepingJob()
    housekeeping_job.start()

    num_runs = 0
    first_run = True
    run_succeeded_without_errors = None

    while True:
        num_runs += 1