def __init__(self): threading.Thread.__init__(self) self.file_mod_watcher = FileModificationWatcher() self.file_mod_watcher.add_files(settings.args.config) self.last_config_parameters = self._get_config_whitelist_parameters() # The shutdown_flag is a threading.Event object that # indicates whether the thread should be terminated. self.shutdown_flag = threading.Event()
def __init__(self): threading.Thread.__init__(self) self.file_mod_watcher = FileModificationWatcher() self.file_mod_watcher.add_files(settings.args.config) # The shutdown_flag is a threading.Event object that # indicates whether the thread should be terminated. self.shutdown_flag = threading.Event() self.analyzers_updated = False self.dict_analyzer = dict()
def run_daemon_mode(): """ Run outliers in daemon mode. In this mode, outliers will continue running based on the cron scheduled defined in the configuration file. """ # In daemon mode, we also want to monitor the configuration file for changes. # In case of a change, we need to make sure that we are using this new configuration file for config_file in settings.args.config: logging.logger.info("monitoring configuration file %s for changes", config_file) # Monitor configuration files for potential changes file_mod_watcher = FileModificationWatcher() file_mod_watcher.add_files(settings.args.config) # Initialize Elasticsearch connection while not es.init_connection(): time.sleep(60) # Create housekeeping job, don't start it yet housekeeping_job = HousekeepingJob() first_run = True run_succeeded_without_errors = None # The daemon should run forever, until the user kills it while True: next_run = None should_schedule_next_run = False # This loop will run for as long we don't need to perform an analysis while (next_run is None or datetime.now() < next_run) and first_run is False and \ run_succeeded_without_errors is True: # Check if we already know when to perform the analysis next; if not, we need to schedule it if next_run is None: should_schedule_next_run = True # Check for configuration file changes and load them in case it's needed if file_mod_watcher.files_changed(): logging.logger.info("configuration file changed, reloading") settings.process_configuration_files() should_schedule_next_run = True # Schedule a next rune based on the cron schedule defined in the configuration file if should_schedule_next_run: next_run = croniter(settings.config.get("daemon", "schedule"), datetime.now()).get_next(datetime) logging.logger.info( "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format( next_run)) should_schedule_next_run = False # Wait 5 seconds before checking the cron schedule again time.sleep(5) # Refresh settings in case the cron has changed for example settings.process_configuration_files() # On the first run, we might have to wipe all the existing outliers if this is set in the configuration file if first_run: first_run = False logging.logger.info( "first run, so we will start immediately - after this, we will respect the cron " "schedule defined in the configuration file") # Wipe all existing outliers if needed if settings.config.getboolean("general", "es_wipe_all_existing_outliers"): logging.logger.info( "wiping all existing outliers on first run") es.remove_all_outliers() # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with # the connection in between runs while not es.init_connection(): time.sleep(60) # Make sure housekeeping is up and running if not housekeeping_job.is_alive(): housekeeping_job.start() # Perform analysis and print the analysis summary at the end logging.print_generic_intro("starting outlier detection") analyzed_models = perform_analysis(housekeeping_job) print_analysis_summary(analyzed_models) errored_models = [ analyzer for analyzer in analyzed_models if analyzer.unknown_error_analysis ] # Check the result of the analysis. In case an error occured, we want to re-run right away (after a minute) if errored_models: run_succeeded_without_errors = False logging.logger.warning( "ran into errors while analyzing use cases - not going to wait for the cron " "schedule, we just start analyzing again after sleeping for a minute first" ) time.sleep(60) else: run_succeeded_without_errors = True logging.print_generic_intro("finished performing outlier detection")
def run_daemon_mode(): # In daemon mode, we also want to monitor the configuration file for changes. # In case of a change, we need to make sure that we are using this new configuration file for config_file in settings.args.config: logging.logger.info("monitoring configuration file " + config_file + " for changes") file_mod_watcher = FileModificationWatcher() file_mod_watcher.add_files(settings.args.config) # Initialize Elasticsearch connection es.init_connection() # Create housekeeping job, don't start it yet housekeeping_job = HousekeepingJob() num_runs = 0 first_run = True run_succeeded_without_errors = None while True: num_runs += 1 next_run = None should_schedule_next_run = False while (next_run is None or datetime.now() < next_run) and first_run is False and \ run_succeeded_without_errors is True: if next_run is None: should_schedule_next_run = True # Check for configuration file changes and load them in case it's needed if file_mod_watcher.files_changed(): logging.logger.info("configuration file changed, reloading") settings.process_configuration_files() should_schedule_next_run = True if should_schedule_next_run: next_run = croniter(settings.config.get("daemon", "schedule"), datetime.now()).get_next(datetime) logging.logger.info( "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format( next_run)) should_schedule_next_run = False time.sleep(5) settings.process_configuration_files() # Refresh settings if first_run: first_run = False logging.logger.info( "first run, so we will start immediately - after this, we will respect the cron " + "schedule defined in the configuration file") # Wipe all existing outliers if needed if settings.config.getboolean("general", "es_wipe_all_existing_outliers"): logging.logger.info( "wiping all existing outliers on first run") es.remove_all_outliers() else: # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with # the connection in between runs es.init_connection() # Make sure housekeeping is up and running if not housekeeping_job.is_alive(): housekeeping_job.start() # Perform analysis logging.print_generic_intro("starting outlier detection") analyzed_models = perform_analysis() print_analysis_summary(analyzed_models) errored_models = [ analyzer for analyzer in analyzed_models if analyzer.unknown_error_analysis ] # Check the result of the analysis if errored_models: run_succeeded_without_errors = False logging.logger.warning( "ran into errors while analyzing use cases - not going to wait for the cron " + "schedule, we just start analyzing again after sleeping for a minute first" ) time.sleep(60) else: run_succeeded_without_errors = True logging.print_generic_intro("finished performing outlier detection")
class HousekeepingJob(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.file_mod_watcher = FileModificationWatcher() self.file_mod_watcher.add_files(settings.args.config) # The shutdown_flag is a threading.Event object that # indicates whether the thread should be terminated. self.shutdown_flag = threading.Event() self.analyzers_updated = False self.dict_analyzer = dict() def run(self): """ Task to launch the housekeeping """ logging.logger.info('housekeeping thread #%s started' % self.ident) # Remove all existing whitelisted items if needed while not self.shutdown_flag.is_set(): if not self.shutdown_flag.wait(5): # Return True if flag was set by outliers self.execute_housekeeping() logging.logger.info('housekeeping thread #%s stopped' % self.ident) def execute_housekeeping(self): """ Execute the housekeeping """ if self.file_mod_watcher.files_changed() or self.analyzers_updated: self.analyzers_updated = False # reload configuration file, in case new whitelisted items were added by the analyst, they # should be processed! settings.process_configuration_files() logging.logger.info("housekeeping - changes detected, process again housekeeping") self.remove_all_whitelisted_outliers() def update_analyzer_list(self, list_analyzer): self.dict_analyzer = dict() # Reset list for analyzer in list_analyzer: self.dict_analyzer[analyzer.model_type + "_" + analyzer.model_name] = analyzer logging.logger.info("housekeeping - list analyzer have been updated") self.analyzers_updated = True def stop_housekeeping(self): self.shutdown_flag.set() self.join() def remove_all_whitelisted_outliers(self): """ Try to remove all whitelist outliers that are already in Elasticsearch """ if settings.config.getboolean("general", "es_wipe_all_whitelisted_outliers"): try: logging.logger.info("housekeeping - going to remove all whitelisted outliers") total_docs_whitelisted = es.remove_all_whitelisted_outliers(self.dict_analyzer) if total_docs_whitelisted > 0: logging.logger.info( "housekeeping - total whitelisted documents cleared from outliers: " + "{:,}".format(total_docs_whitelisted)) else: logging.logger.info("housekeeping - whitelist did not remove any outliers") except Exception: logging.logger.error("housekeeping - something went wrong removing whitelisted outliers", exc_info=True) logging.logger.info("housekeeping - finished round of cleaning whitelisted items")
class HousekeepingJob(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.file_mod_watcher = FileModificationWatcher() self.file_mod_watcher.add_files(settings.args.config) self.last_config_parameters = self._get_config_whitelist_parameters() # The shutdown_flag is a threading.Event object that # indicates whether the thread should be terminated. self.shutdown_flag = threading.Event() @staticmethod def _get_config_whitelist_parameters(): return { 'whitelist_literals': settings.config.items("whitelist_literals"), 'whitelist_regexps': settings.config.items("whitelist_regexps"), 'es_wipe_all_whitelisted_outliers': settings.config.getboolean("general", "es_wipe_all_whitelisted_outliers") } def run(self): logging.logger.info('housekeeping thread #%s started' % self.ident) self.remove_all_whitelisted_outliers() # Remove all existing whitelisted items if needed while not self.shutdown_flag.is_set(): self.shutdown_flag.wait(5) self.execute_housekeeping() logging.logger.info('housekeeping thread #%s stopped' % self.ident) def execute_housekeeping(self): if len(self.file_mod_watcher.files_changed()) > 0: # reload configuration file, in case new whitelisted items were added by the analyst, they # should be processed! settings.process_configuration_files() if self.last_config_parameters != self._get_config_whitelist_parameters( ): self.last_config_parameters = self._get_config_whitelist_parameters( ) logging.logger.info( "housekeeping - changes detected in the whitelist configuration" ) self.remove_all_whitelisted_outliers() @staticmethod def remove_all_whitelisted_outliers(): if settings.config.getboolean("general", "es_wipe_all_whitelisted_outliers"): try: logging.logger.info( "housekeeping - going to remove all whitelisted outliers") total_docs_whitelisted = es.remove_all_whitelisted_outliers() if total_docs_whitelisted > 0: logging.logger.info( "housekeeping - total whitelisted documents cleared from outliers: " + "{:,}".format(total_docs_whitelisted)) else: logging.logger.info( "housekeeping - whitelist did not remove any outliers") except Exception: logging.logger.error( "housekeeping - something went removing whitelisted outliers", exc_info=True) logging.logger.info( "housekeeping - finished round of cleaning whitelisted items")
logging.logger.warning( "no use cases were analyzed. are you sure your configuration file contains use cases, which are enabled?" ) return analyzed_models == len(analyzers_to_evaluate) # Run modes if settings.args.run_mode == "daemon": # In daemon mode, we also want to monitor the configuration file for changes. # In case of a change, we need to make sure that we are using this new configuration file for config_file in settings.args.config: logging.logger.info("monitoring configuration file " + config_file + " for changes") file_mod_watcher = FileModificationWatcher() file_mod_watcher.add_files(settings.args.config) # Initialize Elasticsearch connection es.init_connection() # Start housekeeping activities housekeeping_job = HousekeepingJob() housekeeping_job.start() num_runs = 0 first_run = True run_succeeded_without_errors = None while True: num_runs += 1