def run_interactive_mode(): """ Run outliers in interactive mode. In this mode, outliers will run onces and then stop. """ # Initialize Elasticsearch connection while not es.init_connection(): time.sleep(60) if settings.config.getboolean("general", "es_wipe_all_existing_outliers"): es.remove_all_outliers() # Make sure housekeeping is up and running housekeeping_job = HousekeepingJob() housekeeping_job.start() # The difference with daemon mode is that in interactive mode, we want to allow the user to stop execution on the # command line, interactively. try: analyzed_models = perform_analysis(housekeeping_job) print_analysis_summary(analyzed_models) except KeyboardInterrupt: logging.logger.info( "keyboard interrupt received, stopping housekeeping thread") finally: logging.logger.info( "asking housekeeping jobs to shutdown after finishing") housekeeping_job.stop_housekeeping() logging.logger.info("finished performing outlier detection")
def run_interactive_mode(): es.init_connection() if settings.config.getboolean("general", "es_wipe_all_existing_outliers"): es.remove_all_outliers() # Make sure housekeeping is up and running housekeeping_job = HousekeepingJob() housekeeping_job.start() try: analyzed_models = perform_analysis() print_analysis_summary(analyzed_models) except KeyboardInterrupt: logging.logger.info( "keyboard interrupt received, stopping housekeeping thread") except Exception: logging.logger.error("error running outliers in interactive mode", exc_info=True) finally: logging.logger.info( "asking housekeeping jobs to shutdown after finishing") housekeeping_job.shutdown_flag.set() housekeeping_job.join() logging.logger.info("finished performing outlier detection")
def test_remove_outliers_give_zero_count_documents(self): nbr_generate = 5 self._generate_documents(nbr_generate) es.remove_all_outliers() self.assertEqual(es._count_documents(), 0)
def test_remove_outliers_give_empty_list(self): nbr_generate = 5 self._generate_documents(nbr_generate) es.remove_all_outliers() result = [elem for elem in es._scan()] self.assertEqual(len(result), 0)
def run_daemon_mode(): """ Run outliers in daemon mode. In this mode, outliers will continue running based on the cron scheduled defined in the configuration file. """ # In daemon mode, we also want to monitor the configuration file for changes. # In case of a change, we need to make sure that we are using this new configuration file for config_file in settings.args.config: logging.logger.info("monitoring configuration file %s for changes", config_file) # Monitor configuration files for potential changes file_mod_watcher = FileModificationWatcher() file_mod_watcher.add_files(settings.args.config) # Initialize Elasticsearch connection while not es.init_connection(): time.sleep(60) # Create housekeeping job, don't start it yet housekeeping_job = HousekeepingJob() first_run = True run_succeeded_without_errors = None # The daemon should run forever, until the user kills it while True: next_run = None should_schedule_next_run = False # This loop will run for as long we don't need to perform an analysis while (next_run is None or datetime.now() < next_run) and first_run is False and \ run_succeeded_without_errors is True: # Check if we already know when to perform the analysis next; if not, we need to schedule it if next_run is None: should_schedule_next_run = True # Check for configuration file changes and load them in case it's needed if file_mod_watcher.files_changed(): logging.logger.info("configuration file changed, reloading") settings.process_configuration_files() should_schedule_next_run = True # Schedule a next rune based on the cron schedule defined in the configuration file if should_schedule_next_run: next_run = croniter(settings.config.get("daemon", "schedule"), datetime.now()).get_next(datetime) logging.logger.info( "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format( next_run)) should_schedule_next_run = False # Wait 5 seconds before checking the cron schedule again time.sleep(5) # Refresh settings in case the cron has changed for example settings.process_configuration_files() # On the first run, we might have to wipe all the existing outliers if this is set in the configuration file if first_run: first_run = False logging.logger.info( "first run, so we will start immediately - after this, we will respect the cron " "schedule defined in the configuration file") # Wipe all existing outliers if needed if settings.config.getboolean("general", "es_wipe_all_existing_outliers"): logging.logger.info( "wiping all existing outliers on first run") es.remove_all_outliers() # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with # the connection in between runs while not es.init_connection(): time.sleep(60) # Make sure housekeeping is up and running if not housekeeping_job.is_alive(): housekeeping_job.start() # Perform analysis and print the analysis summary at the end logging.print_generic_intro("starting outlier detection") analyzed_models = perform_analysis(housekeeping_job) print_analysis_summary(analyzed_models) errored_models = [ analyzer for analyzer in analyzed_models if analyzer.unknown_error_analysis ] # Check the result of the analysis. In case an error occured, we want to re-run right away (after a minute) if errored_models: run_succeeded_without_errors = False logging.logger.warning( "ran into errors while analyzing use cases - not going to wait for the cron " "schedule, we just start analyzing again after sleeping for a minute first" ) time.sleep(60) else: run_succeeded_without_errors = True logging.print_generic_intro("finished performing outlier detection")
def run_daemon_mode(): # In daemon mode, we also want to monitor the configuration file for changes. # In case of a change, we need to make sure that we are using this new configuration file for config_file in settings.args.config: logging.logger.info("monitoring configuration file " + config_file + " for changes") file_mod_watcher = FileModificationWatcher() file_mod_watcher.add_files(settings.args.config) # Initialize Elasticsearch connection es.init_connection() # Create housekeeping job, don't start it yet housekeeping_job = HousekeepingJob() num_runs = 0 first_run = True run_succeeded_without_errors = None while True: num_runs += 1 next_run = None should_schedule_next_run = False while (next_run is None or datetime.now() < next_run) and first_run is False and \ run_succeeded_without_errors is True: if next_run is None: should_schedule_next_run = True # Check for configuration file changes and load them in case it's needed if file_mod_watcher.files_changed(): logging.logger.info("configuration file changed, reloading") settings.process_configuration_files() should_schedule_next_run = True if should_schedule_next_run: next_run = croniter(settings.config.get("daemon", "schedule"), datetime.now()).get_next(datetime) logging.logger.info( "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format( next_run)) should_schedule_next_run = False time.sleep(5) settings.process_configuration_files() # Refresh settings if first_run: first_run = False logging.logger.info( "first run, so we will start immediately - after this, we will respect the cron " + "schedule defined in the configuration file") # Wipe all existing outliers if needed if settings.config.getboolean("general", "es_wipe_all_existing_outliers"): logging.logger.info( "wiping all existing outliers on first run") es.remove_all_outliers() else: # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with # the connection in between runs es.init_connection() # Make sure housekeeping is up and running if not housekeeping_job.is_alive(): housekeeping_job.start() # Perform analysis logging.print_generic_intro("starting outlier detection") analyzed_models = perform_analysis() print_analysis_summary(analyzed_models) errored_models = [ analyzer for analyzer in analyzed_models if analyzer.unknown_error_analysis ] # Check the result of the analysis if errored_models: run_succeeded_without_errors = False logging.logger.warning( "ran into errors while analyzing use cases - not going to wait for the cron " + "schedule, we just start analyzing again after sleeping for a minute first" ) time.sleep(60) else: run_succeeded_without_errors = True logging.print_generic_intro("finished performing outlier detection")
should_schedule_next_run = False time.sleep(5) if first_run: first_run = False logging.logger.info( "first run, so we will start immediately - after this, we will respect the cron schedule defined in the configuration file" ) settings.process_arguments() # Refresh settings if settings.config.getboolean( "general", "es_wipe_all_existing_outliers") and num_runs == 1: logging.logger.info("wiping all existing outliers on first run") es.remove_all_outliers() logging.logger.info(settings.get_time_window_info()) # Make sure we are connected to Elasticsearch before analyzing, in case something went wrong with the connection in between runs es.init_connection() # Make sure housekeeping is still up and running if not housekeeping_job.is_alive(): housekeeping_job = HousekeepingJob() housekeeping_job.start() # Perform analysis logging.print_generic_intro("starting outlier detection") run_succeeded_without_errors = perform_analysis() if not run_succeeded_without_errors: