Exemple #1
0
def run_interactive_mode():
    """
    Run outliers in interactive mode.
    In this mode, outliers will run onces and then stop.
    """

    # Initialize Elasticsearch connection
    while not es.init_connection():
        time.sleep(60)

    if settings.config.getboolean("general", "es_wipe_all_existing_outliers"):
        es.remove_all_outliers()

    # Make sure housekeeping is up and running
    housekeeping_job = HousekeepingJob()
    housekeeping_job.start()

    # The difference with daemon mode is that in interactive mode, we want to allow the user to stop execution on the
    # command line, interactively.
    try:
        analyzed_models = perform_analysis(housekeeping_job)
        print_analysis_summary(analyzed_models)
    except KeyboardInterrupt:
        logging.logger.info(
            "keyboard interrupt received, stopping housekeeping thread")
    finally:
        logging.logger.info(
            "asking housekeeping jobs to shutdown after finishing")
        housekeeping_job.stop_housekeeping()

    logging.logger.info("finished performing outlier detection")
Exemple #2
0
def run_interactive_mode():
    es.init_connection()

    if settings.config.getboolean("general", "es_wipe_all_existing_outliers"):
        es.remove_all_outliers()

    # Make sure housekeeping is up and running
    housekeeping_job = HousekeepingJob()
    housekeeping_job.start()

    try:
        analyzed_models = perform_analysis()
        print_analysis_summary(analyzed_models)
    except KeyboardInterrupt:
        logging.logger.info(
            "keyboard interrupt received, stopping housekeeping thread")
    except Exception:
        logging.logger.error("error running outliers in interactive mode",
                             exc_info=True)
    finally:
        logging.logger.info(
            "asking housekeeping jobs to shutdown after finishing")
        housekeeping_job.shutdown_flag.set()
        housekeeping_job.join()

    logging.logger.info("finished performing outlier detection")
 def test_remove_outliers_give_zero_count_documents(self):
     nbr_generate = 5
     self._generate_documents(nbr_generate)
     es.remove_all_outliers()
     self.assertEqual(es._count_documents(), 0)
 def test_remove_outliers_give_empty_list(self):
     nbr_generate = 5
     self._generate_documents(nbr_generate)
     es.remove_all_outliers()
     result = [elem for elem in es._scan()]
     self.assertEqual(len(result), 0)
Exemple #5
0
def run_daemon_mode():
    """
    Run outliers in daemon mode.
    In this mode, outliers will continue running based on the cron scheduled defined in the configuration file.
    """

    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file %s for changes",
                            config_file)

    # Monitor configuration files for potential changes
    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    while not es.init_connection():
        time.sleep(60)

    # Create housekeeping job, don't start it yet
    housekeeping_job = HousekeepingJob()

    first_run = True
    run_succeeded_without_errors = None

    # The daemon should run forever, until the user kills it
    while True:
        next_run = None
        should_schedule_next_run = False

        # This loop will run for as long we don't need to perform an analysis
        while (next_run is None or datetime.now() < next_run) and first_run is False and \
                run_succeeded_without_errors is True:

            # Check if we already know when to perform the analysis next; if not, we need to schedule it
            if next_run is None:
                should_schedule_next_run = True

            # Check for configuration file changes and load them in case it's needed
            if file_mod_watcher.files_changed():
                logging.logger.info("configuration file changed, reloading")
                settings.process_configuration_files()
                should_schedule_next_run = True

            # Schedule a next rune based on the cron schedule defined in the configuration file
            if should_schedule_next_run:
                next_run = croniter(settings.config.get("daemon", "schedule"),
                                    datetime.now()).get_next(datetime)
                logging.logger.info(
                    "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format(
                        next_run))
                should_schedule_next_run = False

            # Wait 5 seconds before checking the cron schedule again
            time.sleep(5)

        # Refresh settings in case the cron has changed for example
        settings.process_configuration_files()

        # On the first run, we might have to wipe all the existing outliers if this is set in the configuration file
        if first_run:
            first_run = False
            logging.logger.info(
                "first run, so we will start immediately - after this, we will respect the cron "
                "schedule defined in the configuration file")

            # Wipe all existing outliers if needed
            if settings.config.getboolean("general",
                                          "es_wipe_all_existing_outliers"):
                logging.logger.info(
                    "wiping all existing outliers on first run")
                es.remove_all_outliers()

        # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with
        # the connection in between runs
        while not es.init_connection():
            time.sleep(60)

        # Make sure housekeeping is up and running
        if not housekeeping_job.is_alive():
            housekeeping_job.start()

        # Perform analysis and print the analysis summary at the end
        logging.print_generic_intro("starting outlier detection")
        analyzed_models = perform_analysis(housekeeping_job)
        print_analysis_summary(analyzed_models)

        errored_models = [
            analyzer for analyzer in analyzed_models
            if analyzer.unknown_error_analysis
        ]

        # Check the result of the analysis. In case an error occured, we want to re-run right away (after a minute)
        if errored_models:
            run_succeeded_without_errors = False
            logging.logger.warning(
                "ran into errors while analyzing use cases - not going to wait for the cron "
                "schedule, we just start analyzing again after sleeping for a minute first"
            )
            time.sleep(60)
        else:
            run_succeeded_without_errors = True

        logging.print_generic_intro("finished performing outlier detection")
Exemple #6
0
def run_daemon_mode():
    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file " + config_file +
                            " for changes")

    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    es.init_connection()

    # Create housekeeping job, don't start it yet
    housekeeping_job = HousekeepingJob()

    num_runs = 0
    first_run = True
    run_succeeded_without_errors = None

    while True:
        num_runs += 1
        next_run = None
        should_schedule_next_run = False

        while (next_run is None or datetime.now() < next_run) and first_run is False and \
                run_succeeded_without_errors is True:
            if next_run is None:
                should_schedule_next_run = True

            # Check for configuration file changes and load them in case it's needed
            if file_mod_watcher.files_changed():
                logging.logger.info("configuration file changed, reloading")
                settings.process_configuration_files()
                should_schedule_next_run = True

            if should_schedule_next_run:
                next_run = croniter(settings.config.get("daemon", "schedule"),
                                    datetime.now()).get_next(datetime)
                logging.logger.info(
                    "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format(
                        next_run))
                should_schedule_next_run = False

            time.sleep(5)

        settings.process_configuration_files()  # Refresh settings

        if first_run:
            first_run = False
            logging.logger.info(
                "first run, so we will start immediately - after this, we will respect the cron "
                + "schedule defined in the configuration file")

            # Wipe all existing outliers if needed
            if settings.config.getboolean("general",
                                          "es_wipe_all_existing_outliers"):
                logging.logger.info(
                    "wiping all existing outliers on first run")
                es.remove_all_outliers()
        else:
            # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with
            # the connection in between runs
            es.init_connection()

        # Make sure housekeeping is up and running
        if not housekeeping_job.is_alive():
            housekeeping_job.start()

        # Perform analysis
        logging.print_generic_intro("starting outlier detection")
        analyzed_models = perform_analysis()
        print_analysis_summary(analyzed_models)

        errored_models = [
            analyzer for analyzer in analyzed_models
            if analyzer.unknown_error_analysis
        ]

        # Check the result of the analysis
        if errored_models:
            run_succeeded_without_errors = False
            logging.logger.warning(
                "ran into errors while analyzing use cases - not going to wait for the cron "
                +
                "schedule, we just start analyzing again after sleeping for a minute first"
            )
            time.sleep(60)
        else:
            run_succeeded_without_errors = True

        logging.print_generic_intro("finished performing outlier detection")
Exemple #7
0
                should_schedule_next_run = False

            time.sleep(5)

        if first_run:
            first_run = False
            logging.logger.info(
                "first run, so we will start immediately - after this, we will respect the cron schedule defined in the configuration file"
            )

        settings.process_arguments()  # Refresh settings

        if settings.config.getboolean(
                "general", "es_wipe_all_existing_outliers") and num_runs == 1:
            logging.logger.info("wiping all existing outliers on first run")
            es.remove_all_outliers()

        logging.logger.info(settings.get_time_window_info())

        # Make sure we are connected to Elasticsearch before analyzing, in case something went wrong with the connection in between runs
        es.init_connection()

        # Make sure housekeeping is still up and running
        if not housekeeping_job.is_alive():
            housekeeping_job = HousekeepingJob()
            housekeeping_job.start()

        # Perform analysis
        logging.print_generic_intro("starting outlier detection")
        run_succeeded_without_errors = perform_analysis()
        if not run_succeeded_without_errors: