Python HousekeepingJob Examples, helpers.housekeeping.HousekeepingJob Python Examples

Example #1

0

Show file

File: test_housekeeping.py Project: benhe119/ee-outliers

    def test_housekeeping_correctly_remove_whitelisted_outlier_when_file_modification(
            self):
        self.test_settings.change_configuration_path(
            test_file_no_whitelist_path_config)
        housekeeping = HousekeepingJob()

        # Add document to "Database"
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)

        filecontent = ""
        with open(test_file_no_whitelist_path_config, 'r') as test_file:
            for line in test_file:
                if "# WHITELIST" in line:
                    break
                filecontent += line

        # Update configuration (read new config and append to default)
        with open(test_file_whitelist_path_config, 'r') as test_file:
            filecontent += test_file.read()

        with open(test_file_no_whitelist_path_config, 'w') as test_file:
            test_file.write(filecontent)

        housekeeping.execute_housekeeping()

        # Fetch result
        result = [elem for elem in self.test_es.scan()][0]

        # Compute expected result:
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)

        self.assertEqual(result, doc_without_outlier)

Example #2

0

Show file

File: test_housekeeping.py Project: benhe119/ee-outliers

    def test_housekeeping_not_execute_no_whitelist_parameter_change(self):
        self.test_settings.change_configuration_path(
            test_file_no_whitelist_path_config)
        housekeeping = HousekeepingJob()

        # Add document to "Database"
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)

        # Update configuration (create new section and append to default)
        filecontent = "[dummy_section]\nparam=1"

        with open(test_file_no_whitelist_path_config, 'a') as test_file:
            test_file.write(filecontent)

        housekeeping.execute_housekeeping()

        # Fetch result
        result = [elem for elem in self.test_es.scan()][0]

        self.assertEqual(result, doc_with_outlier)

Example #3

0

Show file

File: outliers.py Project: tsinan/ee-outliers

def run_interactive_mode():
    """
    Run outliers in interactive mode.
    In this mode, outliers will run onces and then stop.
    """

    # Initialize Elasticsearch connection
    while not es.init_connection():
        time.sleep(60)

    if settings.config.getboolean("general", "es_wipe_all_existing_outliers"):
        es.remove_all_outliers()

    # Make sure housekeeping is up and running
    housekeeping_job = HousekeepingJob()
    housekeeping_job.start()

    # The difference with daemon mode is that in interactive mode, we want to allow the user to stop execution on the
    # command line, interactively.
    try:
        analyzed_models = perform_analysis(housekeeping_job)
        print_analysis_summary(analyzed_models)
    except KeyboardInterrupt:
        logging.logger.info(
            "keyboard interrupt received, stopping housekeeping thread")
    finally:
        logging.logger.info(
            "asking housekeeping jobs to shutdown after finishing")
        housekeeping_job.stop_housekeeping()

    logging.logger.info("finished performing outlier detection")

Example #4

0

Show file

File: test_housekeeping.py Project: nremeikis/ee-outliers

    def test_housekeeping_execute_no_whitelist_parameter_change(self):
        # Check that housekeeping run even when we change new part in the configuration
        self.test_settings.change_configuration_path(
            test_file_whitelist_dummy_reason_path_config)
        self._backup_config(test_file_whitelist_dummy_reason_path_config)
        housekeeping = HousekeepingJob()

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test.conf"
        )
        housekeeping.update_analyzer_list([analyzer])

        # Add document to "Database"
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        expected_doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)

        # Update configuration (create new section and append to default)
        filecontent = "\n\n[dummy_section]\nparam=1"

        # Force the date of the file
        housekeeping.file_mod_watcher._previous_mtimes[
            test_file_whitelist_dummy_reason_path_config] = 0

        with open(test_file_whitelist_dummy_reason_path_config,
                  'a') as test_file:
            test_file.write(filecontent)

        housekeeping.execute_housekeeping()

        # Fetch result
        result = [elem for elem in self.test_es._scan()][0]

        self._restore_config(test_file_whitelist_dummy_reason_path_config)
        self.assertNotEqual(result, expected_doc_with_outlier)

Example #5

0

Show file

File: test_housekeeping.py Project: nremeikis/ee-outliers

    def test_whitelist_literals_per_model_not_removed_by_housekeeping(self):
        # Init
        doc_generate = DummyDocumentsGenerate()
        self.test_settings.change_configuration_path(
            "/app/tests/unit_tests/files/housekeeping.conf")
        housekeeping = HousekeepingJob()

        # Generate document
        document = doc_generate.generate_document({
            "hostname":
            "NOT-WHITELISTED",
            "create_outlier":
            True,
            "outlier.model_name":
            "dummy_test",
            "outlier.model_type":
            "simplequery"
        })
        self.assertTrue("outliers" in document["_source"])

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test_with_whitelist.conf"
        )
        housekeeping.update_analyzer_list([analyzer])

        self.test_es.add_doc(document)

        housekeeping.execute_housekeeping()

        result = [elem for elem in self.test_es._scan()][0]
        self.assertTrue("outliers" in result["_source"])

Example #6

0

Show file

def run_interactive_mode():
    es.init_connection()

    if settings.config.getboolean("general", "es_wipe_all_existing_outliers"):
        es.remove_all_outliers()

    # Make sure housekeeping is up and running
    housekeeping_job = HousekeepingJob()
    housekeeping_job.start()

    try:
        analyzed_models = perform_analysis()
        print_analysis_summary(analyzed_models)
    except KeyboardInterrupt:
        logging.logger.info(
            "keyboard interrupt received, stopping housekeeping thread")
    except Exception:
        logging.logger.error("error running outliers in interactive mode",
                             exc_info=True)
    finally:
        logging.logger.info(
            "asking housekeeping jobs to shutdown after finishing")
        housekeeping_job.shutdown_flag.set()
        housekeeping_job.join()

    logging.logger.info("finished performing outlier detection")

Example #7

0

Show file

File: test_housekeeping.py Project: nremeikis/ee-outliers

    def test_housekeeping_correctly_remove_whitelisted_outlier_when_file_modification(
            self):

        self.test_settings.change_configuration_path(
            test_file_no_whitelist_path_config)
        self._backup_config(test_file_no_whitelist_path_config)
        housekeeping = HousekeepingJob()

        analyzer = AnalyzerFactory.create(
            "/app/tests/unit_tests/files/use_cases/housekeeping/analyzer_dummy_test.conf"
        )
        housekeeping.update_analyzer_list([analyzer])

        # Add document to "Database"
        doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file)
        self.test_es.add_doc(doc_with_outlier)

        housekeeping.file_mod_watcher._previous_mtimes[
            test_file_no_whitelist_path_config] = 0
        filecontent = ""
        with open(test_file_no_whitelist_path_config, 'r') as test_file:
            for line in test_file:
                if "# WHITELIST" in line:
                    break
                filecontent += line

        # Update configuration (read new config and append to default)
        with open(test_file_whitelist_path_config, 'r') as test_file:
            filecontent += test_file.read()

        with open(test_file_no_whitelist_path_config, 'w') as test_file:
            test_file.write(filecontent)

        housekeeping.execute_housekeeping()

        # Fetch result
        result = [elem for elem in self.test_es._scan()][0]

        # Compute expected result:
        doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file)
        self._restore_config(test_file_no_whitelist_path_config)
        self.maxDiff = None
        self.assertEqual(result, doc_without_outlier)

Example #8

0

Show file

File: outliers.py Project: tsinan/ee-outliers

def run_daemon_mode():
    """
    Run outliers in daemon mode.
    In this mode, outliers will continue running based on the cron scheduled defined in the configuration file.
    """

    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file %s for changes",
                            config_file)

    # Monitor configuration files for potential changes
    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    while not es.init_connection():
        time.sleep(60)

    # Create housekeeping job, don't start it yet
    housekeeping_job = HousekeepingJob()

    first_run = True
    run_succeeded_without_errors = None

    # The daemon should run forever, until the user kills it
    while True:
        next_run = None
        should_schedule_next_run = False

        # This loop will run for as long we don't need to perform an analysis
        while (next_run is None or datetime.now() < next_run) and first_run is False and \
                run_succeeded_without_errors is True:

            # Check if we already know when to perform the analysis next; if not, we need to schedule it
            if next_run is None:
                should_schedule_next_run = True

            # Check for configuration file changes and load them in case it's needed
            if file_mod_watcher.files_changed():
                logging.logger.info("configuration file changed, reloading")
                settings.process_configuration_files()
                should_schedule_next_run = True

            # Schedule a next rune based on the cron schedule defined in the configuration file
            if should_schedule_next_run:
                next_run = croniter(settings.config.get("daemon", "schedule"),
                                    datetime.now()).get_next(datetime)
                logging.logger.info(
                    "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format(
                        next_run))
                should_schedule_next_run = False

            # Wait 5 seconds before checking the cron schedule again
            time.sleep(5)

        # Refresh settings in case the cron has changed for example
        settings.process_configuration_files()

        # On the first run, we might have to wipe all the existing outliers if this is set in the configuration file
        if first_run:
            first_run = False
            logging.logger.info(
                "first run, so we will start immediately - after this, we will respect the cron "
                "schedule defined in the configuration file")

            # Wipe all existing outliers if needed
            if settings.config.getboolean("general",
                                          "es_wipe_all_existing_outliers"):
                logging.logger.info(
                    "wiping all existing outliers on first run")
                es.remove_all_outliers()

        # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with
        # the connection in between runs
        while not es.init_connection():
            time.sleep(60)

        # Make sure housekeeping is up and running
        if not housekeeping_job.is_alive():
            housekeeping_job.start()

        # Perform analysis and print the analysis summary at the end
        logging.print_generic_intro("starting outlier detection")
        analyzed_models = perform_analysis(housekeeping_job)
        print_analysis_summary(analyzed_models)

        errored_models = [
            analyzer for analyzer in analyzed_models
            if analyzer.unknown_error_analysis
        ]

        # Check the result of the analysis. In case an error occured, we want to re-run right away (after a minute)
        if errored_models:
            run_succeeded_without_errors = False
            logging.logger.warning(
                "ran into errors while analyzing use cases - not going to wait for the cron "
                "schedule, we just start analyzing again after sleeping for a minute first"
            )
            time.sleep(60)
        else:
            run_succeeded_without_errors = True

        logging.print_generic_intro("finished performing outlier detection")

Example #9

0

Show file

def run_daemon_mode():
    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file " + config_file +
                            " for changes")

    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    es.init_connection()

    # Create housekeeping job, don't start it yet
    housekeeping_job = HousekeepingJob()

    num_runs = 0
    first_run = True
    run_succeeded_without_errors = None

    while True:
        num_runs += 1
        next_run = None
        should_schedule_next_run = False

        while (next_run is None or datetime.now() < next_run) and first_run is False and \
                run_succeeded_without_errors is True:
            if next_run is None:
                should_schedule_next_run = True

            # Check for configuration file changes and load them in case it's needed
            if file_mod_watcher.files_changed():
                logging.logger.info("configuration file changed, reloading")
                settings.process_configuration_files()
                should_schedule_next_run = True

            if should_schedule_next_run:
                next_run = croniter(settings.config.get("daemon", "schedule"),
                                    datetime.now()).get_next(datetime)
                logging.logger.info(
                    "next run scheduled on {0:%Y-%m-%d %H:%M:%S}".format(
                        next_run))
                should_schedule_next_run = False

            time.sleep(5)

        settings.process_configuration_files()  # Refresh settings

        if first_run:
            first_run = False
            logging.logger.info(
                "first run, so we will start immediately - after this, we will respect the cron "
                + "schedule defined in the configuration file")

            # Wipe all existing outliers if needed
            if settings.config.getboolean("general",
                                          "es_wipe_all_existing_outliers"):
                logging.logger.info(
                    "wiping all existing outliers on first run")
                es.remove_all_outliers()
        else:
            # Make sure we are still connected to Elasticsearch before analyzing, in case something went wrong with
            # the connection in between runs
            es.init_connection()

        # Make sure housekeeping is up and running
        if not housekeeping_job.is_alive():
            housekeeping_job.start()

        # Perform analysis
        logging.print_generic_intro("starting outlier detection")
        analyzed_models = perform_analysis()
        print_analysis_summary(analyzed_models)

        errored_models = [
            analyzer for analyzer in analyzed_models
            if analyzer.unknown_error_analysis
        ]

        # Check the result of the analysis
        if errored_models:
            run_succeeded_without_errors = False
            logging.logger.warning(
                "ran into errors while analyzing use cases - not going to wait for the cron "
                +
                "schedule, we just start analyzing again after sleeping for a minute first"
            )
            time.sleep(60)
        else:
            run_succeeded_without_errors = True

        logging.print_generic_intro("finished performing outlier detection")

Example #10

0

Show file

# Run modes
if settings.args.run_mode == "daemon":
    # In daemon mode, we also want to monitor the configuration file for changes.
    # In case of a change, we need to make sure that we are using this new configuration file
    for config_file in settings.args.config:
        logging.logger.info("monitoring configuration file " + config_file +
                            " for changes")

    file_mod_watcher = FileModificationWatcher()
    file_mod_watcher.add_files(settings.args.config)

    # Initialize Elasticsearch connection
    es.init_connection()

    # Start housekeeping activities
    housekeeping_job = HousekeepingJob()
    housekeeping_job.start()

    num_runs = 0
    first_run = True
    run_succeeded_without_errors = None

    while True:
        num_runs += 1
        next_run = None
        should_schedule_next_run = False

        while (
                next_run is None or datetime.now() < next_run
        ) and first_run is False and run_succeeded_without_errors is True:
            if next_run is None:

Example #11

0

Show file

File: outliers.py Project: databill86/ee-outliers

        if first_run:
            first_run = False
            logging.logger.info("first run, so we will start immediately - after this, we will respect the cron schedule defined in the configuration file")

        settings.process_arguments()  # Refresh settings
        es.init_connection()

        if settings.config.getboolean("general", "es_wipe_all_existing_outliers") and num_runs == 1:
            logging.logger.info("wiping all existing outliers on first run")
            es.remove_all_outliers()

        logging.logger.info(settings.get_time_window_info())

        # We place all of this in a try catch-all, so that any errors thrown by the analyzers (timeouts, errors) won't make the daemon loop stop
        housekeeping_job = HousekeepingJob()
        housekeeping_job.start()

        perform_analysis()

        logging.logger.info("asking housekeeping jobs to shutdown after finishing")
        housekeeping_job.shutdown_flag.set()
        housekeeping_job.join()

        logging.logger.info("finished performing outlier detection")

if settings.args.run_mode == "interactive":
    es.init_connection()

    if settings.config.getboolean("general", "es_wipe_all_existing_outliers"):
        es.remove_all_outliers()