Ejemplo n.º 1
0
def perform_analysis(housekeeping_job):
    """ The entrypoint for analysis
    :return: List of analyzers that have been processed and analyzed
    """
    analyzers = load_analyzers()
    housekeeping_job.update_analyzer_list(analyzers)

    # In case the created analyzer is activated in test or run mode, add it to the list of analyzers to evaluate
    analyzers_to_evaluate = list()
    for analyzer in analyzers:
        # Analyzers that produced an error during configuration parsing should not be processed
        if analyzer.configuration_parsing_error:
            continue

        if analyzer.model_settings["run_model"] or analyzer.model_settings[
                "test_model"]:
            analyzers_to_evaluate.append(analyzer)

    # In case a single analyzer is causing issues (for example taking up too much time & resources), then a naive
    # shuffle will prevent this analyzer from blocking all the analyzers from running that come after it.
    random.shuffle(analyzers_to_evaluate)

    # Now it's time actually evaluate all the models. We also make sure to add some information that will be useful
    # in the summary presented to the user at the end of running all the models.
    for index, analyzer in enumerate(analyzers_to_evaluate):
        try:
            analyzer.analysis_start_time = datetime.today().timestamp()
            analyzer.evaluate_model()
            analyzer.analysis_end_time = datetime.today().timestamp()
            analyzer.completed_analysis = True
            es.flush_bulk_actions()

            logging.logger.info(
                "finished processing use case - %d / %d [%s%% done]",
                index + 1, len(analyzers_to_evaluate), '{:.2f}'.format(
                    round(
                        (index + 1) / float(len(analyzers_to_evaluate)) * 100,
                        2)))
        except elasticsearch.exceptions.NotFoundError:
            analyzer.index_not_found_analysis = True
            logging.logger.warning(
                "index %s does not exist, skipping use case",
                analyzer.model_settings["es_index"])
        except elasticsearch.helpers.BulkIndexError as e:
            analyzer.unknown_error_analysis = True
            logging.logger.error(
                f"BulkIndexError while analyzing use case: {e.args[0]}",
                exc_info=False)
            logging.logger.debug(
                "Full stack trace and error message of BulkIndexError",
                exc_info=True)
        except Exception:  # pylint: disable=broad-except
            analyzer.unknown_error_analysis = True
            logging.logger.error("error while analyzing use case",
                                 exc_info=True)

    return analyzers_to_evaluate
Ejemplo n.º 2
0
def perform_analysis():
    """ The entrypoint for analysis """
    analyzers = list()

    for config_section_name in settings.config.sections():
        _analyzer = None
        try:
            if config_section_name.startswith("simplequery_"):
                _analyzer = SimplequeryAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(_analyzer)

            elif config_section_name.startswith("metrics_"):
                _analyzer = MetricsAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(_analyzer)

            elif config_section_name.startswith("terms_"):
                _analyzer = TermsAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(_analyzer)

            elif config_section_name.startswith("beaconing_"):
                logging.logger.error(
                    "use of the beaconing model is deprecated, please use the terms model using "
                    +
                    "coeff_of_variation trigger method to convert use case " +
                    config_section_name)

            elif config_section_name.startswith("word2vec_"):
                _analyzer = Word2VecAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(_analyzer)
        except Exception:
            logging.logger.error("error while initializing analyzer " +
                                 config_section_name,
                                 exc_info=True)

    analyzers_to_evaluate = list()

    for analyzer in analyzers:
        if analyzer.should_run_model or analyzer.should_test_model:
            analyzers_to_evaluate.append(analyzer)

    random.shuffle(analyzers_to_evaluate)

    for index, analyzer in enumerate(analyzers_to_evaluate):
        if analyzer.configuration_parsing_error:
            continue

        try:
            analyzer.analysis_start_time = datetime.today().timestamp()
            analyzer.evaluate_model()
            analyzer.analysis_end_time = datetime.today().timestamp()
            analyzer.completed_analysis = True

            logging.logger.info("finished processing use case - " +
                                str(index + 1) + "/" +
                                str(len(analyzers_to_evaluate)) + " [" +
                                '{:.2f}'.format(
                                    round((index + 1) /
                                          float(len(analyzers_to_evaluate)) *
                                          100, 2)) + "% done" + "]")
        except elasticsearch.exceptions.NotFoundError:
            analyzer.index_not_found_analysis = True
            logging.logger.warning(
                "index %s does not exist, skipping use case" %
                analyzer.es_index)
        except Exception:
            analyzer.unknown_error_analysis = True
            logging.logger.error("error while analyzing use case",
                                 exc_info=True)
        finally:
            es.flush_bulk_actions(refresh=True)

    return analyzers_to_evaluate
Ejemplo n.º 3
0
def perform_analysis():
    """ The entrypoint for analysis """
    analyzers = list()

    for config_section_name in settings.config.sections():
        try:
            if config_section_name.startswith("simplequery_"):
                simplequery_analyzer = SimplequeryAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(simplequery_analyzer)

            if config_section_name.startswith("metrics_"):
                metrics_analyzer = MetricsAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(metrics_analyzer)

            if config_section_name.startswith("terms_"):
                terms_analyzer = TermsAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(terms_analyzer)

            if config_section_name.startswith("beaconing_"):
                beaconing_analyzer = BeaconingAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(beaconing_analyzer)

            if config_section_name.startswith("word2vec_"):
                word2vec_analyzer = Word2VecAnalyzer(
                    config_section_name=config_section_name)
                analyzers.append(word2vec_analyzer)

        except Exception:
            logging.logger.error(traceback.format_exc())

    analyzers_to_evaluate = list()

    for idx, analyzer in enumerate(analyzers):
        if analyzer.should_run_model or analyzer.should_test_model:
            analyzers_to_evaluate.append(analyzer)

    random.shuffle(analyzers_to_evaluate)
    analyzed_models = 0
    for analyzer in analyzers_to_evaluate:
        try:
            analyzer.evaluate_model()
            analyzed_models = analyzed_models + 1
            logging.logger.info("finished processing use case - " +
                                str(analyzed_models) + "/" +
                                str(len(analyzers_to_evaluate)) + " [" +
                                '{:.2f}'.format(
                                    round(
                                        float(analyzed_models) /
                                        float(len(analyzers_to_evaluate)) *
                                        100, 2)) + "% done" + "]")
        except Exception:
            logging.logger.error(traceback.format_exc())
        finally:
            es.flush_bulk_actions(refresh=True)

    if analyzed_models == 0:
        logging.logger.warning(
            "no use cases were analyzed. are you sure your configuration file contains use cases, which are enabled?"
        )

    return analyzed_models == len(analyzers_to_evaluate)