def test_run_cv_evaluation(
        pretrained_embeddings_spacy_config: RasaNLUModelConfig,
        monkeypatch: MonkeyPatch):
    td = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa.json")

    nlu_config = RasaNLUModelConfig({
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                EPOCHS: 2
            },
        ],
    })

    # mock training
    trainer = Trainer(nlu_config)
    trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)
    mock = Mock(return_value=Interpreter(trainer.pipeline, None))
    monkeypatch.setattr(Trainer, "train", mock)

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        td,
        n_folds,
        nlu_config,
        successes=False,
        errors=False,
        disable_plotting=True,
        report_as_dict=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert all(key in intent_results.evaluation
               for key in ["errors", "report"])
    assert any(
        isinstance(intent_report, dict)
        and intent_report.get("confused_with") is not None
        for intent_report in intent_results.evaluation["report"].values())
    for extractor_evaluation in entity_results.evaluation.values():
        assert all(key in extractor_evaluation for key in ["errors", "report"])
Exemple #2
0
def cross_validate(
        data: TrainingData, n_folds: int,
        nlu_config: Union[RasaNLUModelConfig, Text]) -> CVEvaluationResult:
    """Stratified cross validation on data.

    Args:
        data: Training Data
        n_folds: integer, number of cv folds
        nlu_config: nlu config file

    Returns:
        dictionary with key, list structure, where each entry in list
              corresponds to the relevant result for one fold
    """
    from collections import defaultdict
    import tempfile

    if isinstance(nlu_config, str):
        nlu_config = config.load(nlu_config)

    trainer = Trainer(nlu_config)
    trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)

    intent_train_results = defaultdict(list)
    intent_test_results = defaultdict(list)
    entity_train_results = defaultdict(lambda: defaultdict(list))
    entity_test_results = defaultdict(lambda: defaultdict(list))
    tmp_dir = tempfile.mkdtemp()

    for train, test in generate_folds(n_folds, data):
        interpreter = trainer.train(train)

        # calculate train accuracy
        intent_train_results, entity_train_results = combine_result(
            intent_train_results, entity_train_results, interpreter, train)
        # calculate test accuracy
        intent_test_results, entity_test_results = combine_result(
            intent_test_results, entity_test_results, interpreter, test)

    shutil.rmtree(tmp_dir, ignore_errors=True)

    return (
        CVEvaluationResult(dict(intent_train_results),
                           dict(intent_test_results)),
        CVEvaluationResult(dict(entity_train_results),
                           dict(entity_test_results)),
    )
Exemple #3
0
def cross_validate(
    data: TrainingData,
    n_folds: int,
    nlu_config: Union[RasaNLUModelConfig, Text],
    report: Optional[Text] = None,
    successes: Optional[Text] = None,
    errors: Optional[Text] = "errors.json",
    confmat: Optional[Text] = None,
    histogram: Optional[Text] = None,
) -> Tuple[CVEvaluationResult, CVEvaluationResult]:
    """Stratified cross validation on data.

    Args:
        data: Training Data
        n_folds: integer, number of cv folds
        nlu_config: nlu config file
        report: path to folder where reports are stored
        successes: path to file that will contain success cases
        errors: path to file that will contain error cases
        confmat: path to file that will show the confusion matrix
        histogram: path fo file that will show a histogram

    Returns:
        dictionary with key, list structure, where each entry in list
              corresponds to the relevant result for one fold
    """
    from collections import defaultdict

    if isinstance(nlu_config, str):
        nlu_config = config.load(nlu_config)

    if report:
        utils.create_dir(report)

    trainer = Trainer(nlu_config)
    trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)

    intent_train_metrics = defaultdict(list)  # type: IntentMetrics
    intent_test_metrics = defaultdict(list)  # type: IntentMetrics
    entity_train_metrics = defaultdict(
        lambda: defaultdict(list))  # type: EntityMetrics
    entity_test_metrics = defaultdict(
        lambda: defaultdict(list))  # type: EntityMetrics

    intent_test_results = []  # type: List[IntentEvaluationResult]
    entity_test_results = []  # type: List[EntityEvaluationResult]
    intent_classifier_present = False
    extractors = set()  # type: Set[Text]

    for train, test in generate_folds(n_folds, data):
        interpreter = trainer.train(train)

        # calculate train accuracy
        combine_result(intent_train_metrics, entity_train_metrics, interpreter,
                       train)
        # calculate test accuracy
        combine_result(
            intent_test_metrics,
            entity_test_metrics,
            interpreter,
            test,
            intent_test_results,
            entity_test_results,
        )

        if not extractors:
            extractors = get_entity_extractors(interpreter)

        if is_intent_classifier_present(interpreter):
            intent_classifier_present = True

    if intent_classifier_present:
        logger.info("Accumulated test folds intent evaluation results:")
        evaluate_intents(intent_test_results, report, successes, errors,
                         confmat, histogram)

    if extractors:
        logger.info("Accumulated test folds entity evaluation results:")
        evaluate_entities(entity_test_results, extractors, report)

    return (
        CVEvaluationResult(dict(intent_train_metrics),
                           dict(intent_test_metrics)),
        CVEvaluationResult(dict(entity_train_metrics),
                           dict(entity_test_metrics)),
    )
Exemple #4
0
def main(out_directory, config_directory, dataset_directory, n_folds):
    start = timer()
    if not os.path.exists(out_directory):
        os.mkdir(out_directory)
    else:
        count = 0
        out_directory_temp = out_directory
        while os.path.exists(out_directory_temp):
            out_directory_temp = out_directory + str(count)
            count += 1

    config_size = len(os.listdir(config_directory))
    count_config = 0
    for config_filename in os.listdir(config_directory):
        count_config += 1
        print('######################################')
        print('CURRENT CONFIG :', config_filename, ' PROGRESS:', count_config,
              '/', config_size)
        print('######################################')
        start_config = timer()
        if config_filename.endswith(".yml"):
            config_path = os.path.join(config_directory, config_filename)
            config_name = config_filename.split('.')[0]
            out_config_directory = out_directory + config_name + '/'
            if not os.path.exists(out_config_directory):
                os.mkdir(out_config_directory)
            datasets_dir_out = 'Datasets_Results/'
            if not os.path.exists(out_config_directory + datasets_dir_out):
                os.mkdir(out_config_directory + datasets_dir_out)
            nlu_config = config.load(config_path)
            try:
                trainer = Trainer(nlu_config)
                trainer.pipeline = remove_pretrained_extractors(
                    trainer.pipeline)
            except OSError:
                raise
            datasets_results = []
            datasets_names = []
            for dataset_filename in os.listdir(dataset_directory):
                if dataset_filename.endswith(
                        ".json") or dataset_filename.endswith(".md"):
                    dataset_path = os.path.join(dataset_directory,
                                                dataset_filename)
                    dataset_name = dataset_filename.split('.')[0]

                    cross_val_results = run_benchmark(dataset_path, n_folds,
                                                      trainer)
                    # utils.write_json_to_file('new_result_test', cross_val_results)

                    dataset_result = sum_results(cross_val_results,
                                                 collect_report=True)
                    utils.write_json_to_file(
                        out_config_directory + datasets_dir_out +
                        dataset_name + '_Benchmark', dataset_result)
                    datasets_results.append(dataset_result)
                    datasets_names.append(dataset_filename)
            save_result_by_group(datasets_results, n_folds,
                                 out_config_directory, datasets_names)
            overhaul_result = sum_results(datasets_results)
            end_config = timer()
            overhaul_result['time'] = str(end_config)
            utils.write_json_to_file(
                out_config_directory + 'Datasets_Mean_Result', overhaul_result)
    end = timer()
    logger.info("Finished evaluation in: " + str(end - start))