Beispiel #1
0
def test_run_cv_evaluation_with_response_selector():
    training_data_obj = training_data.load_data("data/examples/rasa/demo-rasa.md")
    training_data_responses_obj = training_data.load_data(
        "data/examples/rasa/demo-rasa-responses.md"
    )
    training_data_obj = training_data_obj.merge(training_data_responses_obj)
    training_data_obj.fill_response_phrases()

    nlu_config = config.load(
        "sample_configs/config_embedding_intent_response_selector.yml"
    )

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        training_data_obj, n_folds, nlu_config
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert len(response_selection_results.train["Accuracy"]) == n_folds
    assert len(response_selection_results.train["Precision"]) == n_folds
    assert len(response_selection_results.train["F1-score"]) == n_folds
    assert len(response_selection_results.test["Accuracy"]) == n_folds
    assert len(response_selection_results.test["Precision"]) == n_folds
    assert len(response_selection_results.test["F1-score"]) == n_folds
    # No entity extractor in pipeline
    assert len(entity_results.train) == 0
    assert len(entity_results.test) == 0
Beispiel #2
0
def test_run_cv_evaluation():
    td = training_data.load_data("data/examples/rasa/demo-rasa.json")
    nlu_config = config.load(
        "sample_configs/config_pretrained_embeddings_spacy.yml")

    n_folds = 2
    intent_results, entity_results = cross_validate(td, n_folds, nlu_config)

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["F1-score"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["F1-score"]) == n_folds
Beispiel #3
0
def perform_nlu_cross_validation(config: Text, nlu: Text,
                                 kwargs: Optional[Dict[Text, Any]]):
    import rasa.nlu.config
    from rasa.nlu.test import (
        drop_intents_below_freq,
        cross_validate,
        return_results,
        return_entity_results,
    )

    kwargs = kwargs or {}
    folds = int(kwargs.get("folds", 3))
    nlu_config = rasa.nlu.config.load(config)
    data = rasa.nlu.training_data.load_data(nlu)
    data = drop_intents_below_freq(data, cutoff=folds)
    kwargs = minimal_kwargs(kwargs, cross_validate)
    results, entity_results = cross_validate(data, folds, nlu_config, **kwargs)
    logger.info("CV evaluation (n={})".format(folds))

    if any(results):
        logger.info("Intent evaluation results")
        return_results(results.train, "train")
        return_results(results.test, "test")
    if any(entity_results):
        logger.info("Entity evaluation results")
        return_entity_results(entity_results.train, "train")
        return_entity_results(entity_results.test, "test")
Beispiel #4
0
def test_run_cv_evaluation(pretrained_embeddings_spacy_config):
    td = training_data.load_data("data/examples/rasa/demo-rasa.json")

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        td,
        n_folds,
        pretrained_embeddings_spacy_config,
        successes=False,
        errors=False,
        disable_plotting=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["F1-score"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["F1-score"]) == n_folds
Beispiel #5
0
def test_run_cv_evaluation_with_response_selector():
    training_data_obj = training_data.load_data(
        "data/examples/rasa/demo-rasa.md")
    training_data_responses_obj = training_data.load_data(
        "data/examples/rasa/demo-rasa-responses.md")
    training_data_obj = training_data_obj.merge(training_data_responses_obj)
    training_data_obj.fill_response_phrases()

    nlu_config = RasaNLUModelConfig({
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                EPOCHS: 2
            },
            {
                "name": "ResponseSelector",
                EPOCHS: 2
            },
        ],
    })

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        training_data_obj,
        n_folds,
        nlu_config,
        successes=False,
        errors=False,
        disable_plotting=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert len(response_selection_results.train["Accuracy"]) == n_folds
    assert len(response_selection_results.train["Precision"]) == n_folds
    assert len(response_selection_results.train["F1-score"]) == n_folds
    assert len(response_selection_results.test["Accuracy"]) == n_folds
    assert len(response_selection_results.test["Precision"]) == n_folds
    assert len(response_selection_results.test["F1-score"]) == n_folds
    assert len(entity_results.train["DIETClassifier"]["Accuracy"]) == n_folds
    assert len(entity_results.train["DIETClassifier"]["Precision"]) == n_folds
    assert len(entity_results.train["DIETClassifier"]["F1-score"]) == n_folds
    assert len(entity_results.test["DIETClassifier"]["Accuracy"]) == n_folds
    assert len(entity_results.test["DIETClassifier"]["Precision"]) == n_folds
    assert len(entity_results.test["DIETClassifier"]["F1-score"]) == n_folds
Beispiel #6
0
def test_run_cv_evaluation(
        pretrained_embeddings_spacy_config: RasaNLUModelConfig,
        monkeypatch: MonkeyPatch):
    td = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa.json")

    nlu_config = RasaNLUModelConfig({
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                EPOCHS: 2
            },
        ],
    })

    # mock training
    trainer = Trainer(nlu_config)
    trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)
    mock = Mock(return_value=Interpreter(trainer.pipeline, None))
    monkeypatch.setattr(Trainer, "train", mock)

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        td,
        n_folds,
        nlu_config,
        successes=False,
        errors=False,
        disable_plotting=True,
        report_as_dict=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert all(key in intent_results.evaluation
               for key in ["errors", "report"])
    assert any(
        isinstance(intent_report, dict)
        and intent_report.get("confused_with") is not None
        for intent_report in intent_results.evaluation["report"].values())
    for extractor_evaluation in entity_results.evaluation.values():
        assert all(key in extractor_evaluation for key in ["errors", "report"])
def test_run_cv_evaluation_no_entities():
    td = rasa.shared.nlu.training_data.loading.load_data(
        "data/test/demo-rasa-no-ents.yml")

    nlu_config = RasaNLUModelConfig({
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                EPOCHS: 25
            },
        ],
    })

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        td,
        n_folds,
        nlu_config,
        successes=False,
        errors=False,
        disable_plotting=True,
        report_as_dict=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert all(key in intent_results.evaluation
               for key in ["errors", "report"])
    assert any(
        isinstance(intent_report, dict)
        and intent_report.get("confused_with") is not None
        for intent_report in intent_results.evaluation["report"].values())

    assert len(entity_results.train) == 0
    assert len(entity_results.test) == 0
    assert len(entity_results.evaluation) == 0
Beispiel #8
0
def perform_nlu_cross_validation(
    config: Text,
    data: TrainingData,
    output: Text,
    additional_arguments: Optional[Dict[Text, Any]],
) -> None:
    """Runs cross-validation on test data.

    Args:
        config: The model configuration.
        data: The data which is used for the cross-validation.
        output: Output directory for the cross-validation results.
        additional_arguments: Additional arguments which are passed to the
            cross-validation, like number of `disable_plotting`.
    """
    import rasa.nlu.config
    from rasa.nlu.test import (
        drop_intents_below_freq,
        cross_validate,
        log_results,
        log_entity_results,
    )

    additional_arguments = additional_arguments or {}
    folds = int(additional_arguments.get("folds", 3))
    nlu_config = rasa.nlu.config.load(config)
    data = drop_intents_below_freq(data, cutoff=folds)
    kwargs = rasa.shared.utils.common.minimal_kwargs(
        additional_arguments, cross_validate
    )
    results, entity_results, response_selection_results = cross_validate(
        data, folds, nlu_config, output, **kwargs
    )
    logger.info(f"CV evaluation (n={folds})")

    if any(results):
        logger.info("Intent evaluation results")
        log_results(results.train, "train")
        log_results(results.test, "test")
    if any(entity_results):
        logger.info("Entity evaluation results")
        log_entity_results(entity_results.train, "train")
        log_entity_results(entity_results.test, "test")
    if any(response_selection_results):
        logger.info("Response Selection evaluation results")
        log_results(response_selection_results.train, "train")
        log_results(response_selection_results.test, "test")
Beispiel #9
0
def test_nlu_with_cross_validation(config: Text, nlu: Text, folds: int = 3):
    import rasa.nlu.config
    import rasa.nlu.test as nlu_test

    nlu_config = rasa.nlu.config.load(config)
    data = rasa.nlu.training_data.load_data(nlu)
    data = nlu_test.drop_intents_below_freq(data, cutoff=5)
    results, entity_results = nlu_test.cross_validate(data, int(folds), nlu_config)
    logger.info("CV evaluation (n={})".format(folds))

    if any(results):
        logger.info("Intent evaluation results")
        nlu_test.return_results(results.train, "train")
        nlu_test.return_results(results.test, "test")
    if any(entity_results):
        logger.info("Entity evaluation results")
        nlu_test.return_entity_results(entity_results.train, "train")
        nlu_test.return_entity_results(entity_results.test, "test")
Beispiel #10
0
def test_run_cv_evaluation(
        pretrained_embeddings_spacy_config: RasaNLUModelConfig):
    td = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa.json")

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        td,
        n_folds,
        pretrained_embeddings_spacy_config,
        successes=False,
        errors=False,
        disable_plotting=True,
        report_as_dict=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert all(key in intent_results.evaluation
               for key in ["errors", "report"])
    assert any(
        isinstance(intent_report, dict)
        and intent_report.get("confused_with") is not None
        for intent_report in intent_results.evaluation["report"].values())

    assert len(
        entity_results.train["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(
        entity_results.train["CRFEntityExtractor"]["F1-score"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["Accuracy"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["Precision"]) == n_folds
    assert len(
        entity_results.test["CRFEntityExtractor"]["F1-score"]) == n_folds

    for extractor_evaluation in entity_results.evaluation.values():
        assert all(key in extractor_evaluation for key in ["errors", "report"])
Beispiel #11
0
def perform_nlu_cross_validation(
    config: Text,
    nlu: Text,
    output: Text,
    additional_arguments: Optional[Dict[Text, Any]],
):
    import rasa.nlu.config
    from rasa.nlu.test import (
        drop_intents_below_freq,
        cross_validate,
        log_results,
        log_entity_results,
    )

    additional_arguments = additional_arguments or {}
    folds = int(additional_arguments.get("folds", 3))
    nlu_config = rasa.nlu.config.load(config)
    data = rasa.shared.nlu.training_data.loading.load_data(nlu)
    data = drop_intents_below_freq(data, cutoff=folds)
    kwargs = rasa.shared.utils.common.minimal_kwargs(additional_arguments,
                                                     cross_validate)
    results, entity_results, response_selection_results = cross_validate(
        data, folds, nlu_config, output, **kwargs)
    logger.info(f"CV evaluation (n={folds})")

    if any(results):
        logger.info("Intent evaluation results")
        log_results(results.train, "train")
        log_results(results.test, "test")
    if any(entity_results):
        logger.info("Entity evaluation results")
        log_entity_results(entity_results.train, "train")
        log_entity_results(entity_results.test, "test")
    if any(response_selection_results):
        logger.info("Response Selection evaluation results")
        log_results(response_selection_results.train, "train")
        log_results(response_selection_results.test, "test")
Beispiel #12
0
def CV_eval(td_file, config_file, Nfolds=10):
    # trains a model with crossvalidation using the training data and config

    td = load_data(td_file)
    configuration = config.load(config_file)
    cross_validate(td, Nfolds, configuration)
def test_run_cv_evaluation_with_response_selector():
    training_data_obj = rasa.shared.nlu.training_data.loading.load_data(
        "data/test/demo-rasa-more-ents-and-multiplied.yml")
    training_data_responses_obj = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa-responses.yml")
    training_data_obj = training_data_obj.merge(training_data_responses_obj)

    nlu_config = RasaNLUModelConfig({
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                EPOCHS: 25
            },
            {
                "name": "ResponseSelector",
                EPOCHS: 2
            },
        ],
    })

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        training_data_obj,
        n_folds,
        nlu_config,
        successes=False,
        errors=False,
        disable_plotting=True,
        report_as_dict=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert all(key in intent_results.evaluation
               for key in ["errors", "report"])
    assert any(
        isinstance(intent_report, dict)
        and intent_report.get("confused_with") is not None
        for intent_report in intent_results.evaluation["report"].values())

    assert len(response_selection_results.train["Accuracy"]) == n_folds
    assert len(response_selection_results.train["Precision"]) == n_folds
    assert len(response_selection_results.train["F1-score"]) == n_folds
    assert len(response_selection_results.test["Accuracy"]) == n_folds
    assert len(response_selection_results.test["Precision"]) == n_folds
    assert len(response_selection_results.test["F1-score"]) == n_folds
    assert all(key in response_selection_results.evaluation
               for key in ["errors", "report"])
    assert any(
        isinstance(intent_report, dict)
        and intent_report.get("confused_with") is not None for intent_report in
        response_selection_results.evaluation["report"].values())

    assert len(entity_results.train["DIETClassifier"]["Accuracy"]) == n_folds
    assert len(entity_results.train["DIETClassifier"]["Precision"]) == n_folds
    assert len(entity_results.train["DIETClassifier"]["F1-score"]) == n_folds

    assert len(entity_results.test["DIETClassifier"]["Accuracy"]) == n_folds
    assert len(entity_results.test["DIETClassifier"]["Precision"]) == n_folds
    assert len(entity_results.test["DIETClassifier"]["F1-score"]) == n_folds
    for extractor_evaluation in entity_results.evaluation.values():
        assert all(key in extractor_evaluation for key in ["errors", "report"])