Ejemplo n.º 1
0
def test_drop_intents_below_freq():
    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    clean_td = drop_intents_below_freq(td, 0)
    assert clean_td.intents == {'affirm', 'goodbye', 'greet',
                                'restaurant_search'}

    clean_td = drop_intents_below_freq(td, 10)
    assert clean_td.intents == {'affirm', 'restaurant_search'}
Ejemplo n.º 2
0
def test_drop_intents_below_freq():
    td = training_data.load_data("data/examples/rasa/demo-rasa.json")
    clean_td = drop_intents_below_freq(td, 0)
    assert clean_td.intents == {
        "affirm", "goodbye", "greet", "restaurant_search"
    }

    clean_td = drop_intents_below_freq(td, 10)
    assert clean_td.intents == {"affirm", "restaurant_search"}
Ejemplo n.º 3
0
def perform_nlu_cross_validation(config: Text, nlu: Text,
                                 kwargs: Optional[Dict[Text, Any]]):
    import rasa.nlu.config
    from rasa.nlu.test import (
        drop_intents_below_freq,
        cross_validate,
        return_results,
        return_entity_results,
    )

    kwargs = kwargs or {}
    folds = int(kwargs.get("folds", 3))
    nlu_config = rasa.nlu.config.load(config)
    data = rasa.nlu.training_data.load_data(nlu)
    data = drop_intents_below_freq(data, cutoff=folds)
    kwargs = minimal_kwargs(kwargs, cross_validate)
    results, entity_results = cross_validate(data, folds, nlu_config, **kwargs)
    logger.info("CV evaluation (n={})".format(folds))

    if any(results):
        logger.info("Intent evaluation results")
        return_results(results.train, "train")
        return_results(results.test, "test")
    if any(entity_results):
        logger.info("Entity evaluation results")
        return_entity_results(entity_results.train, "train")
        return_entity_results(entity_results.test, "test")
Ejemplo n.º 4
0
def test_drop_intents_below_freq():
    td = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa.json"
    )
    # include some lookup tables and make sure new td has them
    td = td.merge(TrainingData(lookup_tables=[{"lookup_table": "lookup_entry"}]))
    clean_td = drop_intents_below_freq(td, 0)
    assert clean_td.intents == {
        "affirm",
        "goodbye",
        "greet",
        "restaurant_search",
        "chitchat",
    }

    clean_td = drop_intents_below_freq(td, 10)
    assert clean_td.intents == {"affirm", "restaurant_search"}
    assert clean_td.lookup_tables == td.lookup_tables
Ejemplo n.º 5
0
def perform_nlu_cross_validation(
    config: Text,
    data: TrainingData,
    output: Text,
    additional_arguments: Optional[Dict[Text, Any]],
) -> None:
    """Runs cross-validation on test data.

    Args:
        config: The model configuration.
        data: The data which is used for the cross-validation.
        output: Output directory for the cross-validation results.
        additional_arguments: Additional arguments which are passed to the
            cross-validation, like number of `disable_plotting`.
    """
    import rasa.nlu.config
    from rasa.nlu.test import (
        drop_intents_below_freq,
        cross_validate,
        log_results,
        log_entity_results,
    )

    additional_arguments = additional_arguments or {}
    folds = int(additional_arguments.get("folds", 3))
    nlu_config = rasa.nlu.config.load(config)
    data = drop_intents_below_freq(data, cutoff=folds)
    kwargs = rasa.shared.utils.common.minimal_kwargs(
        additional_arguments, cross_validate
    )
    results, entity_results, response_selection_results = cross_validate(
        data, folds, nlu_config, output, **kwargs
    )
    logger.info(f"CV evaluation (n={folds})")

    if any(results):
        logger.info("Intent evaluation results")
        log_results(results.train, "train")
        log_results(results.test, "test")
    if any(entity_results):
        logger.info("Entity evaluation results")
        log_entity_results(entity_results.train, "train")
        log_entity_results(entity_results.test, "test")
    if any(response_selection_results):
        logger.info("Response Selection evaluation results")
        log_results(response_selection_results.train, "train")
        log_results(response_selection_results.test, "test")
Ejemplo n.º 6
0
def test_nlu_with_cross_validation(config: Text, nlu: Text, folds: int = 3):
    import rasa.nlu.config
    import rasa.nlu.test as nlu_test

    nlu_config = rasa.nlu.config.load(config)
    data = rasa.nlu.training_data.load_data(nlu)
    data = nlu_test.drop_intents_below_freq(data, cutoff=5)
    results, entity_results = nlu_test.cross_validate(data, int(folds), nlu_config)
    logger.info("CV evaluation (n={})".format(folds))

    if any(results):
        logger.info("Intent evaluation results")
        nlu_test.return_results(results.train, "train")
        nlu_test.return_results(results.test, "test")
    if any(entity_results):
        logger.info("Entity evaluation results")
        nlu_test.return_entity_results(entity_results.train, "train")
        nlu_test.return_entity_results(entity_results.test, "test")
Ejemplo n.º 7
0
def compare_nlu_models(
    configs: List[Text],
    nlu: Text,
    output: Text,
    runs: int,
    exclusion_percentages: List[int],
):
    """Trains multiple models, compares them and saves the results."""

    from rasa.nlu.test import drop_intents_below_freq
    from rasa.nlu.training_data import load_data
    from rasa.nlu.utils import write_json_to_file
    from rasa.utils.io import create_path
    from rasa.nlu.test import compare_nlu
    from rasa.core.test import plot_nlu_results

    data = load_data(nlu)
    data = drop_intents_below_freq(data, cutoff=5)

    create_path(output)

    bases = [os.path.basename(nlu_config) for nlu_config in configs]
    model_names = [os.path.splitext(base)[0] for base in bases]

    f1_score_results = {
        model_name: [[] for _ in range(runs)]
        for model_name in model_names
    }

    training_examples_per_run = compare_nlu(
        configs,
        data,
        exclusion_percentages,
        f1_score_results,
        model_names,
        output,
        runs,
    )

    f1_path = os.path.join(output, RESULTS_FILE)
    write_json_to_file(f1_path, f1_score_results)

    plot_nlu_results(output, training_examples_per_run)
Ejemplo n.º 8
0
def perform_nlu_cross_validation(
    config: Text,
    nlu: Text,
    output: Text,
    additional_arguments: Optional[Dict[Text, Any]],
):
    import rasa.nlu.config
    from rasa.nlu.test import (
        drop_intents_below_freq,
        cross_validate,
        log_results,
        log_entity_results,
    )

    additional_arguments = additional_arguments or {}
    folds = int(additional_arguments.get("folds", 3))
    nlu_config = rasa.nlu.config.load(config)
    data = rasa.shared.nlu.training_data.loading.load_data(nlu)
    data = drop_intents_below_freq(data, cutoff=folds)
    kwargs = rasa.shared.utils.common.minimal_kwargs(additional_arguments,
                                                     cross_validate)
    results, entity_results, response_selection_results = cross_validate(
        data, folds, nlu_config, output, **kwargs)
    logger.info(f"CV evaluation (n={folds})")

    if any(results):
        logger.info("Intent evaluation results")
        log_results(results.train, "train")
        log_results(results.test, "test")
    if any(entity_results):
        logger.info("Entity evaluation results")
        log_entity_results(entity_results.train, "train")
        log_entity_results(entity_results.test, "test")
    if any(response_selection_results):
        logger.info("Response Selection evaluation results")
        log_results(response_selection_results.train, "train")
        log_results(response_selection_results.test, "test")