Exemple #1
0
async def _write_domain_to_file(
    domain_path: Text, events: List[Dict[Text, Any]], endpoint: EndpointConfig
) -> None:
    """Write an updated domain file to the file path."""

    create_path(domain_path)

    domain = await retrieve_domain(endpoint)
    old_domain = Domain.from_dict(domain)

    messages = _collect_messages(events)
    actions = _collect_actions(events)
    templates = NEW_TEMPLATES

    # TODO for now there is no way to distinguish between action and form
    collected_actions = list(
        {e["name"] for e in actions if e["name"] not in default_action_names()}
    )

    new_domain = Domain(
        intents=_intents_from_messages(messages),
        entities=_entities_from_messages(messages),
        slots=[],
        templates=templates,
        action_names=collected_actions,
        form_names=[],
    )

    old_domain.merge(new_domain).persist_clean(domain_path)
def write_text_to_file(filename: Union[Text, Path], content: Text) -> None:
    """Writes text to a file."""

    from rasa.utils import io as io_utils

    # Create parent directories
    io_utils.create_path(filename)

    with open(filename, "w", encoding="utf-8") as f:
        f.write(content)
Exemple #3
0
def compare_nlu_models(
    configs: List[Text],
    nlu: Text,
    output: Text,
    runs: int,
    exclusion_percentages: List[int],
):
    """Trains multiple models, compares them and saves the results."""

    from rasa.nlu.test import drop_intents_below_freq
    from rasa.nlu.training_data import load_data
    from rasa.nlu.utils import write_json_to_file
    from rasa.utils.io import create_path
    from rasa.nlu.test import compare_nlu
    from rasa.core.test import plot_nlu_results

    data = load_data(nlu)
    data = drop_intents_below_freq(data, cutoff=5)

    create_path(output)

    bases = [os.path.basename(nlu_config) for nlu_config in configs]
    model_names = [os.path.splitext(base)[0] for base in bases]

    f1_score_results = {
        model_name: [[] for _ in range(runs)]
        for model_name in model_names
    }

    training_examples_per_run = compare_nlu(
        configs,
        data,
        exclusion_percentages,
        f1_score_results,
        model_names,
        output,
        runs,
    )

    f1_path = os.path.join(output, RESULTS_FILE)
    write_json_to_file(f1_path, f1_score_results)

    plot_nlu_results(output, training_examples_per_run)
Exemple #4
0
async def _write_stories_to_file(
    export_story_path: Text, events: List[Dict[Text, Any]]
) -> None:
    """Write the conversation of the sender_id to the file paths."""

    sub_conversations = _split_conversation_at_restarts(events)

    create_path(export_story_path)

    if os.path.exists(export_story_path):
        append_write = "a"  # append if already exists
    else:
        append_write = "w"  # make a new file if not

    with open(export_story_path, append_write, encoding="utf-8") as f:
        for conversation in sub_conversations:
            parsed_events = rasa.core.events.deserialise_events(conversation)
            s = Story.from_events(parsed_events)
            f.write("\n" + s.as_story_string(flat=True))
Exemple #5
0
def compare_nlu(
    configs: List[Text],
    data: TrainingData,
    exclusion_percentages: List[int],
    f_score_results: Dict[Text, Any],
    model_names: List[Text],
    output: Text,
    runs: int,
) -> List[int]:
    """
    Trains and compares multiple NLU models.
    For each run and exclusion percentage a model per config file is trained.
    Thereby, the model is trained only on the current percentage of training data.
    Afterwards, the model is tested on the complete test data of that run.
    All results are stored in the provided output directory.

    Args:
        configs: config files needed for training
        data: training data
        exclusion_percentages: percentages of training data to exclude during comparison
        f_score_results: dictionary of model name to f-score results per run
        model_names: names of the models to train
        output: the output directory
        runs: number of comparison runs

    Returns: training examples per run
    """

    training_examples_per_run = []

    for run in range(runs):

        logger.info("Beginning comparison run {}/{}".format(run + 1, runs))

        run_path = os.path.join(output, "run_{}".format(run + 1))
        create_path(run_path)

        test_path = os.path.join(run_path, TEST_DATA_FILE)
        create_path(test_path)

        train, test = data.train_test_split()
        write_to_file(test_path, test.as_markdown())

        training_examples_per_run = []

        for percentage in exclusion_percentages:
            percent_string = "{}%_exclusion".format(percentage)

            _, train = train.train_test_split(percentage / 100)
            training_examples_per_run.append(len(train.training_examples))

            model_output_path = os.path.join(run_path, percent_string)
            train_split_path = os.path.join(model_output_path, TRAIN_DATA_FILE)
            create_path(train_split_path)
            write_to_file(train_split_path, train.as_markdown())

            for nlu_config, model_name in zip(configs, model_names):
                logger.info(
                    "Evaluating configuration '{}' with {} training data.".
                    format(model_name, percent_string))

                try:
                    model_path = train_nlu(
                        nlu_config,
                        train_split_path,
                        model_output_path,
                        fixed_model_name=model_name,
                    )
                except Exception as e:
                    logger.warning(
                        "Training model '{}' failed. Error: {}".format(
                            model_name, str(e)))
                    f_score_results[model_name][run].append(0.0)
                    continue

                model_path = os.path.join(get_model(model_path), "nlu")

                report_path = os.path.join(model_output_path,
                                           "{}_report".format(model_name))
                errors_path = os.path.join(report_path, "errors.json")
                result = run_evaluation(test_path,
                                        model_path,
                                        report=report_path,
                                        errors=errors_path)

                f1 = result["intent_evaluation"]["f1_score"]
                f_score_results[model_name][run].append(f1)

    return training_examples_per_run