Beispiel #1
0
async def _train_nlu_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    if additional_arguments is None:
        additional_arguments = {}

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))
        config = await file_importer.get_config()
        print_color("Training NLU model...",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)
        async with telemetry.track_model_training(file_importer,
                                                  model_type="nlu"):
            await rasa.nlu.train(
                config,
                file_importer,
                _train_path,
                fixed_model_name="nlu",
                persist_nlu_training_data=persist_nlu_training_data,
                **additional_arguments,
            )
        print_color("NLU model training completed.",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
Beispiel #2
0
async def _train_core_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    additional_arguments: Optional[Dict] = None,
    interpreter: Optional[Interpreter] = None,
) -> Optional[Text]:
    """Train Core with validated training and config data."""

    import rasa.core.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        # normal (not compare) training
        print_color("Training Core model...",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)
        domain, config = await asyncio.gather(file_importer.get_domain(),
                                              file_importer.get_config())
        async with telemetry.track_model_training(file_importer,
                                                  model_type="core"):
            await rasa.core.train(
                domain_file=domain,
                training_resource=file_importer,
                output_path=os.path.join(_train_path,
                                         DEFAULT_CORE_SUBDIRECTORY_NAME),
                policy_config=config,
                additional_arguments=additional_arguments,
                interpreter=interpreter,
            )
        print_color("Core model training completed.",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only Core was trained.
            new_fingerprint = await model.model_fingerprint(file_importer)
            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="core-",
            )

        return _train_path
Beispiel #3
0
async def _do_training(
    file_importer: TrainingDataImporter,
    output_path: Text,
    train_path: Text,
    fingerprint_comparison_result: Optional[
        FingerprintComparisonResult] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    core_additional_arguments: Optional[Dict] = None,
    nlu_additional_arguments: Optional[Dict] = None,
    old_model_zip_path: Optional[Text] = None,
):
    if not fingerprint_comparison_result:
        fingerprint_comparison_result = FingerprintComparisonResult()

    interpreter_path = None
    if fingerprint_comparison_result.should_retrain_nlu():
        model_path = await _train_nlu_with_validated_data(
            file_importer,
            output=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            additional_arguments=nlu_additional_arguments,
        )
        interpreter_path = os.path.join(model_path,
                                        DEFAULT_NLU_SUBDIRECTORY_NAME)
    else:
        print_color(
            "NLU data/configuration did not change. No need to retrain NLU model.",
            color=rasa.shared.utils.io.bcolors.OKBLUE,
        )

    if fingerprint_comparison_result.should_retrain_core():
        await _train_core_with_validated_data(
            file_importer,
            output=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            additional_arguments=core_additional_arguments,
            interpreter=_load_interpreter(interpreter_path)
            or _interpreter_from_previous_model(old_model_zip_path),
        )
    elif fingerprint_comparison_result.should_retrain_nlg():
        print_color(
            "Core stories/configuration did not change. "
            "Only the templates section has been changed. A new model with "
            "the updated templates will be created.",
            color=rasa.shared.utils.io.bcolors.OKBLUE,
        )
        await model.update_model_with_new_domain(file_importer, train_path)
    else:
        print_color(
            "Core stories/configuration did not change. No need to retrain Core model.",
            color=rasa.shared.utils.io.bcolors.OKBLUE,
        )
Beispiel #4
0
async def _train_nlu_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    retrain_nlu: Union[bool, List[Text]] = True,
    additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    if additional_arguments is None:
        additional_arguments = {}

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))
        # bf mod
        config = await file_importer.get_nlu_config(retrain_nlu)
        async with telemetry.track_model_training(file_importer, model_type="nlu"):
            for lang in config:
                if config[lang]:
                    print_color(
                        "Start training <{}> NLU model ...".format(lang),
                        color=rasa.shared.utils.io.bcolors.OKBLUE,
                    )
                    await rasa.nlu.train(
                        config[lang],
                        file_importer,
                        _train_path,
                        fixed_model_name="nlu-{}".format(lang),
                        persist_nlu_training_data=persist_nlu_training_data,
                        **additional_arguments,
                    )
                else:
                    print_color(
                        "NLU data for language <{}> didn't change, skipping training...".format(
                            lang
                        ),
                        color=rasa.shared.utils.io.bcolors.OKBLUE,
                    )
        # /bf mod
        print_color(
            "NLU model training completed.", color=rasa.shared.utils.io.bcolors.OKBLUE
        )

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
Beispiel #5
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    core_additional_arguments: Optional[Dict] = None,
    nlu_additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
                                   with the model.
        core_additional_arguments: Additional training parameters for core training.
        nlu_additional_arguments: Additional training parameters forwarded to training
                                  method of each NLU component.

    Returns:
        Path of the trained model archive.
    """

    stories, nlu_data = await asyncio.gather(
        file_importer.get_stories(), file_importer.get_nlu_data()
    )

    # if stories.is_empty() and nlu_data.can_train_nlu_model():
    #     print_error(
    #         "No training data given. Please provide stories and NLU data in "
    #         "order to train a Rasa model using the '--data' argument."
    #     )
    #     return

    # if stories.is_empty():
    #     print_warning("No stories present. Just a Rasa NLU model will be trained.")
    #     return await _train_nlu_with_validated_data(
    #         file_importer,
    #         output=output_path,
    #         fixed_model_name=fixed_model_name,
    #         persist_nlu_training_data=persist_nlu_training_data,
    #         additional_arguments=nlu_additional_arguments,
    #     )

    # if nlu_data.can_train_nlu_model():
    #     print_warning("No NLU data present. Just a Rasa Core model will be trained.")
    #     return await _train_core_with_validated_data(
    #         file_importer,
    #         output=output_path,
    #         fixed_model_name=fixed_model_name,
    #         additional_arguments=core_additional_arguments,
    #     )

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)

    if not force_training:
        fingerprint_comparison = model.should_retrain(
            new_fingerprint, old_model, train_path
        )
    else:
        fingerprint_comparison = FingerprintComparisonResult(force_training=True)

    # bf mod >
    if fingerprint_comparison.nlu == True:  # replace True with list of all langs
        fingerprint_comparison.nlu = list(new_fingerprint.get("nlu-config", {}).keys())
    domain = await file_importer.get_domain()
    core_untrainable = domain.is_empty() or stories.is_empty()
    nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()]
    fingerprint_comparison.core = fingerprint_comparison.core and not core_untrainable
    fingerprint_comparison.nlu = [
        l for l in fingerprint_comparison.nlu if l not in nlu_untrainable
    ]

    if core_untrainable:
        print_color(
            "Skipping Core training since domain or stories are empty.",
            color=rasa.shared.utils.io.bcolors.OKBLUE,
        )
    for lang in nlu_untrainable:
        print_color(
            "No NLU data found for language <{}>, skipping training...".format(lang),
            color=rasa.shared.utils.io.bcolors.OKBLUE,
        )
    # </ bf mod

    if fingerprint_comparison.is_training_required():
        async with telemetry.track_model_training(file_importer, model_type="rasa"):
            await _do_training(
                file_importer,
                output_path=output_path,
                train_path=train_path,
                fingerprint_comparison_result=fingerprint_comparison,
                fixed_model_name=fixed_model_name,
                persist_nlu_training_data=persist_nlu_training_data,
                core_additional_arguments=core_additional_arguments,
                nlu_additional_arguments=nlu_additional_arguments,
                old_model_zip_path=old_model,
            )

        return model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success(
        "Nothing changed. You can use the old model stored at '{}'."
        "".format(os.path.abspath(old_model))
    )
    return old_model