Ejemplo n.º 1
0
async def train_comparison_models(
    story_file: Text,
    domain: Text,
    output_path: Text = "",
    exclusion_percentages: Optional[List] = None,
    policy_configs: Optional[List] = None,
    runs: int = 1,
    dump_stories: bool = False,
    additional_arguments: Optional[Dict] = None,
):
    """Train multiple models for comparison of policies"""
    from rasa import model
    from rasa.importers.importer import TrainingDataImporter

    exclusion_percentages = exclusion_percentages or []
    policy_configs = policy_configs or []

    for r in range(runs):
        logging.info("Starting run {}/{}".format(r + 1, runs))

        for current_run, percentage in enumerate(exclusion_percentages, 1):
            for policy_config in policy_configs:

                file_importer = TrainingDataImporter.load_core_importer_from_config(
                    policy_config, domain, [story_file])

                config_name = os.path.splitext(
                    os.path.basename(policy_config))[0]
                logging.info("Starting to train {} round {}/{}"
                             " with {}% exclusion"
                             "".format(config_name, current_run,
                                       len(exclusion_percentages), percentage))

                with TempDirectoryPath(tempfile.mkdtemp()) as train_path:
                    _, new_fingerprint = await asyncio.gather(
                        train(
                            domain,
                            file_importer,
                            train_path,
                            policy_config=policy_config,
                            exclusion_percentage=percentage,
                            additional_arguments=additional_arguments,
                            dump_stories=dump_stories,
                        ),
                        model.model_fingerprint(file_importer),
                    )

                    output_dir = os.path.join(output_path, "run_" + str(r + 1))
                    model_name = config_name + PERCENTAGE_KEY + str(percentage)
                    model.package_model(
                        fingerprint=new_fingerprint,
                        output_directory=output_dir,
                        train_path=train_path,
                        fixed_model_name=model_name,
                    )
Ejemplo n.º 2
0
async def _train_nlu_with_validated_data(
        file_importer: TrainingDataImporter,
        output: Text,
        train_path: Optional[Text] = None,
        fixed_model_name: Optional[Text] = None,
        persist_nlu_training_data: bool = False,
        retrain_nlu: Union[bool, List[Text]] = True) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train
    import re

    with ExitStack() as stack:
        models = {}
        from rasa.nlu import config as cfg_loader

        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))
        # bf mod
        config = await file_importer.get_nlu_config(retrain_nlu)
        for lang in config:
            if config[lang]:
                print_color("Start training {} NLU model ...".format(lang),
                            color=bcolors.OKBLUE)
                _, models[lang], _ = await rasa.nlu.train(
                    config[lang],
                    file_importer,  # config[lang]['path'],
                    _train_path,
                    fixed_model_name="nlu-{}".format(lang),
                    persist_nlu_training_data=persist_nlu_training_data,
                )
            else:
                print_color(
                    "NLU data for language <{}> didn't change, skipping training..."
                    .format(lang),
                    color=bcolors.OKBLUE)
        # /bf mod
        print_color("NLU model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
Ejemplo n.º 3
0
async def _train_nlu_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    if additional_arguments is None:
        additional_arguments = {}

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))
        config = await file_importer.get_config()
        print_color("Training NLU model...",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)
        async with telemetry.track_model_training(file_importer,
                                                  model_type="nlu"):
            await rasa.nlu.train(
                config,
                file_importer,
                _train_path,
                fixed_model_name="nlu",
                persist_nlu_training_data=persist_nlu_training_data,
                **additional_arguments,
            )
        print_color("NLU model training completed.",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
Ejemplo n.º 4
0
async def _train_core_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    additional_arguments: Optional[Dict] = None,
    interpreter: Optional[Interpreter] = None,
) -> Optional[Text]:
    """Train Core with validated training and config data."""

    import rasa.core.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        # normal (not compare) training
        print_color("Training Core model...",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)
        domain, config = await asyncio.gather(file_importer.get_domain(),
                                              file_importer.get_config())
        async with telemetry.track_model_training(file_importer,
                                                  model_type="core"):
            await rasa.core.train(
                domain_file=domain,
                training_resource=file_importer,
                output_path=os.path.join(_train_path,
                                         DEFAULT_CORE_SUBDIRECTORY_NAME),
                policy_config=config,
                additional_arguments=additional_arguments,
                interpreter=interpreter,
            )
        print_color("Core model training completed.",
                    color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only Core was trained.
            new_fingerprint = await model.model_fingerprint(file_importer)
            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="core-",
            )

        return _train_path
Ejemplo n.º 5
0
 def __init__(self, agentName, botconfig, data, **kwargs):
     logger.info("Training Agent " + agentName + " in progress")
     trainingData = load_data(data)
     self.intents = list(trainingData.intents)
     self.entities = list(trainingData.entities)
     trainer = Trainer(config.load(botconfig))
     self.interpreter = trainer.train(trainingData)
     self.model_path = "./models/" + agentName + "/"
     persist_path = trainer.persist(self.model_path)
     self.tar_path = package_model(fingerprint=None,
                                   train_path=persist_path,
                                   output_directory=self.model_path)
     self.model_name = self.tar_path.replace(self.model_path, "")
     self.model_version = self.model_name[:self.model_name.index(".tar.gz")]
Ejemplo n.º 6
0
async def _train_core_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Train Core with validated training and config data."""

    import rasa.core.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        # normal (not compare) training
        print_color("Training Core model...", color=bcolors.OKBLUE)
        domain = await file_importer.get_domain()
        # bf mod
        # config = await file_importer.get_config()
        config = await file_importer.get_core_config()
        # /bf mod
        await rasa.core.train(domain_file=domain,
                              training_resource=file_importer,
                              output_path=os.path.join(_train_path, "core"),
                              policy_config=config,
                              kwargs=kwargs)
        print_color("Core model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only Core was trained.
            new_fingerprint = await model.model_fingerprint(file_importer)
            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="core-",
            )

        return _train_path
Ejemplo n.º 7
0
async def _train_nlu_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))
        config = await file_importer.get_config()
        print_color("Training NLU model...", color=bcolors.OKBLUE)
        _, nlu_model, _ = await rasa.nlu.train(config,
                                               file_importer,
                                               _train_path,
                                               fixed_model_name="nlu")
        print_color("NLU model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
Ejemplo n.º 8
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    dry_run: bool,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    core_additional_arguments: Optional[Dict] = None,
    nlu_additional_arguments: Optional[Dict] = None,
    model_to_finetune: Optional[Text] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> TrainingResult:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        dry_run: If `True` then no training will be done, and the information about
            whether the training needs to be done will be printed.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
            with the model.
        core_additional_arguments: Additional training parameters for core training.
        nlu_additional_arguments: Additional training parameters forwarded to training
            method of each NLU component.
        model_to_finetune: Optional path to a model which should be finetuned or
            a directory in case the latest trained model should be used.
        finetuning_epoch_fraction: The fraction currently specified training epochs
            in the model configuration which should be used for finetuning.

    Returns:
        An instance of `TrainingResult`.
    """
    stories, nlu_data = await asyncio.gather(file_importer.get_stories(),
                                             file_importer.get_nlu_data())

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)

    fingerprint_comparison = model.should_retrain(
        new_fingerprint, old_model, train_path, force_training=force_training)

    if dry_run:
        code, texts = dry_run_result(fingerprint_comparison)
        for text in texts:
            print_warning(text) if code > 0 else print_success(text)
        return TrainingResult(code=code)

    if nlu_data.has_e2e_examples():
        rasa.shared.utils.common.mark_as_experimental_feature(
            "end-to-end training")

    if stories.is_empty() and nlu_data.contains_no_pure_nlu_data():
        rasa.shared.utils.cli.print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return TrainingResult()

    if stories.is_empty():
        rasa.shared.utils.cli.print_warning(
            "No stories present. Just a Rasa NLU model will be trained.")
        trained_model = await _train_nlu_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            additional_arguments=nlu_additional_arguments,
            model_to_finetune=model_to_finetune,
            finetuning_epoch_fraction=finetuning_epoch_fraction,
        )
        return TrainingResult(model=trained_model)

    # We will train nlu if there are any nlu example, including from e2e stories.
    if nlu_data.contains_no_pure_nlu_data(
    ) and not nlu_data.has_e2e_examples():
        rasa.shared.utils.cli.print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        trained_model = await _train_core_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            additional_arguments=core_additional_arguments,
            model_to_finetune=model_to_finetune,
            finetuning_epoch_fraction=finetuning_epoch_fraction,
        )

        return TrainingResult(model=trained_model)

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)

    if not force_training:
        fingerprint_comparison = model.should_retrain(
            new_fingerprint,
            old_model,
            train_path,
            has_e2e_examples=nlu_data.has_e2e_examples(),
        )
    else:
        fingerprint_comparison = FingerprintComparisonResult(
            force_training=True)

    if fingerprint_comparison.is_training_required():
        async with telemetry.track_model_training(
                file_importer,
                model_type="rasa",
        ):
            await _do_training(
                file_importer,
                output_path=output_path,
                train_path=train_path,
                fingerprint_comparison_result=fingerprint_comparison,
                fixed_model_name=fixed_model_name,
                persist_nlu_training_data=persist_nlu_training_data,
                core_additional_arguments=core_additional_arguments,
                nlu_additional_arguments=nlu_additional_arguments,
                old_model_zip_path=old_model,
                model_to_finetune=model_to_finetune,
                finetuning_epoch_fraction=finetuning_epoch_fraction,
            )
        trained_model = model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )
        return TrainingResult(model=trained_model)

    rasa.shared.utils.cli.print_success(
        "Nothing changed. You can use the old model stored at '{}'."
        "".format(os.path.abspath(old_model)))
    return TrainingResult(model=old_model)
Ejemplo n.º 9
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    kwargs: Optional[Dict],
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
                                   with the model.
        fixed_model_name: Name of model to be stored.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    stories = await file_importer.get_stories()
    nlu_data = await file_importer.get_nlu_data()

    if stories.is_empty() and nlu_data.is_empty():
        print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return

    if stories.is_empty():
        print_warning(
            "No stories present. Just a Rasa NLU model will be trained.")
        return await _train_nlu_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
        )

    if nlu_data.is_empty():
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            kwargs=kwargs,
        )

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)
    fingerprint_comparison = FingerprintComparisonResult(
        force_training=force_training)
    if not force_training:
        fingerprint_comparison = model.should_retrain(new_fingerprint,
                                                      old_model, train_path)

    if fingerprint_comparison.is_training_required():
        await _do_training(
            file_importer,
            output_path=output_path,
            train_path=train_path,
            fingerprint_comparison_result=fingerprint_comparison,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            kwargs=kwargs,
        )

        return model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
Ejemplo n.º 10
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    kwargs: Optional[Dict],
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        train_path: Directory in which to train the model.
        nlu_data_directory: Path to NLU training files.
        story_directory: Path to Core training files.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    new_fingerprint = await model.model_fingerprint(file_importer)

    stories = await file_importer.get_stories()
    nlu_data = await file_importer.get_nlu_data()

    if stories.is_empty() and nlu_data.is_empty():
        print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return

    if stories.is_empty():
        print_warning(
            "No stories present. Just a Rasa NLU model will be trained.")
        return await _train_nlu_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name)

    if nlu_data.is_empty():
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            kwargs=kwargs,
        )

    old_model = model.get_latest_model(output_path)
    retrain_core, retrain_nlu = model.should_retrain(new_fingerprint,
                                                     old_model, train_path)

    if force_training or retrain_core or retrain_nlu:
        await _do_training(
            file_importer,
            output_path=output_path,
            train_path=train_path,
            force_training=force_training,
            retrain_core=retrain_core,
            retrain_nlu=retrain_nlu,
            fixed_model_name=fixed_model_name,
            kwargs=kwargs,
        )

        return model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
Ejemplo n.º 11
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    additional_arguments: Optional[Dict],
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
                                   with the model.
        fixed_model_name: Name of model to be stored.
        additional_arguments: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """

    stories, nlu_data = await asyncio.gather(file_importer.get_stories(),
                                             file_importer.get_nlu_data())

    # if stories.is_empty() and nlu_data.is_empty():
    #     print_error(
    #         "No training data given. Please provide stories and NLU data in "
    #         "order to train a Rasa model using the '--data' argument."
    #     )
    #     return

    # if nlu_data.is_empty():
    #     print_warning("No NLU data present. Just a Rasa Core model will be trained.")
    #     return await _train_core_with_validated_data(
    #         file_importer,
    #         output=output_path,
    #         fixed_model_name=fixed_model_name,
    #         additional_arguments=additional_arguments,
    #     )

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)
    fingerprint_comparison = FingerprintComparisonResult(
        force_training=force_training)
    if not force_training:
        fingerprint_comparison = model.should_retrain(new_fingerprint,
                                                      old_model, train_path)

    # bf mod >
    if fingerprint_comparison.nlu == True:  # replace True with list of all langs
        fingerprint_comparison.nlu = list(
            new_fingerprint.get("nlu-config", {}).keys())
    domain = await file_importer.get_domain()
    core_untrainable = domain.is_empty() or stories.is_empty()
    nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()]
    fingerprint_comparison.core = fingerprint_comparison.core and not core_untrainable
    fingerprint_comparison.nlu = [
        l for l in fingerprint_comparison.nlu if l not in nlu_untrainable
    ]

    if core_untrainable:
        print_color(
            "Skipping Core training since domain or stories are empty.",
            color=bcolors.OKBLUE)
    for lang in nlu_untrainable:
        print_color(
            "No NLU data found for language <{}>, skipping training...".format(
                lang),
            color=bcolors.OKBLUE)
    # </ bf mod

    if fingerprint_comparison.is_training_required():
        await _do_training(
            file_importer,
            output_path=output_path,
            train_path=train_path,
            fingerprint_comparison_result=fingerprint_comparison,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            additional_arguments=additional_arguments,
        )

        return model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
Ejemplo n.º 12
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    kwargs: Optional[Dict],
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    new_fingerprint = await model.model_fingerprint(file_importer)

    stories = await file_importer.get_stories()
    nlu_data = await file_importer.get_nlu_data()

    # if stories.is_empty() and nlu_data.is_empty():
    #     print_error(
    #         "No training data given. Please provide stories and NLU data in "
    #         "order to train a Rasa model using the '--data' argument."
    #     )
    #     return

    # if stories.is_empty():
    #     print_warning("No stories present. Just a Rasa NLU model will be trained.")
    #     return await _train_nlu_with_validated_data(
    #         file_importer,
    #         output=output_path,
    #         fixed_model_name=fixed_model_name,
    #         persist_nlu_training_data=persist_nlu_training_data,
    #     )

    # if nlu_data.is_empty():
    #     print_warning("No NLU data present. Just a Rasa Core model will be trained.")
    #     return await _train_core_with_validated_data(
    #         file_importer,
    #         output=output_path,
    #         fixed_model_name=fixed_model_name,
    #         kwargs=kwargs,
    #     )

    old_model = model.get_latest_model(output_path)
    retrain_core, retrain_nlu = model.should_retrain(new_fingerprint,
                                                     old_model, train_path)

    # bf mod
    domain = await file_importer.get_domain()
    core_untrainable = domain.is_empty() or stories.is_empty()
    nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()]
    retrain_core = retrain_core and not core_untrainable

    if retrain_nlu is True:
        from rasa.model import FINGERPRINT_NLU_DATA_KEY
        possible_retrains = new_fingerprint[FINGERPRINT_NLU_DATA_KEY].keys()
    else:
        possible_retrains = retrain_nlu

    if core_untrainable:
        print_color(
            "Skipping Core training since domain or stories are empty.",
            color=bcolors.OKBLUE)
    for lang in nlu_untrainable:
        print_color(
            "No NLU data found for language <{}>, skipping training...".format(
                lang),
            color=bcolors.OKBLUE)
    retrain_nlu = [l for l in possible_retrains if l not in nlu_untrainable]
    # /bf mod

    if force_training or retrain_core or retrain_nlu:
        await _do_training(
            file_importer,
            output_path=output_path,
            train_path=train_path,
            force_training=force_training,
            retrain_core=retrain_core,
            retrain_nlu=retrain_nlu,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            kwargs=kwargs,
        )

        return model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
Ejemplo n.º 13
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    dry_run: bool,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    core_additional_arguments: Optional[Dict] = None,
    nlu_additional_arguments: Optional[Dict] = None,
) -> TrainingResult:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        dry_run: If `True` then no training will be done, and the information about
            whether the training needs to be done will be printed.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
            with the model.
        core_additional_arguments: Additional training parameters for core training.
        nlu_additional_arguments: Additional training parameters forwarded to training
            method of each NLU component.

    Returns:
        An instance of `TrainingResult`.
    """
    stories, nlu_data = await asyncio.gather(file_importer.get_stories(),
                                             file_importer.get_nlu_data())

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)

    fingerprint_comparison = model.should_retrain(new_fingerprint, old_model,
                                                  train_path, force_training)

    if dry_run:
        code, texts = dry_run_result(fingerprint_comparison)
        for text in texts:
            print_warning(text) if code > 0 else print_success(text)
        return TrainingResult(code=code)

    if stories.is_empty() and nlu_data.can_train_nlu_model():
        print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return TrainingResult()

    if stories.is_empty():
        print_warning(
            "No stories present. Just a Rasa NLU model will be trained.")
        trained_model = await _train_nlu_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            additional_arguments=nlu_additional_arguments,
        )
        return TrainingResult(model=trained_model)

    if nlu_data.can_train_nlu_model():
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        trained_model = await _train_core_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            additional_arguments=core_additional_arguments,
        )
        return TrainingResult(model=trained_model)

    if fingerprint_comparison.is_training_required():
        async with telemetry.track_model_training(file_importer,
                                                  model_type="rasa"):
            await _do_training(
                file_importer,
                output_path=output_path,
                train_path=train_path,
                fingerprint_comparison_result=fingerprint_comparison,
                fixed_model_name=fixed_model_name,
                persist_nlu_training_data=persist_nlu_training_data,
                core_additional_arguments=core_additional_arguments,
                nlu_additional_arguments=nlu_additional_arguments,
                old_model_zip_path=old_model,
            )
        trained_model = model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )
        return TrainingResult(model=trained_model)

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return TrainingResult(model=old_model)
Ejemplo n.º 14
0
async def train_comparison_models(
    story_file: Text,
    domain: Text,
    output_path: Text = "",
    exclusion_percentages: Optional[List] = None,
    policy_configs: Optional[List] = None,
    runs: int = 1,
    dump_stories: bool = False,
    kwargs: Optional[Dict] = None,
):
    """Train multiple models for comparison of policies"""
    from rasa.core import config
    from rasa import model
    from rasa.importers.importer import TrainingDataImporter

    exclusion_percentages = exclusion_percentages or []
    policy_configs = policy_configs or []

    for r in range(runs):
        logging.info("Starting run {}/{}".format(r + 1, runs))

        for current_run, percentage in enumerate(exclusion_percentages, 1):
            for policy_config in policy_configs:
                policies = config.load(policy_config)

                if len(policies) > 1:
                    raise ValueError(
                        "You can only specify one policy per model for comparison"
                    )

                file_importer = TrainingDataImporter.load_core_importer_from_config(
                    policy_config, domain, [story_file])

                policy_name = type(policies[0]).__name__
                logging.info("Starting to train {} round {}/{}"
                             " with {}% exclusion"
                             "".format(policy_name, current_run,
                                       len(exclusion_percentages), percentage))

                with TempDirectoryPath(tempfile.mkdtemp()) as train_path:
                    await train(
                        domain,
                        file_importer,
                        train_path,
                        policy_config=policy_config,
                        exclusion_percentage=current_run,
                        kwargs=kwargs,
                        dump_stories=dump_stories,
                    )

                    new_fingerprint = await model.model_fingerprint(
                        file_importer)

                    output_dir = os.path.join(output_path, "run_" + str(r + 1))
                    model_name = policy_name + str(current_run)
                    model.package_model(
                        fingerprint=new_fingerprint,
                        output_directory=output_dir,
                        train_path=train_path,
                        fixed_model_name=model_name,
                    )
Ejemplo n.º 15
0
async def _train_core_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    additional_arguments: Optional[Dict] = None,
    interpreter: Optional[Interpreter] = None,
    model_to_finetune: Optional["Text"] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> Optional[Text]:
    """Train Core with validated training and config data."""
    import rasa.core.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        # normal (not compare) training
        rasa.shared.utils.cli.print_color(
            "Training Core model...",
            color=rasa.shared.utils.io.bcolors.OKBLUE)
        domain, config = await asyncio.gather(file_importer.get_domain(),
                                              file_importer.get_config())

        if model_to_finetune:
            rasa.shared.utils.common.mark_as_experimental_feature(
                "Incremental Training feature")
            model_to_finetune = await _core_model_for_finetuning(
                model_to_finetune,
                file_importer=file_importer,
                finetuning_epoch_fraction=finetuning_epoch_fraction,
            )

            if not model_to_finetune:
                rasa.shared.utils.cli.print_error_and_exit(
                    f"No Core model for finetuning found. Please make sure to either "
                    f"specify a path to a previous model or to have a finetunable "
                    f"model within the directory '{output}'.")

        async with telemetry.track_model_training(
                file_importer,
                model_type="core",
                is_finetuning=model_to_finetune is not None,
        ):
            await rasa.core.train.train(
                domain_file=domain,
                training_resource=file_importer,
                output_path=os.path.join(_train_path,
                                         DEFAULT_CORE_SUBDIRECTORY_NAME),
                policy_config=config,
                additional_arguments=additional_arguments,
                interpreter=interpreter,
                model_to_finetune=model_to_finetune,
            )
        rasa.shared.utils.cli.print_color(
            "Core model training completed.",
            color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only Core was trained.
            new_fingerprint = await model.model_fingerprint(file_importer)
            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="core-",
            )

        return _train_path
Ejemplo n.º 16
0
async def _train_nlu_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    additional_arguments: Optional[Dict] = None,
    model_to_finetune: Optional["Text"] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""
    import rasa.nlu.train

    if additional_arguments is None:
        additional_arguments = {}

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))
        config = await file_importer.get_config()
        rasa.shared.utils.cli.print_color(
            "Training NLU model...", color=rasa.shared.utils.io.bcolors.OKBLUE)

        if model_to_finetune:
            rasa.shared.utils.common.mark_as_experimental_feature(
                "Incremental Training feature")
            model_to_finetune = await _nlu_model_for_finetuning(
                model_to_finetune,
                file_importer,
                finetuning_epoch_fraction,
                called_from_combined_training=train_path is not None,
            )
            if not model_to_finetune:
                rasa.shared.utils.cli.print_error_and_exit(
                    f"No NLU model for finetuning found. Please make sure to either "
                    f"specify a path to a previous model or to have a finetunable "
                    f"model within the directory '{output}'.")

        async with telemetry.track_model_training(
                file_importer,
                model_type="nlu",
                is_finetuning=model_to_finetune is not None,
        ):
            await rasa.nlu.train.train(
                config,
                file_importer,
                _train_path,
                fixed_model_name="nlu",
                persist_nlu_training_data=persist_nlu_training_data,
                model_to_finetune=model_to_finetune,
                **additional_arguments,
            )
        rasa.shared.utils.cli.print_color(
            "NLU model training completed.",
            color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
Ejemplo n.º 17
0
async def _train_nlu_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    retrain_nlu: Union[bool, List[Text]] = True,
    additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    if additional_arguments is None:
        additional_arguments = {}

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))
        # bf mod
        config = await file_importer.get_nlu_config(retrain_nlu)
        async with telemetry.track_model_training(file_importer, model_type="nlu"):
            for lang in config:
                if config[lang]:
                    print_color(
                        "Start training <{}> NLU model ...".format(lang),
                        color=rasa.shared.utils.io.bcolors.OKBLUE,
                    )
                    await rasa.nlu.train(
                        config[lang],
                        file_importer,
                        _train_path,
                        fixed_model_name="nlu-{}".format(lang),
                        persist_nlu_training_data=persist_nlu_training_data,
                        **additional_arguments,
                    )
                else:
                    print_color(
                        "NLU data for language <{}> didn't change, skipping training...".format(
                            lang
                        ),
                        color=rasa.shared.utils.io.bcolors.OKBLUE,
                    )
        # /bf mod
        print_color(
            "NLU model training completed.", color=rasa.shared.utils.io.bcolors.OKBLUE
        )

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path