コード例 #1
0
def train_nlu(
    config: Text,
    nlu_data: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
) -> Optional[Text]:
    """Trains an NLU model.

    Args:
        config: Path to the config file for NLU.
        nlu_data: Path to the NLU training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.
        fixed_model_name: Name of the model to be stored.
        uncompress: If `True` the model will not be compressed.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """

    # training NLU only hence the training files still have to be selected
    skill_imports = SkillSelector.load(config, nlu_data)
    train_context = TempDirectoryPath(data.get_nlu_directory(nlu_data, skill_imports))

    with train_context as nlu_data_directory:
        if not os.listdir(nlu_data_directory):
            print_error(
                "No NLU data given. Please provide NLU data in order to train "
                "a Rasa NLU model using the '--nlu' argument."
            )
            return

        return _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )
コード例 #2
0
ファイル: train.py プロジェクト: zzBBc/rasa
async def _train_nlu_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))
        config = await file_importer.get_config()
        print_color("Training NLU model...", color=bcolors.OKBLUE)
        _, nlu_model, _ = await rasa.nlu.train(
            config,
            file_importer,
            _train_path,
            fixed_model_name="nlu",
            persist_nlu_training_data=persist_nlu_training_data,
        )
        print_color("NLU model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
コード例 #3
0
def _train_nlu_with_validated_data(
    config: Text,
    nlu_data_directory: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        print_color("Training NLU model...", color=bcolors.OKBLUE)
        _, nlu_model, _ = rasa.nlu.train(config,
                                         nlu_data_directory,
                                         _train_path,
                                         fixed_model_name="nlu")
        print_color("NLU model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = model.model_fingerprint(
                config, nlu_data=nlu_data_directory)

            return _package_model(
                new_fingerprint=new_fingerprint,
                output_path=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
コード例 #4
0
ファイル: train.py プロジェクト: udit-pandey/kairon
async def train_model(
    data_importer: TrainingDataImporter,
    output_path: Text,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    additional_arguments: Optional[Dict] = None,
):
    """
    trains the bot, overridden the function from rasa

    :param data_importer: TrainingDataImporter object
    :param output_path: model output path
    :param force_training: w
    :param fixed_model_name:
    :param persist_nlu_training_data:
    :param additional_arguments:
    :return: model path
    """
    with ExitStack() as stack:
        train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))

        domain = await data_importer.get_domain()
        if domain.is_empty():
            return await handle_domain_if_not_exists(data_importer,
                                                     output_path,
                                                     fixed_model_name)

        return await _train_async_internal(
            data_importer,
            train_path,
            output_path,
            force_training,
            fixed_model_name,
            persist_nlu_training_data,
            additional_arguments,
        )
コード例 #5
0
ファイル: model_training.py プロジェクト: ducminh-phan/rasa
async def _train_nlu_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    additional_arguments: Optional[Dict] = None,
    model_to_finetune: Optional["Text"] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""
    import rasa.nlu.train

    if additional_arguments is None:
        additional_arguments = {}

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))
        config = await file_importer.get_config()
        rasa.shared.utils.cli.print_color(
            "Training NLU model...", color=rasa.shared.utils.io.bcolors.OKBLUE)

        if model_to_finetune:
            rasa.shared.utils.common.mark_as_experimental_feature(
                "Incremental Training feature")
            model_to_finetune = await _nlu_model_for_finetuning(
                model_to_finetune,
                file_importer,
                finetuning_epoch_fraction,
                called_from_combined_training=train_path is not None,
            )
            if not model_to_finetune:
                rasa.shared.utils.cli.print_error_and_exit(
                    f"No NLU model for finetuning found. Please make sure to either "
                    f"specify a path to a previous model or to have a finetunable "
                    f"model within the directory '{output}'.")

        async with telemetry.track_model_training(
                file_importer,
                model_type="nlu",
                is_finetuning=model_to_finetune is not None,
        ):
            await rasa.nlu.train.train(
                config,
                file_importer,
                _train_path,
                fixed_model_name="nlu",
                persist_nlu_training_data=persist_nlu_training_data,
                model_to_finetune=model_to_finetune,
                **additional_arguments,
            )
        rasa.shared.utils.cli.print_color(
            "NLU model training completed.",
            color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
コード例 #6
0
ファイル: model_training.py プロジェクト: ducminh-phan/rasa
async def train_async(
    domain: Union[Domain, Text],
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output: Text = DEFAULT_MODELS_PATH,
    dry_run: bool = False,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    core_additional_arguments: Optional[Dict] = None,
    nlu_additional_arguments: Optional[Dict] = None,
    model_to_finetune: Optional[Text] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> TrainingResult:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        dry_run: If `True` then no training will be done, and the information about
            whether the training needs to be done will be printed.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
            with the model.
        core_additional_arguments: Additional training parameters for core training.
        nlu_additional_arguments: Additional training parameters forwarded to training
            method of each NLU component.
        model_to_finetune: Optional path to a model which should be finetuned or
            a directory in case the latest trained model should be used.
        finetuning_epoch_fraction: The fraction currently specified training epochs
            in the model configuration which should be used for finetuning.

    Returns:
        An instance of `TrainingResult`.
    """
    file_importer = TrainingDataImporter.load_from_config(
        config, domain, training_files)
    with TempDirectoryPath(tempfile.mkdtemp()) as train_path:
        domain = await file_importer.get_domain()

        if domain.is_empty():
            nlu_model = await handle_domain_if_not_exists(
                file_importer, output, fixed_model_name)
            return TrainingResult(model=nlu_model)

        return await _train_async_internal(
            file_importer,
            train_path,
            output,
            dry_run,
            force_training,
            fixed_model_name,
            persist_nlu_training_data,
            core_additional_arguments=core_additional_arguments,
            nlu_additional_arguments=nlu_additional_arguments,
            model_to_finetune=model_to_finetune,
            finetuning_epoch_fraction=finetuning_epoch_fraction,
        )
コード例 #7
0
ファイル: model_training.py プロジェクト: ducminh-phan/rasa
async def _train_core_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    additional_arguments: Optional[Dict] = None,
    interpreter: Optional[Interpreter] = None,
    model_to_finetune: Optional["Text"] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> Optional[Text]:
    """Train Core with validated training and config data."""
    import rasa.core.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        # normal (not compare) training
        rasa.shared.utils.cli.print_color(
            "Training Core model...",
            color=rasa.shared.utils.io.bcolors.OKBLUE)
        domain, config = await asyncio.gather(file_importer.get_domain(),
                                              file_importer.get_config())

        if model_to_finetune:
            rasa.shared.utils.common.mark_as_experimental_feature(
                "Incremental Training feature")
            model_to_finetune = await _core_model_for_finetuning(
                model_to_finetune,
                file_importer=file_importer,
                finetuning_epoch_fraction=finetuning_epoch_fraction,
            )

            if not model_to_finetune:
                rasa.shared.utils.cli.print_error_and_exit(
                    f"No Core model for finetuning found. Please make sure to either "
                    f"specify a path to a previous model or to have a finetunable "
                    f"model within the directory '{output}'.")

        async with telemetry.track_model_training(
                file_importer,
                model_type="core",
                is_finetuning=model_to_finetune is not None,
        ):
            await rasa.core.train.train(
                domain_file=domain,
                training_resource=file_importer,
                output_path=os.path.join(_train_path,
                                         DEFAULT_CORE_SUBDIRECTORY_NAME),
                policy_config=config,
                additional_arguments=additional_arguments,
                interpreter=interpreter,
                model_to_finetune=model_to_finetune,
            )
        rasa.shared.utils.cli.print_color(
            "Core model training completed.",
            color=rasa.shared.utils.io.bcolors.OKBLUE)

        if train_path is None:
            # Only Core was trained.
            new_fingerprint = await model.model_fingerprint(file_importer)
            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="core-",
            )

        return _train_path
コード例 #8
0
ファイル: train.py プロジェクト: vinit134/rasa
async def train_comparison_models(
    story_file: Text,
    domain: Text,
    output_path: Text = "",
    exclusion_percentages: Optional[List] = None,
    policy_configs: Optional[List] = None,
    runs: int = 1,
    dump_stories: bool = False,
    kwargs: Optional[Dict] = None,
):
    """Train multiple models for comparison of policies"""
    from rasa.core import config
    from rasa import model
    from rasa.importers.importer import TrainingDataImporter

    exclusion_percentages = exclusion_percentages or []
    policy_configs = policy_configs or []

    for r in range(runs):
        logging.info("Starting run {}/{}".format(r + 1, runs))

        for current_run, percentage in enumerate(exclusion_percentages, 1):
            for policy_config in policy_configs:
                policies = config.load(policy_config)

                if len(policies) > 1:
                    raise ValueError(
                        "You can only specify one policy per model for comparison"
                    )

                file_importer = TrainingDataImporter.load_core_importer_from_config(
                    policy_config, domain, [story_file])

                policy_name = type(policies[0]).__name__
                logging.info("Starting to train {} round {}/{}"
                             " with {}% exclusion"
                             "".format(policy_name, current_run,
                                       len(exclusion_percentages), percentage))

                with TempDirectoryPath(tempfile.mkdtemp()) as train_path:
                    await train(
                        domain,
                        file_importer,
                        train_path,
                        policy_config=policy_config,
                        exclusion_percentage=current_run,
                        kwargs=kwargs,
                        dump_stories=dump_stories,
                    )

                    new_fingerprint = await model.model_fingerprint(
                        file_importer)

                    output_dir = os.path.join(output_path, "run_" + str(r + 1))
                    model_name = policy_name + str(current_run)
                    model.package_model(
                        fingerprint=new_fingerprint,
                        output_directory=output_dir,
                        train_path=train_path,
                        fixed_model_name=model_name,
                    )
コード例 #9
0
async def _train_nlu_with_validated_data(
    file_importer: TrainingDataImporter,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    persist_nlu_training_data: bool = False,
    retrain_nlu: Union[bool, List[Text]] = True,
    additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    if additional_arguments is None:
        additional_arguments = {}

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))
        # bf mod
        config = await file_importer.get_nlu_config(retrain_nlu)
        async with telemetry.track_model_training(file_importer, model_type="nlu"):
            for lang in config:
                if config[lang]:
                    print_color(
                        "Start training <{}> NLU model ...".format(lang),
                        color=rasa.shared.utils.io.bcolors.OKBLUE,
                    )
                    await rasa.nlu.train(
                        config[lang],
                        file_importer,
                        _train_path,
                        fixed_model_name="nlu-{}".format(lang),
                        persist_nlu_training_data=persist_nlu_training_data,
                        **additional_arguments,
                    )
                else:
                    print_color(
                        "NLU data for language <{}> didn't change, skipping training...".format(
                            lang
                        ),
                        color=rasa.shared.utils.io.bcolors.OKBLUE,
                    )
        # /bf mod
        print_color(
            "NLU model training completed.", color=rasa.shared.utils.io.bcolors.OKBLUE
        )

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = await model.model_fingerprint(file_importer)

            return model.package_model(
                fingerprint=new_fingerprint,
                output_directory=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
コード例 #10
0
async def train_async(
    domain: Union[Domain, Text],
    config: Dict[Text, Text],
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Dict of paths to the config for Core and NLU. Keys are language codes
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    # for lang in config.keys():
    #     config[lang] = _get_valid_config(config[lang], CONFIG_MANDATORY_KEYS)
 
    # botfront: see how to re-enable skills
    skill_imports = None
    # skill_imports = SkillSelector.load(config, training_files)
    # botfront end

    try:
        domain = Domain.load(domain, skill_imports)
        domain.check_missing_templates()
    except InvalidDomain:
        domain = None

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files, skill_imports
    )

    with ExitStack() as stack:
        train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))
        nlu_data = stack.enter_context(TempDirectoryPath(nlu_data_directory))
        story = stack.enter_context(TempDirectoryPath(story_directory))

        if domain is None:
            return handle_domain_if_not_exists(
                config, nlu_data_directory, output_path, fixed_model_name
            )

        return await _train_async_internal(
            domain,
            config,
            train_path,
            nlu_data,
            story,
            output_path,
            force_training,
            fixed_model_name,
            kwargs,
        )

    if domain is None:
        return handle_domain_if_not_exists(
            config, nlu_data_directory, output_path, fixed_model_name
        )