def train_nlu( config: Text, nlu_data: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, ) -> Optional[Text]: """Trains an NLU model. Args: config: Path to the config file for NLU. nlu_data: Path to the NLU training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. fixed_model_name: Name of the model to be stored. uncompress: If `True` the model will not be compressed. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ # training NLU only hence the training files still have to be selected skill_imports = SkillSelector.load(config, nlu_data) train_context = TempDirectoryPath(data.get_nlu_directory(nlu_data, skill_imports)) with train_context as nlu_data_directory: if not os.listdir(nlu_data_directory): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model using the '--nlu' argument." ) return return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output, train_path=train_path, fixed_model_name=fixed_model_name, )
async def _train_nlu_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) config = await file_importer.get_config() print_color("Training NLU model...", color=bcolors.OKBLUE) _, nlu_model, _ = await rasa.nlu.train( config, file_importer, _train_path, fixed_model_name="nlu", persist_nlu_training_data=persist_nlu_training_data, ) print_color("NLU model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
def _train_nlu_with_validated_data( config: Text, nlu_data_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) print_color("Training NLU model...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train(config, nlu_data_directory, _train_path, fixed_model_name="nlu") print_color("NLU model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = model.model_fingerprint( config, nlu_data=nlu_data_directory) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
async def train_model( data_importer: TrainingDataImporter, output_path: Text, force_training: bool = False, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, additional_arguments: Optional[Dict] = None, ): """ trains the bot, overridden the function from rasa :param data_importer: TrainingDataImporter object :param output_path: model output path :param force_training: w :param fixed_model_name: :param persist_nlu_training_data: :param additional_arguments: :return: model path """ with ExitStack() as stack: train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp())) domain = await data_importer.get_domain() if domain.is_empty(): return await handle_domain_if_not_exists(data_importer, output_path, fixed_model_name) return await _train_async_internal( data_importer, train_path, output_path, force_training, fixed_model_name, persist_nlu_training_data, additional_arguments, )
async def _train_nlu_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, additional_arguments: Optional[Dict] = None, model_to_finetune: Optional["Text"] = None, finetuning_epoch_fraction: float = 1.0, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train if additional_arguments is None: additional_arguments = {} with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) config = await file_importer.get_config() rasa.shared.utils.cli.print_color( "Training NLU model...", color=rasa.shared.utils.io.bcolors.OKBLUE) if model_to_finetune: rasa.shared.utils.common.mark_as_experimental_feature( "Incremental Training feature") model_to_finetune = await _nlu_model_for_finetuning( model_to_finetune, file_importer, finetuning_epoch_fraction, called_from_combined_training=train_path is not None, ) if not model_to_finetune: rasa.shared.utils.cli.print_error_and_exit( f"No NLU model for finetuning found. Please make sure to either " f"specify a path to a previous model or to have a finetunable " f"model within the directory '{output}'.") async with telemetry.track_model_training( file_importer, model_type="nlu", is_finetuning=model_to_finetune is not None, ): await rasa.nlu.train.train( config, file_importer, _train_path, fixed_model_name="nlu", persist_nlu_training_data=persist_nlu_training_data, model_to_finetune=model_to_finetune, **additional_arguments, ) rasa.shared.utils.cli.print_color( "NLU model training completed.", color=rasa.shared.utils.io.bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
async def train_async( domain: Union[Domain, Text], config: Text, training_files: Optional[Union[Text, List[Text]]], output: Text = DEFAULT_MODELS_PATH, dry_run: bool = False, force_training: bool = False, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, core_additional_arguments: Optional[Dict] = None, nlu_additional_arguments: Optional[Dict] = None, model_to_finetune: Optional[Text] = None, finetuning_epoch_fraction: float = 1.0, ) -> TrainingResult: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. dry_run: If `True` then no training will be done, and the information about whether the training needs to be done will be printed. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. core_additional_arguments: Additional training parameters for core training. nlu_additional_arguments: Additional training parameters forwarded to training method of each NLU component. model_to_finetune: Optional path to a model which should be finetuned or a directory in case the latest trained model should be used. finetuning_epoch_fraction: The fraction currently specified training epochs in the model configuration which should be used for finetuning. Returns: An instance of `TrainingResult`. """ file_importer = TrainingDataImporter.load_from_config( config, domain, training_files) with TempDirectoryPath(tempfile.mkdtemp()) as train_path: domain = await file_importer.get_domain() if domain.is_empty(): nlu_model = await handle_domain_if_not_exists( file_importer, output, fixed_model_name) return TrainingResult(model=nlu_model) return await _train_async_internal( file_importer, train_path, output, dry_run, force_training, fixed_model_name, persist_nlu_training_data, core_additional_arguments=core_additional_arguments, nlu_additional_arguments=nlu_additional_arguments, model_to_finetune=model_to_finetune, finetuning_epoch_fraction=finetuning_epoch_fraction, )
async def _train_core_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, additional_arguments: Optional[Dict] = None, interpreter: Optional[Interpreter] = None, model_to_finetune: Optional["Text"] = None, finetuning_epoch_fraction: float = 1.0, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) # normal (not compare) training rasa.shared.utils.cli.print_color( "Training Core model...", color=rasa.shared.utils.io.bcolors.OKBLUE) domain, config = await asyncio.gather(file_importer.get_domain(), file_importer.get_config()) if model_to_finetune: rasa.shared.utils.common.mark_as_experimental_feature( "Incremental Training feature") model_to_finetune = await _core_model_for_finetuning( model_to_finetune, file_importer=file_importer, finetuning_epoch_fraction=finetuning_epoch_fraction, ) if not model_to_finetune: rasa.shared.utils.cli.print_error_and_exit( f"No Core model for finetuning found. Please make sure to either " f"specify a path to a previous model or to have a finetunable " f"model within the directory '{output}'.") async with telemetry.track_model_training( file_importer, model_type="core", is_finetuning=model_to_finetune is not None, ): await rasa.core.train.train( domain_file=domain, training_resource=file_importer, output_path=os.path.join(_train_path, DEFAULT_CORE_SUBDIRECTORY_NAME), policy_config=config, additional_arguments=additional_arguments, interpreter=interpreter, model_to_finetune=model_to_finetune, ) rasa.shared.utils.cli.print_color( "Core model training completed.", color=rasa.shared.utils.io.bcolors.OKBLUE) if train_path is None: # Only Core was trained. new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="core-", ) return _train_path
async def train_comparison_models( story_file: Text, domain: Text, output_path: Text = "", exclusion_percentages: Optional[List] = None, policy_configs: Optional[List] = None, runs: int = 1, dump_stories: bool = False, kwargs: Optional[Dict] = None, ): """Train multiple models for comparison of policies""" from rasa.core import config from rasa import model from rasa.importers.importer import TrainingDataImporter exclusion_percentages = exclusion_percentages or [] policy_configs = policy_configs or [] for r in range(runs): logging.info("Starting run {}/{}".format(r + 1, runs)) for current_run, percentage in enumerate(exclusion_percentages, 1): for policy_config in policy_configs: policies = config.load(policy_config) if len(policies) > 1: raise ValueError( "You can only specify one policy per model for comparison" ) file_importer = TrainingDataImporter.load_core_importer_from_config( policy_config, domain, [story_file]) policy_name = type(policies[0]).__name__ logging.info("Starting to train {} round {}/{}" " with {}% exclusion" "".format(policy_name, current_run, len(exclusion_percentages), percentage)) with TempDirectoryPath(tempfile.mkdtemp()) as train_path: await train( domain, file_importer, train_path, policy_config=policy_config, exclusion_percentage=current_run, kwargs=kwargs, dump_stories=dump_stories, ) new_fingerprint = await model.model_fingerprint( file_importer) output_dir = os.path.join(output_path, "run_" + str(r + 1)) model_name = policy_name + str(current_run) model.package_model( fingerprint=new_fingerprint, output_directory=output_dir, train_path=train_path, fixed_model_name=model_name, )
async def _train_nlu_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, retrain_nlu: Union[bool, List[Text]] = True, additional_arguments: Optional[Dict] = None, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train if additional_arguments is None: additional_arguments = {} with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp())) # bf mod config = await file_importer.get_nlu_config(retrain_nlu) async with telemetry.track_model_training(file_importer, model_type="nlu"): for lang in config: if config[lang]: print_color( "Start training <{}> NLU model ...".format(lang), color=rasa.shared.utils.io.bcolors.OKBLUE, ) await rasa.nlu.train( config[lang], file_importer, _train_path, fixed_model_name="nlu-{}".format(lang), persist_nlu_training_data=persist_nlu_training_data, **additional_arguments, ) else: print_color( "NLU data for language <{}> didn't change, skipping training...".format( lang ), color=rasa.shared.utils.io.bcolors.OKBLUE, ) # /bf mod print_color( "NLU model training completed.", color=rasa.shared.utils.io.bcolors.OKBLUE ) if train_path is None: # Only NLU was trained new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
async def train_async( domain: Union[Domain, Text], config: Dict[Text, Text], training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Dict of paths to the config for Core and NLU. Keys are language codes training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ # for lang in config.keys(): # config[lang] = _get_valid_config(config[lang], CONFIG_MANDATORY_KEYS) # botfront: see how to re-enable skills skill_imports = None # skill_imports = SkillSelector.load(config, training_files) # botfront end try: domain = Domain.load(domain, skill_imports) domain.check_missing_templates() except InvalidDomain: domain = None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports ) with ExitStack() as stack: train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp())) nlu_data = stack.enter_context(TempDirectoryPath(nlu_data_directory)) story = stack.enter_context(TempDirectoryPath(story_directory)) if domain is None: return handle_domain_if_not_exists( config, nlu_data_directory, output_path, fixed_model_name ) return await _train_async_internal( domain, config, train_path, nlu_data, story, output_path, force_training, fixed_model_name, kwargs, ) if domain is None: return handle_domain_if_not_exists( config, nlu_data_directory, output_path, fixed_model_name )