def train_nlu( config: Text, nlu_data: Text, output: Text, train_path: Optional[Text] ) -> Optional[Text]: """Trains a NLU model. Args: config: Path to the config file for NLU. nlu_data: Path to the NLU training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa.nlu.train config = get_valid_config(config, CONFIG_MANDATORY_KEYS_NLU) nlu_data_directory = data.get_nlu_directory(nlu_data) if not os.listdir(nlu_data_directory): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model." ) return _train_path = train_path or tempfile.mkdtemp() print_color("Start training NLU model ...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train( config, nlu_data_directory, _train_path, fixed_model_name="nlu" ) print_color("Done.", color=bcolors.OKBLUE) if not train_path: output_path = create_output_path(output, prefix="nlu-") new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data_directory) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success( "Your Rasa NLU model is trained and saved at '{}'.".format(output_path) ) return output_path return _train_path
async def _train_nlu_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, additional_arguments: Optional[Dict] = None, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train if additional_arguments is None: additional_arguments = {} with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) config = await file_importer.get_config() print_color("Training NLU model...", color=bcolors.OKBLUE) _, nlu_model, _ = await rasa.nlu.train( config, file_importer, _train_path, fixed_model_name="nlu", persist_nlu_training_data=persist_nlu_training_data, **additional_arguments, ) print_color("NLU model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
async def _train_core_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, additional_arguments: Optional[Dict] = None, interpreter: Optional[Interpreter] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) # normal (not compare) training print_color("Training Core model...", color=bcolors.OKBLUE) domain, config = await asyncio.gather(file_importer.get_domain(), file_importer.get_config()) await rasa.core.train( domain_file=domain, training_resource=file_importer, output_path=os.path.join(_train_path, DEFAULT_CORE_SUBDIRECTORY_NAME), policy_config=config, additional_arguments=additional_arguments, interpreter=interpreter, ) print_color("Core model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only Core was trained. new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="core-", ) return _train_path
def _train_nlu_with_validated_data( config: Text, nlu_data_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, uncompress: bool = False, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train if not os.listdir(nlu_data_directory): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model.") return _train_path = train_path or tempfile.mkdtemp() print_color("Start training NLU model ...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train(config, nlu_data_directory, _train_path, fixed_model_name="nlu") print_color("Done.", color=bcolors.OKBLUE) if not train_path: # Only NLU was trained output_path = create_output_path(output, prefix="nlu-", fixed_name=fixed_model_name) new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data_directory) model.create_package_rasa(_train_path, output_path, new_fingerprint) if uncompress: output_path = decompress(output_path) print_success( "Your Rasa NLU model is trained and saved at '{}'.".format( output_path)) return output_path return _train_path
async def _train_core_with_validated_data( domain: Domain, config: Text, story_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) # normal (not compare) training print_color("Training Core model...", color=bcolors.OKBLUE) await rasa.core.train( domain_file=domain, stories_file=story_directory, output_path=os.path.join(_train_path, "core"), policy_config=config, kwargs=kwargs, ) print_color("Core model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only Core was trained. new_fingerprint = model.model_fingerprint(config, domain, stories=story_directory) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="core-", ) return _train_path
async def _do_training( domain: Union[Domain, Text], config: Dict[Text, Text], nlu_data_directory: Optional[Text], story_directory: Optional[Text], output_path: Text, train_path: Text, force_training: bool = False, retrain_core: bool = True, retrain_nlu: Union[bool, List[Text]] = True, fixed_model_name: Optional[Text] = None, kwargs: Optional[Dict] = None, ): if force_training or retrain_core: await _train_core_with_validated_data( domain=domain, config=config[list(config.keys())[0]], story_directory=story_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, kwargs=kwargs, ) else: print_color( "Core stories/configuration did not change. No need to retrain Core model.", color=bcolors.OKBLUE, ) if force_training or retrain_nlu: _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, retrain_nlu=retrain_nlu ) else: print_color( "NLU data/configuration did not change. No need to retrain NLU model.", color=bcolors.OKBLUE, )
def print_buttons( message: Dict[Text, Any], is_latest_message: bool = False, color=cli_utils.bcolors.OKBLUE, ) -> Optional[questionary.Question]: if is_latest_message: choices = cli_utils.button_choices_from_message_data( message, allow_free_text_input=True ) question = questionary.select( message.get("text"), choices, style=Style([("qmark", "#6d91d3"), ("", "#6d91d3"), ("answer", "#b373d6")]), ) return question else: cli_utils.print_color("Buttons:", color=color) for idx, button in enumerate(message.get("buttons")): cli_utils.print_color(cli_utils.button_to_string(button, idx), color=color)
async def _train_core_with_validated_data( domain: Domain, config: Text, story_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train _train_path = train_path or tempfile.mkdtemp() # normal (not compare) training print_color("Start training dialogue model ...", color=bcolors.OKBLUE) await rasa.core.train( domain_file=domain, stories_file=story_directory, output_path=os.path.join(_train_path, "core"), policy_config=config, kwargs=kwargs, ) print_color("Done.", color=bcolors.OKBLUE) if train_path is None: # Only Core was trained. new_fingerprint = model.model_fingerprint(config, domain, stories=story_directory) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="core-", uncompress=uncompress, ) return _train_path
def _train_nlu_with_validated_data( config: Text, nlu_data_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) print_color("Training NLU model...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train(config, nlu_data_directory, _train_path, fixed_model_name="nlu") print_color("NLU model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = model.model_fingerprint( config, nlu_data=nlu_data_directory) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
async def _do_training( file_importer: TrainingDataImporter, output_path: Text, train_path: Text, force_training: bool = False, retrain_core: bool = True, retrain_nlu: Union[bool, List[Text]] = True, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, kwargs: Optional[Dict] = None, ): if force_training or retrain_core: await _train_core_with_validated_data( file_importer, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, kwargs=kwargs, ) else: print_color( "Core stories/configuration did not change. No need to retrain Core model.", color=bcolors.OKBLUE, ) if force_training or retrain_nlu: await _train_nlu_with_validated_data( file_importer, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, retrain_nlu=retrain_nlu, persist_nlu_training_data=persist_nlu_training_data, ) else: print_color( "NLU data/configuration did not change. No need to retrain NLU model.", color=bcolors.OKBLUE, )
async def _train_nlu_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, retrain_nlu: Union[bool, List[Text]] = True) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train import re with ExitStack() as stack: models = {} from rasa.nlu import config as cfg_loader if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) # bf mod config = await file_importer.get_nlu_config(retrain_nlu) for lang in config: if config[lang]: print_color("Start training {} NLU model ...".format(lang), color=bcolors.OKBLUE) _, models[lang], _ = await rasa.nlu.train( config[lang], file_importer, # config[lang]['path'], _train_path, fixed_model_name="nlu-{}".format(lang), persist_nlu_training_data=persist_nlu_training_data, ) else: print_color( "NLU data for language <{}> didn't change, skipping training..." .format(lang), color=bcolors.OKBLUE) # /bf mod print_color("NLU model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
def _train_nlu_with_validated_data( config: Text, nlu_data_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, uncompress: bool = False, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train _train_path = train_path or tempfile.mkdtemp() print_color("Start training NLU model ...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train(config, nlu_data_directory, _train_path, fixed_model_name="nlu") print_color("Done.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data_directory) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", uncompress=uncompress, ) return _train_path
async def _do_training( file_importer: TrainingDataImporter, output_path: Text, train_path: Text, fingerprint_comparison_result: Optional[ FingerprintComparisonResult] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, core_additional_arguments: Optional[Dict] = None, nlu_additional_arguments: Optional[Dict] = None, old_model_zip_path: Optional[Text] = None, ): if not fingerprint_comparison_result: fingerprint_comparison_result = FingerprintComparisonResult() interpreter_path = None if fingerprint_comparison_result.should_retrain_nlu(): model_path = await _train_nlu_with_validated_data( file_importer, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, additional_arguments=nlu_additional_arguments, ) interpreter_path = os.path.join(model_path, DEFAULT_NLU_SUBDIRECTORY_NAME) else: print_color( "NLU data/configuration did not change. No need to retrain NLU model.", color=bcolors.OKBLUE, ) if fingerprint_comparison_result.should_retrain_core(): await _train_core_with_validated_data( file_importer, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, additional_arguments=core_additional_arguments, interpreter=_load_interpreter(interpreter_path) or _interpreter_from_previous_model(old_model_zip_path), ) elif fingerprint_comparison_result.should_retrain_nlg(): print_color( "Core stories/configuration did not change. " "Only the templates section has been changed. A new model with " "the updated templates will be created.", color=bcolors.OKBLUE, ) await model.update_model_with_new_domain(file_importer, train_path) else: print_color( "Core stories/configuration did not change. No need to retrain Core model.", color=bcolors.OKBLUE, )
def _train_nlu_with_validated_data( config: Dict[Text, Text], nlu_data_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, retrain_nlu: Union[bool, List[Text]] = True ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train import re with ExitStack() as stack: models = {} from rasa.nlu import config as cfg_loader if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp())) pattern = r'(\w\w)*(?=\.)' for file in os.listdir(nlu_data_directory): lang = re.search(pattern, file).groups()[0] if isinstance(retrain_nlu, bool) and retrain_nlu or lang in retrain_nlu: nlu_file_path = os.path.join(nlu_data_directory, file) print_color("Start training {} NLU model ...".format(lang), color=bcolors.OKBLUE) nlu_config = cfg_loader.load(config[lang]) nlu_config.language = lang _, models[lang], _ = rasa.nlu.train( nlu_config, nlu_file_path, _train_path, fixed_model_name="nlu-{}".format(lang) ) else: print_color("{} NLU data didn't change, skipping training...".format(lang), color=bcolors.OKBLUE) print_color("NLU model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = model.model_fingerprint( config, nlu_data=nlu_data_directory ) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
async def _do_training( file_importer: TrainingDataImporter, output_path: Text, train_path: Text, fingerprint_comparison_result: Optional[ FingerprintComparisonResult] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, kwargs: Optional[Dict] = None, ): if not fingerprint_comparison_result: fingerprint_comparison_result = FingerprintComparisonResult() if fingerprint_comparison_result.should_retrain_core(): await _train_core_with_validated_data( file_importer, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, kwargs=kwargs, ) elif fingerprint_comparison_result.should_retrain_nlg(): print_color( "Core stories/configuration did not change. " "Only the templates section has been changed. A new model with " "the updated templates will be created.", color=bcolors.OKBLUE, ) await model.update_model_with_new_domain(file_importer, train_path) else: print_color( "Core stories/configuration did not change. No need to retrain Core model.", color=bcolors.OKBLUE, ) if fingerprint_comparison_result.should_retrain_nlu(): await _train_nlu_with_validated_data( file_importer, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, retrain_nlu=fingerprint_comparison_result.nlu, persist_nlu_training_data=persist_nlu_training_data, ) else: print_color( "NLU data/configuration did not change. No need to retrain NLU model.", color=bcolors.OKBLUE, )
def print_bot_output( message: Dict[Text, Any], is_latest_message: bool = False, color=rasa.shared.utils.io.bcolors.OKBLUE, ) -> Optional[questionary.Question]: if "buttons" in message: question = print_buttons(message, is_latest_message, color) if question: return question if "text" in message: cli_utils.print_color(message.get("text"), color=color) if "image" in message: cli_utils.print_color("Image: " + message.get("image"), color=color) if "attachment" in message: cli_utils.print_color("Attachment: " + message.get("attachment"), color=color) if "elements" in message: cli_utils.print_color("Elements:", color=color) for idx, element in enumerate(message.get("elements")): cli_utils.print_color(cli_utils.element_to_string(element, idx), color=color) if "quick_replies" in message: cli_utils.print_color("Quick Replies:", color=color) for idx, element in enumerate(message.get("quick_replies")): cli_utils.print_color(cli_utils.button_to_string(element, idx), color=color) if "custom" in message: cli_utils.print_color("Custom json:", color=color) cli_utils.print_color(json.dumps(message.get("custom"), indent=2), color=color)
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, kwargs: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ new_fingerprint = await model.model_fingerprint(file_importer) stories = await file_importer.get_stories() nlu_data = await file_importer.get_nlu_data() # if stories.is_empty() and nlu_data.is_empty(): # print_error( # "No training data given. Please provide stories and NLU data in " # "order to train a Rasa model using the '--data' argument." # ) # return # if stories.is_empty(): # print_warning("No stories present. Just a Rasa NLU model will be trained.") # return await _train_nlu_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # persist_nlu_training_data=persist_nlu_training_data, # ) # if nlu_data.is_empty(): # print_warning("No NLU data present. Just a Rasa Core model will be trained.") # return await _train_core_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # kwargs=kwargs, # ) old_model = model.get_latest_model(output_path) retrain_core, retrain_nlu = model.should_retrain(new_fingerprint, old_model, train_path) # bf mod domain = await file_importer.get_domain() core_untrainable = domain.is_empty() or stories.is_empty() nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()] retrain_core = retrain_core and not core_untrainable if retrain_nlu is True: from rasa.model import FINGERPRINT_NLU_DATA_KEY possible_retrains = new_fingerprint[FINGERPRINT_NLU_DATA_KEY].keys() else: possible_retrains = retrain_nlu if core_untrainable: print_color( "Skipping Core training since domain or stories are empty.", color=bcolors.OKBLUE) for lang in nlu_untrainable: print_color( "No NLU data found for language <{}>, skipping training...".format( lang), color=bcolors.OKBLUE) retrain_nlu = [l for l in possible_retrains if l not in nlu_untrainable] # /bf mod if force_training or retrain_core or retrain_nlu: await _do_training( file_importer, output_path=output_path, train_path=train_path, force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, kwargs=kwargs, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
def print_bot_output( message: Dict[Text, Any], color=cli_utils.bcolors.OKBLUE) -> Optional[questionary.Question]: if ("text" in message) and not ("buttons" in message): cli_utils.print_color(message.get("text"), color=color) if "image" in message: cli_utils.print_color("Image: " + message.get("image"), color=color) if "attachment" in message: cli_utils.print_color("Attachment: " + message.get("attachment"), color=color) if "buttons" in message: choices = cli_utils.button_choices_from_message_data( message, allow_free_text_input=True) question = questionary.select( message.get("text"), choices, style=Style([("qmark", "#6d91d3"), ("", "#6d91d3"), ("answer", "#b373d6")]), ) return question if "elements" in message: cli_utils.print_color("Elements:", color=color) for idx, element in enumerate(message.get("elements")): cli_utils.print_color(cli_utils.element_to_string(element, idx), color=color) if "quick_replies" in message: cli_utils.print_color("Quick Replies:", color=color) for idx, element in enumerate(message.get("quick_replies")): cli_utils.print_color(cli_utils.button_to_string(element, idx), color=color) if "custom" in message: cli_utils.print_color("Custom json:", color=color) cli_utils.print_color(json.dumps(message.get("custom"), indent=2), color=color)
def _bot_output( message: Dict[Text, Any], color=cli_utils.bcolors.OKBLUE) -> Optional[questionary.Question]: from sagas.nlu.tts_utils import say_lang # from sagas.kit.analysis_kit import AnalysisKit if ("text" in message) and not ("buttons" in message): text = message.get("text") cli_utils.print_color(text, color=color) say_lang(text, sett.lang, False) # AnalysisKit().console_vis(text, sett.lang) if "image" in message: cli_utils.print_color("Image: " + message.get("image"), color=color) if "attachment" in message: cli_utils.print_color("Attachment: " + message.get("attachment"), color=color) if "buttons" in message: choices = cli_utils.button_choices_from_message_data( message, allow_free_text_input=True) question = questionary.select( message.get("text"), choices, style=Style([("qmark", "#6d91d3"), ("", "#6d91d3"), ("answer", "#b373d6")]), ) return question if "elements" in message: cli_utils.print_color("Elements:", color=color) for idx, element in enumerate(message.get("elements")): cli_utils.print_color(cli_utils.element_to_string(element, idx), color=color) if "quick_replies" in message: cli_utils.print_color("Quick Replies:", color=color) for idx, element in enumerate(message.get("quick_replies")): cli_utils.print_color(cli_utils.button_to_string(element, idx), color=color) if "custom" in message: cli_utils.print_color("Custom json:", color=color) cli_utils.print_color(json.dumps(message.get("custom"), indent=2), color=color)
async def train_core_async( domain: Text, config: Text, stories: Text, output: Text, train_path: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Core model. Args: domain: Path to the domain file. config: Path to the config file for Core. stories: Path to the Core training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. kwargs: Additional training parameters. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa.core.train config = get_valid_config(config, CONFIG_MANDATORY_KEYS_CORE) _train_path = train_path or tempfile.mkdtemp() story_directory = data.get_core_directory(stories) if not os.listdir(story_directory): print_error( "No dialogue data given. Please provide dialogue data in order to " "train a Rasa Core model." ) return # normal (not compare) training print_color("Start training dialogue model ...", color=bcolors.OKBLUE) await rasa.core.train( domain_file=domain, stories_file=story_directory, output_path=os.path.join(_train_path, "core"), policy_config=config, kwargs=kwargs, ) print_color("Done.", color=bcolors.OKBLUE) if not train_path: # Only Core was trained. output_path = create_output_path(output, prefix="core-") new_fingerprint = model.model_fingerprint( config, domain, stories=story_directory ) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success( "Your Rasa Core model is trained and saved at '{}'.".format(output_path) ) return output_path return _train_path
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, additional_arguments: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. force_training: If `True` retrain model even if data has not changed. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. fixed_model_name: Name of model to be stored. additional_arguments: Additional training parameters. Returns: Path of the trained model archive. """ stories, nlu_data = await asyncio.gather(file_importer.get_stories(), file_importer.get_nlu_data()) # if stories.is_empty() and nlu_data.is_empty(): # print_error( # "No training data given. Please provide stories and NLU data in " # "order to train a Rasa model using the '--data' argument." # ) # return # if nlu_data.is_empty(): # print_warning("No NLU data present. Just a Rasa Core model will be trained.") # return await _train_core_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # additional_arguments=additional_arguments, # ) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) fingerprint_comparison = FingerprintComparisonResult( force_training=force_training) if not force_training: fingerprint_comparison = model.should_retrain(new_fingerprint, old_model, train_path) # bf mod > if fingerprint_comparison.nlu == True: # replace True with list of all langs fingerprint_comparison.nlu = list( new_fingerprint.get("nlu-config", {}).keys()) domain = await file_importer.get_domain() core_untrainable = domain.is_empty() or stories.is_empty() nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()] fingerprint_comparison.core = fingerprint_comparison.core and not core_untrainable fingerprint_comparison.nlu = [ l for l in fingerprint_comparison.nlu if l not in nlu_untrainable ] if core_untrainable: print_color( "Skipping Core training since domain or stories are empty.", color=bcolors.OKBLUE) for lang in nlu_untrainable: print_color( "No NLU data found for language <{}>, skipping training...".format( lang), color=bcolors.OKBLUE) # </ bf mod if fingerprint_comparison.is_training_required(): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, additional_arguments=additional_arguments, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model