def get_provided_model(arg_model: Text) -> Optional[Union[Text, Path]]: """Checks model path input and selects model from it.""" model_path = rasa.cli.utils.get_validated_path(arg_model, "model", DEFAULT_MODELS_PATH) return (model.get_latest_model(model_path) if os.path.isdir(model_path) else model_path)
def load_local_model( model_path: Text, interpreter: Optional[NaturalLanguageInterpreter] = None, generator: Union[EndpointConfig, "NLG"] = None, tracker_store: Optional["TrackerStore"] = None, action_endpoint: Optional[EndpointConfig] = None, model_server: Optional[EndpointConfig] = None, remote_storage: Optional[Text] = None, ) -> "Agent": if os.path.isfile(model_path): model_archive = model_path else: model_archive = get_latest_model(model_path) if model_archive is None: logger.warning( "Could not load local model in '{}'".format(model_path)) return Agent() working_directory = tempfile.mkdtemp() unpacked_model = unpack_model(model_archive, working_directory) return Agent.load( unpacked_model, interpreter=interpreter, generator=generator, tracker_store=tracker_store, action_endpoint=action_endpoint, model_server=model_server, remote_storage=remote_storage, )
def load_local_model( model_path: Text, interpreter: Optional[NaturalLanguageInterpreter] = None, generator: Union[EndpointConfig, NaturalLanguageGenerator] = None, tracker_store: Optional[TrackerStore] = None, lock_store: Optional[LockStore] = None, action_endpoint: Optional[EndpointConfig] = None, model_server: Optional[EndpointConfig] = None, remote_storage: Optional[Text] = None, ) -> "Agent": if os.path.isfile(model_path): model_archive = model_path else: model_archive = get_latest_model(model_path) if model_archive is None: rasa.shared.utils.io.raise_warning( f"Could not load local model in '{model_path}'.") return Agent() working_directory = tempfile.mkdtemp() unpacked_model = unpack_model(model_archive, working_directory) return Agent.load( unpacked_model, interpreter=interpreter, generator=generator, tracker_store=tracker_store, lock_store=lock_store, action_endpoint=action_endpoint, model_server=model_server, remote_storage=remote_storage, path_to_model_archive=model_archive, )
def shell(args: argparse.Namespace) -> None: """Talk with a bot though the command line.""" from rasa.cli.utils import get_validated_path from rasa.shared.constants import DEFAULT_MODELS_PATH args.connector = "cmdline" model = get_validated_path(args.model, "model", DEFAULT_MODELS_PATH) try: model = get_latest_model(model) except ModelNotFound: print_error("No model found. Train a model before running the " "server using `rasa train`.") return metadata = LocalModelStorage.metadata_from_archive(model) if metadata.training_type == TrainingType.NLU: import rasa.nlu.run telemetry.track_shell_started("nlu") rasa.nlu.run.run_cmdline(model) else: import rasa.cli.run telemetry.track_shell_started("rasa") rasa.cli.run.run(args)
def _load_model( model_path: Union[Text, Path]) -> Tuple[Text, ModelMetadata, GraphRunner]: """Unpacks a model from a given path using the graph model loader.""" try: if os.path.isfile(model_path): model_tar = model_path else: model_tar = get_latest_model(model_path) if not model_tar: raise ModelNotFound( f"No model found at path '{model_path}'.") except TypeError: raise ModelNotFound(f"Model {model_path} can not be loaded.") logger.info(f"Loading model {model_tar}...") with tempfile.TemporaryDirectory() as temporary_directory: try: metadata, runner = loader.load_predict_graph_runner( Path(temporary_directory), Path(model_tar), LocalModelStorage, DaskGraphRunner, ) return os.path.basename(model_tar), metadata, runner except tarfile.ReadError: raise ModelNotFound(f"Model {model_path} can not be loaded.")
def get_provided_model(arg_model: Text) -> Optional[Text]: model_path = utils.get_validated_path(arg_model, "model", DEFAULT_MODELS_PATH) if os.path.isdir(model_path): model_path = model.get_latest_model(model_path) return model_path
def get_provided_model(arg_model: Text): model_path = get_validated_path(arg_model, "model", DEFAULT_MODELS_PATH) if os.path.isdir(model_path): model_path = get_latest_model(model_path) return model_path
def test_get_latest_model(trained_rasa_model: str): path_of_latest = os.path.join(os.path.dirname(trained_rasa_model), "latest.tar.gz") shutil.copy(trained_rasa_model, path_of_latest) model_directory = os.path.dirname(path_of_latest) assert get_latest_model(model_directory) == path_of_latest
async def load_agent(bot: Text, conf: BotsConf) -> Agent: # train it await train_agent(bot, conf) # load it bot_loc = get_latest_model(f"{conf.get_loc(bot)}/models") print(f'.. load bot model {bot_loc}') agent = Agent.load(bot_loc, action_endpoint=conf.get_endpoint(bot)) return agent
def _add_core_subparser_arguments(parser: argparse.ArgumentParser): default_path = get_latest_model(DEFAULT_MODELS_PATH) parser.add_argument( '-m', '--model', type=str, default=default_path, help="Path to a pre-trained model. If it is a directory all models " "in this directory will be compared.")
def _add_core_subparser_arguments(parser: argparse.ArgumentParser): default_path = get_latest_model(DEFAULT_MODELS_PATH) parser.add_argument( "--model", nargs="+", default=[default_path], help="Path to a pre-trained model. If it is a 'tar.gz' file that model file " "will be used. If it is a directory, the latest model in that directory " "will be used. If multiple 'tar.gz' files are provided, all those models " "will be compared.", )
def test_get_latest_model(tmp_path: Path): path = tmp_path / "test_get_latest_model" path.mkdir() Path(path / "model_one.tar.gz").touch() # create second model later to be registered as distinct in Windows time.sleep(0.1) Path(path / "model_two.tar.gz").touch() path_of_latest = os.path.join(path, "model_two.tar.gz") assert get_latest_model(str(path)) == path_of_latest
async def status(request: Request): """Respond with the model name and the fingerprint of that model.""" return response.json({ "model_file": model.get_latest_model(), "fingerprint": model.fingerprint_from_path(app.agent.model_directory), "num_active_training_jobs": app.active_training_processes.value, })
def add_test_core_model_param(parser: argparse.ArgumentParser) -> None: default_path = get_latest_model(DEFAULT_MODELS_PATH) parser.add_argument( "-m", "--model", nargs="+", default=[default_path], help="Path to a pre-trained model. If it is a 'tar.gz' file that model file " "will be used. If it is a directory, the latest model in that directory " "will be used (exception: '--evaluate-model-directory' flag is set). " "If multiple 'tar.gz' files are provided, all those models will be compared.", )
def enabled_chat(project: str, model: str): """ Is the model [model] of the project [project] ready to talk? :param project: project name :param model: model name :return: True if the model [model] of the project [project] has been trained and its ready to talk, False otherwise """ model_path = os.path.join(MODEL_PATH, project, model) if os.path.exists(model_path) and len(os.listdir(model_path)) > 0: return Agent.load(get_latest_model(model_path)).is_ready() else: return False
def run_nlu(args: argparse.Namespace): import rasa_nlu.server import tempfile args.model = get_validated_path(args.path, "path", DEFAULT_MODELS_PATH) model_archive = get_latest_model(args.model) working_directory = tempfile.mkdtemp() unpacked_model = model.unpack_model(model_archive, working_directory) args.path = os.path.dirname(unpacked_model) rasa_nlu.server.main(args) shutil.rmtree(unpacked_model)
def read_models(project_id: str) -> List[Dict[str, Any]]: """ Read data of every model stored related to a project with id [project_id] :param project_id: id of the project :return: a list with the data of the models """ pipeline = [ { "$lookup": { "from": "stories", "localField": "_id", "foreignField": "model_id", "as": "stories", } }, { "$match": { "project_id": ObjectId(project_id) } }, { "$project": { "name": 1, "stories_length": { "$size": "$stories" } } }, ] data = list(MODELS_COLL.aggregate(pipeline)) project_name = read_project_name_from_id(project_id) for doc in data: model_name = read_model_name_from_id(doc["_id"]) if os.path.exists(os.path.join(MODEL_PATH, project_name, model_name)): doc["last_trained_timestamp"] = datetime.fromtimestamp( os.path.getctime( get_latest_model( os.path.join(MODEL_PATH, project_name, model_name)))).isoformat() return json.objectid_to_id(json.loads(json_mongo.dumps(data)))
def load_local_model(dir: Text, component_builder: ComponentBuilder) -> "NLUModel": if os.path.isfile(dir): model_archive = dir else: model_archive = get_latest_model(dir) if model_archive is None: logger.warning("Could not load local model in '{}'".format(dir)) return NLUModel.fallback_model(component_builder) working_directory = tempfile.mkdtemp() unpacked_model = model.unpack_model(model_archive, working_directory) _, nlu_model = model.get_model_subdirectories(unpacked_model) model_path = nlu_model if os.path.exists(nlu_model) else unpacked_model name = os.path.basename(model_archive) interpreter = interpreter_for_model(component_builder, model_path) return NLUModel(name, interpreter, model_path)
def __init__(self, project: str = "GDA", model: str = "AOD", user_type: str = "user"): self.project = project self.model = model self.user_type = user_type self.agent_path = os.path.join(here, DEFAULT_MODELS_PATH, self.project, self.model) if model == "smalltalk": self.action_endpoint = None else: self.action_endpoint = EndpointConfig( url=config.ACTION_URL_ENDPOINT) if os.path.exists(self.agent_path): self.agent = Agent.load(get_latest_model(self.agent_path), action_endpoint=self.action_endpoint) else: raise NotADirectoryError( "NLU or dialogue model not found, make sure training succeeded" )
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, kwargs: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. force_training: If `True` retrain model even if data has not changed. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ stories = await file_importer.get_stories() nlu_data = await file_importer.get_nlu_data() if stories.is_empty() and nlu_data.is_empty(): print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if stories.is_empty(): print_warning( "No stories present. Just a Rasa NLU model will be trained.") return await _train_nlu_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, ) if nlu_data.is_empty(): print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, kwargs=kwargs, ) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) fingerprint_comparison = FingerprintComparisonResult( force_training=force_training) if not force_training: fingerprint_comparison = model.should_retrain(new_fingerprint, old_model, train_path) if fingerprint_comparison.is_training_required(): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, kwargs=kwargs, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def train_async( domain: Union[Domain, Text], config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = _get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() skill_imports = SkillSelector.load(config) try: domain = Domain.load(domain, skill_imports) except InvalidDomain as e: print_error( "Could not load domain due to: '{}'. To specify a valid domain path use " "the '--domain' argument.".format(e)) return None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) old_model = model.get_latest_model(output_path) retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model, train_path) if force_training or retrain_core or retrain_nlu: await _do_training( domain=domain, config=config, output_path=output_path, train_path=train_path, nlu_data_directory=nlu_data_directory, story_directory=story_directory, force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) return _package_model( new_fingerprint=new_fingerprint, output_path=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, additional_arguments: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. force_training: If `True` retrain model even if data has not changed. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. fixed_model_name: Name of model to be stored. additional_arguments: Additional training parameters. Returns: Path of the trained model archive. """ stories, nlu_data = await asyncio.gather(file_importer.get_stories(), file_importer.get_nlu_data()) # if stories.is_empty() and nlu_data.is_empty(): # print_error( # "No training data given. Please provide stories and NLU data in " # "order to train a Rasa model using the '--data' argument." # ) # return # if nlu_data.is_empty(): # print_warning("No NLU data present. Just a Rasa Core model will be trained.") # return await _train_core_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # additional_arguments=additional_arguments, # ) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) fingerprint_comparison = FingerprintComparisonResult( force_training=force_training) if not force_training: fingerprint_comparison = model.should_retrain(new_fingerprint, old_model, train_path) # bf mod > if fingerprint_comparison.nlu == True: # replace True with list of all langs fingerprint_comparison.nlu = list( new_fingerprint.get("nlu-config", {}).keys()) domain = await file_importer.get_domain() core_untrainable = domain.is_empty() or stories.is_empty() nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()] fingerprint_comparison.core = fingerprint_comparison.core and not core_untrainable fingerprint_comparison.nlu = [ l for l in fingerprint_comparison.nlu if l not in nlu_untrainable ] if core_untrainable: print_color( "Skipping Core training since domain or stories are empty.", color=bcolors.OKBLUE) for lang in nlu_untrainable: print_color( "No NLU data found for language <{}>, skipping training...".format( lang), color=bcolors.OKBLUE) # </ bf mod if fingerprint_comparison.is_training_required(): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, additional_arguments=additional_arguments, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def _train_async_internal( domain: Union[Domain, Text], config: Text, train_path: Text, nlu_data_directory: Text, story_directory: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], kwargs: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: domain: Path to the domain file. config: Path to the config for Core and NLU. train_path: Directory in which to train the model. nlu_data_directory: Path to NLU training files. story_directory: Path to Core training files. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, kwargs=kwargs, ) old_model = model.get_latest_model(output_path) retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model, train_path) if force_training or retrain_core or retrain_nlu: await _do_training( domain=domain, config=config, output_path=output_path, train_path=train_path, nlu_data_directory=nlu_data_directory, story_directory=story_directory, force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, fixed_model_name=fixed_model_name, kwargs=kwargs, ) return _package_model( new_fingerprint=new_fingerprint, output_path=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, dry_run: bool, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, core_additional_arguments: Optional[Dict] = None, nlu_additional_arguments: Optional[Dict] = None, ) -> TrainingResult: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. dry_run: If `True` then no training will be done, and the information about whether the training needs to be done will be printed. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. core_additional_arguments: Additional training parameters for core training. nlu_additional_arguments: Additional training parameters forwarded to training method of each NLU component. Returns: An instance of `TrainingResult`. """ stories, nlu_data = await asyncio.gather(file_importer.get_stories(), file_importer.get_nlu_data()) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) fingerprint_comparison = model.should_retrain(new_fingerprint, old_model, train_path, force_training) if dry_run: code, texts = dry_run_result(fingerprint_comparison) for text in texts: print_warning(text) if code > 0 else print_success(text) return TrainingResult(code=code) if stories.is_empty() and nlu_data.can_train_nlu_model(): print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return TrainingResult() if stories.is_empty(): print_warning( "No stories present. Just a Rasa NLU model will be trained.") trained_model = await _train_nlu_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, additional_arguments=nlu_additional_arguments, ) return TrainingResult(model=trained_model) if nlu_data.can_train_nlu_model(): print_warning( "No NLU data present. Just a Rasa Core model will be trained.") trained_model = await _train_core_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, additional_arguments=core_additional_arguments, ) return TrainingResult(model=trained_model) if fingerprint_comparison.is_training_required(): async with telemetry.track_model_training(file_importer, model_type="rasa"): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, core_additional_arguments=core_additional_arguments, nlu_additional_arguments=nlu_additional_arguments, old_model_zip_path=old_model, ) trained_model = model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) return TrainingResult(model=trained_model) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return TrainingResult(model=old_model)
async def train_async( domain: Optional, config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output_path) retrain_core = True retrain_nlu = True skill_imports = SkillSelector.load(config) try: domain = Domain.load(domain, skill_imports) except InvalidDomain as e: print_error(e) return None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide dialogue and NLU data in " "order to train a Rasa model.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) else: print("Dialogue data / configuration did not change. " "No need to retrain dialogue model.") if force_training or retrain_nlu: _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) else: print( "NLU data / configuration did not change. No need to retrain NLU model." ) if retrain_core or retrain_nlu: output_path = create_output_path(output_path, fixed_name=fixed_model_name) model.create_package_rasa(train_path, output_path, new_fingerprint) if uncompress: output_path = decompress(output_path) print_success("Your Rasa model is trained and saved at '{}'.".format( output_path)) return output_path else: print_success( "Nothing changed. You can use the old model stored at '{}'" "".format(os.path.abspath(old_model))) return old_model
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], kwargs: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: domain: Path to the domain file. config: Path to the config for Core and NLU. train_path: Directory in which to train the model. nlu_data_directory: Path to NLU training files. story_directory: Path to Core training files. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ new_fingerprint = await model.model_fingerprint(file_importer) stories = await file_importer.get_stories() nlu_data = await file_importer.get_nlu_data() if stories.is_empty() and nlu_data.is_empty(): print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if stories.is_empty(): print_warning( "No stories present. Just a Rasa NLU model will be trained.") return await _train_nlu_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name) if nlu_data.is_empty(): print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, kwargs=kwargs, ) old_model = model.get_latest_model(output_path) retrain_core, retrain_nlu = model.should_retrain(new_fingerprint, old_model, train_path) if force_training or retrain_core or retrain_nlu: await _do_training( file_importer, output_path=output_path, train_path=train_path, force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, fixed_model_name=fixed_model_name, kwargs=kwargs, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def train_async( domain: Text, config: Text, training_files: Union[Text, List[Text]], output: Text = DEFAULT_MODELS_PATH, force_training: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output: Output path. force_training: If `True` retrain model even if data has not changed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output) retrain_core = True retrain_nlu = True story_directory, nlu_data_directory = data.get_core_nlu_directories(training_files) new_fingerprint = model.model_fingerprint( config, domain, nlu_data_directory, story_directory ) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide dialogue and NLU data in " "order to train a Rasa model." ) return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained." ) return train_nlu(config, nlu_data_directory, output, None) if nlu_data_not_present: print_warning("No NLU data present. Just a Rasa Core model will be trained.") return await train_core_async( domain, config, story_directory, output, None, kwargs ) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await train_core_async( domain, config, story_directory, output, train_path, kwargs ) else: print ( "Dialogue data / configuration did not change. " "No need to retrain dialogue model." ) if force_training or retrain_nlu: train_nlu(config, nlu_data_directory, output, train_path) else: print ("NLU data / configuration did not change. No need to retrain NLU model.") if retrain_core or retrain_nlu: output = create_output_path(output) model.create_package_rasa(train_path, output, new_fingerprint) print_success("Your bot is trained and ready to take for a spin!") return output else: print_success( "Nothing changed. You can use the old model stored at '{}'" "".format(os.path.abspath(old_model)) ) return old_model
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, dry_run: bool, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, core_additional_arguments: Optional[Dict] = None, nlu_additional_arguments: Optional[Dict] = None, model_to_finetune: Optional[Text] = None, finetuning_epoch_fraction: float = 1.0, ) -> TrainingResult: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. dry_run: If `True` then no training will be done, and the information about whether the training needs to be done will be printed. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. core_additional_arguments: Additional training parameters for core training. nlu_additional_arguments: Additional training parameters forwarded to training method of each NLU component. model_to_finetune: Optional path to a model which should be finetuned or a directory in case the latest trained model should be used. finetuning_epoch_fraction: The fraction currently specified training epochs in the model configuration which should be used for finetuning. Returns: An instance of `TrainingResult`. """ stories, nlu_data = await asyncio.gather(file_importer.get_stories(), file_importer.get_nlu_data()) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) fingerprint_comparison = model.should_retrain( new_fingerprint, old_model, train_path, force_training=force_training) if dry_run: code, texts = dry_run_result(fingerprint_comparison) for text in texts: print_warning(text) if code > 0 else print_success(text) return TrainingResult(code=code) if nlu_data.has_e2e_examples(): rasa.shared.utils.common.mark_as_experimental_feature( "end-to-end training") if stories.is_empty() and nlu_data.contains_no_pure_nlu_data(): rasa.shared.utils.cli.print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return TrainingResult() if stories.is_empty(): rasa.shared.utils.cli.print_warning( "No stories present. Just a Rasa NLU model will be trained.") trained_model = await _train_nlu_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, additional_arguments=nlu_additional_arguments, model_to_finetune=model_to_finetune, finetuning_epoch_fraction=finetuning_epoch_fraction, ) return TrainingResult(model=trained_model) # We will train nlu if there are any nlu example, including from e2e stories. if nlu_data.contains_no_pure_nlu_data( ) and not nlu_data.has_e2e_examples(): rasa.shared.utils.cli.print_warning( "No NLU data present. Just a Rasa Core model will be trained.") trained_model = await _train_core_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, additional_arguments=core_additional_arguments, model_to_finetune=model_to_finetune, finetuning_epoch_fraction=finetuning_epoch_fraction, ) return TrainingResult(model=trained_model) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) if not force_training: fingerprint_comparison = model.should_retrain( new_fingerprint, old_model, train_path, has_e2e_examples=nlu_data.has_e2e_examples(), ) else: fingerprint_comparison = FingerprintComparisonResult( force_training=True) if fingerprint_comparison.is_training_required(): async with telemetry.track_model_training( file_importer, model_type="rasa", ): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, core_additional_arguments=core_additional_arguments, nlu_additional_arguments=nlu_additional_arguments, old_model_zip_path=old_model, model_to_finetune=model_to_finetune, finetuning_epoch_fraction=finetuning_epoch_fraction, ) trained_model = model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) return TrainingResult(model=trained_model) rasa.shared.utils.cli.print_success( "Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return TrainingResult(model=old_model)
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, kwargs: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ new_fingerprint = await model.model_fingerprint(file_importer) stories = await file_importer.get_stories() nlu_data = await file_importer.get_nlu_data() # if stories.is_empty() and nlu_data.is_empty(): # print_error( # "No training data given. Please provide stories and NLU data in " # "order to train a Rasa model using the '--data' argument." # ) # return # if stories.is_empty(): # print_warning("No stories present. Just a Rasa NLU model will be trained.") # return await _train_nlu_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # persist_nlu_training_data=persist_nlu_training_data, # ) # if nlu_data.is_empty(): # print_warning("No NLU data present. Just a Rasa Core model will be trained.") # return await _train_core_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # kwargs=kwargs, # ) old_model = model.get_latest_model(output_path) retrain_core, retrain_nlu = model.should_retrain(new_fingerprint, old_model, train_path) # bf mod domain = await file_importer.get_domain() core_untrainable = domain.is_empty() or stories.is_empty() nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()] retrain_core = retrain_core and not core_untrainable if retrain_nlu is True: from rasa.model import FINGERPRINT_NLU_DATA_KEY possible_retrains = new_fingerprint[FINGERPRINT_NLU_DATA_KEY].keys() else: possible_retrains = retrain_nlu if core_untrainable: print_color( "Skipping Core training since domain or stories are empty.", color=bcolors.OKBLUE) for lang in nlu_untrainable: print_color( "No NLU data found for language <{}>, skipping training...".format( lang), color=bcolors.OKBLUE) retrain_nlu = [l for l in possible_retrains if l not in nlu_untrainable] # /bf mod if force_training or retrain_core or retrain_nlu: await _do_training( file_importer, output_path=output_path, train_path=train_path, force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, kwargs=kwargs, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def train_async(domain: Text, config: Text, training_files: Union[Text, List[Text]], output: Text = DEFAULT_MODELS_PATH, force_training: bool = False) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output: Output path. force_training: If `True` retrain model even if data has not changed. Returns: Path of the trained model archive. """ train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output) retrain_core = True retrain_nlu = True story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await train_core_async(domain, config, story_directory, output, train_path) else: print("Dialogue data / configuration did not change. " "No need to retrain dialogue model.") if force_training or retrain_nlu: train_nlu(config, nlu_data_directory, output, train_path) else: print("NLU data / configuration did not change. " "No need to retrain NLU model.") if retrain_core or retrain_nlu: output = create_output_path(output) model.create_package_rasa(train_path, output, new_fingerprint) print("Train path: '{}'.".format(train_path)) print_success("Your bot is trained and ready to take for a spin!") return output else: print("Nothing changed. You can use the old model stored at {}" "".format(os.path.abspath(old_model))) return old_model