def trained_nlu_model(request): cfg = RasaNLUModelConfig({"pipeline": "keyword"}) trainer = Trainer(cfg) td = training_data.load_data(DEFAULT_DATA_PATH) trainer.train(td) model_path = trainer.persist(NLU_MODEL_PATH) nlu_data = data.get_nlu_directory(DEFAULT_DATA_PATH) output_path = os.path.join(NLU_MODEL_PATH, NLU_MODEL_NAME) new_fingerprint = model.model_fingerprint(NLU_DEFAULT_CONFIG_PATH, nlu_data=nlu_data) model.create_package_rasa(model_path, output_path, new_fingerprint) def fin(): if os.path.exists(NLU_MODEL_PATH): shutil.rmtree(NLU_MODEL_PATH) if os.path.exists(output_path): shutil.rmtree(output_path) request.addfinalizer(fin) return output_path
def train_nlu(config: Text, nlu_data: Text, output: Text, train_path: Optional[Text]) -> Optional["Interpreter"]: """Trains a NLU model. Args: config: Path to the config file for NLU. nlu_data: Path to the NLU training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa_nlu _train_path = train_path or tempfile.mkdtemp() _, nlu_model, _ = rasa_nlu.train(config, nlu_data, _train_path, project="", fixed_model_name="nlu") if not train_path: nlu_data = data.get_nlu_directory(nlu_data) output_path = create_output_path(output, prefix="nlu-") new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success("Your Rasa NLU model is trained and saved at '{}'." "".format(output_path)) return nlu_model
def test_get_nlu_directory(project): data_dir = os.path.join(project, "data") nlu_directory = data.get_nlu_directory([data_dir]) nlu_files = os.listdir(nlu_directory) assert len(nlu_files) == 1 assert nlu_files[0].endswith("nlu.md")
async def train(request): # if set will not generate a model name but use the passed one model_name = request.args.get("model", None) try: model_config, data_dict = extract_data_and_config(request) except Exception as e: logger.debug(traceback.format_exc()) raise ErrorResponse( 500, "ServerError", "An unexpected error occurred.", details={"error": str(e)}, ) data_file = dump_to_data_file(data_dict) config_file = dump_to_data_file(model_config, "_config") try: path_to_model = await data_router.start_train_process( data_file, RasaNLUModelConfig(model_config), model_name) # store trained model as tar.gz file output_path = create_model_path(model_name, path_to_model) nlu_data = data.get_nlu_directory(data_file) new_fingerprint = model.model_fingerprint(config_file, nlu_data=nlu_data) model.create_package_rasa(path_to_model, output_path, new_fingerprint) logger.info("Rasa NLU model trained and persisted to '{}'.".format( output_path)) await data_router.load_model(output_path) return await response.file(output_path) except MaxWorkerProcessError as e: raise ErrorResponse( 403, "NoFreeProcess", "No process available for training.", details={"error": str(e)}, ) except InvalidModelError as e: raise ErrorResponse( 404, "ModelNotFound", "Model '{}' not found.".format(model_name), details={"error": str(e)}, ) except TrainingException as e: logger.debug(traceback.format_exc()) raise ErrorResponse( 500, "ServerError", "An unexpected error occurred.", details={"error": str(e)}, )
def test_nlu(args: argparse.Namespace) -> None: from rasa import data from rasa.test import compare_nlu_models, perform_nlu_cross_validation, test_nlu nlu_data = cli_utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) nlu_data = data.get_nlu_directory(nlu_data) output = args.out or DEFAULT_RESULTS_PATH io_utils.create_directory(output) if args.config is not None and len(args.config) == 1: args.config = os.path.abspath(args.config[0]) if os.path.isdir(args.config): config_dir = args.config config_files = os.listdir(config_dir) args.config = [ os.path.join(config_dir, os.path.abspath(config)) for config in config_files ] if isinstance(args.config, list): logger.info( "Multiple configuration files specified, running nlu comparison mode." ) config_files = [] for file in args.config: try: validation_utils.validate_yaml_schema( io_utils.read_file(file), CONFIG_SCHEMA_FILE, show_validation_errors=False, ) config_files.append(file) except validation_utils.InvalidYamlFileError: logger.debug( "Ignoring file '{}' as it is not a valid config file.". format(file)) continue compare_nlu_models( configs=config_files, nlu=nlu_data, output=output, runs=args.runs, exclusion_percentages=args.percentages, ) elif args.cross_validation: logger.info("Test model using cross validation.") config = cli_utils.get_validated_path(args.config, "config", DEFAULT_CONFIG_PATH) perform_nlu_cross_validation(config, nlu_data, output, vars(args)) else: model_path = cli_utils.get_validated_path(args.model, "model", DEFAULT_MODELS_PATH) test_nlu(model_path, nlu_data, output, vars(args))
def split_nlu_data(args): from rasa.nlu.training_data.loading import load_data data_path = get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = data.get_nlu_directory(data_path) nlu_data = load_data(data_path) train, test = nlu_data.train_test_split(args.training_fraction) train.persist(args.out, filename="training_data.json") test.persist(args.out, filename="test_data.json")
def test_get_nlu_file(project): data_file = os.path.join(project, "data/nlu.md") nlu_directory = data.get_nlu_directory(data_file) nlu_files = os.listdir(nlu_directory) original = load_data(data_file) copied = load_data(nlu_directory) assert nlu_files[0].endswith("nlu.md") assert original.intent_examples == copied.intent_examples
def train_nlu(config: Text, nlu_data: Text, output: Text, train_path: Optional[Text]) -> Optional[Text]: """Trains a NLU model. Args: config: Path to the config file for NLU. nlu_data: Path to the NLU training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa.nlu.train config = get_valid_config(config, CONFIG_MANDATORY_KEYS_NLU) if not train_path: # training NLU only hence the training files still have to be selected skill_imports = SkillSelector.load(config) nlu_data_directory = data.get_nlu_directory(nlu_data, skill_imports) else: nlu_data_directory = nlu_data if not os.listdir(nlu_data_directory): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model.") return _train_path = train_path or tempfile.mkdtemp() print_color("Start training NLU model ...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train(config, nlu_data_directory, _train_path, fixed_model_name="nlu") print_color("Done.", color=bcolors.OKBLUE) if not train_path: output_path = create_output_path(output, prefix="nlu-") new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data_directory) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success( "Your Rasa NLU model is trained and saved at '{}'.".format( output_path)) return output_path return _train_path
def show_stories(args: argparse.Namespace): import rasa_core.visualize args.config = args.config args.url = None args.stories = data.get_core_directory(args.stories) if os.path.exists(DEFAULT_DATA_PATH): args.nlu_data = data.get_nlu_directory(DEFAULT_DATA_PATH) rasa_core.visualize(args.config, args.domain, args.stories, args.nlu_data, args.output, args.max_history)
def visualize_stories(args: argparse.Namespace): import rasa.core.visualize loop = asyncio.get_event_loop() args.stories = data.get_core_directory(args.stories) if args.nlu is None and os.path.exists(DEFAULT_DATA_PATH): args.nlu = data.get_nlu_directory(DEFAULT_DATA_PATH) loop.run_until_complete( rasa.core.visualize(args.config, args.domain, args.stories, args.nlu, args.out, args.max_history))
def split_nlu_data(args: argparse.Namespace) -> None: from rasa.nlu.training_data.loading import load_data from rasa.nlu.training_data.util import get_file_format data_path = rasa.cli.utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = data.get_nlu_directory(data_path) nlu_data = load_data(data_path) fformat = get_file_format(data_path) train, test = nlu_data.train_test_split(args.training_fraction, args.random_seed) train.persist(args.out, filename=f"training_data.{fformat}") test.persist(args.out, filename=f"test_data.{fformat}")
def test_nlu(args: argparse.Namespace) -> None: from rasa.test import test_nlu, test_nlu_with_cross_validation model_path = get_validated_path(args.model, "model", DEFAULT_MODELS_PATH) nlu_data = get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) nlu_data = data.get_nlu_directory(nlu_data) if model_path: test_nlu(model_path, nlu_data, vars(args)) else: print ("No model specified. Model will be trained using cross validation.") config = get_validated_path(args.config, "config", DEFAULT_CONFIG_PATH) test_nlu_with_cross_validation(config, nlu_data, args.folds)
def train_nlu( config: Text, nlu_data: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, uncompress: bool = False, ) -> Optional[Text]: """Trains an NLU model. Args: config: Path to the config file for NLU. nlu_data: Path to the NLU training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. fixed_model_name: Name of the model to be stored. uncompress: If `True` the model will not be compressed. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ config = _get_valid_config(config, CONFIG_MANDATORY_KEYS_NLU) # training NLU only hence the training files still have to be selected skill_imports = SkillSelector.load(config) nlu_data_directory = data.get_nlu_directory(nlu_data, skill_imports) if not os.listdir(nlu_data_directory): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model." ) return return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, )
def split_nlu_data(args): from rasa.nlu.training_data.loading import load_data from rasa.nlu.training_data.util import get_file_format data_path = get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = data.get_nlu_directory(data_path) nlu_data = load_data(data_path) fformat = get_file_format(data_path) train, test = nlu_data.train_test_split(args.training_fraction) train.persist(args.out, filename="training_data.{}".format(fformat), fformat=fformat) test.persist(args.out, filename="test_data.{}".format(fformat), fformat=fformat)
def split_nlu_data(args: argparse.Namespace) -> None: """Load data from a file path and split the NLU data into test and train examples. Args: args: Commandline arguments """ from rasa.nlu.training_data.loading import load_data from rasa.nlu.training_data.util import get_file_format data_path = rasa.cli.utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = data.get_nlu_directory(data_path) nlu_data = load_data(data_path) fformat = get_file_format(data_path) train, test = nlu_data.train_test_split(args.training_fraction, args.random_seed) train.persist(args.out, filename=f"training_data.{fformat}") test.persist(args.out, filename=f"test_data.{fformat}")