def test_same_file_names_get_resolved(tmpdir): # makes sure the resolution properly handles if there are two files with # with the same name in different directories tmpdir.join("one").mkdir() tmpdir.join("two").mkdir() data_dir_one = os.path.join(tmpdir.join("one").join("stories.md").strpath) data_dir_two = os.path.join(tmpdir.join("two").join("stories.md").strpath) shutil.copy2(DEFAULT_STORIES_FILE, data_dir_one) shutil.copy2(DEFAULT_STORIES_FILE, data_dir_two) nlu_dir_one = os.path.join(tmpdir.join("one").join("nlu.md").strpath) nlu_dir_two = os.path.join(tmpdir.join("two").join("nlu.md").strpath) shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_one) shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_two) core_directory, nlu_directory = data.get_core_nlu_directories([tmpdir.strpath]) nlu_files = os.listdir(nlu_directory) assert len(nlu_files) == 2 assert all([f.endswith("nlu.md") for f in nlu_files]) stories = os.listdir(core_directory) assert len(stories) == 2 assert all([f.endswith("stories.md") for f in stories])
def test_same_file_names_get_resolved(tmp_path): # makes sure the resolution properly handles if there are two files with # with the same name in different directories (tmp_path / "one").mkdir() (tmp_path / "two").mkdir() data_dir_one = str(tmp_path / "one" / "stories.md") data_dir_two = str(tmp_path / "two" / "stories.md") shutil.copy2(DEFAULT_STORIES_FILE, data_dir_one) shutil.copy2(DEFAULT_STORIES_FILE, data_dir_two) nlu_dir_one = str(tmp_path / "one" / "nlu.yml") nlu_dir_two = str(tmp_path / "two" / "nlu.yml") shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_one) shutil.copy2(DEFAULT_NLU_DATA, nlu_dir_two) core_directory, nlu_directory = data.get_core_nlu_directories( [str(tmp_path)]) nlu_files = os.listdir(nlu_directory) assert len(nlu_files) == 2 assert all(f.endswith("nlu.yml") for f in nlu_files) stories = os.listdir(core_directory) assert len(stories) == 2 assert all(f.endswith("stories.md") for f in stories)
async def train_async( domain: Union[Domain, Text], config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ skill_imports = SkillSelector.load(config, training_files) try: domain = Domain.load(domain, skill_imports) domain.check_missing_templates() except InvalidDomain: domain = None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) with ExitStack() as stack: train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp())) nlu_data = stack.enter_context(TempDirectoryPath(nlu_data_directory)) story = stack.enter_context(TempDirectoryPath(story_directory)) if domain is None: return handle_domain_if_not_exists(config, nlu_data_directory, output_path, fixed_model_name) return await _train_async_internal( domain, config, train_path, nlu_data, story, output_path, force_training, fixed_model_name, kwargs, ) if domain is None: return handle_domain_if_not_exists(config, nlu_data_directory, output_path, fixed_model_name)
def validate_files(args): from rasa.core.validator import Validator loop = asyncio.get_event_loop() story_directory, nlu_data_directory = data.get_core_nlu_directories(args.data) validator = loop.run_until_complete( Validator.from_files(args.domain, nlu_data_directory, story_directory) ) validator.verify_all()
def check_training_data(args): training_files = [ get_validated_path(f, "data", DEFAULT_DATA_PATH, none_is_valid=True) for f in args.data ] story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files) if not os.listdir(story_directory) or not os.listdir(nlu_data_directory): print_error( "Cannot train initial Rasa model. Please provide NLU data and Core data." ) exit(1)
def test_get_core_nlu_directories(project): data_dir = os.path.join(project, "data") core_directory, nlu_directory = data.get_core_nlu_directories([data_dir]) nlu_files = os.listdir(nlu_directory) assert len(nlu_files) == 1 assert nlu_files[0].endswith("nlu.md") stories = os.listdir(core_directory) assert len(stories) == 1 assert stories[0].endswith("stories.md")
def test_get_core_nlu_directories(project): data_dir = os.path.join(project, "data") core_directory, nlu_directory = data.get_core_nlu_directories([data_dir]) nlu_files = os.listdir(nlu_directory) assert len(nlu_files) == 1 assert nlu_files[0].endswith("nlu.yml") core_files = os.listdir(core_directory) assert len(core_files) == 2 assert any(file.endswith("stories.yml") for file in core_files) assert any(file.endswith("rules.yml") for file in core_files)
def _project_files(project, config_file=DEFAULT_CONFIG_PATH, domain=DEFAULT_DOMAIN_PATH, training_files=DEFAULT_DATA_PATH): if training_files is None: core_directory = None nlu_directory = None else: core_directory, nlu_directory = data.get_core_nlu_directories( os.path.join(project, training_files)) paths = {"config_file": config_file, "domain_file": domain, "nlu_data": core_directory, "stories": nlu_directory} return {k: v if v is None else os.path.join(project, v) for k, v in paths.items()}
def interactive(args: argparse.Namespace): args.finetune = False # Don't support finetuning training_files = [ get_validated_path(f, "data", DEFAULT_DATA_PATH) for f in args.data ] story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files) if not os.listdir(story_directory) or not os.listdir(nlu_data_directory): print_error( "Cannot train initial Rasa model. Please provide NLU data and Core data." ) exit(1) zipped_model = train.train(args) perform_interactive_learning(args, zipped_model)
def test_get_core_nlu_directories_with_none(): directories = data.get_core_nlu_directories(None) assert all([directory for directory in directories]) assert all([not os.listdir(directory) for directory in directories])
async def train_async( domain: Union[Domain, Text], config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = _get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() skill_imports = SkillSelector.load(config) try: domain = Domain.load(domain, skill_imports) except InvalidDomain as e: print_error( "Could not load domain due to: '{}'. To specify a valid domain path use " "the '--domain' argument.".format(e)) return None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) old_model = model.get_latest_model(output_path) retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model, train_path) if force_training or retrain_core or retrain_nlu: await _do_training( domain=domain, config=config, output_path=output_path, train_path=train_path, nlu_data_directory=nlu_data_directory, story_directory=story_directory, force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) return _package_model( new_fingerprint=new_fingerprint, output_path=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def train_async(domain: Text, config: Text, training_files: Union[Text, List[Text]], output: Text = DEFAULT_MODELS_PATH, force_training: bool = False) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output: Output path. force_training: If `True` retrain model even if data has not changed. Returns: Path of the trained model archive. """ train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output) retrain_core = True retrain_nlu = True story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await train_core_async(domain, config, story_directory, output, train_path) else: print("Dialogue data / configuration did not change. " "No need to retrain dialogue model.") if force_training or retrain_nlu: train_nlu(config, nlu_data_directory, output, train_path) else: print("NLU data / configuration did not change. " "No need to retrain NLU model.") if retrain_core or retrain_nlu: output = create_output_path(output) model.create_package_rasa(train_path, output, new_fingerprint) print("Train path: '{}'.".format(train_path)) print_success("Your bot is trained and ready to take for a spin!") return output else: print("Nothing changed. You can use the old model stored at {}" "".format(os.path.abspath(old_model))) return old_model
async def train_async( domain: Text, config: Text, training_files: Union[Text, List[Text]], output: Text = DEFAULT_MODELS_PATH, force_training: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output: Output path. force_training: If `True` retrain model even if data has not changed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output) retrain_core = True retrain_nlu = True story_directory, nlu_data_directory = data.get_core_nlu_directories(training_files) new_fingerprint = model.model_fingerprint( config, domain, nlu_data_directory, story_directory ) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide dialogue and NLU data in " "order to train a Rasa model." ) return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained." ) return train_nlu(config, nlu_data_directory, output, None) if nlu_data_not_present: print_warning("No NLU data present. Just a Rasa Core model will be trained.") return await train_core_async( domain, config, story_directory, output, None, kwargs ) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await train_core_async( domain, config, story_directory, output, train_path, kwargs ) else: print ( "Dialogue data / configuration did not change. " "No need to retrain dialogue model." ) if force_training or retrain_nlu: train_nlu(config, nlu_data_directory, output, train_path) else: print ("NLU data / configuration did not change. No need to retrain NLU model.") if retrain_core or retrain_nlu: output = create_output_path(output) model.create_package_rasa(train_path, output, new_fingerprint) print_success("Your bot is trained and ready to take for a spin!") return output else: print_success( "Nothing changed. You can use the old model stored at '{}'" "".format(os.path.abspath(old_model)) ) return old_model
async def train_async( domain: Optional, config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output_path) retrain_core = True retrain_nlu = True skill_imports = SkillSelector.load(config) try: domain = Domain.load(domain, skill_imports) except InvalidDomain as e: print_error(e) return None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide dialogue and NLU data in " "order to train a Rasa model.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) else: print("Dialogue data / configuration did not change. " "No need to retrain dialogue model.") if force_training or retrain_nlu: _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) else: print( "NLU data / configuration did not change. No need to retrain NLU model." ) if retrain_core or retrain_nlu: output_path = create_output_path(output_path, fixed_name=fixed_model_name) model.create_package_rasa(train_path, output_path, new_fingerprint) if uncompress: output_path = decompress(output_path) print_success("Your Rasa model is trained and saved at '{}'.".format( output_path)) return output_path else: print_success( "Nothing changed. You can use the old model stored at '{}'" "".format(os.path.abspath(old_model))) return old_model