async def train_core_async( domain: Union[Domain, Text], config: Text, stories: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Core model. Args: domain: Path to the domain file. config: Path to the config file for Core. stories: Path to the Core training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ skill_imports = SkillSelector.load(config, stories) if isinstance(domain, str): try: domain = Domain.load(domain, skill_imports) domain.check_missing_templates() except InvalidDomain as e: print_error( "Could not load domain due to: '{}'. To specify a valid domain path " "use the '--domain' argument.".format(e) ) return None train_context = TempDirectoryPath(data.get_core_directory(stories, skill_imports)) with train_context as story_directory: if not os.listdir(story_directory): print_error( "No stories given. Please provide stories in order to " "train a Rasa Core model using the '--stories' argument." ) return return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output, train_path=train_path, fixed_model_name=fixed_model_name, kwargs=kwargs, )
def test_load_imports_from_directory_tree(tmpdir_factory: TempdirFactory): root = tmpdir_factory.mktemp("Parent Bot") root_imports = {"imports": ["Skill A"]} utils.dump_obj_as_yaml_to_file(root / "config.yml", root_imports) skill_a_directory = root / "Skill A" skill_a_directory.mkdir() skill_a_imports = {"imports": ["../Skill B"]} utils.dump_obj_as_yaml_to_file(skill_a_directory / "config.yml", skill_a_imports) skill_b_directory = root / "Skill B" skill_b_directory.mkdir() skill_b_imports = {"some other": ["../Skill C"]} utils.dump_obj_as_yaml_to_file(skill_b_directory / "config.yml", skill_b_imports) skill_b_subskill_directory = skill_b_directory / "Skill B-1" skill_b_subskill_directory.mkdir() skill_b_1_imports = {"imports": ["../../Skill A"]} # Check if loading from `.yaml` also works utils.dump_obj_as_yaml_to_file( skill_b_subskill_directory / "config.yaml", skill_b_1_imports ) # should not be imported subdirectory_3 = root / "Skill C" subdirectory_3.mkdir() actual = SkillSelector.load(root / "config.yml") expected = { os.path.join(str(skill_a_directory)), os.path.join(str(skill_b_directory)), } assert actual._imports == expected
def load( cls, paths: Union[List[Text], Text], skill_imports: Optional[SkillSelector] = None, ) -> "Domain": skill_imports = skill_imports or SkillSelector.all_skills() if not skill_imports.no_skills_selected(): paths = skill_imports.training_paths() if not paths: raise InvalidDomain( "No domain file was specified. Please specify a path " "to a valid domain file.") elif not isinstance(paths, list) and not isinstance(paths, set): paths = [paths] domain = Domain.empty() for path in paths: try: other = cls.from_path(path, skill_imports) except FileNotFoundError as error: # Skip missing file and continue with other files logger.warning(error) else: domain = domain.merge(other) return domain
def _get_core_nlu_files( paths: Optional[Union[Text, List[Text]]], skill_imports: Optional[SkillSelector] = None, ) -> Tuple[Set[Text], Set[Text]]: story_files = set() nlu_data_files = set() skill_imports = skill_imports or SkillSelector.all_skills() if not skill_imports.no_skills_selected(): paths = skill_imports.training_paths() if paths is None: paths = [] elif isinstance(paths, str): paths = [paths] for path in set(paths): if not path: continue if _is_valid_filetype(path) and skill_imports.is_imported(path): if _is_nlu_file(path): nlu_data_files.add(os.path.abspath(path)) elif _is_story_file(path): story_files.add(os.path.abspath(path)) else: new_story_files, new_nlu_data_files = _find_core_nlu_files_in_directory( path, skill_imports) story_files.update(new_story_files) nlu_data_files.update(new_nlu_data_files) return story_files, nlu_data_files
async def train_core_async( domain: Union[Domain, Text], config: Text, stories: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Core model. Args: domain: Path to the domain file. config: Path to the config file for Core. stories: Path to the Core training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ config = _get_valid_config(config, CONFIG_MANDATORY_KEYS_CORE) skill_imports = SkillSelector.load(config) if isinstance(domain, str): try: domain = Domain.load(domain, skill_imports) except InvalidDomain as e: print_error(e) return None story_directory = data.get_core_directory(stories, skill_imports) if not os.listdir(story_directory): print_error( "No dialogue data given. Please provide dialogue data in order to " "train a Rasa Core model." ) return return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, )
async def train_async( domain: Union[Domain, Text], config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ skill_imports = SkillSelector.load(config, training_files) try: domain = Domain.load(domain, skill_imports) domain.check_missing_templates() except InvalidDomain: domain = None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) with ExitStack() as stack: train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp())) nlu_data = stack.enter_context(TempDirectoryPath(nlu_data_directory)) story = stack.enter_context(TempDirectoryPath(story_directory)) if domain is None: return handle_domain_if_not_exists(config, nlu_data_directory, output_path, fixed_model_name) return await _train_async_internal( domain, config, train_path, nlu_data, story, output_path, force_training, fixed_model_name, kwargs, ) if domain is None: return handle_domain_if_not_exists(config, nlu_data_directory, output_path, fixed_model_name)
def train_nlu(config: Text, nlu_data: Text, output: Text, train_path: Optional[Text]) -> Optional[Text]: """Trains a NLU model. Args: config: Path to the config file for NLU. nlu_data: Path to the NLU training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa.nlu.train config = get_valid_config(config, CONFIG_MANDATORY_KEYS_NLU) if not train_path: # training NLU only hence the training files still have to be selected skill_imports = SkillSelector.load(config) nlu_data_directory = data.get_nlu_directory(nlu_data, skill_imports) else: nlu_data_directory = nlu_data if not os.listdir(nlu_data_directory): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model.") return _train_path = train_path or tempfile.mkdtemp() print_color("Start training NLU model ...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train(config, nlu_data_directory, _train_path, fixed_model_name="nlu") print_color("Done.", color=bcolors.OKBLUE) if not train_path: output_path = create_output_path(output, prefix="nlu-") new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data_directory) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success( "Your Rasa NLU model is trained and saved at '{}'.".format( output_path)) return output_path return _train_path
def test_load_if_subskill_is_more_specific_than_parent(tmpdir_factory: TempdirFactory): root = tmpdir_factory.mktemp("Parent Bot") config_path = root / "config.yml" utils.dump_obj_as_yaml_to_file(root / "config.yml", {}) skill_a_directory = root / "Skill A" skill_a_directory.mkdir() skill_a_imports = {"imports": ["Skill B"]} utils.dump_obj_as_yaml_to_file(skill_a_directory / "config.yml", skill_a_imports) actual = SkillSelector.load(config_path) assert actual.is_imported(str(skill_a_directory))
def from_path(cls, path: Text, skill_imports: SkillSelector) -> "Domain": path = os.path.abspath(path) # If skills were imported search the whole directory tree for domain files if os.path.isfile(path) and not skill_imports.no_skills_selected(): path = os.path.dirname(path) if os.path.isfile(path): domain = cls.from_file(path) elif os.path.isdir(path): domain = cls.from_directory(path, skill_imports) else: raise Exception("Failed to load domain specification from '{}'. " "File not found!".format(os.path.abspath(path))) return domain
def test_load_imports_without_imports(tmpdir_factory: TempdirFactory): empty_config = {} root = tmpdir_factory.mktemp("Parent Bot") utils.dump_obj_as_yaml_to_file(root / "config.yml", empty_config) skill_a_directory = root / "Skill A" skill_a_directory.mkdir() utils.dump_obj_as_yaml_to_file(skill_a_directory / "config.yml", empty_config) skill_b_directory = root / "Skill B" skill_b_directory.mkdir() utils.dump_obj_as_yaml_to_file(skill_b_directory / "config.yml", empty_config) actual = SkillSelector.load(root / "config.yml") assert actual.is_imported(root / "Skill C")
def get_core_nlu_files( paths: Optional[Union[Text, List[Text]]], skill_imports: Optional["SkillSelector"] = None, ) -> Tuple[Set[Text], Set[Text]]: """Recursively collects all training files from a list of paths. Args: paths: List of paths to training files or folders containing them. skill_imports: `SkillSelector` instance which determines which files should be loaded. Returns: Tuple of paths to story and NLU files. """ from rasa.skill import SkillSelector story_files = set() nlu_data_files = set() skill_imports = skill_imports or SkillSelector.all_skills() if not skill_imports.no_skills_selected(): paths = skill_imports.training_paths() if paths is None: paths = [] elif isinstance(paths, str): paths = [paths] for path in set(paths): if not path: continue if _is_valid_filetype(path) and skill_imports.is_imported(path): if _is_nlu_file(path): nlu_data_files.add(os.path.abspath(path)) elif _is_story_file(path): story_files.add(os.path.abspath(path)) else: new_story_files, new_nlu_data_files = _find_core_nlu_files_in_directory( path, skill_imports) story_files.update(new_story_files) nlu_data_files.update(new_nlu_data_files) return story_files, nlu_data_files
def train_nlu( config: Text, nlu_data: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, uncompress: bool = False, ) -> Optional[Text]: """Trains an NLU model. Args: config: Path to the config file for NLU. nlu_data: Path to the NLU training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. fixed_model_name: Name of the model to be stored. uncompress: If `True` the model will not be compressed. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ config = _get_valid_config(config, CONFIG_MANDATORY_KEYS_NLU) # training NLU only hence the training files still have to be selected skill_imports = SkillSelector.load(config) nlu_data_directory = data.get_nlu_directory(nlu_data, skill_imports) if not os.listdir(nlu_data_directory): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model." ) return return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, )
def test_import_outside_project_directory(tmpdir_factory): root = tmpdir_factory.mktemp("Parent Bot") skill_imports = {"imports": ["Skill A"]} utils.dump_obj_as_yaml_to_file(root / "config.yml", skill_imports) skill_a_directory = root / "Skill A" skill_a_directory.mkdir() skill_a_imports = {"imports": ["../Skill B"]} utils.dump_obj_as_yaml_to_file(skill_a_directory / "config.yml", skill_a_imports) skill_b_directory = root / "Skill B" skill_b_directory.mkdir() skill_b_imports = {"imports": ["../Skill C"]} utils.dump_obj_as_yaml_to_file(skill_b_directory / "config.yml", skill_b_imports) actual = SkillSelector.load(skill_a_directory / "config.yml") assert actual._imports == {str(skill_b_directory), str(root / "Skill C")}
def from_directory( cls, path: Text, skill_imports: Optional[SkillSelector] = None ) -> "Domain": """Loads and merges multiple domain files recursively from a directory tree.""" domain = Domain.empty() skill_imports = skill_imports or SkillSelector.all_skills() for root, _, files in os.walk(path): if not skill_imports.is_imported(root): continue for file in files: full_path = os.path.join(root, file) if data.is_domain_file(full_path): other = Domain.from_file(full_path) domain = other.merge(domain) return domain
def _find_core_nlu_files_in_directory( directory: Text, skill_imports: SkillSelector) -> Tuple[Set[Text], Set[Text]]: story_files = set() nlu_data_files = set() for root, _, files in os.walk(directory): if not skill_imports.is_imported(root): continue for f in files: full_path = os.path.join(root, f) if not _is_valid_filetype(full_path): continue if _is_nlu_file(full_path): nlu_data_files.add(full_path) elif _is_story_file(full_path): story_files.add(full_path) return story_files, nlu_data_files
def load( cls, paths: Union[List[Text], Text], skill_imports: Optional[SkillSelector] = None, ) -> "Domain": skill_imports = skill_imports or SkillSelector.all_skills() if not skill_imports.no_skills_selected(): paths = skill_imports.training_paths() if not paths: raise InvalidDomain( "No domain file was specified. Please specify a path " "to a valid domain file.") elif not isinstance(paths, list) and not isinstance(paths, set): paths = [paths] domain = Domain.empty() for path in paths: other = cls.from_path(path, skill_imports) domain = domain.merge(other) return domain
async def train_async( domain: Union[Domain, Text], config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = _get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() skill_imports = SkillSelector.load(config) try: domain = Domain.load(domain, skill_imports) except InvalidDomain as e: print_error( "Could not load domain due to: '{}'. To specify a valid domain path use " "the '--domain' argument.".format(e)) return None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) old_model = model.get_latest_model(output_path) retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model, train_path) if force_training or retrain_core or retrain_nlu: await _do_training( domain=domain, config=config, output_path=output_path, train_path=train_path, nlu_data_directory=nlu_data_directory, story_directory=story_directory, force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) return _package_model( new_fingerprint=new_fingerprint, output_path=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
def test_not_in_imports(input_path): importer = SkillSelector({"A/A/A", "A/B/A"}) assert not importer.is_imported(input_path)
def test_load_from_none(input_dict): actual = SkillSelector._from_dict(input_dict, Path("."), SkillSelector.all_skills()) assert actual._imports == set()
def test_training_paths(): selector = SkillSelector({"A", "B/C"}, "B") assert selector.training_paths() == {"A", "B"}
def test_merge(): selector1 = SkillSelector({"A", "B"}) selector2 = SkillSelector({"A/1", "B/C/D", "C"}) actual = selector1.merge(selector2) assert actual._imports == {"A", "B", "C"}
async def train_core_async( domain: Union[Domain, Text], config: Text, stories: Text, output: Text, train_path: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Core model. Args: domain: Path to the domain file. config: Path to the config file for Core. stories: Path to the Core training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. kwargs: Additional training parameters. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa.core.train config = get_valid_config(config, CONFIG_MANDATORY_KEYS_CORE) _train_path = train_path or tempfile.mkdtemp() if isinstance(Domain, str) or not train_path: skill_imports = SkillSelector.load(config) domain = Domain.load(domain, skill_imports) story_directory = data.get_core_directory(stories, skill_imports) else: story_directory = stories if not os.listdir(story_directory): print_error( "No dialogue data given. Please provide dialogue data in order to " "train a Rasa Core model.") return # normal (not compare) training print_color("Start training dialogue model ...", color=bcolors.OKBLUE) await rasa.core.train( domain_file=domain, stories_file=story_directory, output_path=os.path.join(_train_path, "core"), policy_config=config, kwargs=kwargs, ) print_color("Done.", color=bcolors.OKBLUE) if not train_path: # Only Core was trained. output_path = create_output_path(output, prefix="core-") new_fingerprint = model.model_fingerprint(config, domain, stories=story_directory) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success( "Your Rasa Core model is trained and saved at '{}'.".format( output_path)) return output_path return _train_path
async def train_async( domain: Optional, config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output_path) retrain_core = True retrain_nlu = True skill_imports = SkillSelector.load(config) try: domain = Domain.load(domain, skill_imports) except InvalidDomain as e: print_error(e) return None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide dialogue and NLU data in " "order to train a Rasa model.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) else: print("Dialogue data / configuration did not change. " "No need to retrain dialogue model.") if force_training or retrain_nlu: _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) else: print( "NLU data / configuration did not change. No need to retrain NLU model." ) if retrain_core or retrain_nlu: output_path = create_output_path(output_path, fixed_name=fixed_model_name) model.create_package_rasa(train_path, output_path, new_fingerprint) if uncompress: output_path = decompress(output_path) print_success("Your Rasa model is trained and saved at '{}'.".format( output_path)) return output_path else: print_success( "Nothing changed. You can use the old model stored at '{}'" "".format(os.path.abspath(old_model))) return old_model