def test_training_core_with_e2e_fails_gracefully( self, capsys: CaptureFixture, monkeypatch: MonkeyPatch, tmp_path: Path, domain_path: Text, stack_config_path: Text, e2e_stories_path: Text, ): mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) output = self.make_tmp_model_dir(tmp_path) train_core( domain_path, stack_config_path, e2e_stories_path, output=output, ) mocked_core_training.assert_not_called() mocked_nlu_training.assert_not_called() captured = capsys.readouterr() assert ( "Stories file contains e2e stories. " "Please train using `rasa train` so that the NLU model is also trained." ) in captured.out
def test_doesnt_checkpoint_with_zero_eval_num_examples( self, tmp_path: Path, tmp_path_factory: TempPathFactory): checkpoint_dir = get_checkpoint_dir_path(tmp_path) assert not checkpoint_dir.is_dir() config_file = "config_ted_policy_model_checkpointing_zero_eval_num_examples.yml" with pytest.warns(UserWarning) as warning: train_core( domain="data/test_domains/default.yml", stories="data/test_yaml_stories/stories_defaultdomain.yml", output=str(tmp_path), fixed_model_name="my_model.tar.gz", config=f"data/test_config/{config_file}", ) warn_text = ( f"You have opted to save the best model, but the value of " f"'{EVAL_NUM_EXAMPLES}' is not greater than 0. No checkpoint model will be " f"saved.") assert len([w for w in warning if warn_text in str(w.message)]) == 1 storage_dir = tmp_path_factory.mktemp("storage dir") storage, _ = LocalModelStorage.from_model_archive( storage_dir, tmp_path / "my_model.tar.gz") checkpoint_dir = get_checkpoint_dir_path(storage_dir) assert not checkpoint_dir.is_dir()
def test_model_finetuning_core_with_default_epochs( tmp_path: Path, monkeypatch: MonkeyPatch, trained_moodbot_path: Text, ): mocked_core_training = mock_core_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") # Providing a new config with no epochs will mean the default amount are used # and then scaled by `finetuning_epoch_fraction`. old_config = rasa.shared.utils.io.read_yaml_file("data/test_moodbot/config.yml") del old_config["policies"][0]["epochs"] new_config_path = tmp_path / "new_config.yml" rasa.shared.utils.io.write_yaml(old_config, new_config_path) train_core( "data/test_moodbot/domain.yml", str(new_config_path), "data/test_moodbot/data/stories.yml", output=output, model_to_finetune=trained_moodbot_path, finetuning_epoch_fraction=2, ) mocked_core_training.assert_called_once() _, kwargs = mocked_core_training.call_args model_to_finetune = kwargs["model_to_finetune"] ted = model_to_finetune.policy_ensemble.policies[0] assert ted.config[EPOCHS] == TEDPolicy.defaults[EPOCHS] * 2
def test_model_finetuning_core_new_domain_label( tmp_path: Path, monkeypatch: MonkeyPatch, trained_moodbot_path: Text, ): mocked_core_training = mock_core_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") # Simulate addition to training data old_domain = rasa.shared.utils.io.read_yaml_file( "data/test_moodbot/domain.yml") old_domain["intents"].append("a_new_one") new_domain_path = tmp_path / "new_domain.yml" rasa.shared.utils.io.write_yaml(old_domain, new_domain_path) with pytest.raises(SystemExit): train_core( domain=str(new_domain_path), config="data/test_moodbot/config.yml", stories="data/test_moodbot/data/stories.yml", output=output, model_to_finetune=trained_moodbot_path, ) mocked_core_training.assert_not_called()
def test_train_core_autoconfig( tmp_path: Path, monkeypatch: MonkeyPatch, domain_path: Text, stories_path: Text, stack_config_path: Text, ): monkeypatch.setattr(tempfile, "tempdir", tmp_path) # mock function that returns configuration mocked_get_configuration = Mock() monkeypatch.setattr(autoconfig, "get_configuration", mocked_get_configuration) # skip actual core training monkeypatch.setattr( rasa.model_training, "_train_core_with_validated_data", AsyncMock() ) # do training train_core( domain_path, stack_config_path, stories_path, output="test_train_core_temp_files_models", ) mocked_get_configuration.assert_called_once() _, args, _ = mocked_get_configuration.mock_calls[0] assert args[1] == autoconfig.TrainingType.CORE
def test_model_finetuning_with_invalid_model_core( tmp_path: Path, monkeypatch: MonkeyPatch, domain_path: Text, stories_path: Text, stack_config_path: Text, model_to_fine_tune: Text, capsys: CaptureFixture, ): mocked_core_training = mock_core_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") with pytest.raises(SystemExit): train_core( domain_path, stack_config_path, stories_path, output=output, model_to_finetune=model_to_fine_tune, finetuning_epoch_fraction=1, ) mocked_core_training.assert_not_called() assert "No Core model for finetuning found" in capsys.readouterr().out
def test_train_model_checkpointing(self, tmp_path: Path): checkpoint_dir = get_checkpoint_dir_path(tmp_path) assert not checkpoint_dir.is_dir() train_core( domain="data/test_domains/default.yml", stories="data/test_yaml_stories/stories_defaultdomain.yml", train_path=str(tmp_path), output=str(tmp_path), config="data/test_config/config_ted_policy_model_checkpointing.yml", ) assert checkpoint_dir.is_dir()
def test_model_finetuning_core( tmp_path: Path, monkeypatch: MonkeyPatch, trained_moodbot_path: Text, use_latest_model: bool, ): mocked_core_training = mock_core_training(monkeypatch) mock_agent_load = Mock(wraps=Agent.load) monkeypatch.setattr(Agent, "load", mock_agent_load) (tmp_path / "models").mkdir() output = str(tmp_path / "models") if use_latest_model: trained_moodbot_path = str(Path(trained_moodbot_path).parent) # Typically models will be fine-tuned with a smaller number of epochs than training # from scratch. # Fine-tuning will use the number of epochs in the new config. old_config = rasa.shared.utils.io.read_yaml_file( "data/test_moodbot/config.yml") old_config["policies"][0]["epochs"] = 10 new_config_path = tmp_path / "new_config.yml" rasa.shared.utils.io.write_yaml(old_config, new_config_path) old_stories = rasa.shared.utils.io.read_yaml_file( "data/test_moodbot/data/stories.yml") old_stories["stories"].append({ "story": "new story", "steps": [{ "intent": "greet" }] }) new_stories_path = tmp_path / "new_stories.yml" rasa.shared.utils.io.write_yaml(old_stories, new_stories_path) train_core( "data/test_moodbot/domain.yml", str(new_config_path), str(new_stories_path), output=output, model_to_finetune=trained_moodbot_path, finetuning_epoch_fraction=0.2, ) mocked_core_training.assert_called_once() _, kwargs = mocked_core_training.call_args model_to_finetune = kwargs["model_to_finetune"] assert isinstance(model_to_finetune, Agent) ted = model_to_finetune.policy_ensemble.policies[0] assert ted.config[EPOCHS] == 2 assert ted.finetune_mode
def test_train_model_checkpointing(self, tmp_path: Path): model_name = "core-checkpointed-model" best_model_file = tmp_path / (model_name + ".tar.gz") assert not best_model_file.exists() train_core( domain="data/test_domains/default.yml", stories="data/test_yaml_stories/stories_defaultdomain.yaml", output=str(tmp_path), fixed_model_name=model_name, config="data/test_config/config_ted_policy_model_checkpointing.yml", ) assert best_model_file.exists()
def test_train_fails_with_checkpoint_zero_eval_num_epochs( self, tmp_path: Path): config_file = "config_ted_policy_model_checkpointing_zero_every_num_epochs.yml" match_string = ("Only values either equal to -1 or greater" " than 0 are allowed for this parameter.") with pytest.raises(InvalidConfigException, match=match_string): train_core( domain="data/test_domains/default.yml", stories="data/test_yaml_stories/stories_defaultdomain.yml", output=str(tmp_path), config=f"data/test_config/{config_file}", ) assert not (tmp_path / "my_model.tar.gz").is_file()
def test_train_model_checkpointing(self, tmp_path: Path, tmp_path_factory: TempPathFactory): train_core( domain="data/test_domains/default.yml", stories="data/test_yaml_stories/stories_defaultdomain.yml", output=str(tmp_path), fixed_model_name="my_model.tar.gz", config="data/test_config/config_ted_policy_model_checkpointing.yml", ) storage_dir = tmp_path_factory.mktemp("storage dir") storage, _ = LocalModelStorage.from_model_archive( storage_dir, tmp_path / "my_model.tar.gz") checkpoint_dir = get_checkpoint_dir_path(storage_dir) assert checkpoint_dir.is_dir()
def test_train_model_checkpointing(self, tmp_path: Path, tmp_path_factory: TempPathFactory): train_core( domain="data/test_domains/default.yml", stories="data/test_yaml_stories/stories_defaultdomain.yml", output=str(tmp_path), fixed_model_name="my_model.tar.gz", config="data/test_config/config_ted_policy_model_checkpointing.yml", ) storage_dir = tmp_path_factory.mktemp("storage dir") LocalModelStorage.from_model_archive(storage_dir, tmp_path / "my_model.tar.gz") model_dir = storage_dir / "train_TEDPolicy0" all_files = list(model_dir.rglob("*.*")) assert any( ["from_checkpoint" in str(filename) for filename in all_files])
def test_train_core_temp_files( tmp_path: Path, monkeypatch: MonkeyPatch, domain_path: Text, stories_path: Text, stack_config_path: Text, ): (tmp_path / "training").mkdir() (tmp_path / "models").mkdir() monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training") train_core( domain_path, stack_config_path, stories_path, output=str(tmp_path / "models"), ) assert count_temp_rasa_files(tempfile.tempdir) == 0
def test_doesnt_checkpoint_with_zero_eval_num_examples( self, tmp_path: Path): checkpoint_dir = get_checkpoint_dir_path(tmp_path) assert not checkpoint_dir.is_dir() config_file = "config_ted_policy_model_checkpointing_zero_eval_num_examples.yml" with pytest.warns(UserWarning) as warning: train_core( domain="data/test_domains/default.yml", stories="data/test_yaml_stories/stories_defaultdomain.yml", train_path=str(tmp_path), output=str(tmp_path), config=f"data/test_config/{config_file}", ) warn_text = ( f"You have opted to save the best model, but the value of " f"'{EVAL_NUM_EXAMPLES}' is not greater than 0. No checkpoint model will be " f"saved.") assert not checkpoint_dir.is_dir() assert len([w for w in warning if warn_text in str(w.message)]) == 1
def test_train_fails_with_checkpoint_zero_eval_num_epochs( self, tmp_path: Path): checkpoint_dir = get_checkpoint_dir_path(tmp_path) assert not checkpoint_dir.is_dir() config_file = "config_ted_policy_model_checkpointing_zero_every_num_epochs.yml" with pytest.raises(InvalidConfigException): with pytest.warns(UserWarning) as warning: train_core( domain="data/test_domains/default.yml", stories="data/test_yaml_stories/stories_defaultdomain.yml", train_path=str(tmp_path), output=str(tmp_path), config=f"data/test_config/{config_file}", ) warn_text = ( f"You have opted to save the best model, but the value of " f"'{EVAL_NUM_EPOCHS}' is not -1 or greater than 0. Training will fail." ) assert len([w for w in warning if warn_text in str(w.message)]) == 1 assert not checkpoint_dir.is_dir()
def run_core_training(args: argparse.Namespace, train_path: Optional[Text] = None) -> Optional[Text]: """Trains a Rasa Core model only. Args: args: Command-line arguments to configure training. train_path: Path where trained model but not unzipped model should be stored. Returns: Path to a trained model or `None` if training was not successful. """ from rasa.model_training import train_core output = train_path or args.out args.domain = rasa.cli.utils.get_validated_path(args.domain, "domain", DEFAULT_DOMAIN_PATH, none_is_valid=True) story_file = rasa.cli.utils.get_validated_path(args.stories, "stories", DEFAULT_DATA_PATH, none_is_valid=True) additional_arguments = extract_core_additional_arguments(args) # Policies might be a list for the compare training. Do normal training # if only list item was passed. if not isinstance(args.config, list) or len(args.config) == 1: if isinstance(args.config, list): args.config = args.config[0] config = _get_valid_config(args.config, CONFIG_MANDATORY_KEYS_CORE) return train_core( domain=args.domain, config=config, stories=story_file, output=output, train_path=train_path, fixed_model_name=args.fixed_model_name, additional_arguments=additional_arguments, model_to_finetune=_model_for_finetuning(args), finetuning_epoch_fraction=args.epoch_fraction, ) else: rasa.utils.common.run_in_loop( do_compare_training(args, story_file, additional_arguments))
def test_should_not_retrain_core(domain_path: Text, tmp_path: Path, stack_config_path: Text): # Don't use `stories_path` as checkpoints currently break fingerprinting story_file = tmp_path / "simple_story.yml" story_file.write_text(""" stories: - story: test_story steps: - intent: greet - action: utter_greet """) trained_model = train_core(domain_path, stack_config_path, str(story_file), str(tmp_path)) importer = TrainingDataImporter.load_from_config( stack_config_path, domain_path, training_data_paths=[str(story_file)]) new_fingerprint = model.model_fingerprint(importer) result = model.should_retrain(new_fingerprint, trained_model, tmp_path) assert not result.should_retrain_core()