def test_nlu_and_core_trained_if_no_nlu_data_but_e2e_stories( self, moodbot_domain_path: Path, e2e_bot_config_file: Path, e2e_stories_path: Text, tmp_path: Path, monkeypatch: MonkeyPatch, ): train_mock = Mock() monkeypatch.setattr(GraphTrainer, GraphTrainer.train.__name__, train_mock) rasa.train( str(moodbot_domain_path), str(e2e_bot_config_file), [e2e_stories_path], output=str(tmp_path), ) args, _ = train_mock.call_args model_configuration: GraphModelConfiguration = args[0] for schema in [ model_configuration.train_schema, model_configuration.predict_schema, ]: assert any( issubclass(node.uses, DIETClassifier) for node in schema.nodes.values()) assert any( issubclass(node.uses, TEDPolicy) for node in schema.nodes.values())
def test_model_finetuning_with_invalid_model( tmp_path: Path, monkeypatch: MonkeyPatch, domain_path: Text, stories_path: Text, stack_config_path: Text, nlu_data_path: Text, model_to_fine_tune: Text, capsys: CaptureFixture, ): (tmp_path / "models").mkdir() output = str(tmp_path / "models") with pytest.raises(SystemExit): rasa.train( domain_path, stack_config_path, [stories_path, nlu_data_path], output=output, force_training=True, model_to_finetune=model_to_fine_tune, finetuning_epoch_fraction=1, ) output = capsys.readouterr().out assert "No model for finetuning found" in output
def test_e2e_gives_experimental_warning( self, monkeypatch: MonkeyPatch, trained_e2e_model: Text, domain_path: Text, stack_config_path: Text, e2e_stories_path: Text, nlu_data_path: Text, caplog: LogCaptureFixture, ): mock_nlu_training(monkeypatch) mock_core_training(monkeypatch) with caplog.at_level(logging.WARNING): rasa.train( domain_path, stack_config_path, [e2e_stories_path, nlu_data_path], output=new_model_path_in_same_dir(trained_e2e_model), ) assert any([ "The end-to-end training is currently experimental" in record.message for record in caplog.records ])
def test_model_finetuning_new_domain_label_stops_all_training( tmp_path: Path, monkeypatch: MonkeyPatch, trained_moodbot_path: Text, ): mocked_core_training = mock_core_training(monkeypatch) mocked_nlu_training = mock_nlu_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") old_domain = rasa.shared.utils.io.read_yaml_file( "data/test_moodbot/domain.yml") old_domain["intents"].append("a_new_one") new_domain_path = tmp_path / "new_domain.yml" rasa.shared.utils.io.write_yaml(old_domain, new_domain_path) with pytest.raises(SystemExit): rasa.train( domain=str(new_domain_path), config="data/test_moodbot/config.yml", training_files=[ "data/test_moodbot/data/stories.yml", "data/test_moodbot/data/nlu.yml", ], output=output, model_to_finetune=trained_moodbot_path, ) mocked_core_training.assert_not_called() mocked_nlu_training.assert_not_called()
def test_interpreter_of_old_model_passed_to_core_training( monkeypatch: MonkeyPatch, tmp_path: Path, trained_rasa_model: Text, domain_path: Text, config_path: Text, stories_path: Text, nlu_data_path: Text, ): # NLU isn't retrained monkeypatch.setattr( rasa.model.FingerprintComparisonResult, rasa.model.FingerprintComparisonResult.should_retrain_nlu.__name__, lambda _: False, ) # An old model with an interpreter exists monkeypatch.setattr(rasa.model, rasa.model.get_latest_model.__name__, lambda _: trained_rasa_model) # Mock the actual Core training _train_core = mock_core_training(monkeypatch) rasa.train( domain_path, config_path, [stories_path, nlu_data_path], str(tmp_path), ) _train_core.assert_called_once() _, _, kwargs = _train_core.mock_calls[0] assert isinstance(kwargs["interpreter"], RasaNLUInterpreter)
def test_trained_interpreter_passed_to_core_training( monkeypatch: MonkeyPatch, tmp_path: Path, unpacked_trained_rasa_model: Text, nlu_data_path: Text, stories_path: Text, config_path: Text, domain_path: Text, ): # Skip actual NLU training and return trained interpreter path from fixture # Patching is bit more complicated as we have a module `train` and function # with the same name 😬 monkeypatch.setattr( rasa.model_training, "_train_nlu_with_validated_data", AsyncMock(return_value=unpacked_trained_rasa_model), ) # Mock the actual Core training _train_core = mock_core_training(monkeypatch) rasa.train( domain_path, config_path, [stories_path, nlu_data_path], str(tmp_path), ) _train_core.assert_called_once() _, _, kwargs = _train_core.mock_calls[0] assert isinstance(kwargs["interpreter"], RasaNLUInterpreter)
def test_train_temp_files( tmp_path: Path, monkeypatch: MonkeyPatch, domain_path: Text, stories_path: Text, stack_config_path: Text, nlu_data_path: Text, ): (tmp_path / "training").mkdir() (tmp_path / "models").mkdir() monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training") output = str(tmp_path / "models") rasa.train( domain_path, stack_config_path, [stories_path, nlu_data_path], output=output, force_training=True, ) assert count_temp_rasa_files(tempfile.tempdir) == 0 # After training the model, try to do it again. This shouldn't try to train # a new model because nothing has been changed. It also shouldn't create # any temp files. rasa.train(domain_path, stack_config_path, [stories_path, nlu_data_path], output=output) assert count_temp_rasa_files(tempfile.tempdir) == 0
def test_invalid_graph_schema( tmp_path: Path, domain_path: Text, stories_path: Text, nlu_data_path: Text, ): config = textwrap.dedent(""" version: "3.0" recipe: "default.v1" pipeline: - name: WhitespaceTokenizer - name: TEDPolicy """) new_config_path = tmp_path / "config.yml" rasa.shared.utils.io.write_yaml(rasa.shared.utils.io.read_yaml(config), new_config_path) with pytest.raises(GraphSchemaValidationException): rasa.train( domain_path, str(new_config_path), [stories_path, nlu_data_path], output=str(tmp_path), )
def train(data=DATA_DICT): rasa.train( domain=data["domain"], config=data["config"], training_files=[data["nlu"], data["stories"]], # This line is commented to use default way to name models # output=model_name, )
def train(): # os.chdir(os.getcwd()) os.chdir("/Users/lidayuan/Documents/edison/nlu/rasa/edo_pro/rasasc") # os.chdir(os.path.dirname(__file__)) # os.chdir(os.getcwd()+"/examples/rasasc") rasa.train(domain='domain.yml', config='config.yml', training_files='./data') # rasa.run(model="models", endpoints="endpoints.yml") return True
def train(self, fixed_model_name: Optional[str] = None) -> NoReturn: """ Trains a model, creating a .tar.gz in the default output folder 'models' """ from rasa import train print("[INFO] Creating temporary training directory") try: os.makedirs(TRAINING_NLU_DATA_DIR) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(TRAINING_NLU_DATA_DIR): pass else: raise print("[INFO] Generating Markdown story format") intent_list, templates, action_list = self._generate_story_file() print( f"[INFO] Story file generated with {len(intent_list)} intents, {len(templates)} templates and {len(action_list)} actions" ) # print("[INFO] Merging stories with smalltalk") # self._merge_stories() print("[INFO] Generating chatbot domain") self._generate_domain_file( intents=intent_list, templates=templates, actions=action_list ) print("[INFO] Generating training data") total = generate_rasa_training_data( project_id=self.project_id, intent_list=intent_list, desc="Aragón OpenData" ) print(f"[INFO] Total examples = {total}") #print("[INFO] Split train and test data") #split() # print("[INFO] Combining data with smalltalk") # copy_smalltalk_intents(project_id=self.project_id) print(f"[INFO] Training model {self.__str__()}") train( domain=os.path.join(TRAINING_DATA_DIR, DEFAULT_DOMAIN_PATH), config=self.pipeline, output=os.path.join(MODEL_PATH, self.project_name, self.model_name), training_files=TRAINING_NLU_DATA_DIR, fixed_model_name=fixed_model_name, )
def train_model(project_id): logger.info("Starting Training for Project ID " + str(project_id)) result = os.listdir('/rasa_projects/' + str(project_id)) logger.info(str(result)) base_path = '/rasa_projects/' + str(project_id) + '/' logger.info("Training Rasa Model ") try: model_path = rasa.train(domain=base_path + 'domain.yml', config=base_path + 'config.yml', training_files=base_path + 'data/', output=base_path + 'models/') logger.info("Model Path " + str(model_path)) return { "Status": "Success", "Message": model_path, "project_id": str(project_id) } except Exception as e: logger.info("Exception while training the model " + str(e)) return { "Status": "Error", "Message": repr(e), "project_id": str(project_id) }
def test_new_nlu_data_retrains_core_if_there_are_e2e_stories( self, trained_e2e_model: Text, moodbot_domain_path: Path, e2e_bot_config_file: Path, e2e_stories_path: Text, nlu_data_path: Text, tmp_path: Path, trained_e2e_model_cache: Path, ): nlu_yaml = rasa.shared.utils.io.read_yaml_file(nlu_data_path) nlu_yaml["nlu"][0]["examples"] += "- surprise!\n" new_nlu_file = tmp_path / "new_nlu.yml" rasa.shared.utils.io.write_yaml(nlu_yaml, new_nlu_file) result = rasa.train( str(moodbot_domain_path), str(e2e_bot_config_file), [e2e_stories_path, new_nlu_file], output=new_model_path_in_same_dir(trained_e2e_model), dry_run=True, ) assert result.code == rasa.model_training.CODE_NEEDS_TO_BE_RETRAINED fingerprints = result.dry_run_results assert not fingerprints["train_CountVectorsFeaturizer3"].is_hit assert not fingerprints["train_DIETClassifier5"].is_hit assert not fingerprints["end_to_end_features_provider"].is_hit assert not fingerprints["train_TEDPolicy0"].is_hit assert fingerprints["train_RulePolicy1"].is_hit
def test_retrains_only_core_if_new_e2e_example_seen_before( self, trained_e2e_model: Text, moodbot_domain_path: Path, e2e_bot_config_file: Path, e2e_stories_path: Text, nlu_data_path: Text, tmp_path: Path, trained_e2e_model_cache: Path, ): stories_yaml = rasa.shared.utils.io.read_yaml_file(e2e_stories_path) stories_yaml["stories"][1]["steps"].append({"user": "******"}) new_stories_file = tmp_path / "new_stories.yml" rasa.shared.utils.io.write_yaml(stories_yaml, new_stories_file) result = rasa.train( str(moodbot_domain_path), str(e2e_bot_config_file), [new_stories_file, nlu_data_path], output=new_model_path_in_same_dir(trained_e2e_model), dry_run=True, ) assert result.code == rasa.model_training.CODE_NEEDS_TO_BE_RETRAINED fingerprints = result.dry_run_results assert fingerprints["train_CountVectorsFeaturizer3"].is_hit assert fingerprints["train_DIETClassifier5"].is_hit assert fingerprints["end_to_end_features_provider"].is_hit assert not fingerprints["train_TEDPolicy0"].is_hit assert not fingerprints["train_RulePolicy1"].is_hit
def test_new_nlu_data_does_not_retrain_core_if_there_are_no_e2e_stories( self, monkeypatch: MonkeyPatch, trained_simple_rasa_model: Text, domain_path: Text, stack_config_path: Text, simple_stories_path: Text, nlu_data_path: Text, tmp_path: Path, ): nlu_yaml = rasa.shared.utils.io.read_yaml_file(nlu_data_path) nlu_yaml["nlu"][0]["examples"] += "- surprise!\n" new_nlu_file = tmp_path / "new_nlu.yml" rasa.shared.utils.io.write_yaml(nlu_yaml, new_nlu_file) mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) new_model_path = rasa.train( domain_path, stack_config_path, [simple_stories_path, new_nlu_file], output=new_model_path_in_same_dir(trained_simple_rasa_model), ).model os.remove(new_model_path) mocked_core_training.assert_not_called() mocked_nlu_training.assert_called_once()
def test_retrains_only_core_if_new_e2e_example_seen_before( self, monkeypatch: MonkeyPatch, trained_e2e_model: Text, domain_path: Text, stack_config_path: Text, e2e_stories_path: Text, nlu_data_path: Text, tmp_path: Path, ): stories_yaml = rasa.shared.utils.io.read_yaml_file(e2e_stories_path) stories_yaml["stories"][1]["steps"].append({"user": "******"}) new_stories_file = new_stories_file = tmp_path / "new_stories.yml" rasa.shared.utils.io.write_yaml(stories_yaml, new_stories_file) mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) new_model_path = rasa.train( domain_path, stack_config_path, [new_stories_file, nlu_data_path], output=new_model_path_in_same_dir(trained_e2e_model), ).model os.remove(new_model_path) mocked_core_training.assert_called_once() mocked_nlu_training.assert_not_called()
def print_train_or_instructions(args: argparse.Namespace) -> None: """Train a model if the user wants to.""" import questionary import rasa print_success("Finished creating project structure.") should_train = (questionary.confirm( "Do you want to train an initial model? 💪🏽").skip_if( args.no_prompt, default=True).ask()) if should_train: print_success("Training an initial model...") training_result = rasa.train( DEFAULT_DOMAIN_PATH, DEFAULT_CONFIG_PATH, DEFAULT_DATA_PATH, DEFAULT_MODELS_PATH, ) args.model = training_result.model print_run_or_instructions(args) else: print_success( "No problem 👍🏼. You can also train a model later by going " "to the project directory and running 'rasa train'.")
def init_connection(self): os.environ["system_file"] = "./tests/testing_data/system.yaml" Utility.load_environment() connect(**Utility.mongoengine_connection( Utility.environment['database']["url"])) tmp_dir = tempfile.mkdtemp() pytest.tmp_dir = tmp_dir from rasa import train # model without entities train_result = train( domain='tests/testing_data/model_tester/domain.yml', config='tests/testing_data/model_tester/config.yml', training_files=[ 'tests/testing_data/model_tester/nlu_with_entities/nlu.yml', 'tests/testing_data/model_tester/training_stories_success/stories.yml' ], output='tests/testing_data/model_tester/models', core_additional_arguments={"augmentation_factor": 100}, force_training=True) pytest.model_path = train_result.model responses.add( 'POST', Utility.environment["augmentation"]["paraphrase_url"], json={'data': { 'paraphrases': ['common training example'] }}) responses.start() yield None responses.stop() shutil.rmtree(pytest.tmp_dir) shutil.rmtree('tests/testing_data/model_tester/models')
def test_model_finetuning( tmp_path: Path, domain_path: Text, stories_path: Text, stack_config_path: Text, nlu_data_path: Text, trained_rasa_model: Text, use_latest_model: bool, ): (tmp_path / "models").mkdir() output = str(tmp_path / "models") if use_latest_model: trained_rasa_model = str(Path(trained_rasa_model).parent) result = rasa.train( domain_path, stack_config_path, [stories_path, nlu_data_path], output=output, force_training=True, model_to_finetune=trained_rasa_model, finetuning_epoch_fraction=0.1, ) assert Path(result.model).is_file()
def train(args: argparse.Namespace) -> Optional[Text]: import rasa domain = get_validated_path(args.domain, "domain", DEFAULT_DOMAIN_PATH, none_is_valid=True) config = _get_valid_config(args.config, CONFIG_MANDATORY_KEYS) training_files = [ get_validated_path(f, "data", DEFAULT_DATA_PATH, none_is_valid=True) for f in args.data ] return rasa.train( domain=domain, config=config, training_files=training_files, output=args.out, force_training=args.force, fixed_model_name=args.fixed_model_name, persist_nlu_training_data=args.persist_nlu_data, additional_arguments=extract_additional_arguments(args), )
def test_models_not_retrained_if_only_new_action( trained_e2e_model: Text, moodbot_domain_path: Path, e2e_bot_config_file: Path, e2e_stories_path: Text, nlu_data_path: Text, trained_e2e_model_cache: Path, tmp_path: Path, ): domain = Domain.load(moodbot_domain_path) domain_with_extra_response = """ version: '2.0' responses: utter_greet_new: - text: "Hi from Rasa" """ new_domain = domain.merge(Domain.from_yaml(domain_with_extra_response)) new_domain_path = tmp_path / "domain.yml" rasa.shared.utils.io.write_yaml(new_domain.as_dict(), new_domain_path) result = rasa.train( str(new_domain_path), str(e2e_bot_config_file), [e2e_stories_path, nlu_data_path], output=str(tmp_path), dry_run=True, ) assert result.code == rasa.model_training.CODE_NEEDS_TO_BE_RETRAINED
def train(args: argparse.Namespace) -> Optional[Text]: import rasa domain = get_validated_path(args.domain, "domain", DEFAULT_DOMAIN_PATH) config = get_validated_path(args.config, "config", DEFAULT_CONFIG_PATH) training_files = [get_validated_path(f, "data", DEFAULT_DATA_PATH) for f in args.data] return rasa.train(domain, config, training_files, args.out, args.force)
def test_nlu_and_core_trained_if_no_nlu_data_but_e2e_stories( self, monkeypatch: MonkeyPatch, domain_path: Text, stack_config_path: Text, e2e_stories_path: Text, tmp_path: Path, ): mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) output = self.make_tmp_model_dir(tmp_path) rasa.train( domain_path, stack_config_path, [e2e_stories_path], output=output, ) mocked_core_training.assert_called_once() mocked_nlu_training.assert_called_once()
def train(args: argparse.Namespace, can_exit: bool = False) -> Optional[Text]: """Trains a model. Args: args: Namespace arguments. can_exit: If `True`, the operation can send `sys.exit` in the case training was not successful. Returns: Path to a trained model or `None` if training was not successful. """ import rasa domain = rasa.cli.utils.get_validated_path( args.domain, "domain", DEFAULT_DOMAIN_PATH, none_is_valid=True ) # bf if os.path.isdir(args.config): from rasa.telemetry import TELEMETRY_ENABLED_ENVIRONMENT_VARIABLE from pathlib import Path os.environ[TELEMETRY_ENABLED_ENVIRONMENT_VARIABLE] = "false" config = [ Path(args.config) / f for f in os.listdir(args.config) if f.startswith("config") and f.endswith(("yml", "yaml")) ] else: config = _get_valid_config(args.config, CONFIG_MANDATORY_KEYS) # /bf training_files = [ rasa.cli.utils.get_validated_path( f, "data", DEFAULT_DATA_PATH, none_is_valid=True ) for f in args.data ] training_result = rasa.train( domain=domain, config=config, training_files=training_files, output=args.out, dry_run=args.dry_run, force_training=args.force, fixed_model_name=args.fixed_model_name, persist_nlu_training_data=args.persist_nlu_data, core_additional_arguments=extract_core_additional_arguments(args), nlu_additional_arguments=extract_nlu_additional_arguments(args), model_to_finetune=_model_for_finetuning(args), finetuning_epoch_fraction=args.epoch_fraction, ) if training_result.code != 0 and can_exit: sys.exit(training_result.code) return training_result.model
def train_project(path: Text) -> Optional[Text]: print_success("Training an initial model...") config = os.path.join(path, DEFAULT_CONFIG_PATH) training_files = os.path.join(path, DEFAULT_DATA_PATH) domain = os.path.join(path, DEFAULT_DOMAIN_PATH) output = os.path.join(path, create_output_path()) model = rasa.train(domain, config, training_files, output) return model
def test_models_not_retrained_if_no_new_data( self, monkeypatch: MonkeyPatch, trained_e2e_model: Text, domain_path: Text, stack_config_path: Text, e2e_stories_path: Text, nlu_data_path: Text, ): mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) rasa.train( domain_path, stack_config_path, [e2e_stories_path, nlu_data_path], output=new_model_path_in_same_dir(trained_e2e_model), ) mocked_core_training.assert_not_called() mocked_nlu_training.assert_not_called()
def init_connection(self): from rasa import train os.environ["system_file"] = "./tests/testing_data/system.yaml" Utility.load_environment() bot = 'agent_testing_user' pytest.bot = bot model_path = os.path.join('models', bot) os.mkdir(model_path) train( domain='tests/testing_data/model_tester/domain.yml', config='tests/testing_data/model_tester/config.yml', training_files=[ 'tests/testing_data/model_tester/nlu_with_entities/nlu.yml', 'tests/testing_data/model_tester/training_stories_success/stories.yml' ], output=model_path, core_additional_arguments={"augmentation_factor": 100}, force_training=True) yield None shutil.rmtree(model_path)
def test_e2e_gives_experimental_warning( self, moodbot_domain_path: Path, e2e_bot_config_file: Path, e2e_stories_path: Text, nlu_data_path: Text, caplog: LogCaptureFixture, tmp_path: Path, ): with caplog.at_level(logging.WARNING): rasa.train( str(moodbot_domain_path), str(e2e_bot_config_file), [e2e_stories_path, nlu_data_path], output=str(tmp_path), dry_run=True, ) assert any([ "The end-to-end training is currently experimental" in record.message for record in caplog.records ])
def test_model_finetuning_new_domain_label_stops_all_training( tmp_path: Path, trained_moodbot_path: Text): (tmp_path / "models").mkdir() output = str(tmp_path / "models") old_domain = rasa.shared.utils.io.read_yaml_file( "data/test_moodbot/domain.yml") old_domain["intents"].append("a_new_one") new_domain_path = tmp_path / "new_domain.yml" rasa.shared.utils.io.write_yaml(old_domain, new_domain_path) with pytest.raises(InvalidConfigException): rasa.train( domain=str(new_domain_path), config="data/test_moodbot/config.yml", training_files=[ "data/test_moodbot/data/stories.yml", "data/test_moodbot/data/nlu.yml", ], output=output, model_to_finetune=trained_moodbot_path, )
def test_model_finetuning( tmp_path: Path, monkeypatch: MonkeyPatch, domain_path: Text, stories_path: Text, stack_config_path: Text, nlu_data_path: Text, trained_rasa_model: Text, use_latest_model: bool, ): mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") if use_latest_model: trained_rasa_model = str(Path(trained_rasa_model).parent) rasa.train( domain_path, stack_config_path, [stories_path, nlu_data_path], output=output, force_training=True, model_to_finetune=trained_rasa_model, finetuning_epoch_fraction=0.1, ) mocked_core_training.assert_called_once() _, kwargs = mocked_core_training.call_args assert isinstance(kwargs["model_to_finetune"], Agent) mocked_nlu_training.assert_called_once() _, kwargs = mocked_nlu_training.call_args assert isinstance(kwargs["model_to_finetune"], Interpreter)