async def _train_core_with_validated_data( file_importer: TrainingDataImporter, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, additional_arguments: Optional[Dict] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) # normal (not compare) training print_color("Training Core model...", color=bcolors.OKBLUE) # bf mod domain, config = await asyncio.gather( file_importer.get_domain(), file_importer.get_core_config() #.get_config() ) # /bf mod await rasa.core.train( domain_file=domain, training_resource=file_importer, output_path=os.path.join(_train_path, DEFAULT_CORE_SUBDIRECTORY_NAME), policy_config=config, additional_arguments=additional_arguments, ) print_color("Core model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only Core was trained. new_fingerprint = await model.model_fingerprint(file_importer) return model.package_model( fingerprint=new_fingerprint, output_directory=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="core-", ) return _train_path
async def test_rasa_file_importer_with_invalid_domain(tmp_path: Path): config_file = tmp_path / "config.yml" config_file.write_text("") importer = TrainingDataImporter.load_from_dict({}, str(config_file), None, []) actual = await importer.get_domain() assert actual.as_dict() == Domain.empty().as_dict()
async def _train_nlu_async( config: Text, nlu_data: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, additional_arguments: Optional[Dict] = None, ): if not nlu_data: print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model using the '--nlu' argument.") return # training NLU only hence the training files still have to be selected file_importer = TrainingDataImporter.load_nlu_importer_from_config( config, training_data_paths=[nlu_data]) training_datas = await file_importer.get_nlu_data() if training_datas.is_empty(): print_error(f"Path '{nlu_data}' doesn't contain valid NLU data in it. " "Please verify the data format. " "The NLU model training will be skipped now.") return return await _train_nlu_with_validated_data( file_importer, output=output, train_path=train_path, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, additional_arguments=additional_arguments, )
def interactive(args: argparse.Namespace) -> None: _set_not_required_args(args) file_importer = TrainingDataImporter.load_from_config( args.config, args.domain, args.data ) if args.model is None: loop = asyncio.get_event_loop() story_graph = loop.run_until_complete(file_importer.get_stories()) if not story_graph or story_graph.is_empty(): utils.print_error_and_exit( "Could not run interactive learning without either core data or a model containing core data." ) zipped_model = train.train_core(args) if args.core_only else train.train(args) if not zipped_model: utils.print_error_and_exit( "Could not train an initial model. Either pass paths " "to the relevant training files (`--data`, `--config`, `--domain`), " "or use 'rasa train' to train a model." ) else: zipped_model = get_provided_model(args.model) if not (zipped_model and os.path.exists(zipped_model)): utils.print_error_and_exit( f"Interactive learning process cannot be started as no initial model was " f"found at path '{args.model}'. Use 'rasa train' to train a model." ) if not args.skip_visualization: logger.info(f"Loading visualization data from {args.data}.") perform_interactive_learning(args, zipped_model, file_importer)
async def test_without_additional_e2e_examples(tmp_path: Path): domain_path = tmp_path / "domain.yml" domain_path.write_text(Domain.empty().as_yaml()) config_path = tmp_path / "config.yml" config_path.touch() existing = TrainingDataImporter.load_from_dict({}, str(config_path), str(domain_path), []) stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]) ]) # Patch to return our test stories existing.get_stories = asyncio.coroutine(lambda *args: stories) importer = E2EImporter(existing) training_data = await importer.get_nlu_data() assert training_data.training_examples assert training_data.is_empty() assert not training_data.without_empty_e2e_examples().training_examples
async def test_import_nlu_training_data_with_default_actions(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) importer = TrainingDataImporter.load_from_dict({}, config_path, domain_path, [default_data_path]) assert isinstance(importer, E2EImporter) importer_without_e2e = importer.importer # Check additional NLU training data from domain was added nlu_data = await importer.get_nlu_data() assert len(nlu_data.training_examples) > len( (await importer_without_e2e.get_nlu_data()).training_examples) from rasa.core.actions import action extended_training_data = await importer.get_nlu_data() assert all( Message(data={ ACTION_NAME: action_name, ACTION_TEXT: "" }) in extended_training_data.training_examples for action_name in action.default_action_names())
def get_training_data(): importer = TrainingDataImporter.load_from_config("../config.yml", "../base/domain-eng.yml", ["../base/data/"]) loop = asyncio.get_event_loop() data = loop.run_until_complete(importer.get_nlu_data()) return set(i.text.lower().strip() for i in data.intent_examples)
async def test_adding_e2e_actions_to_domain(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) existing = TrainingDataImporter.load_from_dict({}, config_path, domain_path, [default_data_path]) additional_actions = ["Hi Joey.", "it's sunny outside."] stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?", {"name": "greet_from_stories"}), ActionExecuted(additional_actions[0], action_text=additional_actions[0]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ]), ]) # Patch to return our test stories existing.get_stories = asyncio.coroutine(lambda *args: stories) importer = E2EImporter(existing) domain = await importer.get_domain() assert all(action_name in domain.action_names for action_name in additional_actions)
async def _train_nlu_async( config: Text, nlu_data: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, ): # training NLU only hence the training files still have to be selected file_importer = TrainingDataImporter.load_nlu_importer_from_config( config, training_data_paths=[nlu_data]) training_datas = await file_importer.get_nlu_data() if training_datas.is_empty(): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model using the '--nlu' argument.") return return await _train_nlu_with_validated_data( file_importer, output=output, train_path=train_path, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, )
async def train_comparison_models( story_file: Text, domain: Text, output_path: Text = "", exclusion_percentages: Optional[List] = None, policy_configs: Optional[List] = None, runs: int = 1, dump_stories: bool = False, additional_arguments: Optional[Dict] = None, ): """Train multiple models for comparison of policies""" from rasa import model from rasa.importers.importer import TrainingDataImporter exclusion_percentages = exclusion_percentages or [] policy_configs = policy_configs or [] for r in range(runs): logging.info("Starting run {}/{}".format(r + 1, runs)) for current_run, percentage in enumerate(exclusion_percentages, 1): for policy_config in policy_configs: file_importer = TrainingDataImporter.load_core_importer_from_config( policy_config, domain, [story_file]) config_name = os.path.splitext( os.path.basename(policy_config))[0] logging.info("Starting to train {} round {}/{}" " with {}% exclusion" "".format(config_name, current_run, len(exclusion_percentages), percentage)) with TempDirectoryPath(tempfile.mkdtemp()) as train_path: _, new_fingerprint = await asyncio.gather( train( domain, file_importer, train_path, policy_config=policy_config, exclusion_percentage=percentage, additional_arguments=additional_arguments, dump_stories=dump_stories, ), model.model_fingerprint(file_importer), ) output_dir = os.path.join(output_path, "run_" + str(r + 1)) model_name = config_name + PERCENTAGE_KEY + str(percentage) model.package_model( fingerprint=new_fingerprint, output_directory=output_dir, train_path=train_path, fixed_model_name=model_name, )
async def test_example_bot_training_data_not_raises(config_file: Text, domain_file: Text, data_folder: Text): importer = TrainingDataImporter.load_from_config(config_file, domain_file, data_folder) with pytest.warns(None) as record: await importer.get_nlu_data() await importer.get_stories() assert not len(record)
async def train_core_async( domain: Union[Domain, Text], config: Text, stories: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, additional_arguments: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Core model. Args: domain: Path to the domain file. config: Path to the config file for Core. stories: Path to the Core training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. additional_arguments: Additional training parameters. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ file_importer = TrainingDataImporter.load_core_importer_from_config( config, domain, [stories] ) domain = await file_importer.get_domain() if domain.is_empty(): print_error( "Core training was skipped because no valid domain file was found. " "Please specify a valid domain using '--domain' argument or check if the provided domain file exists." ) return None if not await file_importer.get_stories(): print_error( "No stories given. Please provide stories in order to " "train a Rasa Core model using the '--stories' argument." ) return return await _train_core_with_validated_data( file_importer, output=output, train_path=train_path, fixed_model_name=fixed_model_name, additional_arguments=additional_arguments, )
async def train_async( domain: Union[Domain, Text], config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, core_additional_arguments: Optional[Dict] = None, nlu_additional_arguments: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. core_additional_arguments: Additional training parameters for core training. nlu_additional_arguments: Additional training parameters forwarded to training method of each NLU component. Returns: Path of the trained model archive. """ file_importer = TrainingDataImporter.load_from_config( config, domain, training_files ) with ExitStack() as stack: train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp())) domain = await file_importer.get_domain() if domain.is_empty(): return await handle_domain_if_not_exists( file_importer, output_path, fixed_model_name ) return await _train_async_internal( file_importer, train_path, output_path, force_training, fixed_model_name, persist_nlu_training_data, core_additional_arguments=core_additional_arguments, nlu_additional_arguments=nlu_additional_arguments, )
def test_load_from_config(tmpdir: Path): import rasa.utils.io as io_utils config_path = str(tmpdir / "config.yml") io_utils.write_yaml({"importers": [{ "name": "MultiProjectImporter" }]}, config_path) importer = TrainingDataImporter.load_from_config(config_path) assert isinstance(importer, CombinedDataImporter) assert isinstance(importer._importers[0], MultiProjectImporter)
def test_load_from_config(tmpdir: Path): config_path = str(tmpdir / "config.yml") rasa.shared.utils.io.write_yaml( {"importers": [{ "name": "MultiProjectImporter" }]}, config_path) importer = TrainingDataImporter.load_from_config(config_path) assert isinstance(importer, E2EImporter) assert isinstance(importer.importer, RetrievalModelsDataImporter) assert isinstance(importer.importer._importer._importers[0], MultiProjectImporter)
def test_load_from_dict(config: Dict, expected: List[Type["TrainingDataImporter"]], project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) actual = TrainingDataImporter.load_from_dict(config, config_path, domain_path, [default_data_path]) assert isinstance(actual, CombinedDataImporter) actual_importers = [i.__class__ for i in actual._importers] assert actual_importers == expected
async def test_eval_data(component_builder, tmpdir, project): _config = RasaNLUModelConfig({ "pipeline": [ { "name": "WhitespaceTokenizer" }, { "name": "CountVectorsFeaturizer" }, { "name": "DIETClassifier", "epochs": 2 }, { "name": "ResponseSelector", "epochs": 2 }, ], "language": "en", }) config_path = os.path.join(project, "config.yml") data_importer = TrainingDataImporter.load_nlu_importer_from_config( config_path, training_data_paths=[ "data/examples/rasa/demo-rasa.md", "data/examples/rasa/demo-rasa-responses.md", ], ) (_, _, persisted_path) = await train( _config, path=tmpdir.strpath, data=data_importer, component_builder=component_builder, persist_nlu_training_data=True, ) interpreter = Interpreter.load(persisted_path, component_builder) data = await data_importer.get_nlu_data() intent_results, response_selection_results, entity_results, = get_eval_data( interpreter, data) assert len(intent_results) == 46 assert len(response_selection_results) == 46 assert len(entity_results) == 46
async def test_example_bot_training_on_initial_project(tmp_path: Path): # we need to test this one separately, as we can't test it in place # configuration suggestions would otherwise change the initial file scaffold.create_initial_project(str(tmp_path)) importer = TrainingDataImporter.load_from_config( str(tmp_path / "config.yml"), str(tmp_path / "domain.yml"), str(tmp_path / "data"), ) with pytest.warns(None) as record: await importer.get_nlu_data() await importer.get_stories() assert not len(record)
async def test_use_of_interface(): importer = TrainingDataImporter() functions_to_test = [ lambda: importer.get_config(), lambda: importer.get_stories(), lambda: importer.get_nlu_data(), lambda: importer.get_domain(), ] for f in functions_to_test: with pytest.raises(NotImplementedError): await f()
async def test_nlu_only(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) actual = TrainingDataImporter.load_nlu_importer_from_config( config_path, training_data_paths=[default_data_path]) assert isinstance(actual, NluDataImporter) stories = await actual.get_stories() assert stories.is_empty() domain = await actual.get_domain() assert domain.is_empty() config = await actual.get_config() assert config nlu_data = await actual.get_nlu_data() assert not nlu_data.is_empty()
async def test_nlu_data_domain_sync_with_retrieval_intents(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = "data/test_domains/default_retrieval_intents.yml" data_paths = [ "data/test_nlu/default_retrieval_intents.md", "data/test_responses/default.md", ] base_data_importer = TrainingDataImporter.load_from_dict({}, config_path, domain_path, data_paths) nlu_importer = NluDataImporter(base_data_importer) core_importer = CoreDataImporter(base_data_importer) importer = RetrievalModelsDataImporter( CombinedDataImporter([nlu_importer, core_importer])) domain = await importer.get_domain() nlu_data = await importer.get_nlu_data() assert domain.retrieval_intents == ["chitchat"] assert domain.intent_properties["chitchat"].get("is_retrieval_intent") assert domain.templates == nlu_data.responses assert "utter_chitchat" in domain.action_names
async def test_formbot_example(): sys.path.append("examples/formbot/") project = Path("examples/formbot/") config = str(project / "config.yml") domain = str(project / "domain.yml") training_dir = project / "data" training_files = [ str(training_dir / "rules.yml"), str(training_dir / "stories.yml"), ] importer = TrainingDataImporter.load_from_config(config, domain, training_files) endpoint = EndpointConfig("https://example.com/webhooks/actions") endpoints = AvailableEndpoints(action=endpoint) agent = await train( domain, importer, str(project / "models" / "dialogue"), endpoints=endpoints, policy_config="examples/formbot/config.yml", ) async def mock_form_happy_path(input_text, output_text, slot=None): if slot: form = "restaurant_form" template = f"utter_ask_{slot}" else: form = None template = "utter_submit" response = { "events": [ { "event": "form", "name": form, "timestamp": None }, { "event": "slot", "timestamp": None, "name": "requested_slot", "value": slot, }, ], "responses": [{ "template": template }], } with aioresponses() as mocked: mocked.post("https://example.com/webhooks/actions", payload=response, repeat=True) responses = await agent.handle_text(input_text) assert responses[0]["text"] == output_text async def mock_form_unhappy_path(input_text, output_text, slot): response_error = { "error": f"Failed to extract slot {slot} with action restaurant_form", "action_name": "restaurant_form", } with aioresponses() as mocked: # noinspection PyTypeChecker mocked.post( "https://example.com/webhooks/actions", repeat=True, exception=ClientResponseError(400, "", json.dumps(response_error)), ) responses = await agent.handle_text(input_text) assert responses[0]["text"] == output_text await mock_form_happy_path("/request_restaurant", "what cuisine?", slot="cuisine") await mock_form_unhappy_path("/chitchat", "chitchat", slot="cuisine") await mock_form_happy_path('/inform{"cuisine": "mexican"}', "how many people?", slot="num_people") await mock_form_happy_path('/inform{"number": "2"}', "do you want to seat outside?", slot="outdoor_seating") await mock_form_happy_path("/affirm", "please provide additional preferences", slot="preferences") responses = await agent.handle_text("/restart") assert responses[0]["text"] == "restarted" responses = await agent.handle_text("/greet") assert (responses[0]["text"] == "Hello! I am restaurant search assistant! How can I help?") await mock_form_happy_path("/request_restaurant", "what cuisine?", slot="cuisine") await mock_form_happy_path('/inform{"cuisine": "mexican"}', "how many people?", slot="num_people") await mock_form_happy_path('/inform{"number": "2"}', "do you want to seat outside?", slot="outdoor_seating") await mock_form_unhappy_path("/stop", "do you want to continue?", slot="outdoor_seating") await mock_form_happy_path("/affirm", "do you want to seat outside?", slot="outdoor_seating") await mock_form_happy_path("/affirm", "please provide additional preferences", slot="preferences") await mock_form_happy_path( "/deny", "please give your feedback on your experience so far", slot="feedback") await mock_form_happy_path('/inform{"feedback": "great"}', "All done!") responses = await agent.handle_text("/thankyou") assert responses[0]["text"] == "you are welcome :)"
async def train_comparison_models( story_file: Text, domain: Text, output_path: Text = "", exclusion_percentages: Optional[List] = None, policy_configs: Optional[List] = None, runs: int = 1, dump_stories: bool = False, kwargs: Optional[Dict] = None, ): """Train multiple models for comparison of policies""" from rasa.core import config from rasa import model from rasa.importers.importer import TrainingDataImporter exclusion_percentages = exclusion_percentages or [] policy_configs = policy_configs or [] for r in range(runs): logging.info("Starting run {}/{}".format(r + 1, runs)) for current_run, percentage in enumerate(exclusion_percentages, 1): for policy_config in policy_configs: policies = config.load(policy_config) if len(policies) > 1: raise ValueError( "You can only specify one policy per model for comparison" ) file_importer = TrainingDataImporter.load_core_importer_from_config( policy_config, domain, [story_file]) policy_name = type(policies[0]).__name__ logging.info("Starting to train {} round {}/{}" " with {}% exclusion" "".format(policy_name, current_run, len(exclusion_percentages), percentage)) with TempDirectoryPath(tempfile.mkdtemp()) as train_path: await train( domain, file_importer, train_path, policy_config=policy_config, exclusion_percentage=current_run, kwargs=kwargs, dump_stories=dump_stories, ) new_fingerprint = await model.model_fingerprint( file_importer) output_dir = os.path.join(output_path, "run_" + str(r + 1)) model_name = policy_name + str(current_run) model.package_model( fingerprint=new_fingerprint, output_directory=output_dir, train_path=train_path, fixed_model_name=model_name, )
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, additional_arguments: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. force_training: If `True` retrain model even if data has not changed. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. fixed_model_name: Name of model to be stored. additional_arguments: Additional training parameters. Returns: Path of the trained model archive. """ stories, nlu_data = await asyncio.gather(file_importer.get_stories(), file_importer.get_nlu_data()) if stories.is_empty() and nlu_data.is_empty(): print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if stories.is_empty(): print_warning( "No stories present. Just a Rasa NLU model will be trained.") return await _train_nlu_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, ) if nlu_data.is_empty(): print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, additional_arguments=additional_arguments, ) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) fingerprint_comparison = FingerprintComparisonResult( force_training=force_training) if not force_training: fingerprint_comparison = model.should_retrain(new_fingerprint, old_model, train_path) if fingerprint_comparison.is_training_required(): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, additional_arguments=additional_arguments, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def test_import_nlu_training_data_from_e2e_stories(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) importer = TrainingDataImporter.load_from_dict({}, config_path, domain_path, [default_data_path]) # The `E2EImporter` correctly wraps the underlying `CombinedDataImporter` assert isinstance(importer, E2EImporter) importer_without_e2e = importer.importer stories = StoryGraph([ StoryStep(events=[ SlotSet("some slot", "doesn't matter"), UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?"), ActionExecuted("utter_greet_from_stories", action_text="Hi Joey."), ]), ]) # Patch to return our test stories importer_without_e2e.get_stories = asyncio.coroutine(lambda *args: stories) # The wrapping `E2EImporter` simply forwards these method calls assert (await importer_without_e2e.get_stories()).as_story_string() == ( await importer.get_stories()).as_story_string() assert (await importer_without_e2e.get_config()) == (await importer.get_config()) # Check additional NLU training data from stories was added nlu_data = await importer.get_nlu_data() # The `E2EImporter` adds NLU training data based on our training stories assert len(nlu_data.training_examples) > len( (await importer_without_e2e.get_nlu_data()).training_examples) # Check if the NLU training data was added correctly from the story training data expected_additional_messages = [ Message(data={ TEXT: "greet_from_stories", INTENT_NAME: "greet_from_stories" }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "" }), Message(data={ TEXT: "how are you doing?", INTENT_NAME: None }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "Hi Joey." }), ] assert all(m in nlu_data.training_examples for m in expected_additional_messages)
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, kwargs: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. force_training: If `True` retrain model even if data has not changed. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ stories, nlu_data = await asyncio.gather(file_importer.get_stories(), file_importer.get_nlu_data()) # if stories.is_empty() and nlu_data.is_empty(): # print_error( # "No training data given. Please provide stories and NLU data in " # "order to train a Rasa model using the '--data' argument." # ) # return # if stories.is_empty(): # print_warning("No stories present. Just a Rasa NLU model will be trained.") # return await _train_nlu_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # persist_nlu_training_data=persist_nlu_training_data, # ) # if nlu_data.is_empty(): # print_warning("No NLU data present. Just a Rasa Core model will be trained.") # return await _train_core_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # kwargs=kwargs, # ) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) fingerprint_comparison = FingerprintComparisonResult( force_training=force_training) if not force_training: fingerprint_comparison = model.should_retrain(new_fingerprint, old_model, train_path) # bf mod > domain = await file_importer.get_domain() core_untrainable = domain.is_empty() or stories.is_empty() nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()] fingerprint_comparison.core = fingerprint_comparison.core and not core_untrainable fingerprint_comparison.nlu = [ l for l in fingerprint_comparison.nlu if l not in nlu_untrainable ] if core_untrainable: print_color( "Skipping Core training since domain or stories are empty.", color=bcolors.OKBLUE) for lang in nlu_untrainable: print_color( "No NLU data found for language <{}>, skipping training...".format( lang), color=bcolors.OKBLUE) # </ bf mod if fingerprint_comparison.is_training_required(): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, kwargs=kwargs, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model