async def test_adding_e2e_actions_to_domain(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) existing = TrainingDataImporter.load_from_dict({}, config_path, domain_path, [default_data_path]) additional_actions = ["Hi Joey.", "it's sunny outside."] stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?", {"name": "greet_from_stories"}), ActionExecuted(additional_actions[0], action_text=additional_actions[0]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ]), ]) # Patch to return our test stories existing.get_stories = asyncio.coroutine(lambda *args: stories) importer = E2EImporter(existing) domain = await importer.get_domain() assert all(action_name in domain.action_names for action_name in additional_actions)
def test_adding_e2e_actions_to_domain(default_importer: E2EImporter): additional_actions = ["Hi Joey.", "it's sunny outside."] stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?", {"name": "greet_from_stories"}), ActionExecuted(additional_actions[0], action_text=additional_actions[0]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ]), ]) def mocked_stories(*_: Any, **__: Any) -> StoryGraph: return stories # Patch to return our test stories default_importer.importer.get_stories = mocked_stories domain = default_importer.get_domain() assert all(action_name in domain.action_names_or_texts for action_name in additional_actions)
def test_different_story_order_doesnt_change_nlu_training_data( default_importer: E2EImporter, ): stories = [ StoryStep(events=[ UserUttered(intent={"name": "greet"}), ActionExecuted("utter_greet_from_stories"), ActionExecuted("hi", action_text="hi"), ]), StoryStep(events=[ UserUttered("bye", {"name": "bye"}), ActionExecuted("utter_greet"), ActionExecuted("hi", action_text="hi"), ActionExecuted("bye", action_text="bye"), ]), ] def mocked_stories(*_: Any, **__: Any) -> StoryGraph: return StoryGraph(stories) # Patch to return our test stories default_importer.importer.get_stories = mocked_stories training_data = default_importer.get_nlu_data() # Pretend the order of the stories changed. This should have no # effect on the NLU training data stories = list(reversed(stories)) # Make sure importer doesn't cache stories default_importer._cached_stories = None training_data2 = default_importer.get_nlu_data() assert hash(training_data) == hash(training_data2)
async def test_import_nlu_training_data_from_e2e_stories( default_importer: TrainingDataImporter, ): # The `E2EImporter` correctly wraps the underlying `CombinedDataImporter` assert isinstance(default_importer, E2EImporter) importer_without_e2e = default_importer.importer stories = StoryGraph([ StoryStep(events=[ SlotSet("some slot", "doesn't matter"), UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?"), ActionExecuted("utter_greet_from_stories", action_text="Hi Joey."), ]), ]) async def mocked_stories(*_: Any, **__: Any) -> StoryGraph: return stories # Patch to return our test stories importer_without_e2e.get_stories = mocked_stories # The wrapping `E2EImporter` simply forwards these method calls assert (await importer_without_e2e.get_stories()).as_story_string() == ( await default_importer.get_stories()).as_story_string() assert (await importer_without_e2e.get_config()) == ( await default_importer.get_config()) # Check additional NLU training data from stories was added nlu_data = await default_importer.get_nlu_data() # The `E2EImporter` adds NLU training data based on our training stories assert len(nlu_data.training_examples) > len( (await importer_without_e2e.get_nlu_data()).training_examples) # Check if the NLU training data was added correctly from the story training data expected_additional_messages = [ Message(data={ TEXT: "greet_from_stories", INTENT_NAME: "greet_from_stories" }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "" }), Message(data={ TEXT: "how are you doing?", INTENT_NAME: None }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "Hi Joey." }), ] assert all(m in nlu_data.training_examples for m in expected_additional_messages)
async def test_without_additional_e2e_examples(tmp_path: Path): domain_path = tmp_path / "domain.yml" domain_path.write_text(Domain.empty().as_yaml()) config_path = tmp_path / "config.yml" config_path.touch() existing = TrainingDataImporter.load_from_dict({}, str(config_path), str(domain_path), []) stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]) ]) # Patch to return our test stories existing.get_stories = asyncio.coroutine(lambda *args: stories) importer = E2EImporter(existing) training_data = await importer.get_nlu_data() assert training_data.training_examples assert training_data.is_empty() assert not training_data.without_empty_e2e_examples().training_examples
def _process_step( self, step: StoryStep, incoming_trackers: List[TrackerWithCachedStates]) -> TrackersTuple: """Processes a steps events with all trackers. The trackers that reached the steps starting checkpoint will be used to process the events. Collects and returns training data while processing the story step.""" events = step.explicit_events(self.domain) trackers = [] if events: # small optimization # need to copy the tracker as multiple story steps # might start with the same checkpoint and all of them # will use the same set of incoming trackers for tracker in incoming_trackers: # sender id is used to be able for a human to see where the # messages and events for this tracker came from - to do this # we concatenate the story block names of the blocks that # contribute to the trackers events if tracker.sender_id: if step.block_name not in tracker.sender_id.split(" > "): new_sender = tracker.sender_id + " > " + step.block_name else: new_sender = tracker.sender_id else: new_sender = step.block_name trackers.append(tracker.copy(new_sender, step.source_name)) end_trackers = [] for event in events: if (isinstance(event, ActionExecuted) and event.action_text and event.action_text not in self.domain.action_texts): rasa.shared.utils.cli.print_warning( f"Test story '{step.block_name}' in '{step.source_name}' contains the bot utterance " f"'{event.action_text}', which is not part of the training data / domain." ) for tracker in trackers: if isinstance( event, (ActionReverted, UserUtteranceReverted, Restarted)): end_trackers.append(tracker.copy(tracker.sender_id)) if isinstance(step, RuleStep): # The rules can specify that a form or a slot shouldn't be set, # therefore we need to distinguish between not set # and explicitly set to None if isinstance(event, ActiveLoop) and event.name is None: event.name = SHOULD_NOT_BE_SET if isinstance(event, SlotSet) and event.value is None: event.value = SHOULD_NOT_BE_SET tracker.update(event) # end trackers should be returned separately # to avoid using them for augmentation return trackers, end_trackers
def _filter_event(event: Union["Event", List["Event"]]) -> bool: """Identifies if the event should be converted/written. Args: event: target event to check. Returns: `True` if the event should be converted/written, `False` otherwise. """ from rasa.shared.core.training_data.structures import StoryStep # This is an "OR" statement, so we accept it if isinstance(event, list): return True return not StoryStep.is_action_listen( event) and not StoryStep.is_action_session_start(event)
def _filter_step_events(step: StoryStep) -> StoryStep: events = [] for event in step.events: if (isinstance(event, WronglyPredictedAction) and event.action_name == event.action_name_prediction == ACTION_UNLIKELY_INTENT_NAME): continue events.append(event) updated_step = step.create_copy(use_new_id=False) updated_step.events = events return updated_step
def _next_story_steps(self) -> List[StoryStep]: start_checkpoints = self._prev_end_checkpoints() if not start_checkpoints: start_checkpoints = [Checkpoint(STORY_START)] current_turns = [ StoryStep( block_name=self.name, start_checkpoints=start_checkpoints, source_name=self.source_name, is_rule=self.is_rule, ) ] return current_turns
def test_converter_for_training( input_converter: CoreFeaturizationInputConverter): # create domain and story graph domain = Domain( intents=["greet", "inform", "domain-only-intent"], entities=["entity_name"], slots=[], responses=dict(), action_names=["action_listen", "utter_greet"], forms=dict(), action_texts=["Hi how are you?"], ) events = [ ActionExecuted(action_name="action_listen"), UserUttered( text="hey this has some entities", intent={INTENT_NAME_KEY: "greet"}, entities=[_create_entity(value="Bot", type="entity_name")], ), ActionExecuted(action_name="utter_greet", action_text="Hi how are you?"), ActionExecuted(action_name="action_listen"), UserUttered(text="some test with an intent!", intent={INTENT_NAME_KEY: "inform"}), ActionExecuted(action_name="action_listen"), ] story_graph = StoryGraph([StoryStep(events=events)]) # convert! training_data = input_converter.convert_for_training( domain=domain, story_graph=story_graph) messages = training_data.training_examples # check that messages were created from (story) events as expected _check_messages_created_from_events_as_expected(events=events, messages=messages) # check that messages were created from domain as expected for intent in domain.intent_properties: assert Message(data={INTENT: intent}) in messages for action_name_or_text in domain.action_names_or_texts: if action_name_or_text in domain.action_texts: assert Message(data={ACTION_TEXT: action_name_or_text}) in messages else: assert Message(data={ACTION_NAME: action_name_or_text}) in messages # check that each message contains only one attribute, which must be a key attribute _check_messages_contain_attribute_which_is_key_attribute(messages=messages)
async def test_import_nlu_training_data_from_e2e_stories(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) importer = TrainingDataImporter.load_from_dict({}, config_path, domain_path, [default_data_path]) # The `E2EImporter` correctly wraps the underlying `CombinedDataImporter` assert isinstance(importer, E2EImporter) importer_without_e2e = importer.importer stories = StoryGraph([ StoryStep(events=[ SlotSet("some slot", "doesn't matter"), UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?"), ActionExecuted("utter_greet_from_stories", action_text="Hi Joey."), ]), ]) # Patch to return our test stories importer_without_e2e.get_stories = asyncio.coroutine(lambda *args: stories) # The wrapping `E2EImporter` simply forwards these method calls assert (await importer_without_e2e.get_stories()).as_story_string() == ( await importer.get_stories()).as_story_string() assert (await importer_without_e2e.get_config()) == (await importer.get_config()) # Check additional NLU training data from stories was added nlu_data = await importer.get_nlu_data() # The `E2EImporter` adds NLU training data based on our training stories assert len(nlu_data.training_examples) > len( (await importer_without_e2e.get_nlu_data()).training_examples) # Check if the NLU training data was added correctly from the story training data expected_additional_messages = [ Message(data={ TEXT: "greet_from_stories", INTENT_NAME: "greet_from_stories" }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "" }), Message(data={ TEXT: "how are you doing?", INTENT_NAME: None }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "Hi Joey." }), ] assert all(m in nlu_data.training_examples for m in expected_additional_messages)