async def test_only_getting_e2e_conversation_tests_if_e2e_enabled( tmp_path: Path): from rasa.shared.core.training_data.structures import StoryGraph import rasa.shared.core.training_data.loading as core_loading config = {"imports": ["bots/Bot A"]} config_path = str(tmp_path / "config.yml") utils.dump_obj_as_yaml_to_file(config_path, config) story_file = tmp_path / "bots" / "Bot A" / "data" / "stories.yml" story_file.parent.mkdir(parents=True) rasa.shared.utils.io.write_text_file( """ stories: - story: story steps: - intent: greet - action: utter_greet """, story_file, ) test_story = """ stories: - story: story test steps: - user: hello intent: greet - action: utter_greet """ story_test_file = tmp_path / "bots" / "Bot A" / "test_stories.yml" rasa.shared.utils.io.write_text_file(test_story, story_test_file) selector = MultiProjectImporter(config_path) story_steps = await core_loading.load_data_from_resource( resource=str(story_test_file), domain=Domain.empty(), template_variables=None, use_e2e=True, exclusion_percentage=None, ) expected = StoryGraph(story_steps) actual = await selector.get_stories(use_e2e=True) assert expected.as_story_string() == actual.as_story_string()
async def test_without_additional_e2e_examples(tmp_path: Path): domain_path = tmp_path / "domain.yml" domain_path.write_text(Domain.empty().as_yaml()) config_path = tmp_path / "config.yml" config_path.touch() existing = TrainingDataImporter.load_from_dict({}, str(config_path), str(domain_path), []) stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]) ]) # Patch to return our test stories existing.get_stories = asyncio.coroutine(lambda *args: stories) importer = E2EImporter(existing) training_data = await importer.get_nlu_data() assert training_data.training_examples assert training_data.is_empty() assert not training_data.without_empty_e2e_examples().training_examples
async def get_stories( self, template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: return StoryGraph([])
async def extract_story_graph( resource_name: Text, domain: "Domain", use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> "StoryGraph": """Loads training stories / rules from file or directory. Args: resource_name: Path to file or directory. domain: The model domain. use_e2e: `True` if Markdown files should be parsed as conversation test files. exclusion_percentage: Percentage of stories which should be dropped. `None` if all training data should be used. Returns: The loaded training data as graph. """ from rasa.shared.core.training_data.structures import StoryGraph import rasa.shared.core.training_data.loading as core_loading story_steps = await core_loading.load_data_from_resource( resource_name, domain, use_e2e=use_e2e, exclusion_percentage=exclusion_percentage, ) return StoryGraph(story_steps)
async def test_adding_e2e_actions_to_domain(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) existing = TrainingDataImporter.load_from_dict({}, config_path, domain_path, [default_data_path]) additional_actions = ["Hi Joey.", "it's sunny outside."] stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?", {"name": "greet_from_stories"}), ActionExecuted(additional_actions[0], action_text=additional_actions[0]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ]), ]) # Patch to return our test stories existing.get_stories = asyncio.coroutine(lambda *args: stories) importer = E2EImporter(existing) domain = await importer.get_domain() assert all(action_name in domain.action_names for action_name in additional_actions)
def test_adding_e2e_actions_to_domain(default_importer: E2EImporter): additional_actions = ["Hi Joey.", "it's sunny outside."] stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?", {"name": "greet_from_stories"}), ActionExecuted(additional_actions[0], action_text=additional_actions[0]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ]), ]) def mocked_stories(*_: Any, **__: Any) -> StoryGraph: return stories # Patch to return our test stories default_importer.importer.get_stories = mocked_stories domain = default_importer.get_domain() assert all(action_name in domain.action_names_or_texts for action_name in additional_actions)
def get_conversation_tests(self) -> StoryGraph: """Retrieves conversation test stories (see parent class for full docstring).""" stories = [importer.get_conversation_tests() for importer in self._importers] return reduce( lambda merged, other: merged.merge(other), stories, StoryGraph([]) )
async def test_import_nlu_training_data_from_e2e_stories( default_importer: TrainingDataImporter, ): # The `E2EImporter` correctly wraps the underlying `CombinedDataImporter` assert isinstance(default_importer, E2EImporter) importer_without_e2e = default_importer.importer stories = StoryGraph([ StoryStep(events=[ SlotSet("some slot", "doesn't matter"), UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?"), ActionExecuted("utter_greet_from_stories", action_text="Hi Joey."), ]), ]) async def mocked_stories(*_: Any, **__: Any) -> StoryGraph: return stories # Patch to return our test stories importer_without_e2e.get_stories = mocked_stories # The wrapping `E2EImporter` simply forwards these method calls assert (await importer_without_e2e.get_stories()).as_story_string() == ( await default_importer.get_stories()).as_story_string() assert (await importer_without_e2e.get_config()) == ( await default_importer.get_config()) # Check additional NLU training data from stories was added nlu_data = await default_importer.get_nlu_data() # The `E2EImporter` adds NLU training data based on our training stories assert len(nlu_data.training_examples) > len( (await importer_without_e2e.get_nlu_data()).training_examples) # Check if the NLU training data was added correctly from the story training data expected_additional_messages = [ Message(data={ TEXT: "greet_from_stories", INTENT_NAME: "greet_from_stories" }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "" }), Message(data={ TEXT: "how are you doing?", INTENT_NAME: None }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "Hi Joey." }), ] assert all(m in nlu_data.training_examples for m in expected_additional_messages)
def visualize_stories( story_steps: List[StoryStep], domain: Domain, output_file: Optional[Text], max_history: int, nlu_training_data: Optional["TrainingData"] = None, should_merge_nodes: bool = True, fontsize: int = 12, ) -> "networkx.MultiDiGraph": """Given a set of stories, generates a graph visualizing the flows in the stories. Visualization is always a trade off between making the graph as small as possible while at the same time making sure the meaning doesn't change to "much". The algorithm will compress the graph generated from the stories to merge nodes that are similar. Hence, the algorithm might create paths through the graph that aren't actually specified in the stories, but we try to minimize that. Output file defines if and where a file containing the plotted graph should be stored. The history defines how much 'memory' the graph has. This influences in which situations the algorithm will merge nodes. Nodes will only be merged if they are equal within the history, this means the larger the history is we take into account the less likely it is we merge any nodes. The training data parameter can be used to pass in a Rasa NLU training data instance. It will be used to replace the user messages from the story file with actual messages from the training data. """ story_graph = StoryGraph(story_steps) g = TrainingDataGenerator( story_graph, domain, use_story_concatenation=False, tracker_limit=100, augmentation_factor=0, ) completed_trackers = g.generate() event_sequences = [t.events for t in completed_trackers] graph = visualize_neighborhood( None, event_sequences, output_file, max_history, nlu_training_data, should_merge_nodes, max_distance=1, fontsize=fontsize, ) return graph
def story_graph_from_paths( files: List[Text], domain: Domain, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: """Returns the `StoryGraph` from paths.""" from rasa.shared.core.training_data import loading story_steps = loading.load_data_from_files(files, domain, exclusion_percentage) return StoryGraph(story_steps)
def get_stories(self, exclusion_percentage: Optional[int] = None,) -> StoryGraph: """Retrieves training stories / rules (see parent class for full docstring).""" stories = [ importer.get_stories(exclusion_percentage) for importer in self._importers ] return reduce( lambda merged, other: merged.merge(other), stories, StoryGraph([]) )
def test_node_ordering_with_cycle(): example_graph = { "a": ["b", "c", "d"], "b": [], "c": ["d"], "d": ["a"], "e": ["f"], "f": ["e"], } sorted_nodes, removed_edges = StoryGraph.topological_sort(example_graph) check_graph_is_sorted(example_graph, sorted_nodes, removed_edges)
def test_node_ordering(): example_graph = { "a": ["b", "c", "d"], "b": [], "c": ["d"], "d": [], "e": ["f"], "f": [], } sorted_nodes, removed_edges = StoryGraph.topological_sort(example_graph) # sorting removed_edges converting set converting it to list assert removed_edges == list() check_graph_is_sorted(example_graph, sorted_nodes, removed_edges)
async def story_graph_from_paths( files: List[Text], domain: Domain, template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: from rasa.shared.core.training_data import loading story_steps = await loading.load_data_from_files(files, domain, template_variables, use_e2e, exclusion_percentage) return StoryGraph(story_steps)
async def get_stories( self, template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: stories = [ importer.get_stories(template_variables, use_e2e, exclusion_percentage) for importer in self._importers ] stories = await asyncio.gather(*stories) return reduce(lambda merged, other: merged.merge(other), stories, StoryGraph([]))
async def extract_story_graph( resource_name: Text, domain: "Domain", use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> "StoryGraph": from rasa.shared.core.training_data.structures import StoryGraph import rasa.shared.core.training_data.loading as core_loading story_steps = await core_loading.load_data_from_resource( resource_name, domain, use_e2e=use_e2e, exclusion_percentage=exclusion_percentage, ) return StoryGraph(story_steps)
async def extract_rule_data( resource_name: Text, domain: "Domain", use_e2e: bool = False, exclusion_percentage: int = None, ) -> "StoryGraph": from rasa.shared.core.training_data import loading from rasa.shared.core.training_data.structures import StoryGraph story_steps = await loading.load_data_from_resource( resource_name, domain, use_e2e=use_e2e, exclusion_percentage=exclusion_percentage, ) return StoryGraph(story_steps)
def test_converter_for_training( input_converter: CoreFeaturizationInputConverter): # create domain and story graph domain = Domain( intents=["greet", "inform", "domain-only-intent"], entities=["entity_name"], slots=[], responses=dict(), action_names=["action_listen", "utter_greet"], forms=dict(), action_texts=["Hi how are you?"], ) events = [ ActionExecuted(action_name="action_listen"), UserUttered( text="hey this has some entities", intent={INTENT_NAME_KEY: "greet"}, entities=[_create_entity(value="Bot", type="entity_name")], ), ActionExecuted(action_name="utter_greet", action_text="Hi how are you?"), ActionExecuted(action_name="action_listen"), UserUttered(text="some test with an intent!", intent={INTENT_NAME_KEY: "inform"}), ActionExecuted(action_name="action_listen"), ] story_graph = StoryGraph([StoryStep(events=events)]) # convert! training_data = input_converter.convert_for_training( domain=domain, story_graph=story_graph) messages = training_data.training_examples # check that messages were created from (story) events as expected _check_messages_created_from_events_as_expected(events=events, messages=messages) # check that messages were created from domain as expected for intent in domain.intent_properties: assert Message(data={INTENT: intent}) in messages for action_name_or_text in domain.action_names_or_texts: if action_name_or_text in domain.action_texts: assert Message(data={ACTION_TEXT: action_name_or_text}) in messages else: assert Message(data={ACTION_NAME: action_name_or_text}) in messages # check that each message contains only one attribute, which must be a key attribute _check_messages_contain_attribute_which_is_key_attribute(messages=messages)
def extract_story_graph( resource_name: Text, domain: "Domain", exclusion_percentage: Optional[int] = None) -> "StoryGraph": """Loads training stories / rules from file or directory. Args: resource_name: Path to file or directory. domain: The model domain. exclusion_percentage: Percentage of stories which should be dropped. `None` if all training data should be used. Returns: The loaded training data as graph. """ from rasa.shared.core.training_data.structures import StoryGraph import rasa.shared.core.training_data.loading as core_loading story_steps = core_loading.load_data_from_resource( resource_name, domain, exclusion_percentage=exclusion_percentage) return StoryGraph(story_steps)
def _warn_if_rule_based_data_is_unused_or_missing( self, story_graph: StoryGraph ) -> None: """Warns if rule-data is unused or missing. Args: story_graph: a story graph (core training data) """ consuming_rule_data = any( policy_node.uses.supported_data() in [SupportedData.RULE_DATA, SupportedData.ML_AND_RULE_DATA] for policy_node in self._policy_schema_nodes ) # Reminder: We generate rule trackers via: # rasa/shared/core/generator/... # .../TrainingDataGenerator/_generate_rule_trackers/ contains_rule_tracker = any( isinstance(step, RuleStep) for step in story_graph.ordered_steps() ) if consuming_rule_data and not contains_rule_tracker: rasa.shared.utils.io.raise_warning( f"Found a rule-based policy in your configuration but " f"no rule-based training data. Please add rule-based " f"stories to your training data or " f"remove the rule-based policy " f"(`{RulePolicy.__name__}`) from your " f"your configuration.", docs=DOCS_URL_RULES, ) elif not consuming_rule_data and contains_rule_tracker: rasa.shared.utils.io.raise_warning( f"Found rule-based training data but no policy supporting rule-based " f"data. Please add `{RulePolicy.__name__}` " f"or another rule-supporting " f"policy to the `policies` section in `{DEFAULT_CONFIG_PATH}`.", docs=DOCS_URL_RULES, )
def test_consistent_fingerprints(): stories_path = "data/test_yaml_stories/stories.yml" domain_path = "data/test_domains/default_with_slots.yml" domain = Domain.load(domain_path) story_steps = rasa.shared.core.training_data.loading.load_data_from_resource( stories_path, domain) story_graph = StoryGraph(story_steps) # read again story_steps_2 = rasa.shared.core.training_data.loading.load_data_from_resource( stories_path, domain) story_graph_2 = StoryGraph(story_steps_2) fingerprint = story_graph.fingerprint() fingerprint_2 = story_graph_2.fingerprint() assert fingerprint == fingerprint_2
def __init__( self, story_graph: StoryGraph, domain: Domain, remove_duplicates: bool = True, unique_last_num_states: Optional[int] = None, augmentation_factor: int = 50, tracker_limit: Optional[int] = None, use_story_concatenation: bool = True, debug_plots: bool = False, ): """Given a set of story parts, generates all stories that are possible. The different story parts can end and start with checkpoints and this generator will match start and end checkpoints to connect complete stories. Afterwards, duplicate stories will be removed and the data is augmented (if augmentation is enabled).""" self.story_graph = story_graph.with_cycles_removed() if debug_plots: self.story_graph.visualize("story_blocks_connections.html") self.domain = domain # 10x factor is a heuristic for augmentation rounds max_number_of_augmented_trackers = augmentation_factor * 10 self.config = ExtractorConfig( remove_duplicates=remove_duplicates, unique_last_num_states=unique_last_num_states, augmentation_factor=augmentation_factor, max_number_of_augmented_trackers=max_number_of_augmented_trackers, tracker_limit=tracker_limit, use_story_concatenation=use_story_concatenation, rand=random.Random(42), ) # hashed featurization of all finished trackers self.hashed_featurizations = set()
def test_generating_trackers( default_model_storage: ModelStorage, default_execution_context: ExecutionContext, config: Dict[Text, Any], expected_trackers: int, ): reader = YAMLStoryReader() steps = reader.read_from_file("data/test_yaml_stories/stories.yml") component = TrainingTrackerProvider.create( { **TrainingTrackerProvider.get_default_config(), **config }, default_model_storage, Resource("xy"), default_execution_context, ) trackers = component.generate_trackers(story_graph=StoryGraph(steps), domain=Domain.empty()) assert len(trackers) == expected_trackers assert all(isinstance(t, TrackerWithCachedStates) for t in trackers)
def create_zip_file( nlu: TrainingData, domain: Domain, stories: StoryGraph, config: Dict, bot: Text ): """ adds training files to zip :param nlu: nlu data :param domain: domain data :param stories: stories data :param config: config data :param bot: bot id :return: None """ directory = Utility.save_files( nlu.nlu_as_markdown().encode(), domain.as_yaml().encode(), stories.as_story_string().encode(), yaml.dump(config).encode(), ) zip_path = os.path.join(tempfile.gettempdir(), bot) zip_file = shutil.make_archive(zip_path, format="zip", root_dir=directory) shutil.rmtree(directory) return zip_file
def get_stories(self) -> StoryGraph: return StoryGraph([])
def get_conversation_tests(self) -> StoryGraph: """Retrieves conversation test stories (see parent class for full docstring).""" return StoryGraph([])
def get_stories(self, exclusion_percentage: Optional[int] = None,) -> StoryGraph: """Retrieves training stories / rules (see parent class for full docstring).""" return StoryGraph([])
def test_is_empty(): assert StoryGraph([]).is_empty()
async def test_import_nlu_training_data_from_e2e_stories(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) importer = TrainingDataImporter.load_from_dict({}, config_path, domain_path, [default_data_path]) # The `E2EImporter` correctly wraps the underlying `CombinedDataImporter` assert isinstance(importer, E2EImporter) importer_without_e2e = importer.importer stories = StoryGraph([ StoryStep(events=[ SlotSet("some slot", "doesn't matter"), UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?"), ActionExecuted("utter_greet_from_stories", action_text="Hi Joey."), ]), ]) # Patch to return our test stories importer_without_e2e.get_stories = asyncio.coroutine(lambda *args: stories) # The wrapping `E2EImporter` simply forwards these method calls assert (await importer_without_e2e.get_stories()).as_story_string() == ( await importer.get_stories()).as_story_string() assert (await importer_without_e2e.get_config()) == (await importer.get_config()) # Check additional NLU training data from stories was added nlu_data = await importer.get_nlu_data() # The `E2EImporter` adds NLU training data based on our training stories assert len(nlu_data.training_examples) > len( (await importer_without_e2e.get_nlu_data()).training_examples) # Check if the NLU training data was added correctly from the story training data expected_additional_messages = [ Message(data={ TEXT: "greet_from_stories", INTENT_NAME: "greet_from_stories" }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "" }), Message(data={ TEXT: "how are you doing?", INTENT_NAME: None }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "Hi Joey." }), ] assert all(m in nlu_data.training_examples for m in expected_additional_messages)
def mocked_stories(*_: Any, **__: Any) -> StoryGraph: return StoryGraph(stories)