async def test_only_getting_e2e_conversation_tests_if_e2e_enabled( tmpdir_factory: TempdirFactory, ): from rasa.core.interpreter import RegexInterpreter from rasa.core.training.structures import StoryGraph import rasa.core.training.loading as core_loading root = tmpdir_factory.mktemp("Parent Bot") config = {"imports": ["bots/Bot A"]} config_path = str(root / "config.yml") utils.dump_obj_as_yaml_to_file(config_path, config) story_file = root / "bots" / "Bot A" / "data" / "stories.md" story_file.write( """ ## story * greet - utter_greet """, ensure=True, ) e2e_story_test_file = ( root / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / "conversation_tests.md" ) e2e_story_test_file.write( """ ## story test * greet : "hello" - utter_greet """, ensure=True, ) selector = MultiProjectImporter(config_path) story_steps = await core_loading.load_data_from_resource( resource=str(e2e_story_test_file), domain=Domain.empty(), interpreter=RegexInterpreter(), template_variables=None, use_e2e=True, exclusion_percentage=None, ) expected = StoryGraph(story_steps) actual = await selector.get_stories(use_e2e=True) assert expected.as_story_string() == actual.as_story_string()
async def test_adding_e2e_actions_to_domain(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) existing = TrainingDataImporter.load_from_dict({}, config_path, domain_path, [default_data_path]) additional_actions = ["Hi Joey.", "it's sunny outside."] stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?", {"name": "greet_from_stories"}), ActionExecuted(additional_actions[0], action_text=additional_actions[0]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ActionExecuted(additional_actions[1], action_text=additional_actions[1]), ]), ]) # Patch to return our test stories existing.get_stories = asyncio.coroutine(lambda *args: stories) importer = E2EImporter(existing) domain = await importer.get_domain() assert all(action_name in domain.action_names for action_name in additional_actions)
def __init__(self, story_graph: StoryGraph, domain: Domain, remove_duplicates: bool = True, unique_last_num_states: Optional[int] = None, augmentation_factor: int = 20, tracker_limit: Optional[int] = None, use_story_concatenation: bool = True, debug_plots: bool = False): """Given a set of story parts, generates all stories that are possible. The different story parts can end and start with checkpoints and this generator will match start and end checkpoints to connect complete stories. Afterwards, duplicate stories will be removed and the data is augmented (if augmentation is enabled).""" self.story_graph = story_graph.with_cycles_removed() if debug_plots: self.story_graph.visualize('story_blocks_connections.html') self.domain = domain # 10x factor is a heuristic for augmentation rounds max_number_of_augmented_trackers = augmentation_factor * 10 self.config = ExtractorConfig( remove_duplicates=remove_duplicates, unique_last_num_states=unique_last_num_states, augmentation_factor=augmentation_factor, max_number_of_augmented_trackers=max_number_of_augmented_trackers, tracker_limit=tracker_limit, use_story_concatenation=use_story_concatenation, rand=random.Random(42)) # hashed featurization of all finished trackers self.hashed_featurizations = set()
async def get_stories( self, template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: return StoryGraph([])
async def get_stories( self, interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(), template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: domain = await self.get_domain() story_steps = await StoryFileReader.read_from_files( self._story_files, domain, interpreter, template_variables, use_e2e, exclusion_percentage, ) if self.helper.get_param('add_original', True): story_steps_copy = story_steps.copy() else: story_steps_copy = list() for copy_nr in range(self.helper.get_param('copys_per_story', 1)): indexes = self.helper.get_indexes(story_steps, copy_nr) for idx, story in enumerate(story_steps): story = await self.add_chitchat_to_story( story.create_copy(True), domain, indexes[idx], interpreter) story_steps_copy.append(story) return StoryGraph(story_steps_copy)
async def get_stories( self, interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(), template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: domain = await self.get_domain() story_steps = await StoryFileReader.read_from_files( self._story_files, domain, interpreter, template_variables, use_e2e, exclusion_percentage, ) copy_story_steps = story_steps.copy() for story in story_steps: possible_indexes = get_possible_indexes(story) choosed_index = random.choice(possible_indexes) new_story = story.create_copy(True) new_story.events = new_story.events[choosed_index:] + new_story.events[:choosed_index] copy_story_steps.append(new_story) return StoryGraph(copy_story_steps)
async def test_without_additional_e2e_examples(tmp_path: Path): domain_path = tmp_path / "domain.yml" domain_path.write_text(Domain.empty().as_yaml()) config_path = tmp_path / "config.yml" config_path.touch() existing = TrainingDataImporter.load_from_dict({}, str(config_path), str(domain_path), []) stories = StoryGraph([ StoryStep(events=[ UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]) ]) # Patch to return our test stories existing.get_stories = asyncio.coroutine(lambda *args: stories) importer = E2EImporter(existing) training_data = await importer.get_nlu_data() assert training_data.training_examples assert training_data.is_empty() assert not training_data.without_empty_e2e_examples().training_examples
async def get_stories( self, interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(), template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: domain = await self.get_domain() story_steps = await StoryFileReader.read_from_files( self._story_files, domain, interpreter, template_variables, use_e2e, exclusion_percentage, ) copy_story_steps = story_steps.copy() number_of_blocks = self.helper.get_param('number_of_blocks', 3) number_of_copies = self.helper.get_param('number_of_copies', 1) for copy in range(0, number_of_copies): for story in story_steps: possible_indexes = get_possible_indexes(story) blocks = get_all_blocks(len(story.events), possible_indexes) blocks = combine_blocks(blocks, number_of_blocks= (number_of_blocks if number_of_blocks <= len(blocks) else len(blocks)), number_of_stories=len(story.events)) new_story = story.create_copy(True) new_story.events = shuffle_blocks(new_story.events, blocks) copy_story_steps.append(new_story) return StoryGraph(copy_story_steps)
async def get_stories( self, interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(), template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: return StoryGraph([])
async def visualize_stories( story_steps: List[StoryStep], domain: Domain, output_file: Optional[Text], max_history: int, interpreter: NaturalLanguageInterpreter = RegexInterpreter(), nlu_training_data: Optional["TrainingData"] = None, should_merge_nodes: bool = True, fontsize: int = 12, silent: bool = False): """Given a set of stories, generates a graph visualizing the flows in the stories. Visualization is always a trade off between making the graph as small as possible while at the same time making sure the meaning doesn't change to "much". The algorithm will compress the graph generated from the stories to merge nodes that are similar. Hence, the algorithm might create paths through the graph that aren't actually specified in the stories, but we try to minimize that. Output file defines if and where a file containing the plotted graph should be stored. The history defines how much 'memory' the graph has. This influences in which situations the algorithm will merge nodes. Nodes will only be merged if they are equal within the history, this means the larger the history is we take into account the less likely it is we merge any nodes. The training data parameter can be used to pass in a Rasa NLU training data instance. It will be used to replace the user messages from the story file with actual messages from the training data.""" story_graph = StoryGraph(story_steps) g = TrainingDataGenerator(story_graph, domain, use_story_concatenation=False, tracker_limit=100, augmentation_factor=0) completed_trackers = g.generate(silent) event_sequences = [t.events for t in completed_trackers] graph = await visualize_neighborhood(None, event_sequences, output_file, max_history, interpreter, nlu_training_data, should_merge_nodes, max_distance=1, fontsize=fontsize) return graph
async def get_stories( self, interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(), template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None) -> StoryGraph: story_steps = await StoryFileReader.read_from_files( self.story_files, await self.get_domain(), interpreter, template_variables, use_e2e, exclusion_percentage) return StoryGraph(story_steps)
async def test_only_getting_e2e_conversation_tests_if_e2e_enabled( tmp_path: Path, test_stories_filename: Text, test_story: Text ): from rasa.core.training.structures import StoryGraph import rasa.core.training.loading as core_loading config = {"imports": ["bots/Bot A"]} config_path = str(tmp_path / "config.yml") utils.dump_obj_as_yaml_to_file(config_path, config) story_file = tmp_path / "bots" / "Bot A" / "data" / "stories.md" story_file.parent.mkdir(parents=True) rasa.shared.utils.io.write_text_file( """ ## story * greet - utter_greet """, story_file, ) story_test_file = ( tmp_path / "bots" / "Bot A" / DEFAULT_E2E_TESTS_PATH / test_stories_filename ) story_test_file.parent.mkdir(parents=True) rasa.shared.utils.io.write_text_file(test_story, story_test_file) selector = MultiProjectImporter(config_path) story_steps = await core_loading.load_data_from_resource( resource=str(story_test_file), domain=Domain.empty(), template_variables=None, use_e2e=True, exclusion_percentage=None, ) expected = StoryGraph(story_steps) actual = await selector.get_stories(use_e2e=True) assert expected.as_story_string() == actual.as_story_string()
def test_node_ordering_with_cycle(): example_graph = { "a": ["b", "c", "d"], "b": [], "c": ["d"], "d": ["a"], "e": ["f"], "f": ["e"], } sorted_nodes, removed_edges = StoryGraph.topological_sort(example_graph) check_graph_is_sorted(example_graph, sorted_nodes, removed_edges)
async def train(self): """Train the engine. """ nltk.download('punkt') lang = self.config['language'] if not os.path.exists('data/' + self.config['skill-id']): _LOGGER.info("Starting Skill training.") _LOGGER.info("Generating stories.") data, domain_data, stories = await GenerateStories.run( self.config['skill-id'], self.config['language'], self.asm) training_data = TrainingData(training_examples=data) nlu_config = RasaNLUModelConfig({ "language": lang, "pipeline": self.config['pipeline'], "data": None }) trainer = Trainer(nlu_config, None, True) _LOGGER.info("Training Arcus NLU") trainer.train(training_data) trainer.persist("data/" + self.config['skill-id'], None, 'nlu') # Rasa core domain = Domain.from_dict(domain_data) reader = StoryFileReader(domain, RegexInterpreter(), None, False) story_steps = await reader.process_lines(stories) graph = StoryGraph(story_steps) g = TrainingDataGenerator( graph, domain, remove_duplicates=True, unique_last_num_states=None, augmentation_factor=20, tracker_limit=None, use_story_concatenation=True, debug_plots=False, ) training_trackers = g.generate() policy_list = SimplePolicyEnsemble.from_dict( {"policies": self.config['policies']}) policy_ensemble = SimplePolicyEnsemble(policy_list) _LOGGER.info("Training Arcus Core") policy_ensemble.train(training_trackers, domain) policy_ensemble.persist( "data/" + self.config['skill-id'] + "/core", False) domain.persist("data/" + self.config['skill-id'] + "/core/model") domain.persist_specification("data/" + self.config['skill-id'] + "/core")
def test_node_ordering(): example_graph = { "a": ["b", "c", "d"], "b": [], "c": ["d"], "d": [], "e": ["f"], "f": [], } sorted_nodes, removed_edges = StoryGraph.topological_sort(example_graph) assert removed_edges == set() check_graph_is_sorted(example_graph, sorted_nodes, removed_edges)
async def story_graph_from_paths( files: List[Text], domain: Domain, template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: from rasa.core.training import loading story_steps = await loading.load_data_from_files(files, domain, template_variables, use_e2e, exclusion_percentage) return StoryGraph(story_steps)
def create_zip_file(nlu: TrainingData, domain: Domain, stories: StoryGraph, config: Dict, bot: Text): directory = Utility.save_files( nlu.nlu_as_markdown().encode(), domain.as_yaml().encode(), stories.as_story_string().encode(), yaml.dump(config).encode(), ) zip_path = os.path.join(tempfile.gettempdir(), bot) zip_file = shutil.make_archive(zip_path, format="zip", root_dir=directory) shutil.rmtree(directory) return zip_file
async def get_stories( self, template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: stories = [ importer.get_stories(template_variables, use_e2e, exclusion_percentage) for importer in self._importers ] stories = await asyncio.gather(*stories) return reduce(lambda merged, other: merged.merge(other), stories, StoryGraph([]))
async def extract_story_graph( resource_name: Text, domain: "Domain", use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> "StoryGraph": from rasa.core.training.structures import StoryGraph import rasa.core.training.loading as core_loading story_steps = await core_loading.load_data_from_resource( resource_name, domain, use_e2e=use_e2e, exclusion_percentage=exclusion_percentage, ) return StoryGraph(story_steps)
async def extract_rule_data( resource_name: Text, domain: "Domain", use_e2e: bool = False, exclusion_percentage: int = None, ) -> "StoryGraph": from rasa.core.training import loading from rasa.core.training.structures import StoryGraph story_steps = await loading.load_data_from_resource( resource_name, domain, use_e2e=use_e2e, exclusion_percentage=exclusion_percentage, ) return StoryGraph(story_steps)
async def extract_story_graph( resource_name: Text, domain: 'Domain', interpreter: Optional['NaturalLanguageInterpreter'] = None, use_e2e: bool = False, exclusion_percentage: int = None) -> 'StoryGraph': from rasa.core.interpreter import RegexInterpreter from rasa.core.training.dsl import StoryFileReader from rasa.core.training.structures import StoryGraph if not interpreter: interpreter = RegexInterpreter() story_steps = await StoryFileReader.read_from_folder( resource_name, domain, interpreter, use_e2e=use_e2e, exclusion_percentage=exclusion_percentage) return StoryGraph(story_steps)
async def extract_story_graph( resource_name: Text, domain: "Domain", interpreter: Optional["NaturalLanguageInterpreter"] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> "StoryGraph": from rasa.core.interpreter import RegexInterpreter from rasa.core.training.structures import StoryGraph import rasa.core.training.loading as core_loading if not interpreter: interpreter = RegexInterpreter() story_steps = await core_loading.load_data_from_resource( resource_name, domain, interpreter, use_e2e=use_e2e, exclusion_percentage=exclusion_percentage, ) return StoryGraph(story_steps)
async def test_create_fingerprint_from_invalid_paths(project, project_files): from rasa.nlu.training_data import TrainingData from rasa.core.training.structures import StoryGraph project_files = _project_files(project, *project_files) expected = _fingerprint( config="", config_nlu="", config_core="", domain=hash(Domain.empty()), rasa_version=rasa.__version__, stories=hash(StoryGraph([])), nlu=hash(TrainingData()), ) actual = await model_fingerprint(project_files) assert actual[FINGERPRINT_TRAINED_AT_KEY] is not None del actual[FINGERPRINT_TRAINED_AT_KEY] del expected[FINGERPRINT_TRAINED_AT_KEY] assert actual == expected
async def get_stories( self, interpreter: "NaturalLanguageInterpreter" = RegexInterpreter(), template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> StoryGraph: domain = await self.get_domain() story_steps = await StoryFileReader.read_from_files( self._story_files, domain, interpreter, template_variables, use_e2e, exclusion_percentage, ) self.graph = get_graph(domain, story_steps) number_of_storys = len(story_steps) * self.helper.get_param( 'multiplication', 1) story_length = self.helper.get_param('story_length', 10) probability_of_random_switch = self.helper.get_param( 'random_switch', 0) * 0.01 if not self.helper.get_param('add_original', True): story_steps = [] story_number = 0 while story_number <= number_of_storys: interactions = get_all_interactions(domain) current_interaction = random.choice(interactions) end_of_story = False story_steps_lines = [ '## Random Story ' + str(int(random.random() * 10000)) ] index = 0 while index <= story_length and not end_of_story: index += 1 if random.random( ) >= probability_of_random_switch and probability_of_random_switch > 0.0: current_interaction = random.choice(interactions) # create and add current Event if current_interaction in domain.action_names: story_steps_lines.append('- ' + current_interaction) elif current_interaction in domain.intents: story_steps_lines.append('* ' + current_interaction) elif current_interaction in domain.entities: last_line = story_steps_lines[len(story_steps_lines) - 1] if ': "' in last_line: story_steps_lines.pop() story_steps_lines.append(last_line[:-1] + ',"' + current_interaction + '": "XX"}') else: story_steps_lines.append(story_steps_lines.pop() + '{"' + current_interaction + '": "XX"}') possible_next_interactions = self.get_next_events( current_interaction) if len(possible_next_interactions ) > 0 or possible_next_interactions == None: current_interaction = random.choice( possible_next_interactions) elif probability_of_random_switch > 0: current_interaction = random.choice(interactions) else: end_of_story = True story_step = await StoryFileReader( interpreter, domain, template_variables, use_e2e).process_lines(story_steps_lines) if len(story_step) > 0 and len( story_step[0].events) > story_length * 0.75: story_number += 1 story_steps.extend(story_step) #print(len(story_step[0].events)) return StoryGraph(story_steps)
def test_is_empty(): assert StoryGraph([]).is_empty()
def __prepare_training_story(self, bot: Text): return StoryGraph(list(self.__prepare_training_story_step(bot)))
async def test_import_nlu_training_data_from_e2e_stories(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) importer = TrainingDataImporter.load_from_dict({}, config_path, domain_path, [default_data_path]) # The `E2EImporter` correctly wraps the underlying `CombinedDataImporter` assert isinstance(importer, E2EImporter) importer_without_e2e = importer.importer stories = StoryGraph([ StoryStep(events=[ SlotSet("some slot", "doesn't matter"), UserUttered("greet_from_stories", {"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ]), StoryStep(events=[ UserUttered("how are you doing?"), ActionExecuted("utter_greet_from_stories", action_text="Hi Joey."), ]), ]) # Patch to return our test stories importer_without_e2e.get_stories = asyncio.coroutine(lambda *args: stories) # The wrapping `E2EImporter` simply forwards these method calls assert (await importer_without_e2e.get_stories()).as_story_string() == ( await importer.get_stories()).as_story_string() assert (await importer_without_e2e.get_config()) == (await importer.get_config()) # Check additional NLU training data from stories was added nlu_data = await importer.get_nlu_data() # The `E2EImporter` adds NLU training data based on our training stories assert len(nlu_data.training_examples) > len( (await importer_without_e2e.get_nlu_data()).training_examples) # Check if the NLU training data was added correctly from the story training data expected_additional_messages = [ Message(data={ TEXT: "greet_from_stories", INTENT_NAME: "greet_from_stories" }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "" }), Message(data={ TEXT: "how are you doing?", INTENT_NAME: None }), Message(data={ ACTION_NAME: "utter_greet_from_stories", ACTION_TEXT: "Hi Joey." }), ] assert all(m in nlu_data.training_examples for m in expected_additional_messages)