def test_persist_and_read_test_story(tmpdir, default_domain): graph = training.extract_story_graph("data/test_stories/stories.md", default_domain) out_path = tmpdir.join("persisted_story.md") Story(graph.story_steps).dump_to_file(out_path.strpath) recovered_trackers = training.load_data( out_path.strpath, default_domain, use_story_concatenation=False, tracker_limit=1000, remove_duplicates=False ) existing_trackers = training.load_data( "data/test_stories/stories.md", default_domain, use_story_concatenation=False, tracker_limit=1000, remove_duplicates=False ) existing_stories = {t.export_stories() for t in existing_trackers} for t in recovered_trackers: story_str = t.export_stories() assert story_str in existing_stories existing_stories.discard(story_str)
def load_data( resource_name, # type: Text domain, # type: Domain remove_duplicates=True, # type: bool unique_last_num_states=None, # type: Optional[int] augmentation_factor=20, # type: int max_number_of_trackers=None, # deprecated tracker_limit=None, # type: Optional[int] use_story_concatenation=True, # type: bool debug_plots=False # type: bool ): # type: (...) -> List[DialogueStateTracker] from rasa_core.training import extract_story_graph from rasa_core.training.generator import TrainingDataGenerator if resource_name: graph = extract_story_graph(resource_name, domain) g = TrainingDataGenerator(graph, domain, remove_duplicates, unique_last_num_states, augmentation_factor, max_number_of_trackers, tracker_limit, use_story_concatenation, debug_plots) return g.generate() else: return []
def test_persist_and_read_test_story_graph(tmpdir, default_domain): graph = training.extract_story_graph("data/test_stories/stories.md", default_domain) out_path = tmpdir.join("persisted_story.md") with io.open(out_path.strpath, "w", encoding="utf-8") as f: f.write(graph.as_story_string()) recovered_trackers = training.load_data( out_path.strpath, default_domain, use_story_concatenation=False, tracker_limit=1000, remove_duplicates=False ) existing_trackers = training.load_data( "data/test_stories/stories.md", default_domain, use_story_concatenation=False, tracker_limit=1000, remove_duplicates=False ) existing_stories = {t.export_stories() for t in existing_trackers} for t in recovered_trackers: story_str = t.export_stories() assert story_str in existing_stories existing_stories.discard(story_str)
def load_data( resource_name: Text, domain: 'Domain', remove_duplicates: bool = True, unique_last_num_states: Optional[int] = None, augmentation_factor: int = 20, tracker_limit: Optional[int] = None, use_story_concatenation: bool = True, debug_plots=False, exclusion_percentage: int = None ) -> List['DialogueStateTracker']: from rasa_core.training import extract_story_graph from rasa_core.training.generator import TrainingDataGenerator if resource_name: graph = extract_story_graph(resource_name, domain, exclusion_percentage=exclusion_percentage) g = TrainingDataGenerator(graph, domain, remove_duplicates, unique_last_num_states, augmentation_factor, tracker_limit, use_story_concatenation, debug_plots) return g.generate() else: return []
def load_data( resource_name, # type: Text domain, # type: Domain remove_duplicates=True, # type: bool unique_last_num_states=None, # type: Optional[int] augmentation_factor=20, # type: int tracker_limit=None, # type: Optional[int] use_story_concatenation=True, # type: bool debug_plots=False # type: bool ): # type: (...) -> List[DialogueStateTracker] from rasa_core.training import extract_story_graph from rasa_core.training.generator import TrainingDataGenerator if resource_name: graph = extract_story_graph(resource_name, domain) g = TrainingDataGenerator(graph, domain, remove_duplicates, unique_last_num_states, augmentation_factor, tracker_limit, use_story_concatenation, debug_plots) return g.generate() else: return []
def test_persist_and_read_test_story(tmpdir, default_domain): graph = training.extract_story_graph("data/test_stories/stories.md", default_domain) out_path = tmpdir.join("persisted_story.md") Story(graph.story_steps).dump_to_file(out_path.strpath) recovered_trackers = training.load_data( out_path.strpath, default_domain, use_story_concatenation=False, tracker_limit=1000, remove_duplicates=False ) existing_trackers = training.load_data( "data/test_stories/stories.md", default_domain, use_story_concatenation=False, tracker_limit=1000, remove_duplicates=False ) existing_stories = {t.export_stories() for t in existing_trackers} for t in recovered_trackers: story_str = t.export_stories() assert story_str in existing_stories existing_stories.discard(story_str)
def _generate_trackers(resource_name, agent, max_stories=None, use_e2e=False): story_graph = training.extract_story_graph(resource_name, agent.domain, agent.interpreter, use_e2e) g = TrainingDataGenerator(story_graph, agent.domain, use_story_concatenation=False, augmentation_factor=0, tracker_limit=max_stories) return g.generate()
def test_read_story_file_with_cycles(tmpdir, default_domain): graph = training.extract_story_graph( "data/test_stories/stories_with_cycle.md", default_domain) assert len(graph.story_steps) == 5 graph_without_cycles = graph.with_cycles_removed() assert graph.cyclic_edge_ids != set() assert graph_without_cycles.cyclic_edge_ids == set() assert len(graph.story_steps) == len(graph_without_cycles.story_steps) == 5 assert len(graph_without_cycles.story_end_checkpoints) == 2
def test_read_story_file_with_cycles(tmpdir, default_domain): graph = training.extract_story_graph( "data/test_stories/stories_with_cycle.md", default_domain) assert len(graph.story_steps) == 5 graph_without_cycles = graph.with_cycles_removed() assert graph.cyclic_edge_ids != set() assert graph_without_cycles.cyclic_edge_ids == set() assert len(graph.story_steps) == len(graph_without_cycles.story_steps) == 5 assert len(graph_without_cycles.story_end_checkpoints) == 2
def test_persist_and_read_test_story(tmpdir, default_domain): graph = training.extract_story_graph("data/test_stories/stories.md", default_domain) out_path = tmpdir.join("persisted_story.md") Story(graph.story_steps).dump_to_file(out_path.strpath) recovered_trackers = training.extract_trackers(out_path.strpath, default_domain, BinaryFeaturizer()) existing_trackers = training.extract_trackers( "data/test_stories/stories.md", default_domain, BinaryFeaturizer()) existing_stories = {t.export_stories() for t in existing_trackers} for t in recovered_trackers: story_str = t.export_stories() assert story_str in existing_stories existing_stories.discard(story_str)
def test_visualize_training_data_graph(tmpdir, default_domain): graph = training.extract_story_graph( "data/test_stories/stories_with_cycle.md", default_domain) graph = graph.with_cycles_removed() out_path = tmpdir.join("graph.png").strpath # this will be the plotted networkx graph G = graph.visualize(out_path) assert os.path.exists(out_path) # we can't check the exact topology - but this should be enough to ensure # the visualisation created a sane graph assert set(G.nodes()) == set(range(-1, 14)) assert len(G.edges()) == 16
def test_persist_and_read_test_story_graph(tmpdir, default_domain): graph = training.extract_story_graph("data/test_stories/stories.md", default_domain) out_path = tmpdir.join("persisted_story.md") with io.open(out_path.strpath, "w") as f: f.write(graph.as_story_string()) recovered_trackers = training.extract_trackers(out_path.strpath, default_domain, BinaryFeaturizer()) existing_trackers = training.extract_trackers( "data/test_stories/stories.md", default_domain, BinaryFeaturizer()) existing_stories = {t.export_stories() for t in existing_trackers} for t in recovered_trackers: story_str = t.export_stories() assert story_str in existing_stories existing_stories.discard(story_str)
def test_visualize_training_data_graph(tmpdir, default_domain): graph = training.extract_story_graph( "data/test_stories/stories_with_cycle.md", default_domain) graph = graph.with_cycles_removed() out_path = tmpdir.join("graph.png").strpath # this will be the plotted networkx graph G = graph.visualize(out_path) assert os.path.exists(out_path) # we can't check the exact topology - but this should be enough to ensure # the visualisation created a sane graph assert (set(G.nodes()) == set(range(-1, 13)) or set(G.nodes()) == set(range(-1, 14))) if set(G.nodes()) == set(range(-1, 13)): assert len(G.edges()) == 14 elif set(G.nodes()) == set(range(-1, 14)): assert len(G.edges()) == 16
def collect_story_predictions(resource_name, policy_model_path, nlu_model_path, max_stories): """Test the stories from a file, running them through the stored model.""" if nlu_model_path is not None: interpreter = RasaNLUInterpreter(model_directory=nlu_model_path) else: interpreter = RegexInterpreter() agent = Agent.load(policy_model_path, interpreter=interpreter) story_graph = training.extract_story_graph(resource_name, agent.domain, interpreter) preds = [] actual = [] g = TrainingDataGenerator(story_graph, agent.domain, use_story_concatenation=False, tracker_limit=max_stories) completed_trackers = g.generate() failed_stories = [] logger.info("Evaluating {} stories\nProgress:" "".format(len(completed_trackers))) for tracker in tqdm(completed_trackers): sender_id = "default-" + uuid.uuid4().hex story = {"predicted": [], "actual": []} events = list(tracker.events) actions_between_utterances = [] last_prediction = [] for i, event in enumerate(events[1:]): if isinstance(event, UserUttered): p, a = align_lists(last_prediction, actions_between_utterances) story["predicted"].extend(p) story["actual"].extend(a) actions_between_utterances = [] agent.handle_message(event.text, sender_id=sender_id) tracker = agent.tracker_store.retrieve(sender_id) last_prediction = actions_since_last_utterance(tracker) elif isinstance(event, ActionExecuted): actions_between_utterances.append(event.action_name) if last_prediction: preds.extend(last_prediction) preds_padding = (len(actions_between_utterances) - len(last_prediction)) story["predicted"].extend(["None"] * preds_padding) preds.extend(story["predicted"]) actual.extend(actions_between_utterances) actual_padding = (len(last_prediction) - len(actions_between_utterances)) story["actual"].extend(["None"] * actual_padding) actual.extend(story["actual"]) if story["predicted"] != story["actual"]: failed_stories.append(story) return actual, preds, failed_stories
def collect_story_predictions(resource_name, policy_model_path, nlu_model_path, max_stories=None, shuffle_stories=True): """Test the stories from a file, running them through the stored model.""" if nlu_model_path is not None: interpreter = RasaNLUInterpreter(model_directory=nlu_model_path) else: interpreter = RegexInterpreter() agent = Agent.load(policy_model_path, interpreter=interpreter) story_graph = training.extract_story_graph(resource_name, agent.domain, interpreter) preds = [] actual = [] max_history = agent.policy_ensemble.policies[0].max_history g = TrainingsDataGenerator(story_graph, agent.domain, agent.featurizer, max_history=max_history, use_story_concatenation=False, tracker_limit=100) data = g.generate() completed_trackers = data.metadata["trackers"] logger.info("Evaluating {} stories\nProgress:".format( len(completed_trackers))) for tracker in tqdm(completed_trackers): sender_id = "default-" + uuid.uuid4().hex events = list(tracker.events) actions_between_utterances = [] last_prediction = [] for i, event in enumerate(events[1:]): if isinstance(event, UserUttered): p, a = align_lists(last_prediction, actions_between_utterances) preds.extend(p) actual.extend(a) actions_between_utterances = [] agent.handle_message(event.text, sender_id=sender_id) tracker = agent.tracker_store.retrieve(sender_id) last_prediction = actions_since_last_utterance(tracker) elif isinstance(event, ActionExecuted): actions_between_utterances.append(event.action_name) if last_prediction: preds.extend(last_prediction) preds_padding = len(actions_between_utterances) - \ len(last_prediction) preds.extend(["None"] * preds_padding) actual.extend(actions_between_utterances) actual_padding = len(last_prediction) - \ len(actions_between_utterances) actual.extend(["None"] * actual_padding) return actual, preds
def collect_story_predictions(resource_name, policy_model_path, nlu_model_path, max_stories): """Test the stories from a file, running them through the stored model.""" if nlu_model_path is not None: interpreter = RasaNLUInterpreter(model_directory=nlu_model_path) else: interpreter = RegexInterpreter() agent = Agent.load(policy_model_path, interpreter=interpreter) story_graph = training.extract_story_graph(resource_name, agent.domain, interpreter) preds = [] actual = [] g = TrainingDataGenerator(story_graph, agent.domain, use_story_concatenation=False, tracker_limit=max_stories) completed_trackers = g.generate() failed_stories = [] logger.info("Evaluating {} stories\nProgress:" "".format(len(completed_trackers))) for tracker in tqdm(completed_trackers): sender_id = "default-" + uuid.uuid4().hex story = {"predicted": [], "actual": []} events = list(tracker.events) actions_between_utterances = [] last_prediction = [] for i, event in enumerate(events[1:]): if isinstance(event, UserUttered): p, a = align_lists(last_prediction, actions_between_utterances) story["predicted"].extend(p) story["actual"].extend(a) actions_between_utterances = [] agent.handle_message(event.text, sender_id=sender_id) tracker = agent.tracker_store.retrieve(sender_id) last_prediction = actions_since_last_utterance(tracker) elif isinstance(event, ActionExecuted): actions_between_utterances.append(event.action_name) if last_prediction: preds.extend(last_prediction) preds_padding = (len(actions_between_utterances) - len(last_prediction)) story["predicted"].extend(["None"] * preds_padding) preds.extend(story["predicted"]) actual.extend(actions_between_utterances) actual_padding = (len(last_prediction) - len(actions_between_utterances)) story["actual"].extend(["None"] * actual_padding) actual.extend(story["actual"]) if story["predicted"] != story["actual"]: failed_stories.append(story) return actual, preds, failed_stories