def test_yaml_writer_stories_to_yaml(default_domain: Domain): from collections import OrderedDict reader = YAMLStoryReader(default_domain, None, False) writer = YAMLStoryWriter() steps = reader.read_from_file( "data/test_yaml_stories/simple_story_with_only_end.yml") result = writer.stories_to_yaml(steps) assert isinstance(result, OrderedDict) assert "stories" in result assert len(result["stories"]) == 1
def test_yaml_writer_dumps_rules(input_yaml_file: Text, tmpdir: Path, domain: Domain): original_yaml_reader = YAMLStoryReader(domain, None, False) original_yaml_story_steps = original_yaml_reader.read_from_file( input_yaml_file) dump = YAMLStoryWriter().dumps(original_yaml_story_steps) # remove the version string dump = "\n".join(dump.split("\n")[1:]) with open(input_yaml_file) as original_file: assert dump == original_file.read()
async def test_forms_are_converted(domain: Domain): original_yaml_reader = YAMLStoryReader(domain, None) original_yaml_story_steps = original_yaml_reader.read_from_file( "data/test_yaml_stories/stories_form.yml") assert YAMLStoryWriter.stories_contain_loops(original_yaml_story_steps) writer = YAMLStoryWriter() with pytest.warns(None) as record: writer.dumps(original_yaml_story_steps) assert len(record) == 0
def _dump_rules(path: Path, new_rules: List[StoryStep]) -> None: existing_rules = [] if path.exists(): rules_reader = YAMLStoryReader() existing_rules = rules_reader.read_from_file(path) _backup(path) if existing_rules: rasa.shared.utils.cli.print_info( f"Found existing rules in the output file '{path}'. The new rules will " f"be appended to the existing rules.") rules_writer = YAMLStoryWriter() rules_writer.dump(path, existing_rules + new_rules)
def write_training_data(nlu, domain, config: dict, stories, rules=None, actions: dict = None): """ convert mongo data to individual files :param nlu: nlu data :param domain: domain data :param stories: stories data :param config: config data :param rules: rules data :param actions: action configuration data :return: files path """ from rasa.shared.core.training_data.story_writer.yaml_story_writer import YAMLStoryWriter from rasa.shared.constants import DEFAULT_CONFIG_PATH, DEFAULT_DATA_PATH, DEFAULT_DOMAIN_PATH from rasa.shared.importers.rasa import Domain temp_path = tempfile.mkdtemp() data_path = os.path.join(temp_path, DEFAULT_DATA_PATH) os.makedirs(data_path) nlu_path = os.path.join(data_path, "nlu.yml") domain_path = os.path.join(temp_path, DEFAULT_DOMAIN_PATH) stories_path = os.path.join(data_path, "stories.yml") config_path = os.path.join(temp_path, DEFAULT_CONFIG_PATH) rules_path = os.path.join(data_path, "rules.yml") actions_path = os.path.join(temp_path, "actions.yml") nlu_as_str = nlu.nlu_as_yaml().encode() config_as_str = yaml.dump(config).encode() if isinstance(domain, Domain): domain_as_str = domain.as_yaml().encode() Utility.write_to_file(domain_path, domain_as_str) elif isinstance(domain, Dict): yaml.safe_dump(domain, open(domain_path, "w")) Utility.write_to_file(nlu_path, nlu_as_str) Utility.write_to_file(config_path, config_as_str) YAMLStoryWriter().dump(stories_path, stories.story_steps) if rules: YAMLStoryWriter().dump(rules_path, rules.story_steps) if actions: actions_as_str = yaml.dump(actions).encode() Utility.write_to_file(actions_path, actions_as_str) return temp_path
async def convert_and_write(cls, source_path: Path, output_path: Path) -> None: """Converts the given training data file and saves it to the output directory. Args: source_path: Path to the training data file. output_path: Path to the output directory. """ from rasa.shared.core.training_data.story_reader.yaml_story_reader import ( KEY_ACTIVE_LOOP, ) # check if source file is test stories file if MarkdownStoryReader.is_test_stories_file(source_path): reader = MarkdownStoryReader( is_used_for_training=False, use_e2e=True, ignore_deprecation_warning=True, ) output_core_path = cls._generate_path_for_converted_test_data_file( source_path, output_path) else: reader = MarkdownStoryReader(is_used_for_training=False, ignore_deprecation_warning=True) output_core_path = cls.generate_path_for_converted_training_data_file( source_path, output_path) steps = reader.read_from_file(source_path) if YAMLStoryWriter.stories_contain_loops(steps): print_warning( f"Training data file '{source_path}' contains forms. " f"Any 'form' events will be converted to '{KEY_ACTIVE_LOOP}' events. " f"Please note that in order for these stories to work you still " f"need the 'FormPolicy' to be active. However the 'FormPolicy' is " f"deprecated, please consider switching to the new 'RulePolicy', " f"for which you can find the documentation here: " f"{rasa.shared.constants.DOCS_URL_RULES}.") writer = YAMLStoryWriter() writer.dump( output_core_path, steps, is_test_story=MarkdownStoryReader.is_test_stories_file( source_path), ) print_success( f"Converted Core file: '{source_path}' >> '{output_core_path}'.")
def create(bot: str, use_test_stories: bool = False): from kairon import Utility from itertools import chain from rasa.shared.nlu.training_data.training_data import TrainingData bot_home = os.path.join('testing_data', bot) Utility.make_dirs(bot_home) processor = MongoProcessor() intents_and_training_examples = processor.get_intents_and_training_examples(bot) aug_training_examples = map(lambda training_data: TestDataGenerator.__prepare_nlu(training_data[0], training_data[1]), intents_and_training_examples.items()) messages = list(chain.from_iterable(aug_training_examples)) nlu_data = TrainingData(training_examples=messages) stories = processor.load_stories(bot) rules = processor.get_rules_for_training(bot) stories = stories.merge(rules) if stories.is_empty() or nlu_data.is_empty(): raise AppException('Not enough training data exists. Please add some training data.') nlu_as_str = nlu_data.nlu_as_yaml().encode() nlu_path = os.path.join(bot_home, "nlu.yml") Utility.write_to_file(nlu_path, nlu_as_str) if use_test_stories: stories_path = os.path.join(bot_home, "test_stories.yml") else: stories_path = os.path.join(bot_home, "stories.yml") YAMLStoryWriter().dump(stories_path, stories.story_steps, is_test_story=use_test_stories) return nlu_path, stories_path
def _collect_action_executed_predictions( processor: "MessageProcessor", partial_tracker: DialogueStateTracker, event: ActionExecuted, fail_on_prediction_errors: bool, circuit_breaker_tripped: bool, ) -> Tuple[EvaluationStore, Optional[Text], Optional[float]]: from rasa.core.policies.form_policy import FormPolicy action_executed_eval_store = EvaluationStore() gold = event.action_name or event.action_text if circuit_breaker_tripped: predicted = "circuit breaker tripped" policy = None confidence = None else: action, policy, confidence = processor.predict_next_action( partial_tracker) predicted = action.name() if (policy and predicted != gold and _form_might_have_been_rejected( processor.domain, partial_tracker, predicted)): # Wrong action was predicted, # but it might be Ok if form action is rejected. emulate_loop_rejection(partial_tracker) # try again action, policy, confidence = processor.predict_next_action( partial_tracker) # Even if the prediction is also wrong, we don't have to undo the emulation # of the action rejection as we know that the user explicitly specified # that something else than the form was supposed to run. predicted = action.name() action_executed_eval_store.add_to_store(action_predictions=[predicted], action_targets=[gold]) if action_executed_eval_store.has_prediction_target_mismatch(): partial_tracker.update( WronglyPredictedAction(gold, predicted, policy, confidence, event.timestamp)) if fail_on_prediction_errors: story_dump = YAMLStoryWriter().dumps( partial_tracker.as_story().story_steps) error_msg = (f"Model predicted a wrong action. Failed Story: " f"\n\n{story_dump}") if FormPolicy.__name__ in policy: error_msg += ("FormAction is not run during " "evaluation therefore it is impossible to know " "if validation failed or this story is wrong. " "If the story is correct, add it to the " "training stories and retrain.") raise WrongPredictionException(error_msg) else: partial_tracker.update( ActionExecuted(predicted, policy, confidence, event.timestamp)) return action_executed_eval_store, policy, confidence
async def test_forms_are_converted(default_domain: Domain): original_md_reader = MarkdownStoryReader(default_domain, None, False, is_used_for_training=False) original_md_story_steps = original_md_reader.read_from_file( "data/test_stories/stories_form.md") assert YAMLStoryWriter.stories_contain_loops(original_md_story_steps) writer = YAMLStoryWriter() with pytest.warns(None) as record: writer.dumps(original_md_story_steps) assert len(record) == 0
def test_yaml_writer_avoids_dumping_not_existing_user_messages(): events = [ UserUttered("greet", {"name": "greet"}), ActionExecuted("utter_greet") ] tracker = DialogueStateTracker.from_events("default", events) dump = YAMLStoryWriter().dumps(tracker.as_story().story_steps) assert (dump.strip() == textwrap.dedent(""" version: "2.0" stories: - story: default steps: - intent: greet - action: utter_greet """).strip())
def _collect_user_uttered_predictions( event: UserUttered, predicted: Dict[Text, Any], partial_tracker: DialogueStateTracker, fail_on_prediction_errors: bool, ) -> EvaluationStore: user_uttered_eval_store = EvaluationStore() # intent from the test story, may either be base intent or full retrieval intent base_intent = event.intent.get(INTENT_NAME_KEY) full_retrieval_intent = event.intent.get(FULL_RETRIEVAL_INTENT_NAME_KEY) intent_gold = full_retrieval_intent if full_retrieval_intent else base_intent # predicted intent: note that this is only the base intent at this point predicted_base_intent = predicted.get(INTENT, {}).get(INTENT_NAME_KEY) # if the test story only provides the base intent AND the prediction was correct, # we are not interested in full retrieval intents and skip this section. # In any other case we are interested in the full retrieval intent (e.g. for report) if intent_gold != predicted_base_intent: predicted_base_intent = _get_full_retrieval_intent(predicted) user_uttered_eval_store.add_to_store( intent_targets=[intent_gold], intent_predictions=[predicted_base_intent]) entity_gold = event.entities predicted_entities = predicted.get(ENTITIES) if entity_gold or predicted_entities: user_uttered_eval_store.add_to_store( entity_targets=_clean_entity_results(event.text, entity_gold), entity_predictions=_clean_entity_results(event.text, predicted_entities), ) if user_uttered_eval_store.check_prediction_target_mismatch(): partial_tracker.update( WronglyClassifiedUserUtterance(event, user_uttered_eval_store)) if fail_on_prediction_errors: story_dump = YAMLStoryWriter().dumps( partial_tracker.as_story().story_steps) raise WrongPredictionException( f"NLU model predicted a wrong intent or entities. Failed Story:" f" \n\n{story_dump}") else: response_selector_info = ({ RESPONSE_SELECTOR_PROPERTY_NAME: predicted[RESPONSE_SELECTOR_PROPERTY_NAME] } if RESPONSE_SELECTOR_PROPERTY_NAME in predicted else None) end_to_end_user_utterance = EndToEndUserUtterance( text=event.text, intent=event.intent, entities=event.entities, parse_data=response_selector_info, ) partial_tracker.update(end_to_end_user_utterance) return user_uttered_eval_store
def test_reading_and_writing_end_to_end_stories_in_test_mode( default_domain: Domain): story_name = "test_writing_end_to_end_stories_in_test_mode" conversation_tests = f""" stories: - story: {story_name} steps: - intent: greet user: Hi - action: utter_greet - intent: greet user: | [Hi](test) - action: utter_greet - user: Hi - bot: Hi, I'm a bot. - user: | [Hi](test) - bot: Hi, I'm a bot. """ end_to_end_tests = YAMLStoryReader().read_from_string(conversation_tests) dump = YAMLStoryWriter().dumps(end_to_end_tests, is_test_story=True) assert (dump.strip() == textwrap.dedent(f""" version: "2.0" stories: - story: {story_name} steps: - intent: greet user: |- Hi - action: utter_greet - intent: greet user: |- [Hi](test) - action: utter_greet - user: |- Hi - bot: Hi, I'm a bot. - user: |- [Hi](test) - bot: Hi, I'm a bot. """).strip())
def test_migrate_mapping_policy_to_rules( config: Dict[Text, Any], domain_dict: Dict[Text, Any], expected_results: Dict[Text, Any], ): domain = Domain.from_dict(domain_dict) config, domain, rules = rasa.core.config.migrate_mapping_policy_to_rules( config, domain) assert config == expected_results["config"] assert domain.cleaned_domain( )["intents"] == expected_results["domain_intents"] assert len(rules) == expected_results["rules_count"] rule_writer = YAMLStoryWriter() assert (rasa.shared.utils.io.read_yaml(rule_writer.dumps(rules)).get( "rules", []) == expected_results["rules"])
async def test_simple_story(tmpdir: Path, domain: Domain, input_yaml_file: Text): original_yaml_reader = YAMLStoryReader(domain, None) original_yaml_story_steps = original_yaml_reader.read_from_file( input_yaml_file) target_story_filename = tmpdir / "test.yml" writer = YAMLStoryWriter() writer.dump(target_story_filename, original_yaml_story_steps) processed_yaml_reader = YAMLStoryReader(domain, None) processed_yaml_story_steps = processed_yaml_reader.read_from_file( target_story_filename) assert len(processed_yaml_story_steps) == len(original_yaml_story_steps) for processed_step, original_step in zip(processed_yaml_story_steps, original_yaml_story_steps): assert len(processed_step.events) == len(original_step.events)
def test_writing_end_to_end_stories(domain: Domain): story_name = "test_writing_end_to_end_stories" events = [ # Training story story with intent and action labels ActionExecuted(ACTION_LISTEN_NAME), UserUttered(intent={"name": "greet"}), ActionExecuted("utter_greet"), ActionExecuted(ACTION_LISTEN_NAME), # Prediction story story with intent and action labels ActionExecuted(ACTION_LISTEN_NAME), UserUttered(text="Hi", intent={"name": "greet"}), DefinePrevUserUtteredFeaturization(use_text_for_featurization=False), ActionExecuted("utter_greet"), ActionExecuted(ACTION_LISTEN_NAME), # End-To-End Training Story UserUttered(text="Hi"), ActionExecuted(action_text="Hi, I'm a bot."), ActionExecuted(ACTION_LISTEN_NAME), # End-To-End Prediction Story UserUttered("Hi", intent={"name": "greet"}), DefinePrevUserUtteredFeaturization(use_text_for_featurization=True), ActionExecuted(action_text="Hi, I'm a bot."), ActionExecuted(ACTION_LISTEN_NAME), ] tracker = DialogueStateTracker.from_events(story_name, events) dump = YAMLStoryWriter().dumps(tracker.as_story().story_steps) assert (dump.strip() == textwrap.dedent(f""" version: "3.0" stories: - story: {story_name} steps: - intent: greet - action: utter_greet - intent: greet - action: utter_greet - user: |- Hi - bot: Hi, I'm a bot. - user: |- Hi - bot: Hi, I'm a bot. """).strip())
def write_training_data(nlu: TrainingData, domain: Domain, config: dict, stories: StoryGraph, rules: StoryGraph = None, http_action: dict = None): """ convert mongo data to individual files :param nlu: nlu data :param domain: domain data :param stories: stories data :param config: config data :param rules: rules data :param http_action: http actions data :return: files path """ temp_path = tempfile.mkdtemp() data_path = os.path.join(temp_path, DEFAULT_DATA_PATH) os.makedirs(data_path) nlu_path = os.path.join(data_path, "nlu.yml") domain_path = os.path.join(temp_path, DEFAULT_DOMAIN_PATH) stories_path = os.path.join(data_path, "stories.yml") config_path = os.path.join(temp_path, DEFAULT_CONFIG_PATH) rules_path = os.path.join(data_path, "rules.yml") http_path = os.path.join(temp_path, "http_action.yml") nlu_as_str = nlu.nlu_as_yaml().encode() config_as_str = yaml.dump(config).encode() if isinstance(domain, Domain): domain_as_str = domain.as_yaml().encode() Utility.write_to_file(domain_path, domain_as_str) elif isinstance(domain, Dict): yaml.safe_dump(domain, open(domain_path, "w")) Utility.write_to_file(nlu_path, nlu_as_str) Utility.write_to_file(config_path, config_as_str) YAMLStoryWriter().dump(stories_path, stories.story_steps) if rules: YAMLStoryWriter().dump(rules_path, rules.story_steps) if http_action: http_as_str = yaml.dump(http_action).encode() Utility.write_to_file(http_path, http_as_str) return temp_path
def _log_stories(trackers: List[DialogueStateTracker], file_path: Text) -> None: """Write given stories to the given file.""" with open(file_path, "w", encoding=DEFAULT_ENCODING) as f: if not trackers: f.write("# None of the test stories failed - all good!") else: stories = [tracker.as_story(include_source=True) for tracker in trackers] steps = [step for story in stories for step in story.story_steps] f.write(YAMLStoryWriter().dumps(steps))
async def test_story_start_checkpoint_is_skipped(domain: Domain): input_yaml_file = "data/test_yaml_stories/stories.yml" original_yaml_reader = YAMLStoryReader(domain, None) original_yaml_story_steps = original_yaml_reader.read_from_file( input_yaml_file) yaml_text = YAMLStoryWriter().dumps(original_yaml_story_steps) assert STORY_START not in yaml_text
def export_stories_to_file(self, export_path: Text = "debug_stories.yml") -> None: """Dump the tracker as a story to a file.""" from rasa.shared.core.training_data.story_writer.yaml_story_writer import ( YAMLStoryWriter, ) append = not os.path.exists(export_path) rasa.shared.utils.io.write_text_file( self.export_stories(YAMLStoryWriter()) + "\n", export_path, append=append )
def test_yaml_writer_dumps_user_messages(): events = [ UserUttered("Hello", {"name": "greet"}), ActionExecuted("utter_greet") ] tracker = DialogueStateTracker.from_events("default", events) dump = YAMLStoryWriter().dumps(tracker.as_story().story_steps, is_test_story=True) assert (dump.strip() == textwrap.dedent(""" version: "3.0" stories: - story: default steps: - intent: greet user: |- Hello - action: utter_greet """).strip())
def fingerprint(self) -> Text: """Returns a unique hash for the stories which is stable across python runs. Returns: fingerprint of the stories """ from rasa.shared.core.training_data.story_writer.yaml_story_writer import ( YAMLStoryWriter, ) stories_as_yaml = YAMLStoryWriter().stories_to_yaml(self.story_steps) return rasa.shared.utils.io.deep_container_fingerprint(stories_as_yaml)
async def test_action_start_action_listen_are_not_dumped(): events = [ ActionExecuted(ACTION_SESSION_START_NAME), UserUttered("Hello", {"name": "greet"}), ActionExecuted("utter_greet"), ActionExecuted(ACTION_LISTEN_NAME), ] tracker = DialogueStateTracker.from_events("default", events) dump = YAMLStoryWriter().dumps(tracker.as_story().story_steps) assert ACTION_SESSION_START_NAME not in dump assert ACTION_LISTEN_NAME not in dump
def test_yaml_writer_stories_to_yaml_with_null_entities(domain: Domain): writer = YAMLStoryWriter() stories = textwrap.dedent(""" version: "3.0" stories: - story: happy path steps: - intent: test_intent entities: - test_entity: null - test_entity2: false """) stories_yaml = YAMLStoryReader().read_from_string(stories) result = writer.stories_to_yaml(stories_yaml) assert isinstance(result, OrderedDict) assert "stories" in result assert len(result["stories"]) == 1 entities = result["stories"][0]["steps"][0]["entities"] assert entities[0] == "test_entity" assert entities[1] == OrderedDict({"test_entity2": False})
def fingerprint(self) -> Text: """Returns a unique hash for the stories which is stable across python runs. Returns: fingerprint of the stories """ from rasa.shared.core.training_data.story_writer.yaml_story_writer import ( YAMLStoryWriter, ) self_as_string = YAMLStoryWriter().dumps(self.story_steps) return rasa.shared.utils.io.get_text_hash(self_as_string)
async def test_story_start_checkpoint_is_skipped(default_domain: Domain): input_md_file = "data/test_stories/stories.md" original_md_reader = MarkdownStoryReader(default_domain, None, False, input_md_file, is_used_for_training=False) original_md_story_steps = original_md_reader.read_from_file(input_md_file) yaml_text = YAMLStoryWriter().dumps(original_md_story_steps) assert STORY_START not in yaml_text
async def _convert_core_training_data( in_path: Text, out_path: Text, ): from rasa.shared.core.training_data.story_reader.markdown_story_reader import ( MarkdownStoryReader, ) from rasa.shared.core.training_data.story_reader.yaml_story_reader import ( YAMLStoryReader, ) reader = (MarkdownStoryReader() if rasa.shared.data.is_likely_markdown_file(in_path) else YAMLStoryReader()) steps = reader.read_from_file(in_path) YAMLStoryWriter().dump(out_path, steps)
def get_test_stories( processor: "MessageProcessor", conversation_id: Text, until_time: Optional[float], fetch_all_sessions: bool = False, ) -> Text: """Retrieves test stories from `processor` for all conversation sessions for `conversation_id`. Args: processor: An instance of `MessageProcessor`. conversation_id: Conversation ID to fetch stories for. until_time: Timestamp up to which to include events. fetch_all_sessions: Whether to fetch stories for all conversation sessions. If `False`, only the last conversation session is retrieved. Returns: The stories for `conversation_id` in test format. """ if fetch_all_sessions: trackers: List[ DialogueStateTracker] = processor.get_trackers_for_all_conversation_sessions( conversation_id) else: trackers = [processor.get_tracker(conversation_id)] if until_time is not None: trackers = [ tracker.travel_back_in_time(until_time) for tracker in trackers ] # keep only non-empty trackers trackers = [tracker for tracker in trackers if len(tracker.events)] logger.debug(f"Fetched trackers for {len(trackers)} conversation sessions " f"for conversation ID {conversation_id}.") story_steps = [] more_than_one_story = len(trackers) > 1 for i, tracker in enumerate(trackers, 1): tracker.sender_id = conversation_id if more_than_one_story: tracker.sender_id += f", story {i}" story_steps += tracker.as_story().story_steps return YAMLStoryWriter().dumps(story_steps, is_test_story=True)
def test_yaml_writer_doesnt_dump_action_unlikely_intent(): events = [ UserUttered("Hello", {"name": "greet"}), ActionExecuted("utter_hello"), ActionExecuted(ACTION_UNLIKELY_INTENT_NAME, metadata={"key1": "value1"}), ActionExecuted("utter_bye"), ] tracker = DialogueStateTracker.from_events("default", events) dump = YAMLStoryWriter().dumps(tracker.as_story().story_steps, is_test_story=True) assert (dump.strip() == textwrap.dedent(""" version: "3.0" stories: - story: default steps: - intent: greet user: |- Hello - action: utter_hello - action: utter_bye """).strip())
async def _convert_core_training_data( in_path: Text, out_path: Text, ): from rasa.core.training.converters import StoryMarkdownToYamlConverter from rasa.shared.core.training_data.story_reader.yaml_story_reader import ( YAMLStoryReader, ) if rasa.shared.data.is_likely_markdown_file(in_path): in_path = Path(in_path) out_path = in_path.parent await StoryMarkdownToYamlConverter.convert_and_write(in_path, out_path) else: steps = YAMLStoryReader().read_from_file(in_path) YAMLStoryWriter().dump(out_path, steps)
def _log_stories(trackers: List[DialogueStateTracker], file_path: Text, message_if_no_trackers: Text) -> None: """Write given stories to the given file.""" with open(file_path, "w", encoding=DEFAULT_ENCODING) as f: if not trackers: f.write(f"# {message_if_no_trackers}") else: stories = [ tracker.as_story(include_source=True) for tracker in trackers ] steps = [ _filter_step_events(step) for story in stories for step in story.story_steps ] f.write(YAMLStoryWriter().dumps(steps))