def test_encode_entities__with_entity_roles_and_groups(): # create fake message that has been tokenized and entities have been extracted text = "I am flying from London to Paris" tokens = [ Token(text=match.group(), start=match.start()) for match in re.finditer(r"\S+", text) ] entity_tags = ["city", f"city{ENTITY_LABEL_SEPARATOR}to"] entities = [ { ENTITY_ATTRIBUTE_TYPE: entity_tags[0], ENTITY_ATTRIBUTE_VALUE: "London", ENTITY_ATTRIBUTE_START: 17, ENTITY_ATTRIBUTE_END: 23, }, { ENTITY_ATTRIBUTE_TYPE: entity_tags[1], ENTITY_ATTRIBUTE_VALUE: "Paris", ENTITY_ATTRIBUTE_START: 27, ENTITY_ATTRIBUTE_END: 32, }, ] message = Message({ TEXT: text, TOKENS_NAMES[TEXT]: tokens, ENTITIES: entities }) # create a lookup table that has seen this message precomputations = MessageContainerForCoreFeaturization() precomputations.add(message) # instantiate matching domain and single state featurizer domain = Domain( intents=[], entities=entity_tags, slots=[], responses={}, forms={}, action_names=[], ) f = SingleStateFeaturizer() f.prepare_for_training(domain) # encode! encoded = f.encode_entities(entity_data={ TEXT: text, ENTITIES: entities }, precomputations=precomputations) # check assert len(f.entity_tag_specs) == 1 tags_to_ids = f.entity_tag_specs[0].tags_to_ids for idx, entity_tag in enumerate(entity_tags): tags_to_ids[entity_tag] = idx + 1 # hence, city -> 1, city#to -> 2 assert sorted(list(encoded.keys())) == [ENTITY_TAGS] assert np.all(encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [0], [2]])
def test_container_derive_messages_from_domain_and_add(): action_names = ["a", "b"] # action texts, response keys, forms, and action_names must be unique or the # domain will complain about it ... action_texts = ["a2", "b2"] # ... but the response texts could overlap with e.g action texts responses = {"a3": {TEXT: "a2"}, "b3": {TEXT: "b2"}} forms = {"a4": "a4"} # however, intent names can be anything intents = ["a", "b"] domain = Domain( intents=intents, action_names=action_names, action_texts=action_texts, responses=responses, entities=["e_a", "e_b", "e_c"], slots=[TextSlot(name="s", mappings=[{}])], forms=forms, data={}, ) lookup_table = MessageContainerForCoreFeaturization() lookup_table.derive_messages_from_domain_and_add(domain) assert len(lookup_table) == ( len(domain.intent_properties) + len(domain.action_names_or_texts) )
def test_encode_all_labels__encoded_all_action_names_and_texts(): # ... where "labels" means actions... domain = Domain( intents=[], entities=[], slots=[], responses={}, forms={}, action_names=["a", "b", "c", "d"], ) f = SingleStateFeaturizer() f.prepare_for_training(domain) precomputations = MessageContainerForCoreFeaturization() precomputations.derive_messages_from_domain_and_add(domain) encoded_actions = f.encode_all_labels(domain, precomputations=precomputations) assert len(encoded_actions) == len(domain.action_names_or_texts) assert all( [ ACTION_NAME in encoded_action and ACTION_TEXT not in encoded_action for encoded_action in encoded_actions ] )
def _get_domain_with_e2e_actions(self) -> Domain: stories = self.get_stories() additional_e2e_action_names = set() for story_step in stories.story_steps: additional_e2e_action_names.update( { event.action_text for event in story_step.events if isinstance(event, ActionExecuted) and event.action_text } ) additional_e2e_action_names = list(additional_e2e_action_names) return Domain( [], [], [], {}, action_names=[], forms={}, action_texts=additional_e2e_action_names, )
async def test_unpack_regex_message_has_correct_entity_start_and_end(): entity = "name" slot_1 = {entity: "Core"} text = f"/greet{json.dumps(slot_1)}" message = Message(data={TEXT: text},) domain = Domain( intents=["greet"], entities=[entity], slots=[], responses={}, action_names=[], forms={}, ) message = YAMLStoryReader.unpack_regex_message( message, domain, entity_extractor_name="RegexMessageHandler" ) assert message.data == { "text": '/greet{"name": "Core"}', "intent": {"name": "greet", "confidence": 1.0}, "intent_ranking": [{"name": "greet", "confidence": 1.0}], "entities": [ { "entity": "name", "value": "Core", "start": 6, "end": 22, EXTRACTOR: "RegexMessageHandler", } ], }
def test_process_warns_if_intent_or_entities_not_in_domain( intent: Text, entities: Optional[Text], expected_intent: Text, domain_entities: List[Text], ): # construct text according to pattern text = INTENT_MESSAGE_PREFIX + intent # do not add a confidence value if entities is not None: text += json.dumps(entities) message = Message(data={TEXT: text}) # construct domain from expected intent/entities domain = Domain( intents=[expected_intent], entities=domain_entities, slots=[], responses={}, action_names=[], forms={}, ) # expect a warning with pytest.warns(UserWarning): unpacked_message = YAMLStoryReader.unpack_regex_message(message, domain) if "wrong" not in intent: assert unpacked_message.data[INTENT][INTENT_NAME_KEY] == intent if "wrong" in entities: assert unpacked_message.data[ENTITIES] is not None assert len(unpacked_message.data[ENTITIES]) == 0 else: assert unpacked_message == message
def test_domain_action_instantiation(): domain = Domain( intents=[{ "chitchat": { "is_retrieval_intent": True } }], entities=[], slots=[], templates={}, action_names=["my_module.ActionTest", "utter_test", "utter_chitchat"], forms={}, ) instantiated_actions = [ action.action_for_name_or_text(action_name, domain, None) for action_name in domain.action_names_or_texts ] assert len(instantiated_actions) == 14 assert instantiated_actions[0].name() == ACTION_LISTEN_NAME assert instantiated_actions[1].name() == ACTION_RESTART_NAME assert instantiated_actions[2].name() == ACTION_SESSION_START_NAME assert instantiated_actions[3].name() == ACTION_DEFAULT_FALLBACK_NAME assert instantiated_actions[4].name() == ACTION_DEACTIVATE_LOOP_NAME assert instantiated_actions[5].name() == ACTION_REVERT_FALLBACK_EVENTS_NAME assert instantiated_actions[6].name( ) == ACTION_DEFAULT_ASK_AFFIRMATION_NAME assert instantiated_actions[7].name() == ACTION_DEFAULT_ASK_REPHRASE_NAME assert instantiated_actions[8].name() == ACTION_TWO_STAGE_FALLBACK_NAME assert instantiated_actions[9].name() == ACTION_BACK_NAME assert instantiated_actions[10].name() == RULE_SNIPPET_ACTION_NAME assert instantiated_actions[11].name() == "my_module.ActionTest" assert instantiated_actions[12].name() == "utter_test" assert instantiated_actions[13].name() == "utter_chitchat"
def _get_domain_with_retrieval_intents( retrieval_intents: Set[Text], response_templates: Dict[Text, List[Dict[Text, Any]]], existing_domain: Domain, ) -> Domain: """Construct a domain consisting of retrieval intents listed in the NLU training data. Args: retrieval_intents: Set of retrieval intents defined in NLU training data. existing_domain: Domain which is already loaded from the domain file. Returns: Domain with retrieval actions added to action names and properties for retrieval intents updated. """ # Get all the properties already defined # for each retrieval intent in other domains # and add the retrieval intent property to them retrieval_intent_properties = [] for intent in retrieval_intents: intent_properties = (existing_domain.intent_properties[intent] if intent in existing_domain.intent_properties else {}) intent_properties[IS_RETRIEVAL_INTENT_KEY] = True retrieval_intent_properties.append({intent: intent_properties}) return Domain( retrieval_intent_properties, [], [], response_templates, RetrievalModelsDataImporter._construct_retrieval_action_names( retrieval_intents), [], )
def test_process_does_not_do_anything( regex_message_handler: RegexMessageHandler, text: Text): message = Message( data={ TEXT: text, INTENT: "bla" }, features=[ Features( features=np.zeros((1, 1)), feature_type=FEATURE_TYPE_SENTENCE, attribute=TEXT, origin="nlu-pipeline", ) ], ) # construct domain from expected intent/entities domain = Domain( intents=["intent"], entities=["entity"], slots=[], responses={}, action_names=[], forms={}, data={}, ) parsed_messages = regex_message_handler.process([message], domain) assert parsed_messages[0] == message
def test_check_domain_sanity_on_invalid_domain(): with pytest.raises(InvalidDomain): Domain( intents={}, entities=[], slots=[], templates={}, action_names=["random_name", "random_name"], forms=[], ) with pytest.raises(InvalidDomain): Domain( intents={}, entities=[], slots=[TextSlot("random_name"), TextSlot("random_name")], templates={}, action_names=[], forms=[], ) with pytest.raises(InvalidDomain): Domain( intents={}, entities=[ "random_name", "random_name", "other_name", "other_name" ], slots=[], templates={}, action_names=[], forms=[], ) with pytest.raises(InvalidDomain): Domain( intents={}, entities=[], slots=[], templates={}, action_names=[], forms=["random_name", "random_name"], )
def test_verify_domain_with_duplicates( duplicates: Optional[Dict[Text, List[Text]]], is_valid: bool, warning_type: Any, messages: List[Text], ): domain = Domain([], [], [], {}, [], {}, duplicates=duplicates) validator = Validator(domain, None, None, None) with pytest.warns(warning_type) as warning: assert validator.verify_domain_duplicates() is is_valid assert len(warning) == len(messages) for i in range(len(messages)): assert messages[i] in warning[i].message.args[0]
def get_domain_nlu(state: StateMachineState, is_initial_state: bool): all_entity_names = {entity for entity in state.all_entities()} all_intents: Set[IntentWithExamples] = { intent for intent in state.all_intents() } all_actions: Set[Action] = {action for action in state.all_actions()} all_utterances: Set[Utterance] = { action for action in all_actions if isinstance(action, Utterance) } all_slots: Set[Slot] = {slot for slot in state.all_slots()} # all_stories: List[Story] = get_stories(state) # Write domain domain = Domain( intents=[intent.name for intent in all_intents], entities=all_entity_names, # List of entity names slots=[slot.as_rasa_slot() for slot in all_slots], responses={ utterance.name: [{"text": utterance.text}] for utterance in all_utterances }, action_names=[action.name for action in all_actions], forms={}, action_texts=[], state_machine_states={ state.name: { "is_initial_state": is_initial_state, "state_yaml": yaml.dump(state), } }, ) # Write NLU nlu_data = { "version": "2.0", "nlu": [ intent.as_nlu_yaml() for intent in all_intents if isinstance(intent, IntentWithExamples) ], } return domain, nlu_data
def test_single_state_featurizer_creates_encoded_all_actions(): domain = Domain( intents=[], entities=[], slots=[], templates={}, forms=[], action_names=["a", "b", "c", "d"], ) f = SingleStateFeaturizer() f.prepare_from_domain(domain) encoded_actions = f.encode_all_actions(domain, RegexInterpreter()) assert len(encoded_actions) == len(domain.action_names) assert all([ ACTION_NAME in encoded_action and ACTION_TEXT not in encoded_action for encoded_action in encoded_actions ])
async def _get_domain_with_e2e_actions(self) -> Domain: from rasa.shared.core.events import ActionExecuted stories = await self.get_stories() additional_e2e_action_names = set() for story_step in stories.story_steps: additional_e2e_action_names.update({ event.action_text for event in story_step.events if isinstance(event, ActionExecuted) and event.action_text }) additional_e2e_action_names = list(additional_e2e_action_names) return Domain([], [], [], {}, action_names=additional_e2e_action_names, forms=[])
def test_converter_for_training( input_converter: CoreFeaturizationInputConverter): # create domain and story graph domain = Domain( intents=["greet", "inform", "domain-only-intent"], entities=["entity_name"], slots=[], responses=dict(), action_names=["action_listen", "utter_greet"], forms=dict(), action_texts=["Hi how are you?"], ) events = [ ActionExecuted(action_name="action_listen"), UserUttered( text="hey this has some entities", intent={INTENT_NAME_KEY: "greet"}, entities=[_create_entity(value="Bot", type="entity_name")], ), ActionExecuted(action_name="utter_greet", action_text="Hi how are you?"), ActionExecuted(action_name="action_listen"), UserUttered(text="some test with an intent!", intent={INTENT_NAME_KEY: "inform"}), ActionExecuted(action_name="action_listen"), ] story_graph = StoryGraph([StoryStep(events=events)]) # convert! training_data = input_converter.convert_for_training( domain=domain, story_graph=story_graph) messages = training_data.training_examples # check that messages were created from (story) events as expected _check_messages_created_from_events_as_expected(events=events, messages=messages) # check that messages were created from domain as expected for intent in domain.intent_properties: assert Message(data={INTENT: intent}) in messages for action_name_or_text in domain.action_names_or_texts: if action_name_or_text in domain.action_texts: assert Message(data={ACTION_TEXT: action_name_or_text}) in messages else: assert Message(data={ACTION_NAME: action_name_or_text}) in messages # check that each message contains only one attribute, which must be a key attribute _check_messages_contain_attribute_which_is_key_attribute(messages=messages)
def test_single_state_featurizer_uses_regex_interpreter( unpacked_trained_moodbot_path: Text, ): from rasa.core.agent import Agent domain = Domain( intents=[], entities=[], slots=[], responses={}, forms=[], action_names=[], ) f = SingleStateFeaturizer() # simulate that core was trained separately by passing # RegexInterpreter to prepare_for_training f.prepare_for_training(domain, RegexInterpreter()) # simulate that nlu and core models were manually combined for prediction # by passing trained interpreter to encode_all_actions interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter features = f._extract_state_features({TEXT: "some text"}, interpreter) # RegexInterpreter cannot create features for text, therefore since featurizer # was trained without nlu, features for text should be empty assert not features
def test_single_state_featurizer_with_entity_roles_and_groups( unpacked_trained_moodbot_path: Text, ): from rasa.core.agent import Agent interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter # TODO roles and groups are not supported in e2e yet domain = Domain( intents=[], entities=["city", f"city{ENTITY_LABEL_SEPARATOR}to"], slots=[], responses={}, forms={}, action_names=[], ) f = SingleStateFeaturizer() f.prepare_for_training(domain, RegexInterpreter()) encoded = f.encode_entities( { TEXT: "I am flying from London to Paris", ENTITIES: [ { ENTITY_ATTRIBUTE_TYPE: "city", ENTITY_ATTRIBUTE_VALUE: "London", ENTITY_ATTRIBUTE_START: 17, ENTITY_ATTRIBUTE_END: 23, }, { ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to", ENTITY_ATTRIBUTE_VALUE: "Paris", ENTITY_ATTRIBUTE_START: 27, ENTITY_ATTRIBUTE_END: 32, }, ], }, interpreter=interpreter, ) assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS]) assert np.all( encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [0], [2]] )
def test_domain_validation_with_valid_marker(depth: int, max_branches: int, seed: int): # We do this a bit backwards, we construct the domain from the marker # and assert they must match rng = np.random.default_rng(seed=seed) marker, expected_size = generate_random_marker( depth=depth, max_branches=max_branches, rng=rng, possible_conditions=CONDITION_MARKERS, possible_operators=OPERATOR_MARKERS, constant_condition_text=None, constant_negated=None, ) slots = [ Slot(name, []) for name in _collect_parameters(marker, SlotSetMarker) ] actions = list(_collect_parameters(marker, ActionExecutedMarker)) intents = _collect_parameters(marker, IntentDetectedMarker) domain = Domain(intents, [], slots, {}, actions, {}) assert marker.validate_against_domain(domain)
def test_single_state_featurizer_prepare_for_training(): domain = Domain( intents=["greet"], entities=["name"], slots=[Slot("name")], templates={}, forms=[], action_names=["utter_greet", "action_check_weather"], ) f = SingleStateFeaturizer() f.prepare_for_training(domain, RegexInterpreter()) assert len(f._default_feature_states[INTENT]) > 1 assert "greet" in f._default_feature_states[INTENT] assert len(f._default_feature_states[ENTITIES]) == 1 assert f._default_feature_states[ENTITIES]["name"] == 0 assert len(f._default_feature_states[SLOTS]) == 1 assert f._default_feature_states[SLOTS]["name_0"] == 0 assert len(f._default_feature_states[ACTION_NAME]) > 2 assert "utter_greet" in f._default_feature_states[ACTION_NAME] assert "action_check_weather" in f._default_feature_states[ACTION_NAME] assert len(f._default_feature_states[ACTIVE_LOOP]) == 0
def test_prepare_for_training(): domain = Domain( intents=["greet"], entities=["name"], slots=[TextSlot("name", mappings=[{}])], responses={}, forms={}, action_names=["utter_greet", "action_check_weather"], data={}, ) f = SingleStateFeaturizer() f.prepare_for_training(domain) assert len(f._default_feature_states[INTENT]) > 1 assert "greet" in f._default_feature_states[INTENT] assert len(f._default_feature_states[ENTITIES]) == 1 assert f._default_feature_states[ENTITIES]["name"] == 0 assert len(f._default_feature_states[SLOTS]) == 1 assert f._default_feature_states[SLOTS]["name_0"] == 0 assert len(f._default_feature_states[ACTION_NAME]) > 2 assert "utter_greet" in f._default_feature_states[ACTION_NAME] assert "action_check_weather" in f._default_feature_states[ACTION_NAME] assert len(f._default_feature_states[ACTIVE_LOOP]) == 0
async def test_logging_of_end_to_end_action(): end_to_end_action = "hi, how are you?" domain = Domain( intents=["greet"], entities=[], slots=[], templates={}, action_names=[], forms={}, action_texts=[end_to_end_action], ) conversation_id = "test_logging_of_end_to_end_action" user_message = "/greet" class ConstantEnsemble(PolicyEnsemble): def __init__(self) -> None: super().__init__([]) self.number_of_calls = 0 def probabilities_using_best_policy( self, tracker: DialogueStateTracker, domain: Domain, interpreter: NaturalLanguageInterpreter, **kwargs: Any, ) -> PolicyPrediction: if self.number_of_calls == 0: prediction = PolicyPrediction.for_action_name( domain, end_to_end_action, "some policy" ) prediction.is_end_to_end_prediction = True self.number_of_calls += 1 return prediction else: return PolicyPrediction.for_action_name(domain, ACTION_LISTEN_NAME) tracker_store = InMemoryTrackerStore(domain) lock_store = InMemoryLockStore() processor = MessageProcessor( RegexInterpreter(), ConstantEnsemble(), domain, tracker_store, lock_store, NaturalLanguageGenerator.create(None, domain), ) await processor.handle_message(UserMessage(user_message, sender_id=conversation_id)) tracker = tracker_store.retrieve(conversation_id) expected_events = [ ActionExecuted(ACTION_SESSION_START_NAME), SessionStarted(), ActionExecuted(ACTION_LISTEN_NAME), UserUttered(user_message, intent={"name": "greet"}), ActionExecuted(action_text=end_to_end_action), BotUttered("hi, how are you?", {}, {}, 123), ActionExecuted(ACTION_LISTEN_NAME), ] for event, expected in zip(tracker.events, expected_events): assert event == expected
def test_process_unpacks_attributes_from_single_message_and_fallsback_if_needed( confidence: Optional[Text], entities: Optional[Text], expected_confidence: float, expected_entities: Optional[List[Dict[Text, Any]]], should_warn: bool, ): # dummy intent expected_intent = "my-intent" # construct text according to pattern text = " \t " + INTENT_MESSAGE_PREFIX + expected_intent if confidence is not None: text += f"@{confidence}" if entities is not None: text += entities text += " \t " # create a message with some dummy attributes and features message = Message( data={TEXT: text, INTENT: "extracted-from-the-pattern-text-via-nlu"}, features=[ Features( features=np.zeros((1, 1)), feature_type=FEATURE_TYPE_SENTENCE, attribute=TEXT, origin="nlu-pipeline", ) ], ) # construct domain from expected intent/entities domain_entities = [item[ENTITY_ATTRIBUTE_TYPE] for item in expected_entities] domain_intents = [expected_intent] if expected_intent is not None else [] domain = Domain( intents=domain_intents, entities=domain_entities, slots=[], responses={}, action_names=[], forms={}, ) # extract information if should_warn: with pytest.warns(UserWarning): unpacked_message = YAMLStoryReader.unpack_regex_message(message, domain) else: unpacked_message = YAMLStoryReader.unpack_regex_message(message, domain) assert not unpacked_message.features assert set(unpacked_message.data.keys()) == { TEXT, INTENT, INTENT_RANKING_KEY, ENTITIES, } assert unpacked_message.data[TEXT] == message.data[TEXT].strip() assert set(unpacked_message.data[INTENT].keys()) == { INTENT_NAME_KEY, PREDICTED_CONFIDENCE_KEY, } assert unpacked_message.data[INTENT][INTENT_NAME_KEY] == expected_intent assert ( unpacked_message.data[INTENT][PREDICTED_CONFIDENCE_KEY] == expected_confidence ) intent_ranking = unpacked_message.data[INTENT_RANKING_KEY] assert len(intent_ranking) == 1 assert intent_ranking[0] == { INTENT_NAME_KEY: expected_intent, PREDICTED_CONFIDENCE_KEY: expected_confidence, } if expected_entities: entity_data: List[Dict[Text, Any]] = unpacked_message.data[ENTITIES] assert all( set(item.keys()) == { ENTITY_ATTRIBUTE_VALUE, ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_START, ENTITY_ATTRIBUTE_END, } for item in entity_data ) assert set( (item[ENTITY_ATTRIBUTE_TYPE], item[ENTITY_ATTRIBUTE_VALUE]) for item in expected_entities ) == set( (item[ENTITY_ATTRIBUTE_TYPE], item[ENTITY_ATTRIBUTE_VALUE]) for item in entity_data ) else: assert unpacked_message.data[ENTITIES] is not None assert len(unpacked_message.data[ENTITIES]) == 0
def get_domain_nlu( self, use_rules: bool ) -> Tuple[Domain, List[Dict], Set[Intent]]: story_nlu_steps: List[str] = [] last_element: Optional[Intent] = None sub_domains: List[Domain] = [] sub_nlus: List[Dict] = [] all_intents: Set[Intent] = set() all_utterances: Set[Utterance] = set() all_actions: Set[Action] = set() all_slot_was_sets: Set[SlotWasSet] = set() for element_index, element in enumerate(self.paths): # Add to current story story_nlu_steps.append(element.as_story_yaml()) if isinstance(element, Intent): all_intents.add(element) elif isinstance(element, Utterance): all_utterances.add(element) elif isinstance(element, Or): all_intents.update(element.all_intents()) elif isinstance(element, Action): all_actions.add(element) elif isinstance(element, SlotWasSet): all_slot_was_sets.add(element) elif isinstance(element, OrActions): # Start a new story for every fork for action_index, action in enumerate(element.actions): story = Story( name=f"{self.name}_action_fork_{action_index}", elements=[action] + self.paths[element_index + 1 :], ) ( sub_domain, sub_nlu, sub_intents, sub_slot_was_sets, ) = story.get_domain_nlu(use_rules=use_rules) sub_domains.append(sub_domain) sub_nlus += sub_nlu all_intents.update(sub_intents) all_slot_was_sets.update(sub_slot_was_sets) # All subsequent elements have been accounted for, so break break elif isinstance(element, Fork): if last_element: assert isinstance(last_element, Action) else: assert RuntimeError( "Fork must not be the first element in a story path." ) # Start a new story for every fork for index, path in enumerate(element.paths): story = Story( name=f"{self.name}_fork_{index}", elements=[last_element] + path, ) ( sub_domain, sub_nlu, sub_intents, sub_slot_was_sets, ) = story.get_domain_nlu(use_rules=use_rules) sub_domains.append(sub_domain) sub_nlus += sub_nlu all_intents.update(sub_intents) all_slot_was_sets.update(sub_slot_was_sets) if element_index != len(self.paths) - 1: raise ValueError( "The fork must be the last element of its path." ) last_element = element # Filter out empty dictionaries story_nlu_steps = [step for step in story_nlu_steps if bool(step)] # Persist domain domain = Domain( intents=list({intent.name for intent in all_intents}), entities=list( { entity for intent in all_intents for entity in intent.entities } ), # List of entity names slots=[], responses={ utterance.name: [{"text": utterance.text}] for utterance in all_utterances }, action_names=[action.name for action in all_actions], forms={}, action_texts=[], ) # Merge sub-domains for sub_domain in sub_domains: domain = domain.merge(sub_domain) # Save current story story_nlu = ( [ { "rule" if use_rules else "story": self.name, "steps": story_nlu_steps, } ] if len(story_nlu_steps) > 1 # Omit all stories with less than 2 steps else [] ) story_nlu += sub_nlus # wait_for_user_input if use_rules: for nlu in story_nlu: nlu["wait_for_user_input"] = False return (domain, story_nlu, all_intents, all_slot_was_sets)
def persist( stories: List[Story], domain_filename: str, nlu_filename: str, additional_intents: List[Intent], additional_utterances: List[Utterance], slots: List[Slot], use_rules: bool = False, ): all_domain = Domain.empty() all_intents: Set[Intent] = set(additional_intents) all_stories: List[Story] = [] all_slot_was_sets: Set[SlotWasSet] = set() for story in stories: domain, sub_stories, intents, slot_was_sets = story.get_domain_nlu( use_rules=use_rules) all_domain = all_domain.merge(domain) all_intents.update(intents) all_stories.extend(sub_stories) all_slot_was_sets.update(slot_was_sets) # Append consolidated slots domain_slots = Domain( intents=set([intent.name for intent in all_intents]), entities=[slot.name for slot in slots], slots=slots, responses={ utterance.name: [{ "text": utterance.text }] for utterance in additional_utterances }, action_names=[], forms={}, ) all_domain = all_domain.merge(domain_slots) # Validate domain rasa.shared.utils.validation.validate_yaml_schema( all_domain.as_yaml(), rasa.shared.constants.DOMAIN_SCHEMA_FILE) # Write domain if os.path.exists(domain_filename): os.remove(domain_filename) Path(domain_filename).parent.mkdir(parents=True, exist_ok=True) all_domain.persist(domain_filename) # Write NLU nlu_data = { "version": "2.0", "nlu": [ intent.as_nlu_yaml() for intent in all_intents if isinstance(intent, IntentWithExamples) ], "rules" if use_rules else "stories": all_stories, } nlu_data_yaml = dump_obj_as_yaml_to_string(nlu_data, should_preserve_key_order=True) RasaYAMLReader().validate(nlu_data_yaml) # TODO: Create folders if not existent if os.path.exists(nlu_filename): os.remove(nlu_filename) Path(nlu_filename).parent.mkdir(parents=True, exist_ok=True) write_text_file(nlu_data_yaml, nlu_filename)
def test_encode_entities__with_bilou_entity_roles_and_groups(): # Instantiate domain and configure the single state featurizer for this domain. # Note that there are 2 entity tags here. entity_tags = ["city", f"city{ENTITY_LABEL_SEPARATOR}to"] domain = Domain( intents=[], entities=entity_tags, slots=[], responses={}, forms={}, action_names=[], ) f = SingleStateFeaturizer() f.prepare_for_training(domain, bilou_tagging=True) # (1) example with both entities # create message that has been tokenized and where entities have been extracted text = "I am flying from London to Paris" tokens = [ Token(text=match.group(), start=match.start()) for match in re.finditer(r"\S+", text) ] entities = [ { ENTITY_ATTRIBUTE_TYPE: entity_tags[0], ENTITY_ATTRIBUTE_VALUE: "London", ENTITY_ATTRIBUTE_START: 17, ENTITY_ATTRIBUTE_END: 23, }, { ENTITY_ATTRIBUTE_TYPE: entity_tags[1], ENTITY_ATTRIBUTE_VALUE: "Paris", ENTITY_ATTRIBUTE_START: 27, ENTITY_ATTRIBUTE_END: 32, }, ] message = Message({TEXT: text, TOKENS_NAMES[TEXT]: tokens, ENTITIES: entities}) # create a lookup table that has seen this message precomputations = MessageContainerForCoreFeaturization() precomputations.add(message) # encode! encoded = f.encode_entities( {TEXT: text, ENTITIES: entities,}, precomputations=precomputations, bilou_tagging=True, ) assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS]) assert np.all( encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [4], [0], [8]] ) # (2) example with only the "city" entity # create message that has been tokenized and where entities have been extracted text = "I am flying to Saint Petersburg" tokens = [ Token(text=match.group(), start=match.start()) for match in re.finditer(r"\S+", text) ] entities = [ { ENTITY_ATTRIBUTE_TYPE: "city", ENTITY_ATTRIBUTE_VALUE: "Saint Petersburg", ENTITY_ATTRIBUTE_START: 15, ENTITY_ATTRIBUTE_END: 31, }, ] message = Message({TEXT: text, TOKENS_NAMES[TEXT]: tokens, ENTITIES: entities}) # create a lookup table that has seen this message precomputations = MessageContainerForCoreFeaturization() precomputations.add(message) # encode! encoded = f.encode_entities( {TEXT: text, ENTITIES: entities,}, precomputations=precomputations, bilou_tagging=True, ) assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS]) assert np.all(encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [3]])