Exemple #1
0
def test_encode_entities__with_entity_roles_and_groups():

    # create fake message that has been tokenized and entities have been extracted
    text = "I am flying from London to Paris"
    tokens = [
        Token(text=match.group(), start=match.start())
        for match in re.finditer(r"\S+", text)
    ]
    entity_tags = ["city", f"city{ENTITY_LABEL_SEPARATOR}to"]
    entities = [
        {
            ENTITY_ATTRIBUTE_TYPE: entity_tags[0],
            ENTITY_ATTRIBUTE_VALUE: "London",
            ENTITY_ATTRIBUTE_START: 17,
            ENTITY_ATTRIBUTE_END: 23,
        },
        {
            ENTITY_ATTRIBUTE_TYPE: entity_tags[1],
            ENTITY_ATTRIBUTE_VALUE: "Paris",
            ENTITY_ATTRIBUTE_START: 27,
            ENTITY_ATTRIBUTE_END: 32,
        },
    ]
    message = Message({
        TEXT: text,
        TOKENS_NAMES[TEXT]: tokens,
        ENTITIES: entities
    })

    # create a lookup table that has seen this message
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add(message)

    # instantiate matching domain and single state featurizer
    domain = Domain(
        intents=[],
        entities=entity_tags,
        slots=[],
        responses={},
        forms={},
        action_names=[],
    )
    f = SingleStateFeaturizer()
    f.prepare_for_training(domain)

    # encode!
    encoded = f.encode_entities(entity_data={
        TEXT: text,
        ENTITIES: entities
    },
                                precomputations=precomputations)

    # check
    assert len(f.entity_tag_specs) == 1
    tags_to_ids = f.entity_tag_specs[0].tags_to_ids
    for idx, entity_tag in enumerate(entity_tags):
        tags_to_ids[entity_tag] = idx + 1  # hence, city -> 1, city#to -> 2
    assert sorted(list(encoded.keys())) == [ENTITY_TAGS]
    assert np.all(encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1],
                                                       [0], [2]])
def test_container_derive_messages_from_domain_and_add():
    action_names = ["a", "b"]
    # action texts, response keys, forms, and action_names must be unique or the
    # domain will complain about it ...
    action_texts = ["a2", "b2"]
    # ... but the response texts could overlap with e.g action texts
    responses = {"a3": {TEXT: "a2"}, "b3": {TEXT: "b2"}}
    forms = {"a4": "a4"}
    # however, intent names can be anything
    intents = ["a", "b"]
    domain = Domain(
        intents=intents,
        action_names=action_names,
        action_texts=action_texts,
        responses=responses,
        entities=["e_a", "e_b", "e_c"],
        slots=[TextSlot(name="s", mappings=[{}])],
        forms=forms,
        data={},
    )
    lookup_table = MessageContainerForCoreFeaturization()
    lookup_table.derive_messages_from_domain_and_add(domain)
    assert len(lookup_table) == (
        len(domain.intent_properties) + len(domain.action_names_or_texts)
    )
def test_encode_all_labels__encoded_all_action_names_and_texts():
    # ... where "labels" means actions...
    domain = Domain(
        intents=[],
        entities=[],
        slots=[],
        responses={},
        forms={},
        action_names=["a", "b", "c", "d"],
    )

    f = SingleStateFeaturizer()
    f.prepare_for_training(domain)

    precomputations = MessageContainerForCoreFeaturization()
    precomputations.derive_messages_from_domain_and_add(domain)

    encoded_actions = f.encode_all_labels(domain, precomputations=precomputations)

    assert len(encoded_actions) == len(domain.action_names_or_texts)
    assert all(
        [
            ACTION_NAME in encoded_action and ACTION_TEXT not in encoded_action
            for encoded_action in encoded_actions
        ]
    )
Exemple #4
0
    def _get_domain_with_e2e_actions(self) -> Domain:

        stories = self.get_stories()

        additional_e2e_action_names = set()
        for story_step in stories.story_steps:
            additional_e2e_action_names.update(
                {
                    event.action_text
                    for event in story_step.events
                    if isinstance(event, ActionExecuted) and event.action_text
                }
            )

        additional_e2e_action_names = list(additional_e2e_action_names)

        return Domain(
            [],
            [],
            [],
            {},
            action_names=[],
            forms={},
            action_texts=additional_e2e_action_names,
        )
Exemple #5
0
async def test_unpack_regex_message_has_correct_entity_start_and_end():
    entity = "name"
    slot_1 = {entity: "Core"}
    text = f"/greet{json.dumps(slot_1)}"

    message = Message(data={TEXT: text},)

    domain = Domain(
        intents=["greet"],
        entities=[entity],
        slots=[],
        responses={},
        action_names=[],
        forms={},
    )

    message = YAMLStoryReader.unpack_regex_message(
        message, domain, entity_extractor_name="RegexMessageHandler"
    )

    assert message.data == {
        "text": '/greet{"name": "Core"}',
        "intent": {"name": "greet", "confidence": 1.0},
        "intent_ranking": [{"name": "greet", "confidence": 1.0}],
        "entities": [
            {
                "entity": "name",
                "value": "Core",
                "start": 6,
                "end": 22,
                EXTRACTOR: "RegexMessageHandler",
            }
        ],
    }
Exemple #6
0
def test_process_warns_if_intent_or_entities_not_in_domain(
    intent: Text,
    entities: Optional[Text],
    expected_intent: Text,
    domain_entities: List[Text],
):
    # construct text according to pattern
    text = INTENT_MESSAGE_PREFIX + intent  # do not add a confidence value
    if entities is not None:
        text += json.dumps(entities)
    message = Message(data={TEXT: text})

    # construct domain from expected intent/entities
    domain = Domain(
        intents=[expected_intent],
        entities=domain_entities,
        slots=[],
        responses={},
        action_names=[],
        forms={},
    )

    # expect a warning
    with pytest.warns(UserWarning):
        unpacked_message = YAMLStoryReader.unpack_regex_message(message, domain)

    if "wrong" not in intent:
        assert unpacked_message.data[INTENT][INTENT_NAME_KEY] == intent
        if "wrong" in entities:
            assert unpacked_message.data[ENTITIES] is not None
            assert len(unpacked_message.data[ENTITIES]) == 0
    else:
        assert unpacked_message == message
Exemple #7
0
def test_domain_action_instantiation():
    domain = Domain(
        intents=[{
            "chitchat": {
                "is_retrieval_intent": True
            }
        }],
        entities=[],
        slots=[],
        templates={},
        action_names=["my_module.ActionTest", "utter_test", "utter_chitchat"],
        forms={},
    )

    instantiated_actions = [
        action.action_for_name_or_text(action_name, domain, None)
        for action_name in domain.action_names_or_texts
    ]

    assert len(instantiated_actions) == 14
    assert instantiated_actions[0].name() == ACTION_LISTEN_NAME
    assert instantiated_actions[1].name() == ACTION_RESTART_NAME
    assert instantiated_actions[2].name() == ACTION_SESSION_START_NAME
    assert instantiated_actions[3].name() == ACTION_DEFAULT_FALLBACK_NAME
    assert instantiated_actions[4].name() == ACTION_DEACTIVATE_LOOP_NAME
    assert instantiated_actions[5].name() == ACTION_REVERT_FALLBACK_EVENTS_NAME
    assert instantiated_actions[6].name(
    ) == ACTION_DEFAULT_ASK_AFFIRMATION_NAME
    assert instantiated_actions[7].name() == ACTION_DEFAULT_ASK_REPHRASE_NAME
    assert instantiated_actions[8].name() == ACTION_TWO_STAGE_FALLBACK_NAME
    assert instantiated_actions[9].name() == ACTION_BACK_NAME
    assert instantiated_actions[10].name() == RULE_SNIPPET_ACTION_NAME
    assert instantiated_actions[11].name() == "my_module.ActionTest"
    assert instantiated_actions[12].name() == "utter_test"
    assert instantiated_actions[13].name() == "utter_chitchat"
    def _get_domain_with_retrieval_intents(
        retrieval_intents: Set[Text],
        response_templates: Dict[Text, List[Dict[Text, Any]]],
        existing_domain: Domain,
    ) -> Domain:
        """Construct a domain consisting of retrieval intents listed in the NLU training data.

        Args:
            retrieval_intents: Set of retrieval intents defined in NLU training data.
            existing_domain: Domain which is already loaded from the domain file.

        Returns: Domain with retrieval actions added to action names and properties
        for retrieval intents updated.
        """

        # Get all the properties already defined
        # for each retrieval intent in other domains
        # and add the retrieval intent property to them
        retrieval_intent_properties = []
        for intent in retrieval_intents:
            intent_properties = (existing_domain.intent_properties[intent]
                                 if intent in existing_domain.intent_properties
                                 else {})
            intent_properties[IS_RETRIEVAL_INTENT_KEY] = True
            retrieval_intent_properties.append({intent: intent_properties})

        return Domain(
            retrieval_intent_properties,
            [],
            [],
            response_templates,
            RetrievalModelsDataImporter._construct_retrieval_action_names(
                retrieval_intents),
            [],
        )
def test_process_does_not_do_anything(
        regex_message_handler: RegexMessageHandler, text: Text):

    message = Message(
        data={
            TEXT: text,
            INTENT: "bla"
        },
        features=[
            Features(
                features=np.zeros((1, 1)),
                feature_type=FEATURE_TYPE_SENTENCE,
                attribute=TEXT,
                origin="nlu-pipeline",
            )
        ],
    )

    # construct domain from expected intent/entities
    domain = Domain(
        intents=["intent"],
        entities=["entity"],
        slots=[],
        responses={},
        action_names=[],
        forms={},
        data={},
    )

    parsed_messages = regex_message_handler.process([message], domain)

    assert parsed_messages[0] == message
Exemple #10
0
def test_check_domain_sanity_on_invalid_domain():
    with pytest.raises(InvalidDomain):
        Domain(
            intents={},
            entities=[],
            slots=[],
            templates={},
            action_names=["random_name", "random_name"],
            forms=[],
        )

    with pytest.raises(InvalidDomain):
        Domain(
            intents={},
            entities=[],
            slots=[TextSlot("random_name"),
                   TextSlot("random_name")],
            templates={},
            action_names=[],
            forms=[],
        )

    with pytest.raises(InvalidDomain):
        Domain(
            intents={},
            entities=[
                "random_name", "random_name", "other_name", "other_name"
            ],
            slots=[],
            templates={},
            action_names=[],
            forms=[],
        )

    with pytest.raises(InvalidDomain):
        Domain(
            intents={},
            entities=[],
            slots=[],
            templates={},
            action_names=[],
            forms=["random_name", "random_name"],
        )
Exemple #11
0
def test_verify_domain_with_duplicates(
    duplicates: Optional[Dict[Text, List[Text]]],
    is_valid: bool,
    warning_type: Any,
    messages: List[Text],
):
    domain = Domain([], [], [], {}, [], {}, duplicates=duplicates)
    validator = Validator(domain, None, None, None)

    with pytest.warns(warning_type) as warning:
        assert validator.verify_domain_duplicates() is is_valid

    assert len(warning) == len(messages)
    for i in range(len(messages)):
        assert messages[i] in warning[i].message.args[0]
def get_domain_nlu(state: StateMachineState, is_initial_state: bool):
    all_entity_names = {entity for entity in state.all_entities()}

    all_intents: Set[IntentWithExamples] = {
        intent for intent in state.all_intents()
    }

    all_actions: Set[Action] = {action for action in state.all_actions()}

    all_utterances: Set[Utterance] = {
        action for action in all_actions if isinstance(action, Utterance)
    }

    all_slots: Set[Slot] = {slot for slot in state.all_slots()}
    # all_stories: List[Story] = get_stories(state)

    # Write domain
    domain = Domain(
        intents=[intent.name for intent in all_intents],
        entities=all_entity_names,  # List of entity names
        slots=[slot.as_rasa_slot() for slot in all_slots],
        responses={
            utterance.name: [{"text": utterance.text}]
            for utterance in all_utterances
        },
        action_names=[action.name for action in all_actions],
        forms={},
        action_texts=[],
        state_machine_states={
            state.name: {
                "is_initial_state": is_initial_state,
                "state_yaml": yaml.dump(state),
            }
        },
    )

    # Write NLU
    nlu_data = {
        "version": "2.0",
        "nlu": [
            intent.as_nlu_yaml()
            for intent in all_intents
            if isinstance(intent, IntentWithExamples)
        ],
    }

    return domain, nlu_data
Exemple #13
0
def test_single_state_featurizer_creates_encoded_all_actions():
    domain = Domain(
        intents=[],
        entities=[],
        slots=[],
        templates={},
        forms=[],
        action_names=["a", "b", "c", "d"],
    )
    f = SingleStateFeaturizer()
    f.prepare_from_domain(domain)
    encoded_actions = f.encode_all_actions(domain, RegexInterpreter())
    assert len(encoded_actions) == len(domain.action_names)
    assert all([
        ACTION_NAME in encoded_action and ACTION_TEXT not in encoded_action
        for encoded_action in encoded_actions
    ])
    async def _get_domain_with_e2e_actions(self) -> Domain:
        from rasa.shared.core.events import ActionExecuted

        stories = await self.get_stories()

        additional_e2e_action_names = set()
        for story_step in stories.story_steps:
            additional_e2e_action_names.update({
                event.action_text
                for event in story_step.events
                if isinstance(event, ActionExecuted) and event.action_text
            })

        additional_e2e_action_names = list(additional_e2e_action_names)

        return Domain([], [], [], {},
                      action_names=additional_e2e_action_names,
                      forms=[])
Exemple #15
0
def test_converter_for_training(
        input_converter: CoreFeaturizationInputConverter):
    # create domain and story graph
    domain = Domain(
        intents=["greet", "inform", "domain-only-intent"],
        entities=["entity_name"],
        slots=[],
        responses=dict(),
        action_names=["action_listen", "utter_greet"],
        forms=dict(),
        action_texts=["Hi how are you?"],
    )
    events = [
        ActionExecuted(action_name="action_listen"),
        UserUttered(
            text="hey this has some entities",
            intent={INTENT_NAME_KEY: "greet"},
            entities=[_create_entity(value="Bot", type="entity_name")],
        ),
        ActionExecuted(action_name="utter_greet",
                       action_text="Hi how are you?"),
        ActionExecuted(action_name="action_listen"),
        UserUttered(text="some test with an intent!",
                    intent={INTENT_NAME_KEY: "inform"}),
        ActionExecuted(action_name="action_listen"),
    ]
    story_graph = StoryGraph([StoryStep(events=events)])
    # convert!
    training_data = input_converter.convert_for_training(
        domain=domain, story_graph=story_graph)
    messages = training_data.training_examples
    # check that messages were created from (story) events as expected
    _check_messages_created_from_events_as_expected(events=events,
                                                    messages=messages)
    # check that messages were created from domain as expected
    for intent in domain.intent_properties:
        assert Message(data={INTENT: intent}) in messages
    for action_name_or_text in domain.action_names_or_texts:
        if action_name_or_text in domain.action_texts:
            assert Message(data={ACTION_TEXT: action_name_or_text}) in messages
        else:
            assert Message(data={ACTION_NAME: action_name_or_text}) in messages
    # check that each message contains only one attribute, which must be a key attribute
    _check_messages_contain_attribute_which_is_key_attribute(messages=messages)
Exemple #16
0
def test_single_state_featurizer_uses_regex_interpreter(
    unpacked_trained_moodbot_path: Text,
):
    from rasa.core.agent import Agent

    domain = Domain(
        intents=[], entities=[], slots=[], responses={}, forms=[], action_names=[],
    )
    f = SingleStateFeaturizer()
    # simulate that core was trained separately by passing
    # RegexInterpreter to prepare_for_training
    f.prepare_for_training(domain, RegexInterpreter())
    # simulate that nlu and core models were manually combined for prediction
    # by passing trained interpreter to encode_all_actions
    interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter
    features = f._extract_state_features({TEXT: "some text"}, interpreter)
    # RegexInterpreter cannot create features for text, therefore since featurizer
    # was trained without nlu, features for text should be empty
    assert not features
Exemple #17
0
def test_single_state_featurizer_with_entity_roles_and_groups(
    unpacked_trained_moodbot_path: Text,
):
    from rasa.core.agent import Agent

    interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter
    # TODO roles and groups are not supported in e2e yet
    domain = Domain(
        intents=[],
        entities=["city", f"city{ENTITY_LABEL_SEPARATOR}to"],
        slots=[],
        responses={},
        forms={},
        action_names=[],
    )
    f = SingleStateFeaturizer()
    f.prepare_for_training(domain, RegexInterpreter())
    encoded = f.encode_entities(
        {
            TEXT: "I am flying from London to Paris",
            ENTITIES: [
                {
                    ENTITY_ATTRIBUTE_TYPE: "city",
                    ENTITY_ATTRIBUTE_VALUE: "London",
                    ENTITY_ATTRIBUTE_START: 17,
                    ENTITY_ATTRIBUTE_END: 23,
                },
                {
                    ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to",
                    ENTITY_ATTRIBUTE_VALUE: "Paris",
                    ENTITY_ATTRIBUTE_START: 27,
                    ENTITY_ATTRIBUTE_END: 32,
                },
            ],
        },
        interpreter=interpreter,
    )
    assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS])
    assert np.all(
        encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [0], [2]]
    )
Exemple #18
0
def test_domain_validation_with_valid_marker(depth: int, max_branches: int,
                                             seed: int):
    # We do this a bit backwards, we construct the domain from the marker
    # and assert they must match
    rng = np.random.default_rng(seed=seed)
    marker, expected_size = generate_random_marker(
        depth=depth,
        max_branches=max_branches,
        rng=rng,
        possible_conditions=CONDITION_MARKERS,
        possible_operators=OPERATOR_MARKERS,
        constant_condition_text=None,
        constant_negated=None,
    )

    slots = [
        Slot(name, []) for name in _collect_parameters(marker, SlotSetMarker)
    ]
    actions = list(_collect_parameters(marker, ActionExecutedMarker))
    intents = _collect_parameters(marker, IntentDetectedMarker)
    domain = Domain(intents, [], slots, {}, actions, {})

    assert marker.validate_against_domain(domain)
def test_single_state_featurizer_prepare_for_training():
    domain = Domain(
        intents=["greet"],
        entities=["name"],
        slots=[Slot("name")],
        templates={},
        forms=[],
        action_names=["utter_greet", "action_check_weather"],
    )

    f = SingleStateFeaturizer()
    f.prepare_for_training(domain, RegexInterpreter())

    assert len(f._default_feature_states[INTENT]) > 1
    assert "greet" in f._default_feature_states[INTENT]
    assert len(f._default_feature_states[ENTITIES]) == 1
    assert f._default_feature_states[ENTITIES]["name"] == 0
    assert len(f._default_feature_states[SLOTS]) == 1
    assert f._default_feature_states[SLOTS]["name_0"] == 0
    assert len(f._default_feature_states[ACTION_NAME]) > 2
    assert "utter_greet" in f._default_feature_states[ACTION_NAME]
    assert "action_check_weather" in f._default_feature_states[ACTION_NAME]
    assert len(f._default_feature_states[ACTIVE_LOOP]) == 0
def test_prepare_for_training():
    domain = Domain(
        intents=["greet"],
        entities=["name"],
        slots=[TextSlot("name", mappings=[{}])],
        responses={},
        forms={},
        action_names=["utter_greet", "action_check_weather"],
        data={},
    )

    f = SingleStateFeaturizer()
    f.prepare_for_training(domain)

    assert len(f._default_feature_states[INTENT]) > 1
    assert "greet" in f._default_feature_states[INTENT]
    assert len(f._default_feature_states[ENTITIES]) == 1
    assert f._default_feature_states[ENTITIES]["name"] == 0
    assert len(f._default_feature_states[SLOTS]) == 1
    assert f._default_feature_states[SLOTS]["name_0"] == 0
    assert len(f._default_feature_states[ACTION_NAME]) > 2
    assert "utter_greet" in f._default_feature_states[ACTION_NAME]
    assert "action_check_weather" in f._default_feature_states[ACTION_NAME]
    assert len(f._default_feature_states[ACTIVE_LOOP]) == 0
async def test_logging_of_end_to_end_action():
    end_to_end_action = "hi, how are you?"
    domain = Domain(
        intents=["greet"],
        entities=[],
        slots=[],
        templates={},
        action_names=[],
        forms={},
        action_texts=[end_to_end_action],
    )

    conversation_id = "test_logging_of_end_to_end_action"
    user_message = "/greet"

    class ConstantEnsemble(PolicyEnsemble):
        def __init__(self) -> None:
            super().__init__([])
            self.number_of_calls = 0

        def probabilities_using_best_policy(
            self,
            tracker: DialogueStateTracker,
            domain: Domain,
            interpreter: NaturalLanguageInterpreter,
            **kwargs: Any,
        ) -> PolicyPrediction:
            if self.number_of_calls == 0:
                prediction = PolicyPrediction.for_action_name(
                    domain, end_to_end_action, "some policy"
                )
                prediction.is_end_to_end_prediction = True
                self.number_of_calls += 1
                return prediction
            else:
                return PolicyPrediction.for_action_name(domain, ACTION_LISTEN_NAME)

    tracker_store = InMemoryTrackerStore(domain)
    lock_store = InMemoryLockStore()
    processor = MessageProcessor(
        RegexInterpreter(),
        ConstantEnsemble(),
        domain,
        tracker_store,
        lock_store,
        NaturalLanguageGenerator.create(None, domain),
    )

    await processor.handle_message(UserMessage(user_message, sender_id=conversation_id))

    tracker = tracker_store.retrieve(conversation_id)
    expected_events = [
        ActionExecuted(ACTION_SESSION_START_NAME),
        SessionStarted(),
        ActionExecuted(ACTION_LISTEN_NAME),
        UserUttered(user_message, intent={"name": "greet"}),
        ActionExecuted(action_text=end_to_end_action),
        BotUttered("hi, how are you?", {}, {}, 123),
        ActionExecuted(ACTION_LISTEN_NAME),
    ]
    for event, expected in zip(tracker.events, expected_events):
        assert event == expected
Exemple #22
0
def test_process_unpacks_attributes_from_single_message_and_fallsback_if_needed(
    confidence: Optional[Text],
    entities: Optional[Text],
    expected_confidence: float,
    expected_entities: Optional[List[Dict[Text, Any]]],
    should_warn: bool,
):
    # dummy intent
    expected_intent = "my-intent"

    # construct text according to pattern
    text = " \t  " + INTENT_MESSAGE_PREFIX + expected_intent
    if confidence is not None:
        text += f"@{confidence}"
    if entities is not None:
        text += entities
    text += " \t "

    # create a message with some dummy attributes and features
    message = Message(
        data={TEXT: text, INTENT: "extracted-from-the-pattern-text-via-nlu"},
        features=[
            Features(
                features=np.zeros((1, 1)),
                feature_type=FEATURE_TYPE_SENTENCE,
                attribute=TEXT,
                origin="nlu-pipeline",
            )
        ],
    )

    # construct domain from expected intent/entities
    domain_entities = [item[ENTITY_ATTRIBUTE_TYPE] for item in expected_entities]
    domain_intents = [expected_intent] if expected_intent is not None else []
    domain = Domain(
        intents=domain_intents,
        entities=domain_entities,
        slots=[],
        responses={},
        action_names=[],
        forms={},
    )

    # extract information
    if should_warn:
        with pytest.warns(UserWarning):
            unpacked_message = YAMLStoryReader.unpack_regex_message(message, domain)
    else:
        unpacked_message = YAMLStoryReader.unpack_regex_message(message, domain)

    assert not unpacked_message.features

    assert set(unpacked_message.data.keys()) == {
        TEXT,
        INTENT,
        INTENT_RANKING_KEY,
        ENTITIES,
    }

    assert unpacked_message.data[TEXT] == message.data[TEXT].strip()

    assert set(unpacked_message.data[INTENT].keys()) == {
        INTENT_NAME_KEY,
        PREDICTED_CONFIDENCE_KEY,
    }
    assert unpacked_message.data[INTENT][INTENT_NAME_KEY] == expected_intent
    assert (
        unpacked_message.data[INTENT][PREDICTED_CONFIDENCE_KEY] == expected_confidence
    )

    intent_ranking = unpacked_message.data[INTENT_RANKING_KEY]
    assert len(intent_ranking) == 1
    assert intent_ranking[0] == {
        INTENT_NAME_KEY: expected_intent,
        PREDICTED_CONFIDENCE_KEY: expected_confidence,
    }
    if expected_entities:
        entity_data: List[Dict[Text, Any]] = unpacked_message.data[ENTITIES]
        assert all(
            set(item.keys())
            == {
                ENTITY_ATTRIBUTE_VALUE,
                ENTITY_ATTRIBUTE_TYPE,
                ENTITY_ATTRIBUTE_START,
                ENTITY_ATTRIBUTE_END,
            }
            for item in entity_data
        )
        assert set(
            (item[ENTITY_ATTRIBUTE_TYPE], item[ENTITY_ATTRIBUTE_VALUE])
            for item in expected_entities
        ) == set(
            (item[ENTITY_ATTRIBUTE_TYPE], item[ENTITY_ATTRIBUTE_VALUE])
            for item in entity_data
        )
    else:
        assert unpacked_message.data[ENTITIES] is not None
        assert len(unpacked_message.data[ENTITIES]) == 0
    def get_domain_nlu(
        self, use_rules: bool
    ) -> Tuple[Domain, List[Dict], Set[Intent]]:
        story_nlu_steps: List[str] = []
        last_element: Optional[Intent] = None

        sub_domains: List[Domain] = []
        sub_nlus: List[Dict] = []

        all_intents: Set[Intent] = set()
        all_utterances: Set[Utterance] = set()
        all_actions: Set[Action] = set()
        all_slot_was_sets: Set[SlotWasSet] = set()

        for element_index, element in enumerate(self.paths):
            # Add to current story
            story_nlu_steps.append(element.as_story_yaml())

            if isinstance(element, Intent):
                all_intents.add(element)
            elif isinstance(element, Utterance):
                all_utterances.add(element)
            elif isinstance(element, Or):
                all_intents.update(element.all_intents())
            elif isinstance(element, Action):
                all_actions.add(element)
            elif isinstance(element, SlotWasSet):
                all_slot_was_sets.add(element)
            elif isinstance(element, OrActions):
                # Start a new story for every fork
                for action_index, action in enumerate(element.actions):
                    story = Story(
                        name=f"{self.name}_action_fork_{action_index}",
                        elements=[action] + self.paths[element_index + 1 :],
                    )

                    (
                        sub_domain,
                        sub_nlu,
                        sub_intents,
                        sub_slot_was_sets,
                    ) = story.get_domain_nlu(use_rules=use_rules)
                    sub_domains.append(sub_domain)
                    sub_nlus += sub_nlu
                    all_intents.update(sub_intents)
                    all_slot_was_sets.update(sub_slot_was_sets)

                # All subsequent elements have been accounted for, so break
                break
            elif isinstance(element, Fork):
                if last_element:
                    assert isinstance(last_element, Action)
                else:
                    assert RuntimeError(
                        "Fork must not be the first element in a story path."
                    )

                # Start a new story for every fork
                for index, path in enumerate(element.paths):
                    story = Story(
                        name=f"{self.name}_fork_{index}",
                        elements=[last_element] + path,
                    )

                    (
                        sub_domain,
                        sub_nlu,
                        sub_intents,
                        sub_slot_was_sets,
                    ) = story.get_domain_nlu(use_rules=use_rules)
                    sub_domains.append(sub_domain)
                    sub_nlus += sub_nlu
                    all_intents.update(sub_intents)
                    all_slot_was_sets.update(sub_slot_was_sets)

                if element_index != len(self.paths) - 1:
                    raise ValueError(
                        "The fork must be the last element of its path."
                    )

            last_element = element

        # Filter out empty dictionaries
        story_nlu_steps = [step for step in story_nlu_steps if bool(step)]

        # Persist domain
        domain = Domain(
            intents=list({intent.name for intent in all_intents}),
            entities=list(
                {
                    entity
                    for intent in all_intents
                    for entity in intent.entities
                }
            ),  # List of entity names
            slots=[],
            responses={
                utterance.name: [{"text": utterance.text}]
                for utterance in all_utterances
            },
            action_names=[action.name for action in all_actions],
            forms={},
            action_texts=[],
        )

        # Merge sub-domains
        for sub_domain in sub_domains:
            domain = domain.merge(sub_domain)

        # Save current story
        story_nlu = (
            [
                {
                    "rule" if use_rules else "story": self.name,
                    "steps": story_nlu_steps,
                }
            ]
            if len(story_nlu_steps)
            > 1  # Omit all stories with less than 2 steps
            else []
        )
        story_nlu += sub_nlus

        # wait_for_user_input
        if use_rules:
            for nlu in story_nlu:
                nlu["wait_for_user_input"] = False

        return (domain, story_nlu, all_intents, all_slot_was_sets)
Exemple #24
0
def persist(
    stories: List[Story],
    domain_filename: str,
    nlu_filename: str,
    additional_intents: List[Intent],
    additional_utterances: List[Utterance],
    slots: List[Slot],
    use_rules: bool = False,
):
    all_domain = Domain.empty()
    all_intents: Set[Intent] = set(additional_intents)
    all_stories: List[Story] = []
    all_slot_was_sets: Set[SlotWasSet] = set()

    for story in stories:
        domain, sub_stories, intents, slot_was_sets = story.get_domain_nlu(
            use_rules=use_rules)

        all_domain = all_domain.merge(domain)
        all_intents.update(intents)
        all_stories.extend(sub_stories)
        all_slot_was_sets.update(slot_was_sets)

    # Append consolidated slots
    domain_slots = Domain(
        intents=set([intent.name for intent in all_intents]),
        entities=[slot.name for slot in slots],
        slots=slots,
        responses={
            utterance.name: [{
                "text": utterance.text
            }]
            for utterance in additional_utterances
        },
        action_names=[],
        forms={},
    )
    all_domain = all_domain.merge(domain_slots)

    # Validate domain
    rasa.shared.utils.validation.validate_yaml_schema(
        all_domain.as_yaml(), rasa.shared.constants.DOMAIN_SCHEMA_FILE)

    # Write domain
    if os.path.exists(domain_filename):
        os.remove(domain_filename)

    Path(domain_filename).parent.mkdir(parents=True, exist_ok=True)
    all_domain.persist(domain_filename)

    # Write NLU
    nlu_data = {
        "version":
        "2.0",
        "nlu": [
            intent.as_nlu_yaml() for intent in all_intents
            if isinstance(intent, IntentWithExamples)
        ],
        "rules" if use_rules else "stories":
        all_stories,
    }

    nlu_data_yaml = dump_obj_as_yaml_to_string(nlu_data,
                                               should_preserve_key_order=True)

    RasaYAMLReader().validate(nlu_data_yaml)

    # TODO: Create folders if not existent

    if os.path.exists(nlu_filename):
        os.remove(nlu_filename)

    Path(nlu_filename).parent.mkdir(parents=True, exist_ok=True)

    write_text_file(nlu_data_yaml, nlu_filename)
def test_encode_entities__with_bilou_entity_roles_and_groups():

    # Instantiate domain and configure the single state featurizer for this domain.
    # Note that there are 2 entity tags here.
    entity_tags = ["city", f"city{ENTITY_LABEL_SEPARATOR}to"]
    domain = Domain(
        intents=[],
        entities=entity_tags,
        slots=[],
        responses={},
        forms={},
        action_names=[],
    )
    f = SingleStateFeaturizer()
    f.prepare_for_training(domain, bilou_tagging=True)

    # (1) example with both entities

    # create message that has been tokenized and where entities have been extracted
    text = "I am flying from London to Paris"
    tokens = [
        Token(text=match.group(), start=match.start())
        for match in re.finditer(r"\S+", text)
    ]
    entities = [
        {
            ENTITY_ATTRIBUTE_TYPE: entity_tags[0],
            ENTITY_ATTRIBUTE_VALUE: "London",
            ENTITY_ATTRIBUTE_START: 17,
            ENTITY_ATTRIBUTE_END: 23,
        },
        {
            ENTITY_ATTRIBUTE_TYPE: entity_tags[1],
            ENTITY_ATTRIBUTE_VALUE: "Paris",
            ENTITY_ATTRIBUTE_START: 27,
            ENTITY_ATTRIBUTE_END: 32,
        },
    ]
    message = Message({TEXT: text, TOKENS_NAMES[TEXT]: tokens, ENTITIES: entities})

    # create a lookup table that has seen this message
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add(message)

    # encode!
    encoded = f.encode_entities(
        {TEXT: text, ENTITIES: entities,},
        precomputations=precomputations,
        bilou_tagging=True,
    )
    assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS])
    assert np.all(
        encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [4], [0], [8]]
    )

    # (2) example with only the "city" entity

    # create message that has been tokenized and where entities have been extracted
    text = "I am flying to Saint Petersburg"
    tokens = [
        Token(text=match.group(), start=match.start())
        for match in re.finditer(r"\S+", text)
    ]
    entities = [
        {
            ENTITY_ATTRIBUTE_TYPE: "city",
            ENTITY_ATTRIBUTE_VALUE: "Saint Petersburg",
            ENTITY_ATTRIBUTE_START: 15,
            ENTITY_ATTRIBUTE_END: 31,
        },
    ]
    message = Message({TEXT: text, TOKENS_NAMES[TEXT]: tokens, ENTITIES: entities})

    # create a lookup table that has seen this message
    precomputations = MessageContainerForCoreFeaturization()
    precomputations.add(message)

    # encode!
    encoded = f.encode_entities(
        {TEXT: text, ENTITIES: entities,},
        precomputations=precomputations,
        bilou_tagging=True,
    )
    assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS])
    assert np.all(encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [3]])