Esempio n. 1
0
 def test_train_kwargs_are_set_on_model(
     self,
     default_domain: Domain,
     trackers: List[TrackerWithCachedStates],
     featurizer: Optional[TrackerFeaturizer],
     priority: int,
 ):
     policy = self.create_policy(featurizer=featurizer,
                                 priority=priority,
                                 cv=None,
                                 C=123)
     policy.train(trackers,
                  domain=default_domain,
                  interpreter=RegexInterpreter())
     assert policy.model.C == 123
Esempio n. 2
0
    def test_normalization(
        self,
        trained_policy: Policy,
        tracker: DialogueStateTracker,
        default_domain: Domain,
        monkeypatch: MonkeyPatch,
    ):
        # Mock actual normalization method
        mock = Mock()
        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
        trained_policy.predict_action_probabilities(tracker, default_domain,
                                                    RegexInterpreter())

        # function should not get called for margin loss_type
        mock.normalize.assert_not_called()
Esempio n. 3
0
def test_policy_priority():
    domain = Domain.load("data/test_domains/default.yml")
    tracker = DialogueStateTracker.from_events("test", [UserUttered("hi")], [])

    priority_1 = ConstantPolicy(priority=1, predict_index=0)
    priority_2 = ConstantPolicy(priority=2, predict_index=1)

    policy_ensemble_0 = SimplePolicyEnsemble([priority_1, priority_2])
    policy_ensemble_1 = SimplePolicyEnsemble([priority_2, priority_1])

    priority_2_result = priority_2.predict_action_probabilities(
        tracker, domain, RegexInterpreter())

    i = 1  # index of priority_2 in ensemble_0
    result, best_policy = policy_ensemble_0.probabilities_using_best_policy(
        tracker, domain, RegexInterpreter())
    assert best_policy == "policy_{}_{}".format(i, type(priority_2).__name__)
    assert result == priority_2_result

    i = 0  # index of priority_2 in ensemble_1
    result, best_policy = policy_ensemble_1.probabilities_using_best_policy(
        tracker, domain, RegexInterpreter())
    assert best_policy == "policy_{}_{}".format(i, type(priority_2).__name__)
    assert result == priority_2_result
    def _parse_message(self, message: Text, line_num: int) -> UserUttered:

        if self.use_e2e:
            parsed = self.parse_e2e_message(message,
                                            self._is_used_for_training)
            text = parsed.get("text")
            intent = {
                INTENT_NAME_KEY:
                parsed.get("intent_response_key", default=parsed.get("intent"))
            }
            entities = parsed.get("entities")
            parse_data = {
                "text": text,
                "intent": intent,
                "intent_ranking": [intent],
                "entities": entities,
            }
        else:
            parse_data = RegexInterpreter().synchronous_parse(message)
            text = None
            intent = parse_data.get("intent")

        utterance = UserUttered(text, intent, parse_data.get("entities"),
                                parse_data)

        intent_name = utterance.intent.get(INTENT_NAME_KEY)

        if self.domain and intent_name not in self.domain.intents:
            rasa.shared.utils.io.raise_warning(
                f"Found unknown intent '{intent_name}' on line {line_num}. "
                "Please, make sure that all intents are "
                "listed in your domain yaml.",
                UserWarning,
                docs=DOCS_URL_DOMAINS,
            )
        return utterance
Esempio n. 5
0
    async def test_memorise(self, trained_policy: MemoizationPolicy,
                            default_domain: Domain):
        trackers = await train_trackers(default_domain, augmentation_factor=20)
        trained_policy.train(trackers, default_domain, RegexInterpreter())
        lookup_with_augmentation = trained_policy.lookup

        trackers = [
            t for t in trackers
            if not hasattr(t, "is_augmented") or not t.is_augmented
        ]

        (
            all_states,
            all_actions,
        ) = trained_policy.featurizer.training_states_and_actions(
            trackers, default_domain)

        for tracker, states, actions in zip(trackers, all_states, all_actions):
            recalled = trained_policy.recall(states, tracker, default_domain)
            assert recalled == actions[0]

        nums = np.random.randn(default_domain.num_states)
        random_states = [{
            f: num
            for f, num in zip(default_domain.input_states, nums)
        }]
        assert trained_policy._recall_states(random_states) is None

        # compare augmentation for augmentation_factor of 0 and 20:
        trackers_no_augmentation = await train_trackers(default_domain,
                                                        augmentation_factor=0)
        trained_policy.train(trackers_no_augmentation, default_domain,
                             RegexInterpreter())
        lookup_no_augmentation = trained_policy.lookup

        assert lookup_no_augmentation == lookup_with_augmentation
Esempio n. 6
0
    def test_cv_not_none_param_grid_none_triggers_search_with_params(
            self, mock_search, default_domain, trackers, featurizer, priority):
        param_grid = {"n_estimators": 50}
        policy = self.create_policy(featurizer=featurizer,
                                    priority=priority,
                                    cv=3,
                                    param_grid=param_grid)
        policy.train(trackers,
                     domain=default_domain,
                     interpreter=RegexInterpreter())

        assert mock_search.call_count > 0
        assert mock_search.call_args_list[0][1]["cv"] == 3
        assert mock_search.call_args_list[0][1]["param_grid"] == param_grid
        assert policy.model == "mockmodel"
def test_featurize_trackers_with_max_history_tracker_featurizer(
        moodbot_domain: Domain):
    state_featurizer = SingleStateFeaturizer()
    tracker_featurizer = MaxHistoryTrackerFeaturizer(state_featurizer)

    tracker = tracker_from_dialogue_file("data/test_dialogues/moodbot.json",
                                         moodbot_domain)
    state_features, labels, entity_tags = tracker_featurizer.featurize_trackers(
        [tracker], moodbot_domain, RegexInterpreter())

    assert state_features is not None
    assert len(state_features) == 7
    assert labels is not None
    assert len(labels) == 7
    # moodbot doesn't contain e2e entities
    assert not any([any(turn_tags) for turn_tags in entity_tags])
Esempio n. 8
0
def _load_interpreter(agent: "Agent",
                      nlu_path: Optional[Text]) -> NaturalLanguageInterpreter:
    """Load the NLU interpreter at `nlu_path`.

    Args:
        agent: Instance of `Agent` to inspect for an interpreter if `nlu_path` is
            `None`.
        nlu_path: NLU model path.

    Returns:
        The NLU interpreter.
    """
    if nlu_path:
        return rasa.core.interpreter.create_interpreter(nlu_path)

    return agent.interpreter or RegexInterpreter()
Esempio n. 9
0
    def test_cv_not_none_param_grid_none_triggers_search_without_params(
        self,
        mock_search,
        default_domain: Domain,
        trackers: List[TrackerWithCachedStates],
        featurizer: Optional[TrackerFeaturizer],
        priority: int,
    ):

        policy = self.create_policy(featurizer=featurizer, priority=priority, cv=3)
        policy.train(trackers, domain=default_domain, interpreter=RegexInterpreter())

        assert mock_search.call_count > 0
        assert mock_search.call_args_list[0][1]["cv"] == 3
        assert mock_search.call_args_list[0][1]["param_grid"] == {}
        assert policy.model == "mockmodel"
Esempio n. 10
0
 def test_additional_train_args_do_not_raise(
     self,
     default_domain: Domain,
     trackers: List[TrackerWithCachedStates],
     featurizer: Optional[TrackerFeaturizer],
     priority: int,
 ):
     policy = self.create_policy(featurizer=featurizer,
                                 priority=priority,
                                 cv=None)
     policy.train(
         trackers,
         domain=default_domain,
         interpreter=RegexInterpreter(),
         this_is_not_a_feature=True,
     )
Esempio n. 11
0
 def test_predict_action_listen(self, priority, domain_with_mapping,
                                intent_mapping):
     policy = self.create_policy(None, priority)
     events = [
         ActionExecuted(ACTION_LISTEN_NAME),
         user_uttered(intent_mapping[0], 1),
         ActionExecuted(intent_mapping[1], policy="policy_0_MappingPolicy"),
     ]
     tracker = get_tracker(events)
     scores = policy.predict_action_probabilities(tracker,
                                                  domain_with_mapping,
                                                  RegexInterpreter())
     index = scores.index(max(scores))
     action_planned = domain_with_mapping.action_names[index]
     assert action_planned == ACTION_LISTEN_NAME
     assert scores != [0] * domain_with_mapping.num_actions
def test_single_state_featurizer_without_interpreter_state_with_action_listen(
):
    """This test are for encoding state without a trained interpreter.
    action_name is action_listen, so, INTENT and ENTITIES should be featurized
    while text shouldn't because we don't have an interpreter.
    """
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {
        "c": 0,
        "d": 1,
        "action_listen": 2
    }
    f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}

    encoded = f.encode_state(
        {
            "user": {
                "intent": "a",
                "text": "blah blah blah"
            },
            "prev_action": {
                "action_name": "action_listen",
                "action_text": "boom"
            },
            "active_loop": {
                "name": "k"
            },
            "slots": {
                "e": (1.0, )
            },
        },
        interpreter=RegexInterpreter(),
    )

    # we featurize all the features except for *_text ones because NLU wasn't trained
    assert list(encoded.keys()) == [INTENT, ACTION_NAME, ACTIVE_LOOP, SLOTS]
    assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix(
        [[1, 0]])).nnz == 0
    assert (encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix(
        [[0, 0, 1]])).nnz == 0
    assert (encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix(
        [[0, 0, 0, 1]])).nnz == 0
    assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0]
                                                                   ])).nnz == 0
Esempio n. 13
0
    def test_finetune_after_load(
        self,
        trained_policy: SklearnPolicy,
        trackers: List[TrackerWithCachedStates],
        default_domain: Domain,
        tmp_path: Path,
    ):

        trained_policy.persist(tmp_path)

        loaded_policy = SklearnPolicy.load(tmp_path, should_finetune=True)

        assert loaded_policy.finetune_mode

        loaded_policy.train(trackers, default_domain, RegexInterpreter())

        assert loaded_policy.model
Esempio n. 14
0
def test_single_state_featurizer_creates_encoded_all_actions():
    domain = Domain(
        intents=[],
        entities=[],
        slots=[],
        templates={},
        forms=[],
        action_names=["a", "b", "c", "d"],
    )
    f = SingleStateFeaturizer()
    f.prepare_from_domain(domain)
    encoded_actions = f.encode_all_actions(domain, RegexInterpreter())
    assert len(encoded_actions) == len(domain.action_names)
    assert all([
        ACTION_NAME in encoded_action and ACTION_TEXT not in encoded_action
        for encoded_action in encoded_actions
    ])
Esempio n. 15
0
    def test_cv_none_does_not_trigger_search(
        self,
        mock_search,
        default_domain: Domain,
        trackers: List[TrackerWithCachedStates],
        featurizer: Optional[TrackerFeaturizer],
        priority: int,
    ):
        policy = self.create_policy(featurizer=featurizer,
                                    priority=priority,
                                    cv=None)
        policy.train(trackers,
                     domain=default_domain,
                     interpreter=RegexInterpreter())

        assert mock_search.call_count == 0
        assert policy.model != "mockmodel"
Esempio n. 16
0
 def test_do_not_follow_other_policy(
     self,
     priority: int,
     domain_with_mapping: Domain,
     intent_mapping: Tuple[Text, Text],
 ):
     policy = self.create_policy(None, priority)
     events = [
         ActionExecuted(ACTION_LISTEN_NAME),
         user_uttered(intent_mapping[0], 1),
         ActionExecuted(intent_mapping[1], policy="other_policy"),
     ]
     tracker = get_tracker(events)
     scores = policy.predict_action_probabilities(
         tracker, domain_with_mapping, RegexInterpreter()
     ).probabilities
     assert scores == [0] * domain_with_mapping.num_actions
Esempio n. 17
0
def test_prediction_applies_must_have_policy_events(default_domain: Domain):
    must_have_events = [ActionExecuted("my action")]

    ensemble = SimplePolicyEnsemble([
        ConstantPolicy(priority=10, predict_index=1),
        ConstantPolicy(priority=1, predict_index=2, events=must_have_events),
    ])
    tracker = DialogueStateTracker.from_events("test", evts=[])

    prediction = ensemble.probabilities_using_best_policy(
        tracker, default_domain, RegexInterpreter())

    # Policy 0 won due to higher prio
    assert prediction.policy_name == f"policy_0_{ConstantPolicy.__name__}"

    # Events of losing policy were applied nevertheless
    assert prediction.events == must_have_events
Esempio n. 18
0
async def test_training_script_with_max_history_set(tmp_path: Path):
    tmpdir = str(tmp_path)

    await train(
        DEFAULT_DOMAIN_PATH_WITH_SLOTS,
        DEFAULT_STORIES_FILE,
        tmpdir,
        interpreter=RegexInterpreter(),
        policy_config="data/test_config/max_hist_config.yml",
        additional_arguments={},
    )
    agent = Agent.load(tmpdir)

    expected_max_history = {FormPolicy: 2, RulePolicy: None}
    for policy in agent.policy_ensemble.policies:
        if hasattr(policy.featurizer, "max_history"):
            expected_history = expected_max_history.get(type(policy), 5)
            assert policy.featurizer.max_history == expected_history
Esempio n. 19
0
def test_single_state_featurizer_correctly_encodes_non_existing_value():
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {"c": 0, "d": 1}
    encoded = f.encode_state(
        {
            "user": {
                "intent": "e"
            },
            "prev_action": {
                "action_name": "action_listen"
            }
        },
        interpreter=RegexInterpreter(),
    )
    assert list(encoded.keys()) == [INTENT, ACTION_NAME]
    assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix(
        [[0, 0]])).nnz == 0
Esempio n. 20
0
def test_single_state_featurizer_uses_dtype_float():
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1}
    f._default_feature_states[ENTITIES] = {"c": 0}
    encoded = f.encode_state(
        {
            "user": {
                "intent": "a",
                "entities": ["c"]
            },
            "prev_action": {
                "action_name": "d"
            },
        },
        interpreter=RegexInterpreter(),
    )
    assert encoded[ACTION_NAME][0].features.dtype == np.float32
def test_single_state_featurizer_without_interpreter_state_not_with_action_listen(
):
    """This test are for encoding state without a trained interpreter.
    action_name is not action_listen, so, INTENT, TEXT and ENTITIES should not be
    featurized.
    """
    f = SingleStateFeaturizer()
    f._default_feature_states[INTENT] = {"a": 0, "b": 1}
    f._default_feature_states[ACTION_NAME] = {
        "c": 0,
        "d": 1,
        "action_listen": 2
    }
    f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
    f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}

    encoded = f.encode_state(
        {
            "user": {
                "intent": "a",
                "text": "blah blah blah"
            },
            "prev_action": {
                "action_name": "d",
                "action_text": "boom"
            },
            "active_loop": {
                "name": "i"
            },
            "slots": {
                "g": (1.0, )
            },
        },
        interpreter=RegexInterpreter(),
    )

    # user input is ignored as prev action is not action_listen
    assert list(encoded.keys()) == [ACTION_NAME, ACTIVE_LOOP, SLOTS]
    assert (encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix(
        [[0, 1, 0]])).nnz == 0
    assert (encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix(
        [[0, 1, 0, 0]])).nnz == 0
    assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[0, 0, 1]
                                                                   ])).nnz == 0
Esempio n. 22
0
def test_single_state_featurizer_uses_regex_interpreter(
    unpacked_trained_moodbot_path: Text,
):
    from rasa.core.agent import Agent

    domain = Domain(
        intents=[], entities=[], slots=[], responses={}, forms=[], action_names=[],
    )
    f = SingleStateFeaturizer()
    # simulate that core was trained separately by passing
    # RegexInterpreter to prepare_for_training
    f.prepare_for_training(domain, RegexInterpreter())
    # simulate that nlu and core models were manually combined for prediction
    # by passing trained interpreter to encode_all_actions
    interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter
    features = f._extract_state_features({TEXT: "some text"}, interpreter)
    # RegexInterpreter cannot create features for text, therefore since featurizer
    # was trained without nlu, features for text should be empty
    assert not features
Esempio n. 23
0
    def _get_prediction(
        policy: Policy,
        tracker: DialogueStateTracker,
        domain: Domain,
        interpreter: NaturalLanguageInterpreter,
    ) -> PolicyPrediction:
        number_of_arguments_in_rasa_1_0 = 2
        arguments = rasa.shared.utils.common.arguments_of(
            policy.predict_action_probabilities
        )

        if (
            len(arguments) > number_of_arguments_in_rasa_1_0
            and "interpreter" in arguments
        ):
            prediction = policy.predict_action_probabilities(
                tracker, domain, interpreter
            )
        else:
            rasa.shared.utils.io.raise_warning(
                "The function `predict_action_probabilities` of "
                "the `Policy` interface was changed to support "
                "additional parameters. Please make sure to "
                "adapt your custom `Policy` implementation.",
                category=DeprecationWarning,
            )
            prediction = policy.predict_action_probabilities(
                tracker, domain, RegexInterpreter()
            )

        if isinstance(prediction, list):
            rasa.shared.utils.io.raise_deprecation_warning(
                f"The function `predict_action_probabilities` of "
                f"the `{Policy.__name__}` interface was changed to return "
                f"a `{PolicyPrediction.__name__}` object. Please make sure to "
                f"adapt your custom `{Policy.__name__}` implementation. Support for "
                f"returning a list of floats will be removed in Rasa Open Source 3.0.0"
            )
            prediction = PolicyPrediction(
                prediction, policy.__class__.__name__, policy_priority=policy.priority
            )

        return prediction
Esempio n. 24
0
    def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUttered]:
        intent_name = self._user_intent_from_step(step)
        intent = {"name": intent_name, "confidence": 1.0}

        if KEY_USER_MESSAGE in step:
            user_message = step[KEY_USER_MESSAGE].strip()
            entities = entities_parser.find_entities_in_training_example(user_message)
            plain_text = entities_parser.replace_entities(user_message)

            if plain_text.startswith(INTENT_MESSAGE_PREFIX):
                entities = (
                    RegexInterpreter().synchronous_parse(plain_text).get(ENTITIES, [])
                )
        else:
            raw_entities = step.get(KEY_ENTITIES, [])
            entities = self._parse_raw_entities(raw_entities)
            # set plain_text to None because only intent was provided in the stories
            plain_text = None
        return UserUttered(plain_text, intent, entities)
Esempio n. 25
0
    def parse_e2e_message(
        self, line: Text, is_used_for_training: bool = True
    ) -> Message:
        """Parses an md list item line based on the current section type.

        Matches expressions of the form `<intent>:<example>`. For the
        syntax of `<example>` see the Rasa docs on NLU training data.
        """
        # Match three groups:
        # 1) Potential "form" annotation
        # 2) The correct intent
        # 3) Optional entities
        # 4) The message text
        form_group = fr"({FORM_PREFIX}\s*)*"
        item_regex = re.compile(r"\s*" + form_group + r"([^{}]+?)({.*})*:\s*(.*)")
        match = re.match(item_regex, line)

        if not match:
            raise ValueError(
                "Encountered invalid test story format for message "
                "`{}`. Please visit the documentation page on "
                "end-to-end testing at {}/user-guide/testing-your-assistant/"
                "#end-to-end-testing/".format(line, LEGACY_DOCS_BASE_URL)
            )
        from rasa.shared.nlu.training_data import entities_parser

        intent = match.group(2)
        message = match.group(4)
        example = entities_parser.parse_training_example(message, intent)
        if not is_used_for_training and not self.use_e2e:
            # In case this is a simple conversion from Markdown we should copy over
            # the original text and not parse the entities
            example.data[rasa.shared.nlu.constants.TEXT] = message
            example.data[rasa.shared.nlu.constants.ENTITIES] = []

        # If the message starts with the `INTENT_MESSAGE_PREFIX` potential entities
        # are annotated in the json format (e.g. `/greet{"name": "Rasa"})
        if message.startswith(INTENT_MESSAGE_PREFIX):
            parsed = RegexInterpreter().synchronous_parse(message)
            example.data["entities"] = parsed["entities"]

        return example
Esempio n. 26
0
async def test_training_script_without_max_history_set(tmp_path: Path):
    tmpdir = str(tmp_path)
    await train(
        DEFAULT_DOMAIN_PATH_WITH_SLOTS,
        DEFAULT_STORIES_FILE,
        tmpdir,
        interpreter=RegexInterpreter(),
        policy_config="data/test_config/no_max_hist_config.yml",
        additional_arguments={},
    )

    agent = Agent.load(tmpdir)
    for policy in agent.policy_ensemble.policies:
        if hasattr(policy.featurizer, "max_history"):
            if type(policy) == FormPolicy:
                assert policy.featurizer.max_history == 2
            elif type(policy) == MemoizationPolicy:
                assert policy.featurizer.max_history == OLD_DEFAULT_MAX_HISTORY
            else:
                assert policy.featurizer.max_history is None
Esempio n. 27
0
def test_end_to_end_prediction_supersedes_others(default_domain: Domain):
    expected_action_index = 2
    expected_confidence = 0.5
    ensemble = SimplePolicyEnsemble([
        ConstantPolicy(priority=100, predict_index=0),
        ConstantPolicy(
            priority=1,
            predict_index=expected_action_index,
            confidence=expected_confidence,
            is_end_to_end_prediction=True,
        ),
    ])
    tracker = DialogueStateTracker.from_events("test", evts=[])

    prediction = ensemble.probabilities_using_best_policy(
        tracker, default_domain, RegexInterpreter())

    assert prediction.max_confidence == expected_confidence
    assert prediction.max_confidence_index == expected_action_index
    assert prediction.policy_name == f"policy_1_{ConstantPolicy.__name__}"
Esempio n. 28
0
def test_fallback_wins_over_mapping():
    domain = Domain.load("data/test_domains/default.yml")
    events = [
        ActionExecuted(ACTION_LISTEN_NAME),
        # Low confidence should trigger fallback
        utilities.user_uttered(USER_INTENT_RESTART, 0.0001),
    ]
    tracker = DialogueStateTracker.from_events("test", events, [])

    ensemble = SimplePolicyEnsemble([FallbackPolicy(), MappingPolicy()])

    prediction = ensemble.probabilities_using_best_policy(
        tracker, domain, RegexInterpreter())
    index_of_fallback_policy = 0
    next_action = rasa.core.actions.action.action_for_index(
        prediction.max_confidence_index, domain, None)

    assert (prediction.policy_name ==
            f"policy_{index_of_fallback_policy}_{FallbackPolicy.__name__}")
    assert next_action.name() == ACTION_DEFAULT_FALLBACK_NAME
Esempio n. 29
0
def test_training_script_with_max_history_set(tmp_path: Path,
                                              domain_path: Text,
                                              stories_path: Text):
    tmpdir = str(tmp_path)

    train(
        domain_path,
        stories_path,
        tmpdir,
        interpreter=RegexInterpreter(),
        policy_config="data/test_config/max_hist_config.yml",
        additional_arguments={},
    )
    agent = Agent.load(tmpdir)

    expected_max_history = {RulePolicy: None}
    for policy in agent.policy_ensemble.policies:
        if hasattr(policy.featurizer, "max_history"):
            expected_history = expected_max_history.get(type(policy), 5)
            assert policy.featurizer.max_history == expected_history
Esempio n. 30
0
def test_single_state_featurizer_with_entity_roles_and_groups(
    unpacked_trained_moodbot_path: Text,
):
    from rasa.core.agent import Agent

    interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter
    # TODO roles and groups are not supported in e2e yet
    domain = Domain(
        intents=[],
        entities=["city", f"city{ENTITY_LABEL_SEPARATOR}to"],
        slots=[],
        responses={},
        forms={},
        action_names=[],
    )
    f = SingleStateFeaturizer()
    f.prepare_for_training(domain, RegexInterpreter())
    encoded = f.encode_entities(
        {
            TEXT: "I am flying from London to Paris",
            ENTITIES: [
                {
                    ENTITY_ATTRIBUTE_TYPE: "city",
                    ENTITY_ATTRIBUTE_VALUE: "London",
                    ENTITY_ATTRIBUTE_START: 17,
                    ENTITY_ATTRIBUTE_END: 23,
                },
                {
                    ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to",
                    ENTITY_ATTRIBUTE_VALUE: "Paris",
                    ENTITY_ATTRIBUTE_START: 27,
                    ENTITY_ATTRIBUTE_END: 32,
                },
            ],
        },
        interpreter=interpreter,
    )
    assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS])
    assert np.all(
        encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [0], [2]]
    )