def test_train_kwargs_are_set_on_model( self, default_domain: Domain, trackers: List[TrackerWithCachedStates], featurizer: Optional[TrackerFeaturizer], priority: int, ): policy = self.create_policy(featurizer=featurizer, priority=priority, cv=None, C=123) policy.train(trackers, domain=default_domain, interpreter=RegexInterpreter()) assert policy.model.C == 123
def test_normalization( self, trained_policy: Policy, tracker: DialogueStateTracker, default_domain: Domain, monkeypatch: MonkeyPatch, ): # Mock actual normalization method mock = Mock() monkeypatch.setattr(train_utils, "normalize", mock.normalize) trained_policy.predict_action_probabilities(tracker, default_domain, RegexInterpreter()) # function should not get called for margin loss_type mock.normalize.assert_not_called()
def test_policy_priority(): domain = Domain.load("data/test_domains/default.yml") tracker = DialogueStateTracker.from_events("test", [UserUttered("hi")], []) priority_1 = ConstantPolicy(priority=1, predict_index=0) priority_2 = ConstantPolicy(priority=2, predict_index=1) policy_ensemble_0 = SimplePolicyEnsemble([priority_1, priority_2]) policy_ensemble_1 = SimplePolicyEnsemble([priority_2, priority_1]) priority_2_result = priority_2.predict_action_probabilities( tracker, domain, RegexInterpreter()) i = 1 # index of priority_2 in ensemble_0 result, best_policy = policy_ensemble_0.probabilities_using_best_policy( tracker, domain, RegexInterpreter()) assert best_policy == "policy_{}_{}".format(i, type(priority_2).__name__) assert result == priority_2_result i = 0 # index of priority_2 in ensemble_1 result, best_policy = policy_ensemble_1.probabilities_using_best_policy( tracker, domain, RegexInterpreter()) assert best_policy == "policy_{}_{}".format(i, type(priority_2).__name__) assert result == priority_2_result
def _parse_message(self, message: Text, line_num: int) -> UserUttered: if self.use_e2e: parsed = self.parse_e2e_message(message, self._is_used_for_training) text = parsed.get("text") intent = { INTENT_NAME_KEY: parsed.get("intent_response_key", default=parsed.get("intent")) } entities = parsed.get("entities") parse_data = { "text": text, "intent": intent, "intent_ranking": [intent], "entities": entities, } else: parse_data = RegexInterpreter().synchronous_parse(message) text = None intent = parse_data.get("intent") utterance = UserUttered(text, intent, parse_data.get("entities"), parse_data) intent_name = utterance.intent.get(INTENT_NAME_KEY) if self.domain and intent_name not in self.domain.intents: rasa.shared.utils.io.raise_warning( f"Found unknown intent '{intent_name}' on line {line_num}. " "Please, make sure that all intents are " "listed in your domain yaml.", UserWarning, docs=DOCS_URL_DOMAINS, ) return utterance
async def test_memorise(self, trained_policy: MemoizationPolicy, default_domain: Domain): trackers = await train_trackers(default_domain, augmentation_factor=20) trained_policy.train(trackers, default_domain, RegexInterpreter()) lookup_with_augmentation = trained_policy.lookup trackers = [ t for t in trackers if not hasattr(t, "is_augmented") or not t.is_augmented ] ( all_states, all_actions, ) = trained_policy.featurizer.training_states_and_actions( trackers, default_domain) for tracker, states, actions in zip(trackers, all_states, all_actions): recalled = trained_policy.recall(states, tracker, default_domain) assert recalled == actions[0] nums = np.random.randn(default_domain.num_states) random_states = [{ f: num for f, num in zip(default_domain.input_states, nums) }] assert trained_policy._recall_states(random_states) is None # compare augmentation for augmentation_factor of 0 and 20: trackers_no_augmentation = await train_trackers(default_domain, augmentation_factor=0) trained_policy.train(trackers_no_augmentation, default_domain, RegexInterpreter()) lookup_no_augmentation = trained_policy.lookup assert lookup_no_augmentation == lookup_with_augmentation
def test_cv_not_none_param_grid_none_triggers_search_with_params( self, mock_search, default_domain, trackers, featurizer, priority): param_grid = {"n_estimators": 50} policy = self.create_policy(featurizer=featurizer, priority=priority, cv=3, param_grid=param_grid) policy.train(trackers, domain=default_domain, interpreter=RegexInterpreter()) assert mock_search.call_count > 0 assert mock_search.call_args_list[0][1]["cv"] == 3 assert mock_search.call_args_list[0][1]["param_grid"] == param_grid assert policy.model == "mockmodel"
def test_featurize_trackers_with_max_history_tracker_featurizer( moodbot_domain: Domain): state_featurizer = SingleStateFeaturizer() tracker_featurizer = MaxHistoryTrackerFeaturizer(state_featurizer) tracker = tracker_from_dialogue_file("data/test_dialogues/moodbot.json", moodbot_domain) state_features, labels, entity_tags = tracker_featurizer.featurize_trackers( [tracker], moodbot_domain, RegexInterpreter()) assert state_features is not None assert len(state_features) == 7 assert labels is not None assert len(labels) == 7 # moodbot doesn't contain e2e entities assert not any([any(turn_tags) for turn_tags in entity_tags])
def _load_interpreter(agent: "Agent", nlu_path: Optional[Text]) -> NaturalLanguageInterpreter: """Load the NLU interpreter at `nlu_path`. Args: agent: Instance of `Agent` to inspect for an interpreter if `nlu_path` is `None`. nlu_path: NLU model path. Returns: The NLU interpreter. """ if nlu_path: return rasa.core.interpreter.create_interpreter(nlu_path) return agent.interpreter or RegexInterpreter()
def test_cv_not_none_param_grid_none_triggers_search_without_params( self, mock_search, default_domain: Domain, trackers: List[TrackerWithCachedStates], featurizer: Optional[TrackerFeaturizer], priority: int, ): policy = self.create_policy(featurizer=featurizer, priority=priority, cv=3) policy.train(trackers, domain=default_domain, interpreter=RegexInterpreter()) assert mock_search.call_count > 0 assert mock_search.call_args_list[0][1]["cv"] == 3 assert mock_search.call_args_list[0][1]["param_grid"] == {} assert policy.model == "mockmodel"
def test_additional_train_args_do_not_raise( self, default_domain: Domain, trackers: List[TrackerWithCachedStates], featurizer: Optional[TrackerFeaturizer], priority: int, ): policy = self.create_policy(featurizer=featurizer, priority=priority, cv=None) policy.train( trackers, domain=default_domain, interpreter=RegexInterpreter(), this_is_not_a_feature=True, )
def test_predict_action_listen(self, priority, domain_with_mapping, intent_mapping): policy = self.create_policy(None, priority) events = [ ActionExecuted(ACTION_LISTEN_NAME), user_uttered(intent_mapping[0], 1), ActionExecuted(intent_mapping[1], policy="policy_0_MappingPolicy"), ] tracker = get_tracker(events) scores = policy.predict_action_probabilities(tracker, domain_with_mapping, RegexInterpreter()) index = scores.index(max(scores)) action_planned = domain_with_mapping.action_names[index] assert action_planned == ACTION_LISTEN_NAME assert scores != [0] * domain_with_mapping.num_actions
def test_single_state_featurizer_without_interpreter_state_with_action_listen( ): """This test are for encoding state without a trained interpreter. action_name is action_listen, so, INTENT and ENTITIES should be featurized while text shouldn't because we don't have an interpreter. """ f = SingleStateFeaturizer() f._default_feature_states[INTENT] = {"a": 0, "b": 1} f._default_feature_states[ACTION_NAME] = { "c": 0, "d": 1, "action_listen": 2 } f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2} f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3} encoded = f.encode_state( { "user": { "intent": "a", "text": "blah blah blah" }, "prev_action": { "action_name": "action_listen", "action_text": "boom" }, "active_loop": { "name": "k" }, "slots": { "e": (1.0, ) }, }, interpreter=RegexInterpreter(), ) # we featurize all the features except for *_text ones because NLU wasn't trained assert list(encoded.keys()) == [INTENT, ACTION_NAME, ACTIVE_LOOP, SLOTS] assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix( [[1, 0]])).nnz == 0 assert (encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix( [[0, 0, 1]])).nnz == 0 assert (encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix( [[0, 0, 0, 1]])).nnz == 0 assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0] ])).nnz == 0
def test_finetune_after_load( self, trained_policy: SklearnPolicy, trackers: List[TrackerWithCachedStates], default_domain: Domain, tmp_path: Path, ): trained_policy.persist(tmp_path) loaded_policy = SklearnPolicy.load(tmp_path, should_finetune=True) assert loaded_policy.finetune_mode loaded_policy.train(trackers, default_domain, RegexInterpreter()) assert loaded_policy.model
def test_single_state_featurizer_creates_encoded_all_actions(): domain = Domain( intents=[], entities=[], slots=[], templates={}, forms=[], action_names=["a", "b", "c", "d"], ) f = SingleStateFeaturizer() f.prepare_from_domain(domain) encoded_actions = f.encode_all_actions(domain, RegexInterpreter()) assert len(encoded_actions) == len(domain.action_names) assert all([ ACTION_NAME in encoded_action and ACTION_TEXT not in encoded_action for encoded_action in encoded_actions ])
def test_cv_none_does_not_trigger_search( self, mock_search, default_domain: Domain, trackers: List[TrackerWithCachedStates], featurizer: Optional[TrackerFeaturizer], priority: int, ): policy = self.create_policy(featurizer=featurizer, priority=priority, cv=None) policy.train(trackers, domain=default_domain, interpreter=RegexInterpreter()) assert mock_search.call_count == 0 assert policy.model != "mockmodel"
def test_do_not_follow_other_policy( self, priority: int, domain_with_mapping: Domain, intent_mapping: Tuple[Text, Text], ): policy = self.create_policy(None, priority) events = [ ActionExecuted(ACTION_LISTEN_NAME), user_uttered(intent_mapping[0], 1), ActionExecuted(intent_mapping[1], policy="other_policy"), ] tracker = get_tracker(events) scores = policy.predict_action_probabilities( tracker, domain_with_mapping, RegexInterpreter() ).probabilities assert scores == [0] * domain_with_mapping.num_actions
def test_prediction_applies_must_have_policy_events(default_domain: Domain): must_have_events = [ActionExecuted("my action")] ensemble = SimplePolicyEnsemble([ ConstantPolicy(priority=10, predict_index=1), ConstantPolicy(priority=1, predict_index=2, events=must_have_events), ]) tracker = DialogueStateTracker.from_events("test", evts=[]) prediction = ensemble.probabilities_using_best_policy( tracker, default_domain, RegexInterpreter()) # Policy 0 won due to higher prio assert prediction.policy_name == f"policy_0_{ConstantPolicy.__name__}" # Events of losing policy were applied nevertheless assert prediction.events == must_have_events
async def test_training_script_with_max_history_set(tmp_path: Path): tmpdir = str(tmp_path) await train( DEFAULT_DOMAIN_PATH_WITH_SLOTS, DEFAULT_STORIES_FILE, tmpdir, interpreter=RegexInterpreter(), policy_config="data/test_config/max_hist_config.yml", additional_arguments={}, ) agent = Agent.load(tmpdir) expected_max_history = {FormPolicy: 2, RulePolicy: None} for policy in agent.policy_ensemble.policies: if hasattr(policy.featurizer, "max_history"): expected_history = expected_max_history.get(type(policy), 5) assert policy.featurizer.max_history == expected_history
def test_single_state_featurizer_correctly_encodes_non_existing_value(): f = SingleStateFeaturizer() f._default_feature_states[INTENT] = {"a": 0, "b": 1} f._default_feature_states[ACTION_NAME] = {"c": 0, "d": 1} encoded = f.encode_state( { "user": { "intent": "e" }, "prev_action": { "action_name": "action_listen" } }, interpreter=RegexInterpreter(), ) assert list(encoded.keys()) == [INTENT, ACTION_NAME] assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix( [[0, 0]])).nnz == 0
def test_single_state_featurizer_uses_dtype_float(): f = SingleStateFeaturizer() f._default_feature_states[INTENT] = {"a": 0, "b": 1} f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1} f._default_feature_states[ENTITIES] = {"c": 0} encoded = f.encode_state( { "user": { "intent": "a", "entities": ["c"] }, "prev_action": { "action_name": "d" }, }, interpreter=RegexInterpreter(), ) assert encoded[ACTION_NAME][0].features.dtype == np.float32
def test_single_state_featurizer_without_interpreter_state_not_with_action_listen( ): """This test are for encoding state without a trained interpreter. action_name is not action_listen, so, INTENT, TEXT and ENTITIES should not be featurized. """ f = SingleStateFeaturizer() f._default_feature_states[INTENT] = {"a": 0, "b": 1} f._default_feature_states[ACTION_NAME] = { "c": 0, "d": 1, "action_listen": 2 } f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2} f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3} encoded = f.encode_state( { "user": { "intent": "a", "text": "blah blah blah" }, "prev_action": { "action_name": "d", "action_text": "boom" }, "active_loop": { "name": "i" }, "slots": { "g": (1.0, ) }, }, interpreter=RegexInterpreter(), ) # user input is ignored as prev action is not action_listen assert list(encoded.keys()) == [ACTION_NAME, ACTIVE_LOOP, SLOTS] assert (encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix( [[0, 1, 0]])).nnz == 0 assert (encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix( [[0, 1, 0, 0]])).nnz == 0 assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[0, 0, 1] ])).nnz == 0
def test_single_state_featurizer_uses_regex_interpreter( unpacked_trained_moodbot_path: Text, ): from rasa.core.agent import Agent domain = Domain( intents=[], entities=[], slots=[], responses={}, forms=[], action_names=[], ) f = SingleStateFeaturizer() # simulate that core was trained separately by passing # RegexInterpreter to prepare_for_training f.prepare_for_training(domain, RegexInterpreter()) # simulate that nlu and core models were manually combined for prediction # by passing trained interpreter to encode_all_actions interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter features = f._extract_state_features({TEXT: "some text"}, interpreter) # RegexInterpreter cannot create features for text, therefore since featurizer # was trained without nlu, features for text should be empty assert not features
def _get_prediction( policy: Policy, tracker: DialogueStateTracker, domain: Domain, interpreter: NaturalLanguageInterpreter, ) -> PolicyPrediction: number_of_arguments_in_rasa_1_0 = 2 arguments = rasa.shared.utils.common.arguments_of( policy.predict_action_probabilities ) if ( len(arguments) > number_of_arguments_in_rasa_1_0 and "interpreter" in arguments ): prediction = policy.predict_action_probabilities( tracker, domain, interpreter ) else: rasa.shared.utils.io.raise_warning( "The function `predict_action_probabilities` of " "the `Policy` interface was changed to support " "additional parameters. Please make sure to " "adapt your custom `Policy` implementation.", category=DeprecationWarning, ) prediction = policy.predict_action_probabilities( tracker, domain, RegexInterpreter() ) if isinstance(prediction, list): rasa.shared.utils.io.raise_deprecation_warning( f"The function `predict_action_probabilities` of " f"the `{Policy.__name__}` interface was changed to return " f"a `{PolicyPrediction.__name__}` object. Please make sure to " f"adapt your custom `{Policy.__name__}` implementation. Support for " f"returning a list of floats will be removed in Rasa Open Source 3.0.0" ) prediction = PolicyPrediction( prediction, policy.__class__.__name__, policy_priority=policy.priority ) return prediction
def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUttered]: intent_name = self._user_intent_from_step(step) intent = {"name": intent_name, "confidence": 1.0} if KEY_USER_MESSAGE in step: user_message = step[KEY_USER_MESSAGE].strip() entities = entities_parser.find_entities_in_training_example(user_message) plain_text = entities_parser.replace_entities(user_message) if plain_text.startswith(INTENT_MESSAGE_PREFIX): entities = ( RegexInterpreter().synchronous_parse(plain_text).get(ENTITIES, []) ) else: raw_entities = step.get(KEY_ENTITIES, []) entities = self._parse_raw_entities(raw_entities) # set plain_text to None because only intent was provided in the stories plain_text = None return UserUttered(plain_text, intent, entities)
def parse_e2e_message( self, line: Text, is_used_for_training: bool = True ) -> Message: """Parses an md list item line based on the current section type. Matches expressions of the form `<intent>:<example>`. For the syntax of `<example>` see the Rasa docs on NLU training data. """ # Match three groups: # 1) Potential "form" annotation # 2) The correct intent # 3) Optional entities # 4) The message text form_group = fr"({FORM_PREFIX}\s*)*" item_regex = re.compile(r"\s*" + form_group + r"([^{}]+?)({.*})*:\s*(.*)") match = re.match(item_regex, line) if not match: raise ValueError( "Encountered invalid test story format for message " "`{}`. Please visit the documentation page on " "end-to-end testing at {}/user-guide/testing-your-assistant/" "#end-to-end-testing/".format(line, LEGACY_DOCS_BASE_URL) ) from rasa.shared.nlu.training_data import entities_parser intent = match.group(2) message = match.group(4) example = entities_parser.parse_training_example(message, intent) if not is_used_for_training and not self.use_e2e: # In case this is a simple conversion from Markdown we should copy over # the original text and not parse the entities example.data[rasa.shared.nlu.constants.TEXT] = message example.data[rasa.shared.nlu.constants.ENTITIES] = [] # If the message starts with the `INTENT_MESSAGE_PREFIX` potential entities # are annotated in the json format (e.g. `/greet{"name": "Rasa"}) if message.startswith(INTENT_MESSAGE_PREFIX): parsed = RegexInterpreter().synchronous_parse(message) example.data["entities"] = parsed["entities"] return example
async def test_training_script_without_max_history_set(tmp_path: Path): tmpdir = str(tmp_path) await train( DEFAULT_DOMAIN_PATH_WITH_SLOTS, DEFAULT_STORIES_FILE, tmpdir, interpreter=RegexInterpreter(), policy_config="data/test_config/no_max_hist_config.yml", additional_arguments={}, ) agent = Agent.load(tmpdir) for policy in agent.policy_ensemble.policies: if hasattr(policy.featurizer, "max_history"): if type(policy) == FormPolicy: assert policy.featurizer.max_history == 2 elif type(policy) == MemoizationPolicy: assert policy.featurizer.max_history == OLD_DEFAULT_MAX_HISTORY else: assert policy.featurizer.max_history is None
def test_end_to_end_prediction_supersedes_others(default_domain: Domain): expected_action_index = 2 expected_confidence = 0.5 ensemble = SimplePolicyEnsemble([ ConstantPolicy(priority=100, predict_index=0), ConstantPolicy( priority=1, predict_index=expected_action_index, confidence=expected_confidence, is_end_to_end_prediction=True, ), ]) tracker = DialogueStateTracker.from_events("test", evts=[]) prediction = ensemble.probabilities_using_best_policy( tracker, default_domain, RegexInterpreter()) assert prediction.max_confidence == expected_confidence assert prediction.max_confidence_index == expected_action_index assert prediction.policy_name == f"policy_1_{ConstantPolicy.__name__}"
def test_fallback_wins_over_mapping(): domain = Domain.load("data/test_domains/default.yml") events = [ ActionExecuted(ACTION_LISTEN_NAME), # Low confidence should trigger fallback utilities.user_uttered(USER_INTENT_RESTART, 0.0001), ] tracker = DialogueStateTracker.from_events("test", events, []) ensemble = SimplePolicyEnsemble([FallbackPolicy(), MappingPolicy()]) prediction = ensemble.probabilities_using_best_policy( tracker, domain, RegexInterpreter()) index_of_fallback_policy = 0 next_action = rasa.core.actions.action.action_for_index( prediction.max_confidence_index, domain, None) assert (prediction.policy_name == f"policy_{index_of_fallback_policy}_{FallbackPolicy.__name__}") assert next_action.name() == ACTION_DEFAULT_FALLBACK_NAME
def test_training_script_with_max_history_set(tmp_path: Path, domain_path: Text, stories_path: Text): tmpdir = str(tmp_path) train( domain_path, stories_path, tmpdir, interpreter=RegexInterpreter(), policy_config="data/test_config/max_hist_config.yml", additional_arguments={}, ) agent = Agent.load(tmpdir) expected_max_history = {RulePolicy: None} for policy in agent.policy_ensemble.policies: if hasattr(policy.featurizer, "max_history"): expected_history = expected_max_history.get(type(policy), 5) assert policy.featurizer.max_history == expected_history
def test_single_state_featurizer_with_entity_roles_and_groups( unpacked_trained_moodbot_path: Text, ): from rasa.core.agent import Agent interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter # TODO roles and groups are not supported in e2e yet domain = Domain( intents=[], entities=["city", f"city{ENTITY_LABEL_SEPARATOR}to"], slots=[], responses={}, forms={}, action_names=[], ) f = SingleStateFeaturizer() f.prepare_for_training(domain, RegexInterpreter()) encoded = f.encode_entities( { TEXT: "I am flying from London to Paris", ENTITIES: [ { ENTITY_ATTRIBUTE_TYPE: "city", ENTITY_ATTRIBUTE_VALUE: "London", ENTITY_ATTRIBUTE_START: 17, ENTITY_ATTRIBUTE_END: 23, }, { ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to", ENTITY_ATTRIBUTE_VALUE: "Paris", ENTITY_ATTRIBUTE_START: 27, ENTITY_ATTRIBUTE_END: 32, }, ], }, interpreter=interpreter, ) assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS]) assert np.all( encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [0], [2]] )