Esempio n. 1
0
    def _extract_state_features(
        self,
        sub_state: SubState,
        interpreter: NaturalLanguageInterpreter,
        sparse: bool = False,
    ) -> Dict[Text, List["Features"]]:
        # this method is called during both prediction and training,
        # `self._use_regex_interpreter == True` means that core was trained
        # separately, therefore substitute interpreter based on some trained
        # nlu model with default RegexInterpreter to make sure
        # that prediction and train time features are the same
        if self._use_regex_interpreter and not isinstance(
                interpreter, RegexInterpreter):
            interpreter = RegexInterpreter()

        message = Message(data=sub_state)
        # remove entities from possible attributes
        attributes = set(attribute for attribute in sub_state.keys()
                         if attribute != ENTITIES)

        parsed_message = interpreter.featurize_message(message)
        output = self._get_features_from_parsed_message(
            parsed_message, attributes)

        # check that name attributes have features
        name_attribute = self._get_name_attribute(attributes)
        if name_attribute and name_attribute not in output:
            # nlu pipeline didn't create features for user or action
            # this might happen, for example, when we have action_name in the state
            # but it did not get featurized because only character level
            # CountVectorsFeaturizer was included in the config.
            output[name_attribute] = self._create_features(
                sub_state, name_attribute, sparse)

        return output
Esempio n. 2
0
    def _extract_state_features(
        self,
        sub_state: SubState,
        interpreter: NaturalLanguageInterpreter,
        sparse: bool = False,
    ) -> Dict[Text, List["Features"]]:

        message = Message(data=sub_state)
        # remove entities from possible attributes
        attributes = set(
            attribute for attribute in sub_state.keys() if attribute != ENTITIES
        )

        parsed_message = interpreter.featurize_message(message)
        output = self._get_features_from_parsed_message(parsed_message, attributes)

        # check that name attributes have features
        name_attribute = self._get_name_attribute(attributes)
        if name_attribute and name_attribute not in output:
            # nlu pipeline didn't create features for user or action
            # this might happen, for example, when we have action_name in the state
            # but it did not get featurized because only character level
            # CountVectorsFeaturizer was included in the config.
            output[name_attribute] = self._create_features(
                sub_state, name_attribute, sparse
            )

        return output
Esempio n. 3
0
def test_ensemble_prevents_multiple_action_unlikely_intents(
    monkeypatch: MonkeyPatch,
    tmp_path: Path,
    unexpected_intent_policy_agent: Agent,
    moodbot_domain: Domain,
):
    monkeypatch.setattr(
        UnexpecTEDIntentPolicy,
        "predict_action_probabilities",
        _action_unlikely_intent_for("greet"),
    )

    tracker = DialogueStateTracker.from_events(
        "rule triggering tracker",
        evts=[
            ActionExecuted(ACTION_LISTEN_NAME),
            UserUttered(text="hello", intent={"name": "greet"}),
            ActionExecuted(ACTION_UNLIKELY_INTENT_NAME),
        ],
    )

    policy_ensemble = unexpected_intent_policy_agent.policy_ensemble
    prediction = policy_ensemble.probabilities_using_best_policy(
        tracker, moodbot_domain, NaturalLanguageInterpreter())

    # prediction cannot be action_unlikely_intent for sure because
    # the last event is not of type UserUttered and that's the
    # first condition for `UnexpecTEDIntentPolicy` to make a prediction
    assert (moodbot_domain.action_names_or_texts[np.argmax(
        prediction.probabilities)] != ACTION_UNLIKELY_INTENT_NAME)
Esempio n. 4
0
def test_rule_action_wins_over_action_unlikely_intent(
    monkeypatch: MonkeyPatch,
    tmp_path: Path,
    unexpected_intent_policy_agent: Agent,
    moodbot_domain: Domain,
):
    # The original training data consists of a rule for `goodbye` intent.
    # We monkey-patch UnexpecTEDIntentPolicy to always predict action_unlikely_intent
    # if last user intent was goodbye. The predicted action from ensemble
    # should be utter_goodbye and not action_unlikely_intent.
    monkeypatch.setattr(
        UnexpecTEDIntentPolicy,
        "predict_action_probabilities",
        _action_unlikely_intent_for("goodbye"),
    )

    tracker = DialogueStateTracker.from_events(
        "rule triggering tracker",
        evts=[
            ActionExecuted(ACTION_LISTEN_NAME),
            UserUttered(text="goodbye", intent={"name": "goodbye"}),
        ],
    )
    policy_ensemble = unexpected_intent_policy_agent.policy_ensemble
    prediction = policy_ensemble.probabilities_using_best_policy(
        tracker, moodbot_domain, NaturalLanguageInterpreter())

    test_utils.assert_predicted_action(prediction, moodbot_domain,
                                       "utter_goodbye")
Esempio n. 5
0
    def _create_optional_event_for_entities(
        self,
        prediction_output: Dict[Text, tf.Tensor],
        is_e2e_prediction: bool,
        interpreter: NaturalLanguageInterpreter,
        tracker: DialogueStateTracker,
    ) -> Optional[List[Event]]:
        if tracker.latest_action_name != ACTION_LISTEN_NAME or not is_e2e_prediction:
            # entities belong only to the last user message
            # and only if user text was used for prediction,
            # a user message always comes after action listen
            return None

        if not self.config[ENTITY_RECOGNITION]:
            # entity recognition is not turned on, no entities can be predicted
            return None

        # The batch dimension of entity prediction is not the same as batch size,
        # rather it is the number of last (if max history featurizer else all)
        # text inputs in the batch
        # therefore, in order to pick entities from the latest user message
        # we need to pick entities from the last batch dimension of entity prediction
        predicted_tags, confidence_values = rasa.utils.train_utils.entity_label_to_tags(
            prediction_output,
            self._entity_tag_specs,
            self.config[BILOU_FLAG],
            prediction_index=-1,
        )

        if ENTITY_ATTRIBUTE_TYPE not in predicted_tags:
            # no entities detected
            return None

        # entities belong to the last message of the tracker
        # convert the predicted tags to actual entities
        text = tracker.latest_message.text
        parsed_message = interpreter.featurize_message(
            Message(data={TEXT: text}))
        tokens = parsed_message.get(TOKENS_NAMES[TEXT])
        entities = EntityExtractor.convert_predictions_into_entities(
            text,
            tokens,
            predicted_tags,
            self.split_entities_config,
            confidences=confidence_values,
        )

        # add the extractor name
        for entity in entities:
            entity[EXTRACTOR] = "TEDPolicy"

        return [EntitiesAdded(entities)]
Esempio n. 6
0
    def encode_entities(
        self, entity_data: Dict[Text,
                                Any], interpreter: NaturalLanguageInterpreter
    ) -> Dict[Text, List["Features"]]:
        """Encode the given entity data with the help of the given interpreter.

        Produce numeric entity tags for tokens.

        Args:
            entity_data: The dict containing the text and entity labels and locations
            interpreter: The interpreter used to encode the state

        Returns:
            A dictionary of entity type to list of features.
        """
        from rasa.nlu.test import determine_token_labels

        # TODO
        #  The entity states used to create the tag-idx-mapping contains the
        #  entities and the concatenated entity and roles/groups. We do not
        #  distinguish between entities and roles/groups right now.
        # TODO
        #  Should we support BILOU tagging?

        if TEXT not in entity_data or len(self.entity_tag_id_mapping) < 2:
            # we cannot build a classifier with fewer than 2 classes
            return {}

        parsed_text = interpreter.featurize_message(
            Message({TEXT: entity_data[TEXT]}))
        if not parsed_text:
            return {}
        entities = entity_data.get(ENTITIES, [])

        _tags = []
        for token in parsed_text.get(TOKENS_NAMES[TEXT], []):
            _tag = determine_token_labels(token,
                                          entities,
                                          attribute_key=ENTITY_ATTRIBUTE_TYPE)
            # TODO handle if tag is not in mapping
            _tags.append(self.entity_tag_id_mapping[_tag])

        # transpose to have seq_len x 1
        return {
            ENTITY_TAGS:
            [Features(np.array([_tags]).T, IDS, ENTITY_TAGS, TAG_ID_ORIGIN)]
        }
Esempio n. 7
0
    def encode_entities(
        self,
        entity_data: Dict[Text, Any],
        interpreter: NaturalLanguageInterpreter,
        bilou_tagging: bool = False,
    ) -> Dict[Text, List["Features"]]:
        """Encode the given entity data with the help of the given interpreter.

        Produce numeric entity tags for tokens.

        Args:
            entity_data: The dict containing the text and entity labels and locations
            interpreter: The interpreter used to encode the state
            bilou_tagging: indicates whether BILOU tagging should be used or not

        Returns:
            A dictionary of entity type to list of features.
        """
        # TODO
        #  The entity states used to create the tag-idx-mapping contains the
        #  entities and the concatenated entity and roles/groups. We do not
        #  distinguish between entities and roles/groups right now.
        if (not entity_data or not self.entity_tag_specs
                or self.entity_tag_specs[0].num_tags < 2):
            # we cannot build a classifier with fewer than 2 classes
            return {}

        message = interpreter.featurize_message(Message(entity_data))

        if not message:
            return {}

        if bilou_tagging:
            bilou_utils.apply_bilou_schema_to_message(message)

        return {
            ENTITY_TAGS: [
                model_data_utils.get_tag_ids(message, self.entity_tag_specs[0],
                                             bilou_tagging)
            ]
        }