def test_label_data_assembly(
        self, trained_policy: UnexpecTEDIntentPolicy, default_domain: Domain
    ):

        # Construct input data
        state_featurizer = trained_policy.featurizer.state_featurizer
        encoded_all_labels = state_featurizer.encode_all_labels(
            default_domain, precomputations=None
        )
        attribute_data, _ = model_data_utils.convert_to_data_format(encoded_all_labels)

        assembled_label_data = trained_policy._assemble_label_data(
            attribute_data, default_domain
        )
        assembled_label_data_signature = assembled_label_data.get_signature()

        assert list(assembled_label_data_signature.keys()) == [
            f"{LABEL}_{INTENT}",
            LABEL,
        ]
        assert assembled_label_data.num_examples == len(default_domain.intents)
        assert list(assembled_label_data_signature[f"{LABEL}_{INTENT}"].keys()) == [
            MASK,
            SENTENCE,
        ]
        assert list(assembled_label_data_signature[LABEL].keys()) == [IDS]
        assert assembled_label_data_signature[f"{LABEL}_{INTENT}"][SENTENCE][
            0
        ].units == len(default_domain.intents)
Beispiel #2
0
    def train(
        self,
        training_trackers: List[TrackerWithCachedStates],
        domain: Domain,
        interpreter: NaturalLanguageInterpreter,
        **kwargs: Any,
    ) -> None:
        tracker_state_features, label_ids = self.featurize_for_training(
            training_trackers, domain, interpreter, **kwargs)
        training_data, zero_state_features = model_data_utils.convert_to_data_format(
            tracker_state_features)
        self.zero_state_features = zero_state_features

        self._train_params.update(kwargs)
        model = self.model_architecture(**self._train_params)
        score = None
        # Note: clone is called throughout to avoid mutating default arguments.
        self.label_encoder = clone(self.label_encoder).fit(label_ids)
        X = self._preprocess_data(training_data)
        y = self.label_encoder.transform(label_ids)

        if self.shuffle:
            X, y = sklearn_shuffle(X, y)

        if self.cv is None:
            model = clone(model).fit(X, y)
        else:
            param_grid = self.param_grid or {}
            model, score = self._search_and_score(model, X, y, param_grid)

        self.model = model
        logger.info("Done fitting sklearn policy model")
        if score is not None:
            logger.info(f"Cross validation score: {score:.5f}")
Beispiel #3
0
    def test_label_data_assembly(self, trained_policy: TEDPolicy,
                                 default_domain: Domain):
        state_featurizer = trained_policy.featurizer.state_featurizer
        encoded_all_labels = state_featurizer.encode_all_labels(
            default_domain, precomputations=None)

        attribute_data, _ = model_data_utils.convert_to_data_format(
            encoded_all_labels)
        assembled_label_data = trained_policy._assemble_label_data(
            attribute_data, default_domain)
        assembled_label_data_signature = assembled_label_data.get_signature()

        assert list(assembled_label_data_signature.keys()) == [
            f"{LABEL}_{ACTION_NAME}",
            f"{LABEL}",
        ]
        assert assembled_label_data.num_examples == default_domain.num_actions
        assert list(assembled_label_data_signature[f"{LABEL}_{ACTION_NAME}"].
                    keys()) == [
                        MASK,
                        SENTENCE,
                    ]
        assert list(assembled_label_data_signature[LABEL].keys()) == [IDS]
        assert (assembled_label_data_signature[f"{LABEL}_{ACTION_NAME}"]
                [SENTENCE][0].units == default_domain.num_actions)
Beispiel #4
0
    def predict_action_probabilities(
        self,
        tracker: DialogueStateTracker,
        domain: Domain,
        interpreter: NaturalLanguageInterpreter,
        **kwargs: Any,
    ) -> PolicyPrediction:
        """Predicts the next action the bot should take after seeing the tracker.

        Args:
            tracker: the :class:`rasa.core.trackers.DialogueStateTracker`
            domain: the :class:`rasa.shared.core.domain.Domain`
            interpreter: Interpreter which may be used by the policies to create
                additional features.

        Returns:
             The policy's prediction (e.g. the probabilities for the actions).
        """
        X = self._featurize_for_prediction(tracker, domain, interpreter)
        training_data, _ = model_data_utils.convert_to_data_format(
            X, self.zero_state_features
        )
        Xt = self._preprocess_data(training_data)
        y_proba = self.model.predict_proba(Xt)
        return self._prediction(self._postprocess_prediction(y_proba, domain))
Beispiel #5
0
    def _create_model_data(
        self,
        tracker_state_features: List[List[Dict[Text, List["Features"]]]],
        label_ids: Optional[np.ndarray] = None,
        encoded_all_labels: Optional[List[Dict[Text, List["Features"]]]] = None,
    ) -> RasaModelData:
        """Combine all model related data into RasaModelData.

        Args:
            tracker_state_features: a dictionary of attributes (INTENT, TEXT, ACTION_NAME, ACTION_TEXT,
                ENTITIES, SLOTS, ACTIVE_LOOP) to a list of features for all dialogue
                turns in all training trackers
            label_ids: the label ids (e.g. action ids) for every dialogue turn in all
                training trackers
            encoded_all_labels: a list of dictionaries containing attribute features for labels ids

        Returns:
            RasaModelData
        """
        model_data = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY)

        if label_ids is not None and encoded_all_labels is not None:

            label_ids = np.array(
                [np.expand_dims(seq_label_ids, -1) for seq_label_ids in label_ids]
            )
            model_data.add_features(LABEL_KEY, LABEL_SUB_KEY, [label_ids])

            attribute_data, self.zero_state_features = convert_to_data_format(
                tracker_state_features
            )
        else:
            # method is called during prediction
            attribute_data, _ = convert_to_data_format(
                tracker_state_features, self.zero_state_features
            )

        model_data.add_data(attribute_data)
        model_data.add_lengths(
            DIALOGUE, LENGTH, next(iter(list(attribute_data.keys()))), MASK
        )

        return model_data
Beispiel #6
0
 def predict_action_probabilities(
     self,
     tracker: DialogueStateTracker,
     domain: Domain,
     interpreter: NaturalLanguageInterpreter,
     **kwargs: Any,
 ) -> List[float]:
     X = self.featurizer.create_state_features([tracker], domain,
                                               interpreter)
     training_data, _ = model_data_utils.convert_to_data_format(
         X, self.zero_state_features)
     Xt = self._preprocess_data(training_data)
     y_proba = self.model.predict_proba(Xt)
     return self._postprocess_prediction(y_proba, domain)
Beispiel #7
0
    def _create_label_data(
        self, domain: Domain, interpreter: NaturalLanguageInterpreter
    ) -> Tuple[RasaModelData, List[Dict[Text, List["Features"]]]]:
        # encode all label_ids with policies' featurizer
        state_featurizer = self.featurizer.state_featurizer
        encoded_all_labels = state_featurizer.encode_all_labels(
            domain, interpreter)

        attribute_data, _ = convert_to_data_format(
            encoded_all_labels, featurizers=self.config[FEATURIZERS])

        label_data = self._assemble_label_data(attribute_data, domain)

        return label_data, encoded_all_labels
Beispiel #8
0
    def _create_label_data(
        self, domain: Domain, interpreter: NaturalLanguageInterpreter
    ) -> Tuple[RasaModelData, List[Dict[Text, List["Features"]]]]:
        # encode all label_ids with policies' featurizer
        state_featurizer = self.featurizer.state_featurizer
        encoded_all_labels = state_featurizer.encode_all_actions(domain, interpreter)

        attribute_data, _ = convert_to_data_format(encoded_all_labels)

        label_data = RasaModelData()
        label_data.add_data(attribute_data, key_prefix=f"{LABEL_KEY}_")

        label_ids = np.arange(domain.num_actions)
        label_data.add_features(
            LABEL_KEY, LABEL_SUB_KEY, [np.expand_dims(label_ids, -1)]
        )

        return label_data, encoded_all_labels
Beispiel #9
0
    def _create_data_for_entities(
        self, entity_tags: Optional[List[List[Dict[Text, List["Features"]]]]]
    ) -> Optional[Data]:
        if not self.config[ENTITY_RECOGNITION]:
            return None

        # check that there are real entity tags
        if entity_tags and self._should_extract_entities(entity_tags):
            entity_tags_data, _ = convert_to_data_format(entity_tags)
            return entity_tags_data

        # there are no "real" entity tags
        logger.debug(
            f"Entity recognition cannot be performed, "
            f"set '{ENTITY_RECOGNITION}' config parameter to 'False'.")
        self.config[ENTITY_RECOGNITION] = False

        return None
Beispiel #10
0
    def _create_model_data(
        self,
        tracker_state_features: List[List[Dict[Text, List["Features"]]]],
        label_ids: Optional[np.ndarray] = None,
        entity_tags: Optional[List[List[Dict[Text, List["Features"]]]]] = None,
        encoded_all_labels: Optional[List[Dict[Text,
                                               List["Features"]]]] = None,
    ) -> RasaModelData:
        """Combine all model related data into RasaModelData.

        Args:
            tracker_state_features: a dictionary of attributes
                (INTENT, TEXT, ACTION_NAME, ACTION_TEXT, ENTITIES, SLOTS, ACTIVE_LOOP)
                to a list of features for all dialogue turns in all training trackers
            label_ids: the label ids (e.g. action ids) for every dialogue turn in all
                training trackers
            entity_tags: a dictionary of entity type (ENTITY_TAGS) to a list of features
                containing entity tag ids for text user inputs otherwise empty dict
                for all dialogue turns in all training trackers
            encoded_all_labels: a list of dictionaries containing attribute features
                for label ids

        Returns:
            RasaModelData
        """
        model_data = RasaModelData(label_key=LABEL_KEY,
                                   label_sub_key=LABEL_SUB_KEY)

        if label_ids is not None and encoded_all_labels is not None:
            label_ids = np.array([
                np.expand_dims(seq_label_ids, -1)
                for seq_label_ids in label_ids
            ])
            model_data.add_features(
                LABEL_KEY,
                LABEL_SUB_KEY,
                [FeatureArray(label_ids, number_of_dimensions=3)],
            )

            attribute_data, self.fake_features = convert_to_data_format(
                tracker_state_features, featurizers=self.config[FEATURIZERS])

            entity_tags_data = self._create_data_for_entities(entity_tags)
            if entity_tags_data is not None:
                model_data.add_data(entity_tags_data)
        else:
            # method is called during prediction
            attribute_data, _ = convert_to_data_format(
                tracker_state_features,
                self.fake_features,
                featurizers=self.config[FEATURIZERS],
            )

        model_data.add_data(attribute_data)
        model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE)
        model_data.add_lengths(ACTION_TEXT, SEQUENCE_LENGTH, ACTION_TEXT,
                               SEQUENCE)

        # add the dialogue lengths
        attribute_present = next(iter(list(attribute_data.keys())))
        dialogue_lengths = np.array([
            np.size(np.squeeze(f, -1))
            for f in model_data.data[attribute_present][MASK][0]
        ])
        model_data.data[DIALOGUE][LENGTH] = [
            FeatureArray(dialogue_lengths, number_of_dimensions=1)
        ]

        # make sure all keys are in the same order during training and prediction
        model_data.sort()

        return model_data