def _extract_and_add_features(
        message: "Message", attribute: Text
    ) -> Tuple[Optional[scipy.sparse.spmatrix], Optional[np.ndarray]]:
        sparse_features = None
        dense_features = None

        if message.get(SPARSE_FEATURE_NAMES[attribute]) is not None:
            sparse_features = message.get(SPARSE_FEATURE_NAMES[attribute])

        if message.get(DENSE_FEATURE_NAMES[attribute]) is not None:
            dense_features = message.get(DENSE_FEATURE_NAMES[attribute])

        if sparse_features is not None and dense_features is not None:
            if sparse_features.shape[0] != dense_features.shape[0]:
                raise ValueError(
                    f"Sequence dimensions for sparse and dense features "
                    f"don't coincide in '{message.text}' for attribute '{attribute}'."
                )

        if attribute != INTENT_ATTRIBUTE:
            # Use only the CLS token vector as features
            sparse_features = sequence_to_sentence_features(sparse_features)
            dense_features = sequence_to_sentence_features(dense_features)

        return sparse_features, dense_features
Beispiel #2
0
def test_sequence_to_sentence_features(features, expected):
    actual = sequence_to_sentence_features(features)

    if isinstance(expected, scipy.sparse.spmatrix):
        assert np.all(expected.toarray() == actual.toarray())
    else:
        assert np.all(expected == actual)
Beispiel #3
0
    def process(self, message: Message, **kwargs: Any) -> None:
        """Return the most likely intent and its probability for a message."""

        if not self.clf:
            # component is either not trained or didn't
            # receive enough training data
            intent = None
            intent_ranking = []
        else:
            X = sequence_to_sentence_features(
                message.get(DENSE_FEATURE_NAMES[TEXT_ATTRIBUTE])).reshape(
                    1, -1)
            intent_ids, probabilities = self.predict(X)
            intents = self.transform_labels_num2str(np.ravel(intent_ids))
            # `predict` returns a matrix as it is supposed
            # to work for multiple examples as well, hence we need to flatten
            probabilities = probabilities.flatten()

            if intents.size > 0 and probabilities.size > 0:
                ranking = list(zip(list(intents),
                                   list(probabilities)))[:LABEL_RANKING_LENGTH]

                intent = {"name": intents[0], "confidence": probabilities[0]}

                intent_ranking = [{
                    "name": intent_name,
                    "confidence": score
                } for intent_name, score in ranking]
            else:
                intent = {"name": None, "confidence": 0.0}
                intent_ranking = []

        message.set("intent", intent, add_to_output=True)
        message.set("intent_ranking", intent_ranking, add_to_output=True)
Beispiel #4
0
    def train(self, training_data: TrainingData, cfg: RasaNLUModelConfig,
              **kwargs: Any) -> None:
        """Train the intent classifier on a data set."""

        num_threads = kwargs.get("num_threads", 1)

        labels = [e.get("intent") for e in training_data.intent_examples]

        if len(set(labels)) < 2:
            raise_warning(
                "Can not train an intent classifier as there are not "
                "enough intents. Need at least 2 different intents. "
                "Skipping training of intent classifier.",
                docs=DOCS_URL_TRAINING_DATA_NLU,
            )
        else:
            y = self.transform_labels_str2num(labels)
            X = np.stack([
                sequence_to_sentence_features(
                    example.get(DENSE_FEATURE_NAMES[TEXT_ATTRIBUTE]))
                for example in training_data.intent_examples
            ])
            # reduce dimensionality
            X = np.reshape(X, (len(X), -1))

            self.clf = self._create_classifier(num_threads, y)

            with warnings.catch_warnings():
                # sklearn raises lots of
                # "UndefinedMetricWarning: F - score is ill - defined"
                # if there are few intent examples, this is needed to prevent it
                warnings.simplefilter("ignore")
                self.clf.fit(X, y)
Beispiel #5
0
    def train(self, training_data: TrainingData, cfg: RasaNLUModelConfig,
              **kwargs: Any) -> None:
        """Train the intent classifier on a data set."""

        num_threads = kwargs.get("num_threads", 1)

        labels = [e.get("intent") for e in training_data.intent_examples]

        if len(set(labels)) < 2:
            warnings.warn("Can not train an intent classifier. "
                          "Need at least 2 different classes. "
                          "Skipping training of intent classifier.")
        else:
            y = self.transform_labels_str2num(labels)
            X = np.stack([
                sequence_to_sentence_features(
                    example.get(DENSE_FEATURE_NAMES[TEXT_ATTRIBUTE]))
                for example in training_data.intent_examples
            ])
            # reduce dimensionality
            X = np.reshape(X, (len(X), -1))

            self.clf = self._create_classifier(num_threads, y)

            self.clf.fit(X, y)