Example #1
0
    def encode(self, state: Dict[Text, float]) -> np.ndarray:
        """Returns a binary vector indicating which features are active.

        Given a dictionary of states (e.g. 'intent_greet',
        'prev_action_listen',...) return a binary vector indicating which
        features of `self.input_features` are in the bag. NB it's a
        regular double precision float array type.

        For example with two active features out of five possible features
        this would return a vector like `[0 0 1 0 1]`

        If intent features are given with a probability, for example
        with two active features and two uncertain intents out
        of five possible features this would return a vector
        like `[0.3, 0.7, 1.0, 0, 1.0]`.

        If this is just a padding vector we set all values to `-1`.
        padding vectors are specified by a `None` or `[None]`
        value for states.
        """

        if not self.num_features:
            raise Exception(
                "BinarySingleStateFeaturizer was not prepared before encoding."
            )

        if state is None or None in state:
            return np.ones(self.num_features, dtype=np.int32) * -1

        # we are going to use floats and convert to int later if possible
        used_features = np.zeros(self.num_features, dtype=np.float)
        using_only_ints = True
        for state_name, prob in state.items():
            if state_name in self.input_state_map:
                idx = self.input_state_map[state_name]
                used_features[idx] = prob
                using_only_ints = using_only_ints and utils.is_int(prob)
            else:
                logger.debug(
                    "Feature '{}' (value: '{}') could not be found in "
                    "feature map. Make sure you added all intents and "
                    "entities to the domain".format(state_name, prob))

        if using_only_ints:
            # this is an optimization - saves us a bit of memory
            return used_features.astype(np.int32)
        else:
            return used_features
Example #2
0
    def encode(self, state: Dict[Text, float]) -> np.ndarray:
        """Returns a binary vector indicating which tokens are present."""

        if not self.num_features:
            raise Exception("LabelTokenizerSingleStateFeaturizer "
                            "was not prepared before encoding.")

        if state is None or None in state:
            return np.ones(self.num_features, dtype=np.int32) * -1

        # we are going to use floats and convert to int later if possible
        used_features = np.zeros(self.num_features, dtype=np.float)
        using_only_ints = True
        for state_name, prob in state.items():
            using_only_ints = using_only_ints and utils.is_int(prob)
            if state_name in self.user_labels:
                if PREV_PREFIX + ACTION_LISTEN_NAME in state:
                    # else we predict next action from bot action and memory
                    for t in state_name.split(self.split_symbol):
                        used_features[self.user_vocab[t]] += prob

            elif state_name in self.slot_labels:
                offset = len(self.user_vocab)
                idx = self.slot_labels.index(state_name)
                used_features[offset + idx] += prob

            elif state_name[len(PREV_PREFIX):] in self.bot_labels:
                action_name = state_name[len(PREV_PREFIX):]
                for t in action_name.split(self.split_symbol):
                    offset = len(self.user_vocab) + len(self.slot_labels)
                    idx = self.bot_vocab[t]
                    used_features[offset + idx] += prob

            else:
                logger.warning(
                    f"Feature '{state_name}' could not be found in feature map."
                )

        if using_only_ints:
            # this is an optimization - saves us a bit of memory
            return used_features.astype(np.int32)
        else:
            return used_features
Example #3
0
def test_is_int():
    assert utils.is_int(1)
    assert utils.is_int(1.0)
    assert not utils.is_int(None)
    assert not utils.is_int(1.2)
    assert not utils.is_int("test")