def encode(self, state: Dict[Text, float]) -> np.ndarray: """Returns a binary vector indicating which features are active. Given a dictionary of states (e.g. 'intent_greet', 'prev_action_listen',...) return a binary vector indicating which features of `self.input_features` are in the bag. NB it's a regular double precision float array type. For example with two active features out of five possible features this would return a vector like `[0 0 1 0 1]` If intent features are given with a probability, for example with two active features and two uncertain intents out of five possible features this would return a vector like `[0.3, 0.7, 1.0, 0, 1.0]`. If this is just a padding vector we set all values to `-1`. padding vectors are specified by a `None` or `[None]` value for states. """ if not self.num_features: raise Exception( "BinarySingleStateFeaturizer was not prepared before encoding." ) if state is None or None in state: return np.ones(self.num_features, dtype=np.int32) * -1 # we are going to use floats and convert to int later if possible used_features = np.zeros(self.num_features, dtype=np.float) using_only_ints = True for state_name, prob in state.items(): if state_name in self.input_state_map: idx = self.input_state_map[state_name] used_features[idx] = prob using_only_ints = using_only_ints and utils.is_int(prob) else: logger.debug( "Feature '{}' (value: '{}') could not be found in " "feature map. Make sure you added all intents and " "entities to the domain".format(state_name, prob)) if using_only_ints: # this is an optimization - saves us a bit of memory return used_features.astype(np.int32) else: return used_features
def encode(self, state: Dict[Text, float]) -> np.ndarray: """Returns a binary vector indicating which tokens are present.""" if not self.num_features: raise Exception("LabelTokenizerSingleStateFeaturizer " "was not prepared before encoding.") if state is None or None in state: return np.ones(self.num_features, dtype=np.int32) * -1 # we are going to use floats and convert to int later if possible used_features = np.zeros(self.num_features, dtype=np.float) using_only_ints = True for state_name, prob in state.items(): using_only_ints = using_only_ints and utils.is_int(prob) if state_name in self.user_labels: if PREV_PREFIX + ACTION_LISTEN_NAME in state: # else we predict next action from bot action and memory for t in state_name.split(self.split_symbol): used_features[self.user_vocab[t]] += prob elif state_name in self.slot_labels: offset = len(self.user_vocab) idx = self.slot_labels.index(state_name) used_features[offset + idx] += prob elif state_name[len(PREV_PREFIX):] in self.bot_labels: action_name = state_name[len(PREV_PREFIX):] for t in action_name.split(self.split_symbol): offset = len(self.user_vocab) + len(self.slot_labels) idx = self.bot_vocab[t] used_features[offset + idx] += prob else: logger.warning( f"Feature '{state_name}' could not be found in feature map." ) if using_only_ints: # this is an optimization - saves us a bit of memory return used_features.astype(np.int32) else: return used_features
def test_is_int(): assert utils.is_int(1) assert utils.is_int(1.0) assert not utils.is_int(None) assert not utils.is_int(1.2) assert not utils.is_int("test")