def encode(self, active_features, input_feature_map): """Returns a binary vector indicating which features are active. Given a dictionary of active_features (e.g. 'intent_greet', 'prev_action_listen',...) return a binary vector indicating which features of `self.input_features` are in the bag. NB it's a regular double precision float array type. For example with two active features out of five possible features this would return a vector like `[0 0 1 0 1]` If this is just a padding vector we set all values to `-1`. padding vectors are specified by a `None` or `[None]` value for active_features.""" num_features = len(input_feature_map.keys()) if active_features is None or None in active_features: return np.ones(num_features, dtype=np.int32) * -1 else: # we are going to use floats and convert to int later if possible used_features = np.zeros(num_features, dtype=float) using_only_ints = True best_intent = None best_intent_prob = 0.0 for feature_name, prob in active_features.items(): if feature_name.startswith('intent_'): if prob >= best_intent_prob: best_intent = feature_name best_intent_prob = prob elif feature_name in input_feature_map: if prob != 0.0: idx = input_feature_map[feature_name] used_features[idx] = prob using_only_ints = using_only_ints and utils.is_int( prob) else: logger.debug( "Feature '{}' (value: '{}') could not be found in " "feature map. Make sure you added all intents and " "entities to the domain".format(feature_name, prob)) if best_intent is not None: # finding the maximum confidence intent and # appending it to the active_features val index_in_feature_list = input_feature_map.get(best_intent) if index_in_feature_list is not None: used_features[index_in_feature_list] = 1 else: logger.warn( "Couldn't set most probable feature '{}', " "it wasn't found in the feature list of the domain." " Make sure you added all intents and " "entities to the domain.".format(best_intent)) if using_only_ints: # this is an optimization - saves us a bit of memory return used_features.astype(np.int32) else: return used_features
def encode(self, state): # type: (Dict[Text, float]) -> np.ndarray """Returns a binary vector indicating which features are active. Given a dictionary of states (e.g. 'intent_greet', 'prev_action_listen',...) return a binary vector indicating which features of `self.input_features` are in the bag. NB it's a regular double precision float array type. For example with two active features out of five possible features this would return a vector like `[0 0 1 0 1]` If intent features are given with a probability, for example with two active features and two uncertain intents out of five possible features this would return a vector like `[0.3, 0.7, 1.0, 0, 1.0]`. If this is just a padding vector we set all values to `-1`. padding vectors are specified by a `None` or `[None]` value for states. """ if not self.num_features: raise Exception("BinarySingleStateFeaturizer " "was not prepared " "before encoding.") if state is None or None in state: return np.ones(self.num_features, dtype=np.int32) * -1 # we are going to use floats and convert to int later if possible used_features = np.zeros(self.num_features, dtype=np.float) using_only_ints = True for state_name, prob in state.items(): if state_name in self.input_state_map: idx = self.input_state_map[state_name] used_features[idx] = prob using_only_ints = using_only_ints and utils.is_int(prob) else: logger.debug( "Feature '{}' (value: '{}') could not be found in " "feature map. Make sure you added all intents and " "entities to the domain".format(state_name, prob)) if using_only_ints: # this is an optimization - saves us a bit of memory return used_features.astype(np.int32) else: return used_features
def encode(self, state): # type: (Dict[Text, float]) -> np.ndarray """Returns a binary vector indicating which features are active. Given a dictionary of states (e.g. 'intent_greet', 'prev_action_listen',...) return a binary vector indicating which features of `self.input_features` are in the bag. NB it's a regular double precision float array type. For example with two active features out of five possible features this would return a vector like `[0 0 1 0 1]` If intent features are given with a probability, for example with two active features and two uncertain intents out of five possible features this would return a vector like `[0.3, 0.7, 1.0, 0, 1.0]`. If this is just a padding vector we set all values to `-1`. padding vectors are specified by a `None` or `[None]` value for states. """ if not self.num_features: raise Exception("BinarySingleStateFeaturizer " "was not prepared " "before encoding.") if state is None or None in state: return np.ones(self.num_features, dtype=np.int32) * -1 # we are going to use floats and convert to int later if possible used_features = np.zeros(self.num_features, dtype=np.float) using_only_ints = True for state_name, prob in state.items(): if state_name in self.input_state_map: idx = self.input_state_map[state_name] used_features[idx] = prob using_only_ints = using_only_ints and utils.is_int(prob) else: logger.debug( "Feature '{}' (value: '{}') could not be found in " "feature map. Make sure you added all intents and " "entities to the domain".format(state_name, prob)) if using_only_ints: # this is an optimization - saves us a bit of memory return used_features.astype(np.int32) else: return used_features
def encode(self, state): # type: (Dict[Text, float]) -> np.ndarray if not self.num_features: raise Exception("LabelTokenizerSingleStateFeaturizer " "was not prepared before encoding.") if state is None or None in state: return np.ones(self.num_features, dtype=np.int32) * -1 # we are going to use floats and convert to int later if possible used_features = np.zeros(self.num_features, dtype=np.float) using_only_ints = True for state_name, prob in state.items(): using_only_ints = using_only_ints and utils.is_int(prob) if state_name in self.user_labels: if PREV_PREFIX + ACTION_LISTEN_NAME in state: # else we predict next action from bot action and memory for t in state_name.split(self.split_symbol): used_features[self.user_vocab[t]] += prob elif state_name in self.slot_labels: offset = len(self.user_vocab) idx = self.slot_labels.index(state_name) used_features[offset + idx] += prob elif state_name[len(PREV_PREFIX):] in self.bot_labels: action_name = state_name[len(PREV_PREFIX):] for t in action_name.split(self.split_symbol): offset = len(self.user_vocab) + len(self.slot_labels) idx = self.bot_vocab[t] used_features[offset + idx] += prob else: logger.warning( "Feature '{}' could not be found in " "feature map.".format(state_name)) if using_only_ints: # this is an optimization - saves us a bit of memory return used_features.astype(np.int32) else: return used_features
def encode(self, state): # type: (Dict[Text, float]) -> np.ndarray if not self.num_features: raise Exception("LabelTokenizerSingleStateFeaturizer " "was not prepared before encoding.") if state is None or None in state: return np.ones(self.num_features, dtype=np.int32) * -1 # we are going to use floats and convert to int later if possible used_features = np.zeros(self.num_features, dtype=np.float) using_only_ints = True for state_name, prob in state.items(): using_only_ints = using_only_ints and utils.is_int(prob) if state_name in self.user_labels: if PREV_PREFIX + ACTION_LISTEN_NAME in state: # else we predict next action from bot action and memory for t in state_name.split(self.split_symbol): used_features[self.user_vocab[t]] += prob elif state_name in self.slot_labels: offset = len(self.user_vocab) idx = self.slot_labels.index(state_name) used_features[offset + idx] += prob elif state_name[len(PREV_PREFIX):] in self.bot_labels: action_name = state_name[len(PREV_PREFIX):] for t in action_name.split(self.split_symbol): offset = len(self.user_vocab) + len(self.slot_labels) idx = self.bot_vocab[t] used_features[offset + idx] += prob else: logger.warning("Feature '{}' could not be found in " "feature map.".format(state_name)) if using_only_ints: # this is an optimization - saves us a bit of memory return used_features.astype(np.int32) else: return used_features
def test_is_int(): assert utils.is_int(1) assert utils.is_int(1.0) assert not utils.is_int(None) assert not utils.is_int(1.2) assert not utils.is_int("test")
def test_is_int(): assert is_int(1) assert is_int(1.0) assert not is_int(None) assert not is_int(1.2) assert not is_int("test")
def test_is_int(): assert utils.is_int(1) assert utils.is_int(1.0) assert not utils.is_int(None) assert not utils.is_int(1.2) assert not utils.is_int("test")