Esempio n. 1
0
    def _extract_state_features(
        self,
        sub_state: SubState,
        interpreter: NaturalLanguageInterpreter,
        sparse: bool = False,
    ) -> Dict[Text, List["Features"]]:

        message = Message(data=sub_state)
        # remove entities from possible attributes
        attributes = set(
            attribute for attribute in sub_state.keys() if attribute != ENTITIES
        )

        parsed_message = interpreter.featurize_message(message)
        output = self._get_features_from_parsed_message(parsed_message, attributes)

        # check that name attributes have features
        name_attribute = self._get_name_attribute(attributes)
        if name_attribute and name_attribute not in output:
            # nlu pipeline didn't create features for user or action
            # this might happen, for example, when we have action_name in the state
            # but it did not get featurized because only character level
            # CountVectorsFeaturizer was included in the config.
            output[name_attribute] = self._create_features(
                sub_state, name_attribute, sparse
            )

        return output
Esempio n. 2
0
    def _extract_state_features(
        self,
        sub_state: SubState,
        interpreter: NaturalLanguageInterpreter,
        sparse: bool = False,
    ) -> Dict[Text, List["Features"]]:
        # this method is called during both prediction and training,
        # `self._use_regex_interpreter == True` means that core was trained
        # separately, therefore substitute interpreter based on some trained
        # nlu model with default RegexInterpreter to make sure
        # that prediction and train time features are the same
        if self._use_regex_interpreter and not isinstance(
                interpreter, RegexInterpreter):
            interpreter = RegexInterpreter()

        message = Message(data=sub_state)
        # remove entities from possible attributes
        attributes = set(attribute for attribute in sub_state.keys()
                         if attribute != ENTITIES)

        parsed_message = interpreter.featurize_message(message)
        output = self._get_features_from_parsed_message(
            parsed_message, attributes)

        # check that name attributes have features
        name_attribute = self._get_name_attribute(attributes)
        if name_attribute and name_attribute not in output:
            # nlu pipeline didn't create features for user or action
            # this might happen, for example, when we have action_name in the state
            # but it did not get featurized because only character level
            # CountVectorsFeaturizer was included in the config.
            output[name_attribute] = self._create_features(
                sub_state, name_attribute, sparse)

        return output
Esempio n. 3
0
    def collect_features(self,
                         sub_state: SubState,
                         attributes: Optional[Iterable[Text]] = None
                         ) -> Dict[Text, List[Features]]:
        """Collects features for all attributes in the given substate.

        There might be be multiple messages in the container that contain features
        relevant for the given substate, e.g. this is the case if `TEXT` and
        `INTENT` are present in the given substate. All of those messages will be
        collected and their features combined.

        Args:
          sub_state: substate for which we want to extract the relevent features
          attributes: if not `None`, this specifies the list of the attributes of the
            `Features` that we're interested in (i.e. all other `Features` contained
            in the relevant messages will be ignored)

        Returns:
          a dictionary that maps all the (requested) attributes to a list of `Features`

        Raises:
          `ValueError`: if there exists some key pair (i.e. key attribute and
            corresponding value) from the given substate cannot be found
          `RuntimeError`: if features for the same attribute are found in two
            different messages that are associated with the given substate
        """
        # If we specify a list of attributes, then we want a dict with one entry
        # for each attribute back - even if the corresponding list of features is empty.
        features: Dict[Text,
                       List[Features]] = (dict() if attributes is None else {
                           attribute: []
                           for attribute in attributes
                       })
        # collect all relevant key attributes
        key_attributes = set(sub_state.keys()).intersection(
            self.KEY_ATTRIBUTES)
        for key_attribute in key_attributes:
            key_value = str(sub_state[key_attribute])
            message = self._table[key_attribute].get(key_value)
            if not message:
                raise ValueError(
                    f"Unknown key ({key_attribute},{key_value}). Cannot retrieve "
                    f"features for substate {sub_state}")
            features_from_message = Features.groupby_attribute(
                message.features, attributes=attributes)
            for feat_attribute, feat_value in features_from_message.items():
                existing_values = features.get(feat_attribute)
                # Note: the following if-s are needed because if we specify a list of
                # attributes then `features_from_message` will contain one entry per
                # attribute even if the corresponding feature list is empty.
                if feat_value and existing_values:
                    raise RuntimeError(
                        f"Feature for attribute {feat_attribute} has already been "
                        f"extracted from a different message stored under a key "
                        f"in {key_attributes} "
                        f"that is different from {key_attribute}. This means there's a "
                        f"redundancy in the message container.")
                if feat_value:
                    features[feat_attribute] = feat_value
        return features
Esempio n. 4
0
 def _state_features_for_attribute(self, sub_state: SubState,
                                   attribute: Text) -> Dict[Text, int]:
     if attribute in {INTENT, ACTION_NAME}:
         return {sub_state[attribute]: 1}
     elif attribute == ENTITIES:
         return {entity: 1 for entity in sub_state.get(ENTITIES, [])}
     elif attribute == ACTIVE_LOOP:
         return {sub_state["name"]: 1}
     elif attribute == SLOTS:
         return {
             f"{slot_name}_{i}": value
             for slot_name, slot_as_feature in sub_state.items()
             for i, value in enumerate(slot_as_feature)
         }
     else:
         raise ValueError(
             f"Given attribute '{attribute}' is not supported. "
             f"It must be one of '{self._default_feature_states.keys()}'.")
Esempio n. 5
0
    def _extract_state_features(
        self,
        sub_state: SubState,
        precomputations: Optional[MessageContainerForCoreFeaturization],
        sparse: bool = False,
    ) -> Dict[Text, List[Features]]:

        # Remove entities from possible attributes
        attributes = set(
            attribute for attribute in sub_state.keys() if attribute != ENTITIES
        )

        if precomputations is not None:

            # Collect features for all those attributes
            attributes_to_features = precomputations.collect_features(
                sub_state, attributes=attributes
            )
            # if features for INTENT or ACTION_NAME exist,
            # they are always sparse sequence features;
            # transform them to sentence sparse features
            if attributes_to_features.get(INTENT):
                attributes_to_features[INTENT] = self._to_sparse_sentence_features(
                    attributes_to_features[INTENT]
                )
            if attributes_to_features.get(ACTION_NAME):
                attributes_to_features[ACTION_NAME] = self._to_sparse_sentence_features(
                    attributes_to_features[ACTION_NAME]
                )

            # Combine and sort the features:
            # Per attribute, combine features of same type and level into one Feature,
            # and (if there are any such features) store the results in a list where
            # - all the sparse features are listed first and a
            # - sequence feature is always listed before the sentence feature of the
            #   same type (sparse/not sparse).
            output = {
                attribute: Features.reduce(
                    features_list=features_list, expected_origins=None
                )
                for attribute, features_list in attributes_to_features.items()
                if len(features_list) > 0  # otherwise, following will fail
            }
        else:
            output = {}

        # Check that the name attribute has features
        name_attribute = self._get_name_attribute(attributes)
        if name_attribute and name_attribute not in output:
            # nlu pipeline didn't create features for user or action
            # this might happen, for example, when we have action_name in the state
            # but it did not get featurized because only character level
            # CountVectorsFeaturizer was included in the config.
            output[name_attribute] = self._create_features(
                sub_state, name_attribute, sparse
            )
        return output
Esempio n. 6
0
 def _state_features_for_attribute(self, sub_state: SubState,
                                   attribute: Text) -> Dict[Text, int]:
     # FIXME: the code below is not type-safe, but fixing it
     #        would require more refactoring, for instance using
     #        data classes in our states
     if attribute in {INTENT, ACTION_NAME}:
         return {sub_state[attribute]: 1}  # type: ignore[dict-item]
     elif attribute == ENTITIES:
         return {entity: 1 for entity in sub_state.get(ENTITIES, [])}
     elif attribute == ACTIVE_LOOP:
         return {sub_state["name"]: 1}  # type: ignore[dict-item]
     elif attribute == SLOTS:
         return {
             f"{slot_name}_{i}": value
             for slot_name, slot_as_feature in sub_state.items()
             for i, value in enumerate(slot_as_feature)
         }
     else:
         raise ValueError(
             f"Given attribute '{attribute}' is not supported. "
             f"It must be one of '{self._default_feature_states.keys()}'.")
Esempio n. 7
0
    def _extract_state_features(
        self,
        sub_state: SubState,
        interpreter: NaturalLanguageInterpreter,
        sparse: bool = False,
    ) -> Dict[Text, List["Features"]]:
        # create a special method that doesn't use passed interpreter
        name_attribute = self._get_name_attribute(set(sub_state.keys()))
        if name_attribute:
            return {
                name_attribute: self._create_features(sub_state, name_attribute, sparse)
            }

        return {}