コード例 #1
0
    def _most_likely_tag(
        self,
        predictions: List[Dict[Text,
                               float]]) -> Tuple[List[Text], List[float]]:
        """Get the entity tags with the highest confidence.

        Args:
            predictions: list of mappings from entity tag to confidence value

        Returns:
            List of entity tags and list of confidence values.
        """
        _tags = []
        _confidences = []

        for token_predictions in predictions:
            tag = max(token_predictions,
                      key=lambda key: token_predictions[key])
            _tags.append(tag)

            if self.component_config[BILOU_FLAG]:
                # if we are using BILOU flags, we will sum up the prob
                # of the B, I, L and U tags for an entity
                _confidences.append(
                    sum(_confidence
                        for _tag, _confidence in token_predictions.items()
                        if bilou_utils.tag_without_prefix(tag) ==
                        bilou_utils.tag_without_prefix(_tag)))
            else:
                _confidences.append(token_predictions[tag])

        return _tags, _confidences
コード例 #2
0
    def convert_predictions_into_entities(
        text: Text,
        tokens: List[Token],
        tags: Dict[Text, List[Text]],
        split_entities_config: Dict[Text, bool] = None,
        confidences: Optional[Dict[Text, List[float]]] = None,
    ) -> List[Dict[Text, Any]]:
        """Convert predictions into entities.

        Args:
            text: The text message.
            tokens: Message tokens without CLS token.
            tags: Predicted tags.
            split_entities_config: config for handling splitting a list of entities
            confidences: Confidences of predicted tags.

        Returns:
            Entities.
        """
        import rasa.nlu.utils.bilou_utils as bilou_utils

        entities = []

        last_entity_tag = NO_ENTITY_TAG
        last_role_tag = NO_ENTITY_TAG
        last_group_tag = NO_ENTITY_TAG
        last_token_end = -1

        for idx, token in enumerate(tokens):
            current_entity_tag = EntityExtractor.get_tag_for(
                tags, ENTITY_ATTRIBUTE_TYPE, idx)

            if current_entity_tag == NO_ENTITY_TAG:
                last_entity_tag = NO_ENTITY_TAG
                last_token_end = token.end
                continue

            current_group_tag = EntityExtractor.get_tag_for(
                tags, ENTITY_ATTRIBUTE_GROUP, idx)
            current_group_tag = bilou_utils.tag_without_prefix(
                current_group_tag)
            current_role_tag = EntityExtractor.get_tag_for(
                tags, ENTITY_ATTRIBUTE_ROLE, idx)
            current_role_tag = bilou_utils.tag_without_prefix(current_role_tag)

            group_or_role_changed = (last_group_tag != current_group_tag
                                     or last_role_tag != current_role_tag)

            if bilou_utils.bilou_prefix_from_tag(current_entity_tag):
                # checks for new bilou tag
                # new bilou tag begins are not with I- , L- tags
                new_bilou_tag_starts = last_entity_tag != current_entity_tag and (
                    bilou_utils.LAST !=
                    bilou_utils.bilou_prefix_from_tag(current_entity_tag)
                    and bilou_utils.INSIDE !=
                    bilou_utils.bilou_prefix_from_tag(current_entity_tag))

                # to handle bilou tags such as only I-, L- tags without B-tag
                # and handle multiple U-tags consecutively
                new_unigram_bilou_tag_starts = (
                    last_entity_tag == NO_ENTITY_TAG or bilou_utils.UNIT
                    == bilou_utils.bilou_prefix_from_tag(current_entity_tag))

                new_tag_found = (new_bilou_tag_starts
                                 or new_unigram_bilou_tag_starts
                                 or group_or_role_changed)
                last_entity_tag = current_entity_tag
                current_entity_tag = bilou_utils.tag_without_prefix(
                    current_entity_tag)
            else:
                new_tag_found = (last_entity_tag != current_entity_tag
                                 or group_or_role_changed)
                last_entity_tag = current_entity_tag

            if new_tag_found:
                # new entity found
                entity = EntityExtractor._create_new_entity(
                    list(tags.keys()),
                    current_entity_tag,
                    current_group_tag,
                    current_role_tag,
                    token,
                    idx,
                    confidences,
                )
                entities.append(entity)
            elif EntityExtractor._check_is_single_entity(
                    text, token, last_token_end, split_entities_config,
                    current_entity_tag):
                # current token has the same entity tag as the token before and
                # the two tokens are separated by at most 3 symbols, where each
                # of the symbols has to be either punctuation (e.g. "." or ",")
                # and a whitespace.
                entities[-1][ENTITY_ATTRIBUTE_END] = token.end
                if confidences is not None:
                    EntityExtractor._update_confidence_values(
                        entities, confidences, idx)

            else:
                # the token has the same entity tag as the token before but the two
                # tokens are separated by at least 2 symbols (e.g. multiple spaces,
                # a comma and a space, etc.) and also shouldn't be represented as a
                # single entity
                entity = EntityExtractor._create_new_entity(
                    list(tags.keys()),
                    current_entity_tag,
                    current_group_tag,
                    current_role_tag,
                    token,
                    idx,
                    confidences,
                )
                entities.append(entity)

            last_group_tag = current_group_tag
            last_role_tag = current_role_tag
            last_token_end = token.end

        for entity in entities:
            entity[ENTITY_ATTRIBUTE_VALUE] = text[
                entity[ENTITY_ATTRIBUTE_START]:entity[ENTITY_ATTRIBUTE_END]]

        return entities
コード例 #3
0
    def convert_predictions_into_entities(
        self,
        text: Text,
        tokens: List[Token],
        tags: Dict[Text, List[Text]],
        confidences: Optional[Dict[Text, List[float]]] = None,
    ) -> List[Dict[Text, Any]]:
        """
        Convert predictions into entities.

        Args:
            text: The text message.
            tokens: Message tokens without CLS token.
            tags: Predicted tags.
            confidences: Confidences of predicted tags.

        Returns:
            Entities.
        """
        entities = []

        last_entity_tag = NO_ENTITY_TAG
        last_role_tag = NO_ENTITY_TAG
        last_group_tag = NO_ENTITY_TAG
        last_token_end = -1

        for idx, token in enumerate(tokens):
            current_entity_tag = self.get_tag_for(tags, ENTITY_ATTRIBUTE_TYPE,
                                                  idx)

            if current_entity_tag == NO_ENTITY_TAG:
                last_entity_tag = NO_ENTITY_TAG
                last_token_end = token.end
                continue

            current_group_tag = self.get_tag_for(tags, ENTITY_ATTRIBUTE_GROUP,
                                                 idx)
            current_group_tag = bilou_utils.tag_without_prefix(
                current_group_tag)
            current_role_tag = self.get_tag_for(tags, ENTITY_ATTRIBUTE_ROLE,
                                                idx)
            current_role_tag = bilou_utils.tag_without_prefix(current_role_tag)

            group_or_role_changed = (last_group_tag != current_group_tag
                                     or last_role_tag != current_role_tag)

            if bilou_utils.bilou_prefix_from_tag(current_entity_tag):
                # checks for new bilou tag
                # new bilou tag begins are not with I- , L- tags
                new_bilou_tag_starts = last_entity_tag != current_entity_tag and (
                    bilou_utils.LAST !=
                    bilou_utils.bilou_prefix_from_tag(current_entity_tag)
                    and bilou_utils.INSIDE !=
                    bilou_utils.bilou_prefix_from_tag(current_entity_tag))

                # to handle bilou tags such as only I-, L- tags without B-tag
                # and handle multiple U-tags consecutively
                new_unigram_bilou_tag_starts = (
                    last_entity_tag == NO_ENTITY_TAG or bilou_utils.UNIT
                    == bilou_utils.bilou_prefix_from_tag(current_entity_tag))

                new_tag_found = (new_bilou_tag_starts
                                 or new_unigram_bilou_tag_starts
                                 or group_or_role_changed)
                last_entity_tag = current_entity_tag
                current_entity_tag = bilou_utils.tag_without_prefix(
                    current_entity_tag)
            else:
                new_tag_found = (last_entity_tag != current_entity_tag
                                 or group_or_role_changed)
                last_entity_tag = current_entity_tag

            if new_tag_found:
                entity = self._create_new_entity(
                    list(tags.keys()),
                    current_entity_tag,
                    current_group_tag,
                    current_role_tag,
                    token,
                    idx,
                    confidences,
                )
                entities.append(entity)
            elif token.start - last_token_end <= 1:
                # current token has the same entity tag as the token before and
                # the two tokens are only separated by at most one symbol (e.g. space,
                # dash, etc.)
                entities[-1][ENTITY_ATTRIBUTE_END] = token.end
                if confidences is not None:
                    self._update_confidence_values(entities, confidences, idx)
            else:
                # the token has the same entity tag as the token before but the two
                # tokens are separated by at least 2 symbols (e.g. multiple spaces,
                # a comma and a space, etc.)
                entity = self._create_new_entity(
                    list(tags.keys()),
                    current_entity_tag,
                    current_group_tag,
                    current_role_tag,
                    token,
                    idx,
                    confidences,
                )
                entities.append(entity)

            last_group_tag = current_group_tag
            last_role_tag = current_role_tag
            last_token_end = token.end

        for entity in entities:
            entity[ENTITY_ATTRIBUTE_VALUE] = text[
                entity[ENTITY_ATTRIBUTE_START]:entity[ENTITY_ATTRIBUTE_END]]

        return entities
コード例 #4
0
ファイル: test_bilou_utils.py プロジェクト: zylhub/rasa
def test_entity_name_from_tag(tag, expected):
    actual = bilou_utils.tag_without_prefix(tag)

    assert actual == expected