def _most_likely_tag( self, predictions: List[Dict[Text, float]]) -> Tuple[List[Text], List[float]]: """Get the entity tags with the highest confidence. Args: predictions: list of mappings from entity tag to confidence value Returns: List of entity tags and list of confidence values. """ _tags = [] _confidences = [] for token_predictions in predictions: tag = max(token_predictions, key=lambda key: token_predictions[key]) _tags.append(tag) if self.component_config[BILOU_FLAG]: # if we are using BILOU flags, we will sum up the prob # of the B, I, L and U tags for an entity _confidences.append( sum(_confidence for _tag, _confidence in token_predictions.items() if bilou_utils.tag_without_prefix(tag) == bilou_utils.tag_without_prefix(_tag))) else: _confidences.append(token_predictions[tag]) return _tags, _confidences
def convert_predictions_into_entities( text: Text, tokens: List[Token], tags: Dict[Text, List[Text]], split_entities_config: Dict[Text, bool] = None, confidences: Optional[Dict[Text, List[float]]] = None, ) -> List[Dict[Text, Any]]: """Convert predictions into entities. Args: text: The text message. tokens: Message tokens without CLS token. tags: Predicted tags. split_entities_config: config for handling splitting a list of entities confidences: Confidences of predicted tags. Returns: Entities. """ import rasa.nlu.utils.bilou_utils as bilou_utils entities = [] last_entity_tag = NO_ENTITY_TAG last_role_tag = NO_ENTITY_TAG last_group_tag = NO_ENTITY_TAG last_token_end = -1 for idx, token in enumerate(tokens): current_entity_tag = EntityExtractor.get_tag_for( tags, ENTITY_ATTRIBUTE_TYPE, idx) if current_entity_tag == NO_ENTITY_TAG: last_entity_tag = NO_ENTITY_TAG last_token_end = token.end continue current_group_tag = EntityExtractor.get_tag_for( tags, ENTITY_ATTRIBUTE_GROUP, idx) current_group_tag = bilou_utils.tag_without_prefix( current_group_tag) current_role_tag = EntityExtractor.get_tag_for( tags, ENTITY_ATTRIBUTE_ROLE, idx) current_role_tag = bilou_utils.tag_without_prefix(current_role_tag) group_or_role_changed = (last_group_tag != current_group_tag or last_role_tag != current_role_tag) if bilou_utils.bilou_prefix_from_tag(current_entity_tag): # checks for new bilou tag # new bilou tag begins are not with I- , L- tags new_bilou_tag_starts = last_entity_tag != current_entity_tag and ( bilou_utils.LAST != bilou_utils.bilou_prefix_from_tag(current_entity_tag) and bilou_utils.INSIDE != bilou_utils.bilou_prefix_from_tag(current_entity_tag)) # to handle bilou tags such as only I-, L- tags without B-tag # and handle multiple U-tags consecutively new_unigram_bilou_tag_starts = ( last_entity_tag == NO_ENTITY_TAG or bilou_utils.UNIT == bilou_utils.bilou_prefix_from_tag(current_entity_tag)) new_tag_found = (new_bilou_tag_starts or new_unigram_bilou_tag_starts or group_or_role_changed) last_entity_tag = current_entity_tag current_entity_tag = bilou_utils.tag_without_prefix( current_entity_tag) else: new_tag_found = (last_entity_tag != current_entity_tag or group_or_role_changed) last_entity_tag = current_entity_tag if new_tag_found: # new entity found entity = EntityExtractor._create_new_entity( list(tags.keys()), current_entity_tag, current_group_tag, current_role_tag, token, idx, confidences, ) entities.append(entity) elif EntityExtractor._check_is_single_entity( text, token, last_token_end, split_entities_config, current_entity_tag): # current token has the same entity tag as the token before and # the two tokens are separated by at most 3 symbols, where each # of the symbols has to be either punctuation (e.g. "." or ",") # and a whitespace. entities[-1][ENTITY_ATTRIBUTE_END] = token.end if confidences is not None: EntityExtractor._update_confidence_values( entities, confidences, idx) else: # the token has the same entity tag as the token before but the two # tokens are separated by at least 2 symbols (e.g. multiple spaces, # a comma and a space, etc.) and also shouldn't be represented as a # single entity entity = EntityExtractor._create_new_entity( list(tags.keys()), current_entity_tag, current_group_tag, current_role_tag, token, idx, confidences, ) entities.append(entity) last_group_tag = current_group_tag last_role_tag = current_role_tag last_token_end = token.end for entity in entities: entity[ENTITY_ATTRIBUTE_VALUE] = text[ entity[ENTITY_ATTRIBUTE_START]:entity[ENTITY_ATTRIBUTE_END]] return entities
def convert_predictions_into_entities( self, text: Text, tokens: List[Token], tags: Dict[Text, List[Text]], confidences: Optional[Dict[Text, List[float]]] = None, ) -> List[Dict[Text, Any]]: """ Convert predictions into entities. Args: text: The text message. tokens: Message tokens without CLS token. tags: Predicted tags. confidences: Confidences of predicted tags. Returns: Entities. """ entities = [] last_entity_tag = NO_ENTITY_TAG last_role_tag = NO_ENTITY_TAG last_group_tag = NO_ENTITY_TAG last_token_end = -1 for idx, token in enumerate(tokens): current_entity_tag = self.get_tag_for(tags, ENTITY_ATTRIBUTE_TYPE, idx) if current_entity_tag == NO_ENTITY_TAG: last_entity_tag = NO_ENTITY_TAG last_token_end = token.end continue current_group_tag = self.get_tag_for(tags, ENTITY_ATTRIBUTE_GROUP, idx) current_group_tag = bilou_utils.tag_without_prefix( current_group_tag) current_role_tag = self.get_tag_for(tags, ENTITY_ATTRIBUTE_ROLE, idx) current_role_tag = bilou_utils.tag_without_prefix(current_role_tag) group_or_role_changed = (last_group_tag != current_group_tag or last_role_tag != current_role_tag) if bilou_utils.bilou_prefix_from_tag(current_entity_tag): # checks for new bilou tag # new bilou tag begins are not with I- , L- tags new_bilou_tag_starts = last_entity_tag != current_entity_tag and ( bilou_utils.LAST != bilou_utils.bilou_prefix_from_tag(current_entity_tag) and bilou_utils.INSIDE != bilou_utils.bilou_prefix_from_tag(current_entity_tag)) # to handle bilou tags such as only I-, L- tags without B-tag # and handle multiple U-tags consecutively new_unigram_bilou_tag_starts = ( last_entity_tag == NO_ENTITY_TAG or bilou_utils.UNIT == bilou_utils.bilou_prefix_from_tag(current_entity_tag)) new_tag_found = (new_bilou_tag_starts or new_unigram_bilou_tag_starts or group_or_role_changed) last_entity_tag = current_entity_tag current_entity_tag = bilou_utils.tag_without_prefix( current_entity_tag) else: new_tag_found = (last_entity_tag != current_entity_tag or group_or_role_changed) last_entity_tag = current_entity_tag if new_tag_found: entity = self._create_new_entity( list(tags.keys()), current_entity_tag, current_group_tag, current_role_tag, token, idx, confidences, ) entities.append(entity) elif token.start - last_token_end <= 1: # current token has the same entity tag as the token before and # the two tokens are only separated by at most one symbol (e.g. space, # dash, etc.) entities[-1][ENTITY_ATTRIBUTE_END] = token.end if confidences is not None: self._update_confidence_values(entities, confidences, idx) else: # the token has the same entity tag as the token before but the two # tokens are separated by at least 2 symbols (e.g. multiple spaces, # a comma and a space, etc.) entity = self._create_new_entity( list(tags.keys()), current_entity_tag, current_group_tag, current_role_tag, token, idx, confidences, ) entities.append(entity) last_group_tag = current_group_tag last_role_tag = current_role_tag last_token_end = token.end for entity in entities: entity[ENTITY_ATTRIBUTE_VALUE] = text[ entity[ENTITY_ATTRIBUTE_START]:entity[ENTITY_ATTRIBUTE_END]] return entities
def test_entity_name_from_tag(tag, expected): actual = bilou_utils.tag_without_prefix(tag) assert actual == expected