def _get_wordpiece_detokenized_text( token_span: _SpanType, raw_prediction: _RawPredictionType, tokenizer: tokenization.FullTokenizer) -> str: """Gets the normalized answer token text given the token span.""" answer_tokens = tokenizer.convert_ids_to_tokens( raw_prediction["long_token_ids"][token_span[0]:token_span[1] + 1]) return data_utils.wordpiece_tokens_to_normalized_text(answer_tokens)
def _get_sentencepiece_detokenized_text(token_span: _SpanType, raw_prediction: _RawPredictionType, tokenizer: tokenization.FullTokenizer): """Gets final text using SentencePiece tokens.""" long_token_ids = raw_prediction["long_token_ids"] answer_tokens = tokenizer.convert_ids_to_tokens( long_token_ids[token_span[0]:token_span[1] + 1].tolist()) return data_utils.sentencepiece_detokenize(answer_tokens)