Example #1
0
def _get_wordpiece_detokenized_text(
    token_span: _SpanType, raw_prediction: _RawPredictionType,
    tokenizer: tokenization.FullTokenizer) -> str:
  """Gets the normalized answer token text given the token span."""
  answer_tokens = tokenizer.convert_ids_to_tokens(
      raw_prediction["long_token_ids"][token_span[0]:token_span[1] + 1])
  return data_utils.wordpiece_tokens_to_normalized_text(answer_tokens)
Example #2
0
def _get_sentencepiece_detokenized_text(token_span: _SpanType,
                                        raw_prediction: _RawPredictionType,
                                        tokenizer: tokenization.FullTokenizer):
  """Gets final text using SentencePiece tokens."""
  long_token_ids = raw_prediction["long_token_ids"]
  answer_tokens = tokenizer.convert_ids_to_tokens(
      long_token_ids[token_span[0]:token_span[1] + 1].tolist())
  return data_utils.sentencepiece_detokenize(answer_tokens)