from t5.data import preprocessors as t5_preprocessors from t5.evaluation import metrics as t5_metrics MixtureRegistry = seqio.MixtureRegistry TaskRegistry = seqio.TaskRegistry DEFAULT_SPM_PATH = "gs://t5-data/vocabs/cc_all.32000/sentencepiece.model" # GCS DEFAULT_EXTRA_IDS = 100 NQ_TRAIN_SPLIT_START = 7830 NQ_TRAIN_SPLIT_END = 79168 NQO_TRAIN_SPLIT_END = 79168 WQ_TRAIN_SPLIT_END = 3417 TQA_TRAIN_SPLIT_END = 78785 DEFAULT_OUTPUT_FEATURES = { "inputs": seqio.Feature(vocabulary=get_default_vocabulary(), add_eos=True), "targets": seqio.Feature(vocabulary=get_default_vocabulary(), add_eos=True) } # ========================== Natural Questions ================================= # Natural Questions open domain variant that most closely matches the official # evaluation procedure. # The model is trained to predict all ground-truth answers # and is only considered correct if it predicts all answers for any one of the # annotators. As in the official evaluation, we consider questions with fewer # than two non-null annotations unanswerable (given the context) but because we # cannot predict unanswerability without the context, we only compute the recall # metric. Further, because our model does not have access to the oracle context, # we also normalize predicted and ground-truth answers when comparing them.
from . import metrics from . import postprocessors from . import preprocessors import t5.data from t5.data import get_default_vocabulary from t5.data import postprocessors as t5_postprocessors from t5.data import preprocessors as t5_preprocessors from t5.data.glue_utils import get_glue_postprocess_fn from t5.evaluation import metrics as t5_metrics import tensorflow_datasets as tfds TaskRegistry = t5.data.TaskRegistry TfdsTask = t5.data.TfdsTask DEFAULT_OUTPUT_FEATURES = { "inputs": t5.data.Feature(vocabulary=get_default_vocabulary(), add_eos=True), "targets": t5.data.Feature(vocabulary=get_default_vocabulary(), add_eos=True) } # ======================== CoS-E Corpus Task ================================== TaskRegistry.add("cos_e_v001", TfdsTask, tfds_name="cos_e:0.0.1", text_preprocessor=preprocessors.cos_e, postprocess_fn=postprocessors.abstractive_explanations, output_features=DEFAULT_OUTPUT_FEATURES, metric_fns=[metrics.esnli_metric]) # CoS-E with no explanations, and modified prefixes like e-SNLI.