Esempio n. 1
0
from t5.data import preprocessors as t5_preprocessors
from t5.evaluation import metrics as t5_metrics

MixtureRegistry = seqio.MixtureRegistry
TaskRegistry = seqio.TaskRegistry

DEFAULT_SPM_PATH = "gs://t5-data/vocabs/cc_all.32000/sentencepiece.model"  # GCS
DEFAULT_EXTRA_IDS = 100
NQ_TRAIN_SPLIT_START = 7830
NQ_TRAIN_SPLIT_END = 79168
NQO_TRAIN_SPLIT_END = 79168
WQ_TRAIN_SPLIT_END = 3417
TQA_TRAIN_SPLIT_END = 78785

DEFAULT_OUTPUT_FEATURES = {
    "inputs": seqio.Feature(vocabulary=get_default_vocabulary(), add_eos=True),
    "targets": seqio.Feature(vocabulary=get_default_vocabulary(), add_eos=True)
}

# ========================== Natural Questions =================================

# Natural Questions open domain variant that most closely matches the official
# evaluation procedure.
# The model is trained to predict all ground-truth answers
# and is only considered correct if it predicts all answers for any one of the
# annotators. As in the official evaluation, we consider questions with fewer
# than two non-null annotations unanswerable (given the context) but because we
# cannot predict unanswerability without the context, we only compute the recall
# metric. Further, because our model does not have access to the oracle context,
# we also normalize predicted and ground-truth answers when comparing them.
Esempio n. 2
0
from . import metrics
from . import postprocessors
from . import preprocessors
import t5.data
from t5.data import get_default_vocabulary
from t5.data import postprocessors as t5_postprocessors
from t5.data import preprocessors as t5_preprocessors
from t5.data.glue_utils import get_glue_postprocess_fn
from t5.evaluation import metrics as t5_metrics
import tensorflow_datasets as tfds

TaskRegistry = t5.data.TaskRegistry
TfdsTask = t5.data.TfdsTask

DEFAULT_OUTPUT_FEATURES = {
    "inputs": t5.data.Feature(vocabulary=get_default_vocabulary(),
                              add_eos=True),
    "targets": t5.data.Feature(vocabulary=get_default_vocabulary(),
                               add_eos=True)
}

# ======================== CoS-E Corpus Task ==================================
TaskRegistry.add("cos_e_v001",
                 TfdsTask,
                 tfds_name="cos_e:0.0.1",
                 text_preprocessor=preprocessors.cos_e,
                 postprocess_fn=postprocessors.abstractive_explanations,
                 output_features=DEFAULT_OUTPUT_FEATURES,
                 metric_fns=[metrics.esnli_metric])

# CoS-E with no explanations, and modified prefixes like e-SNLI.