def test_glue(self): test_idx = 10 input_data = { 'q1': 'How so?', 'q2': 'Why not?', 'q3': 'Who?', 'idx': test_idx, 'label': 0, } og_dataset = tf.data.Dataset.from_tensors(input_data) benchmark_name = 'qqp' label_names = ['not_duplicate', 'duplicate'] dataset = prep.glue(og_dataset, benchmark_name, label_names) assert_dataset( dataset, { 'inputs': 'qqp q1: How so? q2: Why not? q3: Who?', 'targets': 'not_duplicate', 'idx': test_idx, }, ) # Test `feature_names` argument. dataset = prep.glue(og_dataset, benchmark_name, label_names, feature_names=['q3', 'q1']) assert_dataset( dataset, { 'inputs': 'qqp q3: Who? q1: How so?', 'targets': 'not_duplicate', 'idx': test_idx, }, ) # Test target is <unk> when label is -1 input_data['label'] = -1 og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = prep.glue(og_dataset, benchmark_name, label_names) assert_dataset( dataset, { 'inputs': 'qqp q1: How so? q2: Why not? q3: Who?', 'targets': '<unk>', 'idx': test_idx, }, )
def test_multirc(self): og_dataset = tf.data.Dataset.from_tensors({ 'paragraph': '<b>Sent 1: </b>Once upon a time, there was a squirrel named Joey.<br><b>Sent 2: </b>Joey loved to go outside and play with his cousin Jimmy.', 'question': 'Why was Joey surprised the morning he woke up for breakfast?', 'answer': 'There was only pie to eat', 'label': 1, 'idx': { 'paragraph': 5, 'question': 1, 'answer': 3 } }) dataset = prep.glue( og_dataset, 'multirc', label_names=['False', 'True'], feature_names=('question', 'answer', 'paragraph'), ) assert_dataset( dataset, { 'inputs': 'multirc question: Why was Joey surprised the morning he woke up for breakfast? answer: There was only pie to eat paragraph: Sent 1: Once upon a time, there was a squirrel named Joey. Sent 2: Joey loved to go outside and play with his cousin Jimmy.', 'targets': 'True', 'idx/paragraph': 5, 'idx/question': 1, 'idx/answer': 3, }, )