def test_natural_questions_open(self):
     input_data = {
         'question': ['What are the names of the Olsen Twins?'],
         'answer': ['Mary-Kate', 'Ashley']
     }
     og_dataset = tf.data.Dataset.from_tensors(input_data)
     dataset = preprocessors.natural_questions_open(og_dataset)
     t5.data.assert_dataset(
         dataset, {
             'inputs':
             'nq question: What are the names of the Olsen Twins?',
             'targets': 'Mary-Kate',
             'answers': ['Mary-Kate', 'Ashley'],
         })
Example #2
0
    def test_natural_questions(self):
        input_examples = [{
            'question': {
                'text': 'is the answer to this question no',
            },
            'annotations': {
                'short_answers': {
                    'start_token': ([], [0, 0]),
                    'end_token': ([], [0, 0]),
                    'text': ([], [0, 0])
                },
                'yes_no_answer': [-1, -1]
            }
        }, {
            'question': {
                'text': 'is the answer to this question yes',
            },
            'annotations': {
                'short_answers': {
                    'start_token': ([3, 3], [1, 0, 1, 0]),
                    'end_token': ([7, 5], [1, 0, 1, 0]),
                    'text': (['not sure sir', 'not sure'], [1, 0, 1, 0]),
                },
                'yes_no_answer': [-1, 0, -1, 1]
            }
        }, {
            'question': {
                'text': 'what are the names of the olsen twins',
            },
            'annotations': {
                'short_answers': {
                    'start_token': ([0, 3], [2, 0]),
                    'end_token': ([3, 4], [2, 0]),
                    'text': (['Mary-Kate', 'Ashley'], [2, 0])
                },
                'yes_no_answer': [-1, -1]
            }
        }]

        def _short_ans_to_ragged(ex):
            for field in ['start_token', 'end_token', 'text']:
                values, row_lengths = ex['annotations']['short_answers'][field]
                ex['annotations']['short_answers'][field] = (
                    tf.RaggedTensor.from_row_lengths(values, row_lengths))
            return ex

        og_dataset = tf.data.Dataset.from_generator(
            lambda: (x for x in input_examples),
            output_types={
                'question': {
                    'text': tf.string
                },
                'annotations': {
                    'short_answers': {
                        'start_token': (tf.int64, tf.int64),
                        'end_token': (tf.int64, tf.int64),
                        'text': (tf.string, tf.int64)
                    },
                    'yes_no_answer': tf.int64
                }
            },
            output_shapes={
                'question': {
                    'text': []
                },
                'annotations': {
                    'short_answers': {
                        'start_token': ([None], [None]),
                        'end_token': ([None], [None]),
                        'text': ([None], [None]),
                    },
                    'yes_no_answer': [None]
                }
            }).map(_short_ans_to_ragged)

        dataset = preprocessors.natural_questions_nocontext(og_dataset)
        test_utils.assert_dataset(
            dataset,
            [{
                'inputs': 'nq question: is the answer to this question yes',
                'targets': 'answer: no answer: yes answer: not sure sir '
                'answer: not sure',
                'short_answers/values': ['not sure sir', 'not sure'],
                'short_answers/row_starts': [0, 1, 1, 2],
                'yes_no_answers': [-1, 0, -1, 1],
            }, {
                'inputs': 'nq question: what are the names of the olsen twins',
                'targets': 'answer: Mary-Kate answer: Ashley',
                'short_answers/values': ['Mary-Kate', 'Ashley'],
                'short_answers/row_starts': [0, 2],
                'yes_no_answers': [-1, -1],
            }])

        dataset = preprocessors.natural_questions_nocontext(og_dataset,
                                                            drop_yes_no=True)
        test_utils.assert_dataset(
            dataset, [{
                'inputs': 'nq question: is the answer to this question yes',
                'targets': 'answer: not sure sir answer: not sure',
                'short_answers/values': ['not sure sir', 'not sure'],
                'short_answers/row_starts': [0, 1, 1, 2],
                'yes_no_answers': [-1, -1, -1, -1],
            }, {
                'inputs': 'nq question: what are the names of the olsen twins',
                'targets': 'answer: Mary-Kate answer: Ashley',
                'short_answers/values': ['Mary-Kate', 'Ashley'],
                'short_answers/row_starts': [0, 2],
                'yes_no_answers': [-1, -1],
            }])

        dataset = preprocessors.natural_questions_nocontext(og_dataset,
                                                            max_tokens=2)
        test_utils.assert_dataset(
            dataset, [{
                'inputs': 'nq question: is the answer to this question yes',
                'targets': 'answer: no answer: yes answer: not sure',
                'short_answers/values': ['not sure'],
                'short_answers/row_starts': [0, 0, 0, 1],
                'yes_no_answers': [-1, 0, -1, 1],
            }, {
                'inputs': 'nq question: what are the names of the olsen twins',
                'targets': 'answer: Ashley',
                'short_answers/values': ['Ashley'],
                'short_answers/row_starts': [0, 1],
                'yes_no_answers': [-1, -1],
            }])

        dataset = preprocessors.natural_questions_nocontext(og_dataset,
                                                            max_answers=1)
        test_utils.assert_dataset(
            dataset, [{
                'inputs': 'nq question: is the answer to this question yes',
                'targets': 'answer: no',
                'short_answers/values': ['not sure sir', 'not sure'],
                'short_answers/row_starts': [0, 1, 1, 2],
                'yes_no_answers': [-1, 0, -1, 1],
            }, {
                'inputs': 'nq question: what are the names of the olsen twins',
                'targets': 'answer: Mary-Kate',
                'short_answers/values': ['Mary-Kate', 'Ashley'],
                'short_answers/row_starts': [0, 2],
                'yes_no_answers': [-1, -1],
            }])

        dataset = preprocessors.natural_questions_nocontext(og_dataset,
                                                            drop_yes_no=True,
                                                            max_tokens=2,
                                                            max_answers=1)
        test_utils.assert_dataset(
            dataset, [{
                'inputs': 'nq question: is the answer to this question yes',
                'targets': 'answer: not sure',
                'short_answers/values': ['not sure'],
                'short_answers/row_starts': [0, 0, 0, 1],
                'yes_no_answers': [-1, -1, -1, -1],
            }, {
                'inputs': 'nq question: what are the names of the olsen twins',
                'targets': 'answer: Ashley',
                'short_answers/values': ['Ashley'],
                'short_answers/row_starts': [0, 1],
                'yes_no_answers': [-1, -1],
            }])

        dataset = preprocessors.natural_questions_nocontext(og_dataset,
                                                            drop_yes_no=True,
                                                            max_tokens=1)
        test_utils.assert_dataset(dataset, [{
            'inputs': 'nq question: what are the names of the olsen twins',
            'targets': 'answer: Ashley',
            'short_answers/values': ['Ashley'],
            'short_answers/row_starts': [0, 1],
            'yes_no_answers': [-1, -1],
        }])

        dataset = preprocessors.natural_questions_open(og_dataset,
                                                       max_tokens=3)
        test_utils.assert_dataset(dataset, [{
            'inputs': 'nq question: is the answer to this question yes',
            'targets': 'not sure',
            'answers': ['not sure'],
        }, {
            'inputs': 'nq question: what are the names of the olsen twins',
            'targets': 'Mary-Kate',
            'answers': ['Mary-Kate', 'Ashley'],
        }])

        dataset = preprocessors.natural_questions_open(og_dataset,
                                                       max_tokens=1,
                                                       sample_answer=True)
        test_utils.assert_dataset(dataset, [{
            'inputs': 'nq question: what are the names of the olsen twins',
            'targets': 'Ashley',
            'answers': ['Ashley'],
        }])