def test_lm_token_preprocessing(self): ds = _test_dataset_ints([1, 2, 3], [3, 2, 1]) ds1 = tf_inputs.lm_token_preprocessing(ds, True) # pylint: disable=bad-whitespace expected_ds = [ { 'inputs': np.array([1, 0, 1, 1, 1], dtype=np.int64), 'targets': np.array([1, 0, 1, 1, 1], dtype=np.int64), 'mask': np.array([0, 0, 1, 1, 1], dtype=np.int64), }, { 'inputs': np.array([1, 1, 0, 1, 1], dtype=np.int64), 'targets': np.array([1, 1, 0, 1, 1], dtype=np.int64), 'mask': np.array([0, 0, 0, 1, 1], dtype=np.int64), }, { 'inputs': np.array([1, 1, 1, 0, 1], dtype=np.int64), 'targets': np.array([1, 1, 1, 0, 1], dtype=np.int64), 'mask': np.array([0, 0, 0, 0, 1], dtype=np.int64), }, ] # pylint: enable=bad-whitespace t5_test_utils.assert_dataset(ds1, expected_ds)
def test_sample_answer(self): input_data = { 'inputs': ['What are the names of the Olsen Twins?'], 'targets': ['Mary-Kate'], 'answers': ['Mary-Kate', 'Ashley'] } og_dataset = tf.data.Dataset.from_tensors(input_data) tf.set_random_seed(42) test_utils.assert_dataset( preprocessors.sample_answer(og_dataset), { 'inputs': 'What are the names of the Olsen Twins?', 'targets': 'Ashley', 'answers': ['Ashley', 'Mary-Kate'], } ) tf.set_random_seed(420) test_utils.assert_dataset( preprocessors.sample_answer(og_dataset), { 'inputs': ['What are the names of the Olsen Twins?'], 'targets': ['Mary-Kate'], 'answers': ['Mary-Kate', 'Ashley'] } )
def test_eraser_multi_rc_drop_examples(self): input_data = { 'passage': 'This is a multi line passage. \nIt is about multiple things. ' '\nThere is more than one thing in it.', 'query_and_answer': 'Is it about one thing? || Nope.', 'label': 1, 'evidences': [ 'It is about multiple things.', 'There is more than one thing in it.' ] } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = preprocessors.eraser_multi_rc(og_dataset, drop_explanations=True) test_utils.assert_dataset( dataset, { 'inputs': 'explain multirc passage: This is a multi line passage. \nIt is ' 'about multiple things. \nThere is more than one thing in it. ' 'query: Is it about one thing? answer: Nope.', 'targets': 'True' })
def test_amazon_reviews_neutral(self): input_data = { 'data': { 'review_headline': 'okay headphones', 'review_body': 'the sound quality of these headphones is not bad', 'star_rating': 3, } } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = preprocessors.amazon_reviews(og_dataset) test_utils.assert_dataset(dataset, []) dataset = preprocessors.amazon_reviews(og_dataset, binary_output=False) test_utils.assert_dataset( dataset, { 'inputs': 'sentiment review: okay headphones the sound quality of ' 'these headphones is not bad', 'targets': '3' })
def test_process_xquad(self): dataset = tf.data.Dataset.from_tensors({ 'id': '123', 'context': 'Some context.', 'question': 'Whose portrait by François Clouet was included' + ' in the Jones bequest of 1882?', 'answers': { 'text': ['The answer.', 'Another answer.'], } }) dataset = preprocessors.xquad(dataset) test_utils.assert_dataset( dataset, { 'id': '123', 'inputs': 'question: Whose portrait by François Clouet was' + ' included in the Jones bequest of 1882 ? context: Some' ' context . ', 'targets': 'The answer . ', 'context': 'Some context . ', 'question': 'Whose portrait by François Clouet was included' + ' in the Jones bequest of 1882 ? ', 'answers': ['The answer . ', 'Another answer . '], })
def test_process_xnl_multiple_langs(self): dataset = tf.data.Dataset.from_tensors({ 'hypothesis': { 'language': ['lang1', 'lang2', 'lang3'], 'translation': ['translation1', 'translation2', 'translation3'], }, 'label': 1, 'premise': { 'lang1': 'premise1', 'lang2': 'premise2', 'lang3': 'premise3' } }) dataset = preprocessors.process_xnli( dataset, target_languages=['lang1', 'lang2', 'lang3']) test_utils.assert_dataset(dataset, [{ 'inputs': 'xnli: premise: premise1 hypothesis: translation1', 'targets': '1' }, { 'inputs': 'xnli: premise: premise2 hypothesis: translation2', 'targets': '1' }, { 'inputs': 'xnli: premise: premise3 hypothesis: translation3', 'targets': '1' }])
def test_pad_dataset_to_length(self): ds = _test_dataset_ints([5, 6, 7], [6, 7, 8]) ds1 = tf_inputs.pad_dataset_to_length(ds, True, len_map={ 'inputs': 7, 'targets': 10 }) expected_ds = [ { 'inputs': np.array([1, 1, 1, 1, 1, 0, 0], dtype=np.int64), 'targets': np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0], dtype=np.int64), }, { 'inputs': np.array([1, 1, 1, 1, 1, 1, 0], dtype=np.int64), 'targets': np.array([1, 1, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64), }, { 'inputs': np.array([1, 1, 1, 1, 1, 1, 1], dtype=np.int64), 'targets': np.array([1, 1, 1, 1, 1, 1, 1, 1, 0, 0], dtype=np.int64), }, ] t5_test_utils.assert_dataset(ds1, expected_ds)
def test_amazon_reviews(self): input_data = { 'data': { 'review_headline': 'Great headphones', 'review_body': 'Loved the sound quality of these headphones', 'star_rating': 5, } } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = preprocessors.amazon_reviews(og_dataset) test_utils.assert_dataset( dataset, { 'inputs': 'sentiment review: Great headphones Loved the ' 'sound quality of these headphones', 'targets': 'positive' }) dataset = preprocessors.amazon_reviews(og_dataset, binary_output=False) test_utils.assert_dataset( dataset, { 'inputs': 'sentiment review: Great headphones Loved the ' 'sound quality of these headphones', 'targets': '5' })
def test_trim_tokens_at_front(self): sequence_length = {'inputs': 4} inputs = tf.data.Dataset.from_tensors( {'inputs': tf.constant([10, 11, 12, 13, 14, 15])}) output = prep.trim_tokens_at_front(inputs, sequence_length=sequence_length) expected_output = [{'inputs': tf.constant([13, 14, 15])}] test_utils.assert_dataset(output, expected_output)
def test_mask_salient_spans(self): input_examples = [ { 'text': 'He was confident that it would be well received.', 'spans': { 'start': [], 'limit': [], } }, { 'text': 'The episode was filmed over three days at the end of October ' 'and beginning of November 2002.', 'spans': { 'start': [53, 78], 'limit': [60, 91], } } ] og_dataset = tf.data.Dataset.from_generator( lambda: (x for x in input_examples), output_types={ 'text': tf.string, 'spans': { 'start': tf.int64, 'limit': tf.int64, }, }, output_shapes={ 'text': [], 'spans': { 'start': [None], 'limit': [None], }, }) dataset = preprocessors.mask_salient_spans(og_dataset) test_utils.assert_dataset( dataset, [ { 'inputs': 'nem: The episode was filmed over three days at the end of ' '_X_ and beginning of November 2002.', 'targets': 'October' }, { 'inputs': 'nem: The episode was filmed over three days at the end of ' 'October and beginning of _X_.', 'targets': 'November 2002' } ] )
def test_process_mnli(self): dataset = tf.data.Dataset.from_tensors({ 'hypothesis': 'hypothesis1', 'label': 1, 'premise': 'premise1' }) dataset = preprocessors.process_mnli(dataset) test_utils.assert_dataset( dataset, { 'inputs': 'xnli: premise: premise1 hypothesis: hypothesis1', 'targets': '1' })
def test_natural_questions_open(self): input_data = { 'question': ['What are the names of the Olsen Twins?'], 'answer': ['Mary-Kate', 'Ashley'] } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = preprocessors.natural_questions_open(og_dataset) test_utils.assert_dataset( dataset, { 'inputs': 'nq question: What are the names of the Olsen Twins?', 'targets': 'Mary-Kate', 'answers': ['Mary-Kate', 'Ashley'], })
def test_get_t5_preprocessor_by_name(self): gin.clear_config() gin.parse_config(""" get_t5_preprocessor_by_name.name = 'rekey' get_t5_preprocessor_by_name.fn_kwargs = {'key_map': {'inputs': 'other', 'targets': 'text'}} """) prep_rekey = tf_inputs.get_t5_preprocessor_by_name() og_dataset = tf.data.Dataset.from_tensors({ 'text': 'That is good.', 'other': 'That is bad.'}) training = True dataset = prep_rekey(og_dataset, training) t5_test_utils.assert_dataset( dataset, {'inputs': 'That is bad.', 'targets': 'That is good.'})
def test_imdb_movie_reviews(self): input_data = { 'text': ['great movie', 'terrible movie'], 'label': [1, -1], } og_dataset = tf.data.Dataset.from_tensor_slices(input_data) dataset = preprocessors.imdb_reviews(og_dataset) test_utils.assert_dataset(dataset, [{ 'inputs': 'sentiment: great movie', 'targets': 'positive' }, { 'inputs': 'sentiment: terrible movie', 'targets': '<unk>' }])
def test_esnli_drop_explanations(self): input_data = { 'premise': 'It is hot.', 'hypothesis': 'It is sunny.', 'label': 0, 'explanation_1': 'hot implies that it is sunny.', } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = preprocessors.esnli(og_dataset, prefix='nli', drop_explanations=True) test_utils.assert_dataset( dataset, { 'inputs': 'nli hypothesis: It is sunny. premise: It is hot.', 'targets': 'entailment' })
def test_cos_e(self): input_data = { 'question': 'Question?', 'choices': ['First', 'Second', 'Third'], 'abstractive_explanation': 'Abstractive explanation.', 'extractive_explanation': 'Not currently used.', 'answer': 'First', } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = preprocessors.cos_e(og_dataset) test_utils.assert_dataset( dataset, { 'inputs': 'explain cos_e question: Question? choice: First choice: ' 'Second choice: Third', 'targets': 'First explanation: Abstractive explanation.' })
def test_rationales_preprocessor_no_explanations(self): input_data = { 'review': 'This was a terrible movie. Complete waste of time.', 'label': 0, 'evidences': ['terrible movie', 'waste of time'] } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = preprocessors.extractive_explanations(og_dataset, drop_explanations=True) test_utils.assert_dataset( dataset, { 'inputs': 'explain sentiment review: This was a terrible movie. ' 'Complete waste of time.', 'targets': 'negative' })
def test_cos_e_zero_shot_like_esnli_functools(self): input_data = { 'question': 'Question?', 'choices': ['First', 'Second', 'Third'], 'abstractive_explanation': 'Abstractive explanation.', 'extractive_explanation': 'Not currently used.', 'answer': 'First', } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = functools.partial(preprocessors.cos_e, prefix='explain nli', question_prefix='premise:')(og_dataset) test_utils.assert_dataset( dataset, { 'inputs': 'explain nli premise: Question? choice: First choice: ' 'Second choice: Third', 'targets': 'First explanation: Abstractive explanation.' })
def test_esnli_with_choices_like_cos_e(self): input_data = { 'premise': 'It is hot.', 'hypothesis': 'It is sunny.', 'label': 0, 'explanation_1': 'hot implies that it is sunny.' } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = dataset = functools.partial(preprocessors.esnli, add_choices=True)(og_dataset) test_utils.assert_dataset( dataset, { 'inputs': ('explain nli hypothesis: It is sunny. premise: It is hot. ' 'choice: entailment choice: neutral choice: contradiction'), 'targets': 'entailment explanation: hot implies that it is sunny.' })
def test_trivia_qa_open(self): input_data = { 'question': ['What are the names of the Olsen Twins?'], 'answer': { 'value': 'Mary-Kate and Ashley', 'aliases': ['Mary-Kate and Ashley', 'Ashley and Mary-Kate'] } } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = preprocessors.trivia_qa_open(og_dataset) test_utils.assert_dataset( dataset, { 'inputs': 'trivia_qa question: What are the names of the Olsen Twins?', 'targets': 'Mary-Kate and Ashley', 'answers': ['Mary-Kate and Ashley', 'Ashley and Mary-Kate'], })
def test_truncate_dataset_on_len(self): ds = _test_dataset_ints([5, 6, 7], [8, 9, 10]) ds1 = tf_inputs.truncate_dataset_on_len(ds, True, len_map={ 'inputs': 6, 'targets': 4 }) expected_ds = _test_dataset_ints([5, 6, 6], [4, 4, 4]) # training, should filter. t5_test_utils.assert_dataset(ds1, list(expected_ds.as_numpy_iterator())) # not Training, shouldn't filter. ds2 = tf_inputs.truncate_dataset_on_len(ds, False, len_map={ 'inputs': 6, 'targets': 4 }) t5_test_utils.assert_dataset(ds2, list(ds.as_numpy_iterator())) # not Training, but asked to filter, should filter. ds3 = tf_inputs.truncate_dataset_on_len(ds, False, len_map={ 'inputs': 6, 'targets': 4 }, truncate_on_eval=True) t5_test_utils.assert_dataset(ds3, list(expected_ds.as_numpy_iterator()))
def test_esnli_multiple_explanations(self): input_data = { 'premise': 'It is hot.', 'hypothesis': 'It is sunny.', 'label': 0, 'explanation_1': 'hot implies that it is sunny.', 'explanation_2': 'sunny equals hot.', 'explanation_3': 'hot means sunny.', } og_dataset = tf.data.Dataset.from_tensors(input_data) dataset = preprocessors.esnli(og_dataset) test_utils.assert_dataset( dataset, { 'inputs': 'explain nli hypothesis: It is sunny. premise: It is hot.', 'targets': 'entailment explanation: hot implies that it is sunny. ' 'explanation: sunny equals hot. ' 'explanation: hot means sunny.' })
def test_assert_dataset(self): first_dataset = tf.data.Dataset.from_tensor_slices( {'key1': ['val1'], 'key2': ['val2']}) # Equal assert_dataset(first_dataset, {'key1': [b'val1'], 'key2': [b'val2']}) # Unequal value with self.assertRaises(AssertionError): assert_dataset(first_dataset, {'key1': [b'val1'], 'key2': [b'val2x']}) # Additional key, value with self.assertRaises(AssertionError): assert_dataset(first_dataset, {'key1': [b'val1'], 'key2': [b'val2'], 'key3': [b'val3']})
def test_natural_questions_nocontext(self): input_examples = [{ 'question': { 'text': 'is the answer to this question no', }, 'annotations': { 'short_answers': { 'start_token': ([], [0, 0]), 'end_token': ([], [0, 0]), 'text': ([], [0, 0]) }, 'yes_no_answer': [-1, -1] } }, { 'question': { 'text': 'is the answer to this question yes', }, 'annotations': { 'short_answers': { 'start_token': ([3, 3], [1, 0, 1, 0]), 'end_token': ([7, 5], [1, 0, 1, 0]), 'text': (['not sure sir', 'not sure'], [1, 0, 1, 0]), }, 'yes_no_answer': [-1, 0, -1, 1] } }, { 'question': { 'text': 'what are the names of the olsen twins', }, 'annotations': { 'short_answers': { 'start_token': ([0, 3], [2, 0]), 'end_token': ([3, 4], [2, 0]), 'text': (['Mary-Kate', 'Ashley'], [2, 0]) }, 'yes_no_answer': [-1, -1] } }] def _short_ans_to_ragged(ex): for field in ['start_token', 'end_token', 'text']: values, row_lengths = ex['annotations']['short_answers'][field] ex['annotations']['short_answers'][field] = ( tf.RaggedTensor.from_row_lengths(values, row_lengths)) return ex og_dataset = tf.data.Dataset.from_generator( lambda: (x for x in input_examples), output_types={ 'question': { 'text': tf.string }, 'annotations': { 'short_answers': { 'start_token': (tf.int64, tf.int64), 'end_token': (tf.int64, tf.int64), 'text': (tf.string, tf.int64) }, 'yes_no_answer': tf.int64 } }, output_shapes={ 'question': { 'text': [] }, 'annotations': { 'short_answers': { 'start_token': ([None], [None]), 'end_token': ([None], [None]), 'text': ([None], [None]), }, 'yes_no_answer': [None] } }).map(_short_ans_to_ragged) dataset = preprocessors.natural_questions_nocontext(og_dataset) test_utils.assert_dataset( dataset, [{ 'inputs': 'nq question: is the answer to this question yes', 'targets': 'answer: no answer: yes answer: not sure sir ' 'answer: not sure', 'short_answers/values': ['not sure sir', 'not sure'], 'short_answers/row_starts': [0, 1, 1, 2], 'yes_no_answers': [-1, 0, -1, 1], }, { 'inputs': 'nq question: what are the names of the olsen twins', 'targets': 'answer: Mary-Kate answer: Ashley', 'short_answers/values': ['Mary-Kate', 'Ashley'], 'short_answers/row_starts': [0, 2], 'yes_no_answers': [-1, -1], }]) dataset = preprocessors.natural_questions_nocontext(og_dataset, drop_yes_no=True) test_utils.assert_dataset( dataset, [{ 'inputs': 'nq question: is the answer to this question yes', 'targets': 'answer: not sure sir answer: not sure', 'short_answers/values': ['not sure sir', 'not sure'], 'short_answers/row_starts': [0, 1, 1, 2], 'yes_no_answers': [-1, -1, -1, -1], }, { 'inputs': 'nq question: what are the names of the olsen twins', 'targets': 'answer: Mary-Kate answer: Ashley', 'short_answers/values': ['Mary-Kate', 'Ashley'], 'short_answers/row_starts': [0, 2], 'yes_no_answers': [-1, -1], }]) dataset = preprocessors.natural_questions_nocontext(og_dataset, max_tokens=2) test_utils.assert_dataset( dataset, [{ 'inputs': 'nq question: is the answer to this question yes', 'targets': 'answer: no answer: yes answer: not sure', 'short_answers/values': ['not sure'], 'short_answers/row_starts': [0, 0, 0, 1], 'yes_no_answers': [-1, 0, -1, 1], }, { 'inputs': 'nq question: what are the names of the olsen twins', 'targets': 'answer: Ashley', 'short_answers/values': ['Ashley'], 'short_answers/row_starts': [0, 1], 'yes_no_answers': [-1, -1], }]) dataset = preprocessors.natural_questions_nocontext(og_dataset, max_answers=1) test_utils.assert_dataset( dataset, [{ 'inputs': 'nq question: is the answer to this question yes', 'targets': 'answer: no', 'short_answers/values': ['not sure sir', 'not sure'], 'short_answers/row_starts': [0, 1, 1, 2], 'yes_no_answers': [-1, 0, -1, 1], }, { 'inputs': 'nq question: what are the names of the olsen twins', 'targets': 'answer: Mary-Kate', 'short_answers/values': ['Mary-Kate', 'Ashley'], 'short_answers/row_starts': [0, 2], 'yes_no_answers': [-1, -1], }]) dataset = preprocessors.natural_questions_nocontext(og_dataset, drop_yes_no=True, max_tokens=2, max_answers=1) test_utils.assert_dataset( dataset, [{ 'inputs': 'nq question: is the answer to this question yes', 'targets': 'answer: not sure', 'short_answers/values': ['not sure'], 'short_answers/row_starts': [0, 0, 0, 1], 'yes_no_answers': [-1, -1, -1, -1], }, { 'inputs': 'nq question: what are the names of the olsen twins', 'targets': 'answer: Ashley', 'short_answers/values': ['Ashley'], 'short_answers/row_starts': [0, 1], 'yes_no_answers': [-1, -1], }]) dataset = preprocessors.natural_questions_nocontext(og_dataset, drop_yes_no=True, max_tokens=1) test_utils.assert_dataset(dataset, [{ 'inputs': 'nq question: what are the names of the olsen twins', 'targets': 'answer: Ashley', 'short_answers/values': ['Ashley'], 'short_answers/row_starts': [0, 1], 'yes_no_answers': [-1, -1], }])
def test_rank_classification(self): input_examples = [ { 'premise': 'The farmland needed irrigation.', 'question': 'effect', 'choice1': 'a canal was constructed', 'choice2': 'the crops grew tall', 'label': 0, }, { 'premise': 'I decided to stay home last night.', 'question': 'cause', 'choice1': 'I wanted to see people', 'choice2': 'I was too tired', 'label': 1, }, ] input_ds = tf.data.Dataset.from_generator(lambda: (x for x in input_examples), output_types={ 'premise': tf.string, 'question': tf.string, 'choice1': tf.string, 'choice2': tf.string, 'label': tf.int32, }, output_shapes={ 'premise': [], 'question': [], 'choice1': [], 'choice2': [], 'label': [], }) # all options dataset = prep.rank_classification( input_ds, inputs_format='{premise} What is the {question}? X', targets_formats=['I think {choice1}.', 'I think {choice2}.'], mode='eval') test_utils.assert_dataset(dataset, [ { 'idx': 0, 'inputs': 'The farmland needed irrigation. What is the effect? X', 'targets': 'I think a canal was constructed.', 'label': 0 }, { 'idx': 0, 'inputs': 'The farmland needed irrigation. What is the effect? X', 'targets': 'I think the crops grew tall.', 'label': 0 }, { 'idx': 1, 'inputs': 'I decided to stay home last night. What is the cause? X', 'targets': 'I think I wanted to see people.', 'label': 1 }, { 'idx': 1, 'inputs': 'I decided to stay home last night. What is the cause? X', 'targets': 'I think I was too tired.', 'label': 1 }, ]) # label option only dataset = prep.rank_classification( input_ds, inputs_format='{premise} What is the {question}? X', targets_formats=['I think {choice1}.', 'I think {choice2}.'], mode='train') test_utils.assert_dataset(dataset, [ { 'idx': 0, 'inputs': 'The farmland needed irrigation. What is the effect? X', 'targets': 'I think a canal was constructed.', 'label': 0 }, { 'idx': 1, 'inputs': 'I decided to stay home last night. What is the cause? X', 'targets': 'I think I was too tired.', 'label': 1 }, ]) # label option only, repeated dataset = prep.rank_classification( input_ds, inputs_format='{premise} What is the {question}? X', targets_formats=['I think {choice1}.', 'I think {choice2}.'], mode='fewshot_train') test_utils.assert_dataset(dataset, [ { 'idx': 0, 'inputs': 'The farmland needed irrigation. What is the effect? X', 'targets': 'I think a canal was constructed.', 'label': 0 }, { 'idx': 0, 'inputs': 'The farmland needed irrigation. What is the effect? X', 'targets': 'I think a canal was constructed.', 'label': 0 }, { 'idx': 1, 'inputs': 'I decided to stay home last night. What is the cause? X', 'targets': 'I think I was too tired.', 'label': 1 }, { 'idx': 1, 'inputs': 'I decided to stay home last night. What is the cause? X', 'targets': 'I think I was too tired.', 'label': 1 }, ])
def test_assert_dataset(self): first_dataset = tf.data.Dataset.from_tensor_slices({ 'key1': ['val1'], 'key2': ['val2'] }) # Equal assert_dataset(first_dataset, {'key1': [b'val1'], 'key2': [b'val2']}) assert_dataset(first_dataset, { 'key1': [b'val1'], 'key2': [b'val2'] }, expected_dtypes={'key1': tf.string}) # Unequal value with self.assertRaises(AssertionError): assert_dataset(first_dataset, { 'key1': [b'val1'], 'key2': [b'val2x'] }) # Wrong dtype with self.assertRaises(AssertionError): assert_dataset(first_dataset, { 'key1': [b'val1'], 'key2': [b'val2'] }, expected_dtypes={'key1': tf.int32}) # Additional key, value with self.assertRaises(AssertionError): assert_dataset(first_dataset, { 'key1': [b'val1'], 'key2': [b'val2'], 'key3': [b'val3'] }) # Additional key, value with self.assertRaises(AssertionError): assert_dataset(first_dataset, { 'key1': [b'val1'], 'key2': [b'val2'], 'key3': [b'val3'] })