def test_encoder_decoder_pretokenized_field(self): x = [{ "inputs": [7, 8, 5, 1], "targets": [3, 9, 1], "targets_pretokenized": "abc" }, { "inputs": [8, 4, 9, 3, 1], "targets": [4, 1], "targets_pretokenized": "def" }] types = { "inputs": tf.int32, "targets": tf.int32, "targets_pretokenized": tf.string } shapes = { "inputs": [None], "targets": [None], "targets_pretokenized": [] } ds = tf.data.Dataset.from_generator(lambda: x, output_types=types, output_shapes=shapes) task_feature_lengths = {"inputs": 10, "targets": 7} converter = feature_converters.EncDecFeatureConverter(pack=True) # Check whether convert_features raise error because targets_pretokenized is # present in the ds but not in the task_feature_lengths converter(ds, task_feature_lengths)
def test_get_dataset_enc_dec_sharded_and_packed(self): mixture_or_task_name = "enc_dec_sharded_and_packed" x = [{"inputs": [7, 8], "targets": [3, 9]}, {"inputs": [8, 4], "targets": [4]}, {"inputs": [5, 6, 7], "targets": [6]}] ds = create_default_dataset(x) dataset_fn = lambda split, shuffle_files: ds register_dummy_task(mixture_or_task_name, dataset_fn=dataset_fn) task_feature_lengths = {"inputs": 7, "targets": 5} converter = feature_converters.EncDecFeatureConverter(pack=True) shard_info = dataset_providers.ShardInfo(index=0, num_shards=2) output_ds = dataset_providers.get_dataset( mixture_or_task_name=mixture_or_task_name, task_feature_lengths=task_feature_lengths, dataset_split="train", shuffle=False, feature_converter=converter, shard_info=shard_info) # Packing should be done after the sharding. expected = { "encoder_input_tokens": [7, 8, 1, 5, 6, 7, 1], "encoder_segment_ids": [1, 1, 1, 2, 2, 2, 2], "encoder_positions": [0, 1, 2, 0, 1, 2, 3], "decoder_target_tokens": [3, 9, 1, 6, 1], "decoder_input_tokens": [0, 3, 9, 0, 6], "decoder_loss_weights": [1, 1, 1, 1, 1], "decoder_segment_ids": [1, 1, 1, 2, 2], "decoder_positions": [0, 1, 2, 0, 1], } expected_dtypes = {feat: tf.int32 for feat in expected.keys()} assert_dataset(output_ds, expected, expected_dtypes=expected_dtypes)
def test_get_dataset_enc_dec_sharded(self): mixture_or_task_name = "enc_dec_sharded" x = [{"inputs": [7, 8, 5, 6, 9, 4, 3], "targets": [3, 9]}, {"inputs": [8, 4], "targets": [4]}, {"inputs": [5, 6, 7], "targets": [6, 5]}] ds = create_default_dataset(x) dataset_fn = lambda split, shuffle_files: ds register_dummy_task(mixture_or_task_name, dataset_fn=dataset_fn) task_feature_lengths = {"inputs": 7, "targets": 5} converter = feature_converters.EncDecFeatureConverter(pack=False) shard_info = dataset_providers.ShardInfo(index=0, num_shards=2) output_ds = dataset_providers.get_dataset( mixture_or_task_name=mixture_or_task_name, task_feature_lengths=task_feature_lengths, dataset_split="train", shuffle=False, feature_converter=converter, shard_info=shard_info) # Example index 1 should not be present in the sharded dataset. expected = [{ "encoder_input_tokens": [7, 8, 5, 6, 9, 4, 1], "decoder_target_tokens": [3, 9, 1, 0, 0], "decoder_input_tokens": [0, 3, 9, 1, 0], "decoder_loss_weights": [1, 1, 1, 0, 0], }, { "encoder_input_tokens": [5, 6, 7, 1, 0, 0, 0], "decoder_target_tokens": [6, 5, 1, 0, 0], "decoder_input_tokens": [0, 6, 5, 1, 0], "decoder_loss_weights": [1, 1, 1, 0, 0], }] expected_dtypes = {feat: tf.int32 for feat in expected[0].keys()} assert_dataset(output_ds, expected, expected_dtypes=expected_dtypes)
def test_postprocessing( task_name, raw_data, predict_output=None, score_output=None, feature_encoder=feature_converters.EncDecFeatureConverter()): """Test the postprocessing and metrics for a given task. This function injects `raw_data` into `task`, then creates an Evaluator based on that task. It then calls `Evaluator.evaluate()` using predict_fn and score_fn args that return `predict_output` and `score_output`, returning the output of the `evaluate()` call. (Note that, due to the fact that `evaluate` uses the task data, this test will also actuate the task preprocessing code.) Usually, this function will be invoked `metrics, _, _ = test_postprocessing()` since the second and third returned data should be the same as the passed predict_output and score_output. Args: task_name: A SeqIO task name. raw_data: A string-keyed dict of string-keyed dicts. The top-level dict should be keyed by dataset splits, and the second-level dict should hold the dataset data. predict_output: A list of (int, [value]) tuples representing the model predictions. Optional. score_output: A list of (int, [value]) tuples representing the output of the model scoring code. Optional. feature_encoder: An optional feature encoder object. Defaults to EncDecFeatureEncoder. Returns: metrics: a mapping from task name to computed metrics. predicted_tokens: a mapping from task name to the output tokens from `predict_fn`, for tasks that have `predict_metric_fns`. scores: a mapping from task name to the output scores from `score_fn` for tasks that have `score_predict_fns`. """ class PredictCallable(evaluation.PredictFnCallable): def __call__(self, dataset: tf.data.Dataset = None, model_feature_lengths: Mapping[str, int] = None): return predict_output class ScoreCallable(evaluation.PredictFnCallable): def __call__( self, dataset: tf.data.Dataset = None, model_feature_lengths: Mapping[str, int] = None, ): return score_output with DataInjector(task_name, raw_data): evaluator = evaluation.Evaluator(task_name, feature_converter=feature_encoder) return evaluator.evaluate(compute_metrics=True, predict_fn=PredictCallable(), score_fn=ScoreCallable())
def test_encoder_decoder_extra_long_inputs(self): x = [{"inputs": [9, 4, 3, 8, 4, 5, 1], "targets": [3, 9, 4, 7, 8, 1]}] ds = create_default_dataset(x) task_feature_lengths = {"inputs": 5, "targets": 8} expected_msg = ( r".*Feature \\'inputs\\' has length not less than or equal to the " r"expected length of 5 during input_validation.*") with self.assertRaisesRegex(tf.errors.InvalidArgumentError, expected_msg): converter = feature_converters.EncDecFeatureConverter(pack=False) converted_ds = converter(ds, task_feature_lengths) list(converted_ds.as_numpy_iterator())
def test_get_dataset_enc_dec_packed(self): mixture_or_task_name = "enc_dec_packed" x = [{ "inputs": [7, 8, 5, 6, 9, 4, 3], "targets": [3, 9] }, { "inputs": [8, 4], "targets": [4] }, { "inputs": [5, 6, 7], "targets": [6, 5] }] ds = create_default_dataset(x) dataset_fn = lambda split, shuffle_files: ds register_dummy_task(mixture_or_task_name, dataset_fn=dataset_fn) task_feature_lengths = {"inputs": 7, "targets": 5} converter = feature_converters.EncDecFeatureConverter(pack=True) output_ds = dataset_providers.get_dataset( mixture_or_task_name=mixture_or_task_name, task_feature_lengths=task_feature_lengths, dataset_split="train", shuffle=False, feature_converter=converter) expected = [ { # Example 1 is trimmed "encoder_input_tokens": [7, 8, 5, 6, 9, 4, 1], "encoder_segment_ids": [1, 1, 1, 1, 1, 1, 1], "encoder_positions": [0, 1, 2, 3, 4, 5, 6], "decoder_target_tokens": [3, 9, 1, 0, 0], "decoder_input_tokens": [0, 3, 9, 0, 0], "decoder_loss_weights": [1, 1, 1, 0, 0], "decoder_segment_ids": [1, 1, 1, 0, 0], "decoder_positions": [0, 1, 2, 0, 0], }, { # Example 2 and 3 are packed together "encoder_input_tokens": [8, 4, 1, 5, 6, 7, 1], "encoder_segment_ids": [1, 1, 1, 2, 2, 2, 2], "encoder_positions": [0, 1, 2, 0, 1, 2, 3], "decoder_target_tokens": [4, 1, 6, 5, 1], "decoder_input_tokens": [0, 4, 0, 6, 5], "decoder_loss_weights": [1, 1, 1, 1, 1], "decoder_segment_ids": [1, 1, 2, 2, 2], "decoder_positions": [0, 1, 0, 1, 2], } ] expected_dtypes = {feat: tf.int32 for feat in expected[0].keys()} assert_dataset(output_ds, expected, expected_dtypes=expected_dtypes)
def test_encoder_decoder_targets_max_length(self): x = [{"inputs": [9, 4, 3, 8, 1], "targets": [3, 9, 4, 5, 1]}] ds = create_default_dataset(x) task_feature_lengths = {"inputs": 5, "targets": 5} converter = feature_converters.EncDecFeatureConverter(pack=False) converted_ds = converter(ds, task_feature_lengths) expected = { "encoder_input_tokens": [9, 4, 3, 8, 1], "decoder_target_tokens": [3, 9, 4, 5, 1], "decoder_input_tokens": [0, 3, 9, 4, 5], "decoder_loss_weights": [1, 1, 1, 1, 1], } assert_dataset(converted_ds, expected)
def test_encoder_decoder_unpacked(self): x = [{"inputs": [9, 4, 3, 8, 1], "targets": [3, 9, 4, 1]}] ds = create_default_dataset(x) task_feature_lengths = {"inputs": 7, "targets": 5} converter = feature_converters.EncDecFeatureConverter(pack=False) converted_ds = converter(ds, task_feature_lengths) expected = { "encoder_input_tokens": [9, 4, 3, 8, 1, 0, 0], "decoder_target_tokens": [3, 9, 4, 1, 0], # mtf.transformer.autoregressive_inputs does not zero out the last eos # when the data is not packed. This test mimic the behavior. "decoder_input_tokens": [0, 3, 9, 4, 1], "decoder_loss_weights": [1, 1, 1, 1, 0], } assert_dataset(converted_ds, expected)
def test_task( task_name: str, raw_data: Mapping[str, Any], output_feature_name="targets", feature_encoder=feature_converters.EncDecFeatureConverter(pack=False) ) -> Tuple[Mapping[str, Any], Mapping[str, Any]]: """Test the preprocessing and metrics functionality for a given task. This function injects `raw_data` into the task, then creates an Evaluator based on that task. It runs the task preprocessing on that raw data and extracts the expected value based on `output_feature_name`. Then, it creates an `Evaluator` object based on the `task_name` and runs `evaluate` using the expected value, returning both the result of the preprocessing and the metrics from the `evaluate` call. The expected format for `raw_data` is a nested dict of the form {'split_name': {'data_key': data}}. Note that testing metrics that use score_outputs from this API is currently unsupported. Args: task_name: A SeqIO task name. raw_data: A string-keyed dict of string-keyed dicts. The top-level dict should be keyed by dataset splits, and the second-level dict should hold the dataset data. output_feature_name: A string key for the output feature. Used to extract the expected target from the preprocessing output. feature_encoder: An optional feature encoder object. Defaults to EncDecFeatureEncoder. Returns: A tuple (preprocessing_output, metrics), where `preprocessing_output` is the result of running the tasks' preprocessing code on `raw_data` and `metrics` is a mapping from task name to computed metrics. """ output = test_preprocessing(task_name, raw_data) eval_output = test_postprocessing( task_name, raw_data, predict_output=output[output_feature_name], feature_encoder=feature_encoder) return output, eval_output
def test_get_dataset_both_train_and_validation_splits(self): mixture_or_task_name = "both_train_and_validation_splits" x_train = [{"inputs": [7, 8, 5, 6, 9, 4, 3], "targets": [3, 9]}] x_val = [{"inputs": [8, 4], "targets": [4]}] datasets = { "train": create_default_dataset(x_train), "validation": create_default_dataset(x_val) } dataset_fn = lambda split, shuffle_files: datasets[split] register_dummy_task(mixture_or_task_name, dataset_fn=dataset_fn) task_feature_lengths = {"inputs": 7, "targets": 5} output_ds = {} for split in ["train", "validation"]: converter = feature_converters.EncDecFeatureConverter(pack=False) output_ds[split] = dataset_providers.get_dataset( mixture_or_task_name=mixture_or_task_name, task_feature_lengths=task_feature_lengths, dataset_split=split, shuffle=False, feature_converter=converter) expected_train = { "encoder_input_tokens": [7, 8, 5, 6, 9, 4, 1], "decoder_target_tokens": [3, 9, 1, 0, 0], "decoder_input_tokens": [0, 3, 9, 1, 0], "decoder_loss_weights": [1, 1, 1, 0, 0], } expected_val = { "encoder_input_tokens": [8, 4, 1, 0, 0, 0, 0], "decoder_target_tokens": [4, 1, 0, 0, 0], "decoder_input_tokens": [0, 4, 1, 0, 0], "decoder_loss_weights": [1, 1, 0, 0, 0], } expected_dtypes = {feat: tf.int32 for feat in expected_train.keys()} assert_dataset(output_ds["train"], expected_train, expected_dtypes=expected_dtypes) assert_dataset(output_ds["validation"], expected_val, expected_dtypes=expected_dtypes)
def test_task(task_name, raw_data, predict_output=None, score_output=None, feature_encoder=feature_converters.EncDecFeatureConverter()): """Test the preprocessing and metrics functionality for a given task. This function injects `raw_data` into `task`, then creates an Evaluator based on that task. It then calls `Evaluator.evaluate()` using predict_fn and score_fn args that return `predict_output` and `score_output`, returning the output of `next(task.get_dataset().as_numpy_iterator())` and the `evaluate()` call. Args: task_name: A SeqIO task name. raw_data: A string-keyed dict of string-keyed dicts. The top-level dict should be keyed by dataset splits, and the second-level dict should hold the dataset data. predict_output: A list of (int, [value]) tuples representing the model predictions. Optional. score_output: A list of (int, [value]) tuples representing the output of the model scoring code. Optional. feature_encoder: An optional feature encoder object. Defaults to EncDecFeatureEncoder. Returns: A tuple (preprocessing_output, metrics), where `preprocessing_output` is the result of running the tasks' preprocessing code on `raw_data` and `metrics` is a mapping from task name to computed metrics. """ output = test_preprocessing(task_name, raw_data) eval_output, _, _ = test_postprocessing( task_name, raw_data, predict_output=predict_output, score_output=score_output, feature_encoder=feature_encoder) return output, eval_output
def test_encoder_decoder_packed_long_sequences(self): x = [{ "inputs": [7, 8, 5, 6, 9, 4, 1], "targets": [3, 9, 1] }, { "inputs": [8, 4, 9, 3, 5, 1], "targets": [4, 1] }] ds = create_default_dataset(x) task_feature_lengths = {"inputs": 7, "targets": 3} converter = feature_converters.EncDecFeatureConverter(pack=True) converted_ds = converter(ds, task_feature_lengths) # Corner case: packing is true but task_feature_lengths are too long for # packing to happen. We should still get the *_segment_id, *_position # fields. expected = [{ "encoder_input_tokens": [7, 8, 5, 6, 9, 4, 1], "encoder_segment_ids": [1, 1, 1, 1, 1, 1, 1], "encoder_positions": [0, 1, 2, 3, 4, 5, 6], "decoder_target_tokens": [3, 9, 1], "decoder_input_tokens": [0, 3, 9], "decoder_loss_weights": [1, 1, 1], "decoder_segment_ids": [1, 1, 1], "decoder_positions": [0, 1, 2], }, { "encoder_input_tokens": [8, 4, 9, 3, 5, 1, 0], "encoder_segment_ids": [1, 1, 1, 1, 1, 1, 0], "encoder_positions": [0, 1, 2, 3, 4, 5, 0], "decoder_target_tokens": [4, 1, 0], "decoder_input_tokens": [0, 4, 0], "decoder_loss_weights": [1, 1, 0], "decoder_segment_ids": [1, 1, 0], "decoder_positions": [0, 1, 0], }] assert_dataset(converted_ds, expected)
def test_encoder_decoder_packed(self): x = [{ "inputs": [7, 8, 5, 1], "targets": [3, 9, 1] }, { "inputs": [8, 4, 9, 3, 1], "targets": [4, 1] }] ds = create_default_dataset(x) task_feature_lengths = {"inputs": 10, "targets": 7} converter = feature_converters.EncDecFeatureConverter(pack=True) converted_ds = converter(ds, task_feature_lengths) expected = { "encoder_input_tokens": [7, 8, 5, 1, 8, 4, 9, 3, 1, 0], "encoder_segment_ids": [1, 1, 1, 1, 2, 2, 2, 2, 2, 0], "encoder_positions": [0, 1, 2, 3, 0, 1, 2, 3, 4, 0], "decoder_target_tokens": [3, 9, 1, 4, 1, 0, 0], "decoder_input_tokens": [0, 3, 9, 0, 4, 0, 0], "decoder_loss_weights": [1, 1, 1, 1, 1, 0, 0], "decoder_segment_ids": [1, 1, 1, 2, 2, 0, 0], "decoder_positions": [0, 1, 2, 0, 1, 0, 0], } assert_dataset(converted_ds, expected)