def test_padding_seq2seq(self): """Test padding for sequence-to-sequence models.""" input_len_max = 25 input_len_true = 22 # true input_seq_length for each example in batch. target_len_max = 25 target_len_true = 21 # true target_seq_length for each example in batch. inputs_shape = (batch_size, input_len_max) targets_shape = (batch_size, target_len_max) batch = { 'inputs': np.ones(inputs_shape), 'targets': np.ones(targets_shape) } batch['inputs'][:, input_len_true:] = 0 # zero-pad extra inputs tokens batch['targets'][:, target_len_true:] = 0 # zero-pad extra targets tokens expected_inputs_shape = (desired_batch_size, input_len_max) expected_targets_shape = (desired_batch_size, target_len_max) expected_weights_shape = (desired_batch_size, target_len_max) padded_batch = data_utils.maybe_pad_batch(batch, desired_batch_size, data_format=None, mask_key='targets') self.assertEqual(padded_batch['inputs'].shape, expected_inputs_shape) self.assertEqual(padded_batch['targets'].shape, expected_targets_shape) self.assertEqual(padded_batch['weights'].shape, expected_weights_shape) batch_pad = desired_batch_size - batch_size expected_weights_array = np.ones((desired_batch_size, target_len_max)) # pad at batch axis expected_weights_array[-batch_pad:] = 0 # # pad at sequence_len axis expected_weights_array[:, target_len_true:] = 0 self.assertTrue( np.array_equal(padded_batch['weights'], expected_weights_array))
def valid_epoch(num_batches=None): valid_iter = iter(eval_ds) for batch in itertools.islice(valid_iter, num_batches): yield data_utils.maybe_pad_batch(data_utils.tf_to_numpy(batch), per_host_eval_batch_size, data_format=None, mask_key='targets')
def valid_epoch(num_batches=None): if num_batches is None: num_batches = max_eval_steps valid_iter = iter(eval_ds) np_iter = data_utils.iterator_as_numpy( itertools.islice(valid_iter, num_batches)) for batch in np_iter: yield data_utils.maybe_pad_batch(batch, eval_host_batch_size)
def eval_train_epoch(num_batches=None): if num_batches is None: num_batches = 0 eval_train_iter = iter(eval_train_ds) np_iter = data_utils.iterator_as_numpy( itertools.islice(eval_train_iter, num_batches)) for batch in np_iter: yield data_utils.maybe_pad_batch(batch, eval_host_batch_size)
def test_padding(self, image_format, batch_axis, input_shape): """Test that the shape is the expected padded shape.""" batch = {'inputs': np.ones(input_shape)} padded_batch = data_utils.maybe_pad_batch(batch, desired_batch_size, image_format) expected_shapes = list(input_shape) expected_shapes[batch_axis] = desired_batch_size self.assertEqual(padded_batch['inputs'].shape, tuple(expected_shapes)) self.assertEqual(padded_batch['weights'].shape, (desired_batch_size, ))
def valid_epoch(num_batches=None): valid_iter = iter(eval_ds) for batch in itertools.islice(valid_iter, num_batches): batch_dict = { 'inputs': batch['image'], 'targets': np.eye(num_classes)[batch['label']], } if hps.get('include_example_keys'): batch_dict['example_key'] = batch['example_key'] yield data_utils.maybe_pad_batch(batch_dict, per_host_eval_batch_size)
def _eval_batches(images, labels, per_host_batch_size, num_batches=None, valid_example_keys=None): """Produce a stream of batches for a single evaluation epoch.""" for idx in itertools.islice(range(0, images.shape[0], per_host_batch_size), num_batches): inputs = jnp.array(images[idx:idx + per_host_batch_size]) targets = jnp.array(labels[idx:idx + per_host_batch_size]) data_dict = { 'inputs': inputs, 'targets': targets, 'weights': jnp.ones(per_host_batch_size, dtype=inputs.dtype), } if valid_example_keys is not None: data_dict['example_key'] = valid_example_keys[idx:idx + per_host_batch_size] yield data_utils.maybe_pad_batch(data_dict, per_host_batch_size)
def valid_epoch(num_batches=None): for batch in itertools.islice(eval_ds, num_batches): yield data_utils.maybe_pad_batch(batch, per_host_eval_batch_size)
def eval_train_epoch(num_batches=None): # This uses per_host_batch_size and not per_host_eval_batch_size. for batch in itertools.islice(eval_train_ds, num_batches): yield data_utils.maybe_pad_batch(batch, per_host_eval_batch_size)
def train_iterator_fn(): for batch in iter(train_ds): yield data_utils.maybe_pad_batch(data_utils.tf_to_numpy(batch), per_host_batch_size, data_format=None, mask_key='targets')