Ejemplo n.º 1
0
    def test_padding_seq2seq(self):
        """Test padding for sequence-to-sequence models."""
        input_len_max = 25
        input_len_true = 22  # true input_seq_length for each example in batch.
        target_len_max = 25
        target_len_true = 21  # true target_seq_length for each example in batch.

        inputs_shape = (batch_size, input_len_max)
        targets_shape = (batch_size, target_len_max)
        batch = {
            'inputs': np.ones(inputs_shape),
            'targets': np.ones(targets_shape)
        }
        batch['inputs'][:, input_len_true:] = 0  # zero-pad extra inputs tokens
        batch['targets'][:,
                         target_len_true:] = 0  # zero-pad extra targets tokens
        expected_inputs_shape = (desired_batch_size, input_len_max)
        expected_targets_shape = (desired_batch_size, target_len_max)
        expected_weights_shape = (desired_batch_size, target_len_max)
        padded_batch = data_utils.maybe_pad_batch(batch,
                                                  desired_batch_size,
                                                  data_format=None,
                                                  mask_key='targets')
        self.assertEqual(padded_batch['inputs'].shape, expected_inputs_shape)
        self.assertEqual(padded_batch['targets'].shape, expected_targets_shape)
        self.assertEqual(padded_batch['weights'].shape, expected_weights_shape)

        batch_pad = desired_batch_size - batch_size
        expected_weights_array = np.ones((desired_batch_size, target_len_max))
        # pad at batch axis
        expected_weights_array[-batch_pad:] = 0
        # # pad at sequence_len axis
        expected_weights_array[:, target_len_true:] = 0
        self.assertTrue(
            np.array_equal(padded_batch['weights'], expected_weights_array))
Ejemplo n.º 2
0
 def valid_epoch(num_batches=None):
     valid_iter = iter(eval_ds)
     for batch in itertools.islice(valid_iter, num_batches):
         yield data_utils.maybe_pad_batch(data_utils.tf_to_numpy(batch),
                                          per_host_eval_batch_size,
                                          data_format=None,
                                          mask_key='targets')
Ejemplo n.º 3
0
 def valid_epoch(num_batches=None):
   if num_batches is None:
     num_batches = max_eval_steps
   valid_iter = iter(eval_ds)
   np_iter = data_utils.iterator_as_numpy(
       itertools.islice(valid_iter, num_batches))
   for batch in np_iter:
     yield data_utils.maybe_pad_batch(batch, eval_host_batch_size)
Ejemplo n.º 4
0
 def eval_train_epoch(num_batches=None):
   if num_batches is None:
     num_batches = 0
   eval_train_iter = iter(eval_train_ds)
   np_iter = data_utils.iterator_as_numpy(
       itertools.islice(eval_train_iter, num_batches))
   for batch in np_iter:
     yield data_utils.maybe_pad_batch(batch, eval_host_batch_size)
Ejemplo n.º 5
0
 def test_padding(self, image_format, batch_axis, input_shape):
     """Test that the shape is the expected padded shape."""
     batch = {'inputs': np.ones(input_shape)}
     padded_batch = data_utils.maybe_pad_batch(batch, desired_batch_size,
                                               image_format)
     expected_shapes = list(input_shape)
     expected_shapes[batch_axis] = desired_batch_size
     self.assertEqual(padded_batch['inputs'].shape, tuple(expected_shapes))
     self.assertEqual(padded_batch['weights'].shape, (desired_batch_size, ))
Ejemplo n.º 6
0
 def valid_epoch(num_batches=None):
   valid_iter = iter(eval_ds)
   for batch in itertools.islice(valid_iter, num_batches):
     batch_dict = {
         'inputs': batch['image'],
         'targets': np.eye(num_classes)[batch['label']],
     }
     if hps.get('include_example_keys'):
       batch_dict['example_key'] = batch['example_key']
     yield data_utils.maybe_pad_batch(batch_dict, per_host_eval_batch_size)
def _eval_batches(images,
                  labels,
                  per_host_batch_size,
                  num_batches=None,
                  valid_example_keys=None):
    """Produce a stream of batches for a single evaluation epoch."""
    for idx in itertools.islice(range(0, images.shape[0], per_host_batch_size),
                                num_batches):
        inputs = jnp.array(images[idx:idx + per_host_batch_size])
        targets = jnp.array(labels[idx:idx + per_host_batch_size])
        data_dict = {
            'inputs': inputs,
            'targets': targets,
            'weights': jnp.ones(per_host_batch_size, dtype=inputs.dtype),
        }
        if valid_example_keys is not None:
            data_dict['example_key'] = valid_example_keys[idx:idx +
                                                          per_host_batch_size]
        yield data_utils.maybe_pad_batch(data_dict, per_host_batch_size)
Ejemplo n.º 8
0
 def valid_epoch(num_batches=None):
     for batch in itertools.islice(eval_ds, num_batches):
         yield data_utils.maybe_pad_batch(batch, per_host_eval_batch_size)
Ejemplo n.º 9
0
 def eval_train_epoch(num_batches=None):
     # This uses per_host_batch_size and not per_host_eval_batch_size.
     for batch in itertools.islice(eval_train_ds, num_batches):
         yield data_utils.maybe_pad_batch(batch, per_host_eval_batch_size)
Ejemplo n.º 10
0
 def train_iterator_fn():
     for batch in iter(train_ds):
         yield data_utils.maybe_pad_batch(data_utils.tf_to_numpy(batch),
                                          per_host_batch_size,
                                          data_format=None,
                                          mask_key='targets')