Ejemplo n.º 1
0
 def test_load_dataset_with_label_mapping(self):
     input_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
     batch_size = 10
     seq_length = 128
     _create_fake_preprocessed_dataset(input_path, seq_length, 'int')
     data_config = loader.SentencePredictionDataConfig(
         input_path=input_path,
         seq_length=seq_length,
         global_batch_size=batch_size,
         label_type='int',
         label_name=('label_ids', 'next_sentence_labels'))
     dataset = loader.SentencePredictionDataLoader(data_config).load()
     features = next(iter(dataset))
     self.assertCountEqual([
         'input_word_ids', 'input_mask', 'input_type_ids',
         'next_sentence_labels', 'label_ids'
     ], features.keys())
     self.assertEqual(features['input_word_ids'].shape,
                      (batch_size, seq_length))
     self.assertEqual(features['input_mask'].shape,
                      (batch_size, seq_length))
     self.assertEqual(features['input_type_ids'].shape,
                      (batch_size, seq_length))
     self.assertEqual(features['label_ids'].shape, (batch_size, ))
     self.assertEqual(features['label_ids'].dtype, tf.int32)
     self.assertEqual(features['next_sentence_labels'].shape,
                      (batch_size, ))
     self.assertEqual(features['next_sentence_labels'].dtype, tf.int32)
Ejemplo n.º 2
0
    def build_inputs(self, params, input_context=None):
        """Returns tf.data.Dataset for sentence_prediction task."""
        if params.input_path == 'dummy':

            def dummy_data(_):
                dummy_ids = tf.zeros((1, params.seq_length), dtype=tf.int32)
                x = dict(input_word_ids=dummy_ids,
                         input_mask=dummy_ids,
                         input_type_ids=dummy_ids)
                y = tf.ones((1, 1), dtype=tf.int32)
                return (x, y)

            dataset = tf.data.Dataset.range(1)
            dataset = dataset.repeat()
            dataset = dataset.map(
                dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
            return dataset

        return sentence_prediction_dataloader.SentencePredictionDataLoader(
            params).load(input_context)
Ejemplo n.º 3
0
 def test_load_dataset(self, label_type, expected_label_type):
     input_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
     batch_size = 10
     seq_length = 128
     _create_fake_preprocessed_dataset(input_path, seq_length, label_type)
     data_config = loader.SentencePredictionDataConfig(
         input_path=input_path,
         seq_length=seq_length,
         global_batch_size=batch_size,
         label_type=label_type)
     dataset = loader.SentencePredictionDataLoader(data_config).load()
     features, labels = next(iter(dataset))
     self.assertCountEqual(
         ['input_word_ids', 'input_mask', 'input_type_ids'],
         features.keys())
     self.assertEqual(features['input_word_ids'].shape,
                      (batch_size, seq_length))
     self.assertEqual(features['input_mask'].shape,
                      (batch_size, seq_length))
     self.assertEqual(features['input_type_ids'].shape,
                      (batch_size, seq_length))
     self.assertEqual(labels.shape, (batch_size, ))
     self.assertEqual(labels.dtype, expected_label_type)