def test_images_decoding(self, np_data_type, tf_data_type): file_pattern = os.path.join(self.create_tempdir().full_path, 'test.tfrecord') image_width = 640 image_height = 512 maxval = np.iinfo(np_data_type).max # Maximum value for byte-encoded image. image_np = np.random.uniform( size=(image_height, image_width), high=maxval).astype(np.int32) png_encoded_image = image.numpy_to_image_string(image_np, 'png', np_data_type) test_data = [[png_encoded_image]] self._write_test_images_examples(test_data, file_pattern) feature_spec = tensorspec_utils.TensorSpecStruct() feature_spec.images = tensorspec_utils.ExtendedTensorSpec( shape=(image_height, image_width, 1), dtype=tf_data_type, name='image/encoded', data_format='png') dataset = tfdata.parallel_read(file_patterns=file_pattern) dataset = dataset.batch(1, drop_remainder=True) if np_data_type == np.uint32: with self.assertRaises(tf.errors.InvalidArgumentError): dataset = tfdata.serialized_to_parsed(dataset, feature_spec, None) else: dataset = tfdata.serialized_to_parsed(dataset, feature_spec, None) features = dataset.make_one_shot_iterator().get_next() # Check tensor shapes. self.assertAllEqual( [1, image_height, image_width, 1], features.images.get_shape().as_list()) with self.session() as session: np_features = session.run(features) self.assertEqual(np_features['images'].dtype, np_data_type)
def test_varlen_images_feature_spec_raises(self, batch_size): file_pattern = os.path.join(self.create_tempdir().full_path, 'test.tfrecord') image_width = 640 image_height = 512 padded_varlen_size = 3 maxval = 255 # Maximum value for byte-encoded image. image_np = np.random.uniform( size=(image_height, image_width), high=maxval).astype(np.int32) image_with_invalid_size = np.ones((1024, 1280)) * 255 png_encoded_image = image.numpy_to_image_string(image_np, 'png') png_encoded_image_with_invalid_size = image.numpy_to_image_string( image_with_invalid_size, 'png') test_data = [[png_encoded_image_with_invalid_size], [png_encoded_image, png_encoded_image_with_invalid_size]] self._write_test_varlen_images_examples(test_data, file_pattern) feature_spec = tensorspec_utils.TensorSpecStruct() feature_spec.varlen_images = tensorspec_utils.ExtendedTensorSpec( shape=(padded_varlen_size, image_height, image_width, 1), dtype=tf.uint8, name='varlen_images', data_format='png', varlen_default_value=0) dataset = tfdata.parallel_read(file_patterns=file_pattern) dataset = dataset.batch(batch_size, drop_remainder=True) dataset = tfdata.serialized_to_parsed(dataset, feature_spec, None) features = dataset.make_one_shot_iterator().get_next() # Check tensor shapes. self.assertAllEqual( [None, padded_varlen_size, image_height, image_width, 1], features.varlen_images.get_shape().as_list()) with self.session() as session: with self.assertRaises(tf.errors.InvalidArgumentError): session.run(features)
def test_parsing(self): base_dir = 'tensor2robot' file_pattern = os.path.join(FLAGS.test_srcdir, base_dir, 'test_data/pose_env_test_data.tfrecord') dataset = tfdata.parallel_read(file_patterns=file_pattern) state_spec = TSPEC(shape=(64, 64, 3), dtype=tf.uint8, name='state/image', data_format='jpeg') action_spec = TSPEC(shape=(2), dtype=tf.bfloat16, name='pose') reward_spec = TSPEC(shape=(), dtype=tf.float32, name='reward') feature_tspec = PoseEnvFeature(state=state_spec, action=action_spec) label_tspec = PoseEnvLabel(reward=reward_spec) batched_dataset = dataset.batch(batch_size=1) dataset = tfdata.serialized_to_parsed(batched_dataset, feature_tspec, label_tspec) iterator = dataset.make_one_shot_iterator() features, labels = iterator.get_next() tensorspec_utils.assert_equal(feature_tspec, features, ignore_batch=True) tensorspec_utils.assert_equal(label_tspec, labels, ignore_batch=True) with self.session() as session: features_, labels_ = session.run([features, labels]) self.assertAllEqual([1, 64, 64, 3], features_.state.shape) self.assertAllEqual([1, 2], features_.action.shape) self.assertAllEqual((1, ), labels_.reward.shape)
def test_sequence_parsing(self, batch_size): file_pattern = os.path.join(FLAGS.test_tmpdir, 'test.tfrecord') sequence_length = 3 if not os.path.exists(file_pattern): self._write_test_sequence_examples(sequence_length, file_pattern) dataset = tfdata.parallel_read(file_patterns=file_pattern) # Features state_spec_1 = tensorspec_utils.ExtendedTensorSpec( shape=(TEST_IMAGE_SHAPE), dtype=tf.uint8, is_sequence=True, name='image_sequence_feature', data_format='JPEG') state_spec_2 = tensorspec_utils.ExtendedTensorSpec( shape=(2), dtype=tf.float32, is_sequence=True, name='sequence_feature') feature_tspec = PoseEnvFeature(state=state_spec_1, action=state_spec_2) feature_tspec = tensorspec_utils.add_sequence_length_specs( feature_tspec) # Labels reward_spec = tensorspec_utils.ExtendedTensorSpec( shape=(), dtype=tf.int64, is_sequence=False, name='context_feature') label_tspec = PoseEnvLabel(reward=reward_spec) label_tspec = tensorspec_utils.add_sequence_length_specs(label_tspec) dataset = dataset.batch(batch_size, drop_remainder=True) dataset = tfdata.serialized_to_parsed(dataset, feature_tspec, label_tspec) features, labels = dataset.make_one_shot_iterator().get_next() # Check tensor shapes. self.assertAllEqual([batch_size, None] + TEST_IMAGE_SHAPE, features.state.shape.as_list()) self.assertAllEqual([batch_size, None, 2], features.action.shape.as_list()) self.assertAllEqual([batch_size], features.state_length.shape.as_list()) self.assertAllEqual([batch_size], features.action_length.shape.as_list()) self.assertAllEqual([batch_size], labels.reward.shape.as_list()) with self.session() as session: features_, labels_ = session.run([features, labels]) # Check that images are equal. for i in range(3): img = TEST_IMAGE * i self.assertAllEqual(img, features_.state[0, i]) # Check that numpy shapes are equal. self.assertAllEqual([batch_size, sequence_length] + TEST_IMAGE_SHAPE, features_.state.shape) self.assertAllEqual([sequence_length] * batch_size, features_.state_length) self.assertAllEqual([batch_size, sequence_length, 2], features_.action.shape) self.assertAllEqual([batch_size], labels_.reward.shape)
def test_compress_decompress_fn(self): batch_size = 5 base_dir = 'tensor2robot' file_pattern = os.path.join(FLAGS.test_srcdir, base_dir, 'test_data/pose_env_test_data.tfrecord') dataset = tfdata.parallel_read(file_patterns=file_pattern) state_spec = TSPEC( shape=(64, 64, 3), dtype=tf.uint8, name='state/image', data_format='jpeg') action_spec = TSPEC(shape=(2), dtype=tf.bfloat16, name='pose') reward_spec = TSPEC(shape=(), dtype=tf.float32, name='reward') feature_spec = tensorspec_utils.TensorSpecStruct( state=state_spec, action=action_spec) label_spec = tensorspec_utils.TensorSpecStruct(reward=reward_spec) dataset = tfdata.parallel_read(file_patterns=file_pattern) dataset = dataset.batch(batch_size, drop_remainder=True) dataset = tfdata.serialized_to_parsed(dataset, feature_spec, label_spec) features, _ = dataset.make_one_shot_iterator().get_next() # Check tensor shapes. self.assertAllEqual((batch_size,) + feature_spec.state.shape, features.state.get_shape().as_list()) with self.session() as session: original_features = session.run(features) dataset = tfdata.parallel_read(file_patterns=file_pattern) dataset = dataset.batch(batch_size, drop_remainder=True) dataset = tfdata.serialized_to_parsed(dataset, feature_spec, label_spec) dataset = dataset.map( tfdata.create_compress_fn(feature_spec, label_spec, quality=100)) dataset = dataset.map(tfdata.create_decompress_fn(feature_spec, label_spec)) features, _ = dataset.make_one_shot_iterator().get_next() # Check tensor shapes. self.assertAllEqual((batch_size,) + feature_spec.state.shape, features.state.get_shape().as_list()) with self.session() as session: compressed_decompressed_features = session.run(features) ref_state = original_features.state.astype(np.float32) / 255 state = compressed_decompressed_features.state.astype(np.float32) / 255 np.testing.assert_almost_equal(ref_state, state, decimal=1)
def test_images_decoding_raises(self): file_pattern = os.path.join(self.create_tempdir().full_path, 'test.tfrecord') image_width = 640 image_height = 512 maxval = np.iinfo(np.uint32).max # Maximum value for byte-encoded image. image_np = np.random.uniform( size=(image_height, image_width), high=maxval).astype(np.int32) png_encoded_image = image.numpy_to_image_string(image_np, 'png', np.uint32) test_data = [[png_encoded_image]] self._write_test_images_examples(test_data, file_pattern) feature_spec = tensorspec_utils.TensorSpecStruct() feature_spec.images = tensorspec_utils.ExtendedTensorSpec( shape=(image_height, image_width, 1), dtype=tf.uint32, name='image/encoded', data_format='png') dataset = tfdata.parallel_read(file_patterns=file_pattern) dataset = dataset.batch(1, drop_remainder=True) with self.assertRaises(ValueError): tfdata.serialized_to_parsed(dataset, feature_spec, None)
def create_dataset(self, mode, params, **unused_kwargs): """This abstract function is not required for default input generators.""" batch_size = tfdata.get_batch_size(params, self.batch_size) is_training = (mode == tf.estimator.ModeKeys.TRAIN) data_format, filenames_list = tfdata.get_data_format_and_filenames_list( self._file_patterns) datasets = [] if self._weights is not None: if len(filenames_list) != len(self._weights): raise ValueError( 'Weights need to be same length as number of filenames.') for filenames in filenames_list: filenames_dataset = tf.data.Dataset.list_files(filenames, shuffle=is_training) if is_training: cycle_length = min(self._parallel_shards, len(filenames)) else: cycle_length = 1 dataset = filenames_dataset.apply( tf.data.experimental.parallel_interleave( tfdata.DATA_FORMAT[data_format], cycle_length=cycle_length, sloppy=is_training)) if is_training: dataset = dataset.shuffle( buffer_size=self._shuffle_buffer_size).repeat() else: dataset = dataset.repeat() dataset = dataset.batch(batch_size, drop_remainder=True) datasets.append(dataset) if self._weights is None: weights = [float(1) for _ in range(len(datasets))] else: weights = [float(w) for w in self._weights] dataset = tf.data.experimental.sample_from_datasets(datasets=datasets, weights=weights, seed=self._seed) # Parse all datasets together. dataset = tfdata.serialized_to_parsed( dataset, self._feature_spec, self._label_spec, num_parallel_calls=self._num_parallel_calls) if self._preprocess_fn is not None: dataset = dataset.map(self._preprocess_fn, num_parallel_calls=self._parallel_shards) if self._prefetch_buffer_size is not None: dataset = dataset.prefetch(self._prefetch_buffer_size) return dataset
def test_varlen_feature_spec(self, batch_size): file_pattern = os.path.join(self.create_tempdir().full_path, 'test.tfrecord') test_data = [[1], [1, 2]] self._write_test_varlen_examples(test_data, file_pattern) feature_spec = tensorspec_utils.TensorSpecStruct() feature_spec.varlen = tensorspec_utils.ExtendedTensorSpec( shape=(3,), dtype=tf.int64, name='varlen', varlen_default_value=3.0) dataset = tfdata.parallel_read(file_patterns=file_pattern) dataset = dataset.batch(batch_size, drop_remainder=True) dataset = tfdata.serialized_to_parsed(dataset, feature_spec, None) features = dataset.make_one_shot_iterator().get_next() # Check tensor shapes. self.assertAllEqual([None, 3], features.varlen.get_shape().as_list()) with self.session() as session: np_features = session.run(features) self.assertAllEqual(np_features.varlen, np.array([[1, 3, 3], [1, 2, 3]][:batch_size])) self.assertAllEqual([batch_size, 3], np_features.varlen.shape)