def test_image_dataset_from_directory_color_modes(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=4, color_mode='rgba') dataset = image_dataset.image_dataset_from_directory(directory, batch_size=8, image_size=(18, 18), color_mode='rgba') batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (8, 18, 18, 4)) self.assertEqual(batch[0].dtype.name, 'float32') directory = self._prepare_directory(num_classes=4, color_mode='grayscale') dataset = image_dataset.image_dataset_from_directory( directory, batch_size=8, image_size=(18, 18), color_mode='grayscale') batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (8, 18, 18, 1)) self.assertEqual(batch[0].dtype.name, 'float32')
def test_image_dataset_from_directory_validation_split(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=2, count=10) dataset = image_dataset.image_dataset_from_directory( directory, batch_size=10, image_size=(18, 18), validation_split=0.2, subset='training', seed=1337) batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (8, 18, 18, 3)) dataset = image_dataset.image_dataset_from_directory( directory, batch_size=10, image_size=(18, 18), validation_split=0.2, subset='validation', seed=1337) batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (2, 18, 18, 3))
def test_image_dataset_from_directory_binary(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=2) dataset = image_dataset.image_dataset_from_directory( directory, batch_size=8, image_size=(18, 18), label_mode='int') batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (8, 18, 18, 3)) self.assertEqual(batch[0].dtype.name, 'float32') self.assertEqual(batch[1].shape, (8,)) self.assertEqual(batch[1].dtype.name, 'int32') dataset = image_dataset.image_dataset_from_directory( directory, batch_size=8, image_size=(18, 18), label_mode='binary') batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (8, 18, 18, 3)) self.assertEqual(batch[0].dtype.name, 'float32') self.assertEqual(batch[1].shape, (8, 1)) self.assertEqual(batch[1].dtype.name, 'float32') dataset = image_dataset.image_dataset_from_directory( directory, batch_size=8, image_size=(18, 18), label_mode='categorical') batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (8, 18, 18, 3)) self.assertEqual(batch[0].dtype.name, 'float32') self.assertEqual(batch[1].shape, (8, 2)) self.assertEqual(batch[1].dtype.name, 'float32')
def test_image_dataset_from_directory_multiclass(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=4, count=15) dataset = image_dataset.image_dataset_from_directory(directory, batch_size=8, image_size=(18, 18), label_mode=None) batch = next(iter(dataset)) self.assertEqual(batch.shape, (8, 18, 18, 3)) dataset = image_dataset.image_dataset_from_directory(directory, batch_size=8, image_size=(18, 18), label_mode=None) sample_count = 0 iterator = iter(dataset) for batch in dataset: sample_count += next(iterator).shape[0] self.assertEqual(sample_count, 15) dataset = image_dataset.image_dataset_from_directory(directory, batch_size=8, image_size=(18, 18), label_mode='int') batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (8, 18, 18, 3)) self.assertEqual(batch[0].dtype.name, 'float32') self.assertEqual(batch[1].shape, (8, )) self.assertEqual(batch[1].dtype.name, 'int32') dataset = image_dataset.image_dataset_from_directory( directory, batch_size=8, image_size=(18, 18), label_mode='categorical') batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (8, 18, 18, 3)) self.assertEqual(batch[0].dtype.name, 'float32') self.assertEqual(batch[1].shape, (8, 4)) self.assertEqual(batch[1].dtype.name, 'float32')
def test_image_dataset_from_directory_errors(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=3, count=5) with self.assertRaisesRegex(ValueError, '`labels` argument should be'): _ = image_dataset.image_dataset_from_directory(directory, labels=None) with self.assertRaisesRegex(ValueError, '`label_mode` argument must be'): _ = image_dataset.image_dataset_from_directory(directory, label_mode='other') with self.assertRaisesRegex(ValueError, '`color_mode` must be one of'): _ = image_dataset.image_dataset_from_directory(directory, color_mode='other') with self.assertRaisesRegex( ValueError, 'only pass `class_names` if the labels are inferred'): _ = image_dataset.image_dataset_from_directory( directory, labels=[0, 0, 1, 1, 1], class_names=['class_0', 'class_1', 'class_2']) with self.assertRaisesRegex( ValueError, 'Expected the lengths of `labels` to match the number of images' ): _ = image_dataset.image_dataset_from_directory(directory, labels=[0, 0, 1, 1]) with self.assertRaisesRegex(ValueError, '`class_names` passed did not match'): _ = image_dataset.image_dataset_from_directory( directory, class_names=['class_0', 'class_2']) with self.assertRaisesRegex(ValueError, 'there must exactly 2 classes'): _ = image_dataset.image_dataset_from_directory(directory, label_mode='binary') with self.assertRaisesRegex( ValueError, '`validation_split` must be between 0 and 1'): _ = image_dataset.image_dataset_from_directory(directory, validation_split=2) with self.assertRaisesRegex(ValueError, '`subset` must be either "training" or'): _ = image_dataset.image_dataset_from_directory( directory, validation_split=0.2, subset='other')
def get_dataset( dataset_path: str, augmentation_pipeline: a.Compose, batch_size: int = 32, image_size: Tuple[int, int] = (300, 300), scaling: bool = True, seed: int = 42, ) -> tf.data.Dataset: augmentation_func = partial( augment_image, augmentation_pipeline=augmentation_pipeline, seed=seed, ) dataset = image_dataset_from_directory( dataset_path, class_names=class_names, image_size=image_size, batch_size=batch_size, seed=seed, ) dataset = dataset.map(augmentation_func, num_parallel_calls=AUTOTUNE) if scaling: dataset = dataset.map(scale_images, num_parallel_calls=AUTOTUNE) return dataset.shuffle(buffer_size=512, seed=seed).prefetch(AUTOTUNE)
def test_image_dataset_from_directory_standalone(self): # Test retrieving images without labels from a directory and its subdirs. if PIL is None: return # Skip test if PIL is not available. # Save a few extra images in the parent directory. directory = self._prepare_directory(count=7, num_classes=2) for i, img in enumerate(self._get_images(3)): filename = 'image_%s.jpg' % (i, ) img.save(os.path.join(directory, filename)) dataset = image_dataset.image_dataset_from_directory(directory, batch_size=5, image_size=(18, 18), labels=None) batch = next(iter(dataset)) # We return plain images self.assertEqual(batch.shape, (5, 18, 18, 3)) self.assertEqual(batch.dtype.name, 'float32') # Count samples batch_count = 0 sample_count = 0 for batch in dataset: batch_count += 1 sample_count += batch.shape[0] self.assertEqual(batch_count, 2) self.assertEqual(sample_count, 10)
def predict(config: ConfigManager) -> None: set_random_seed(config.seed) print(config) dataset = image_dataset_from_directory( config.img_dir_path, label_mode=None, batch_size=config.batch_size, image_size=config.image_size, shuffle=False, seed=config.seed, ) model = get_model( config.feature_extractor, config.num_classes, config.image_size, config.l2_strength, ) model.load_weights(config.checkpoint_path) optimizer = tf.keras.optimizers.Adam(learning_rate=config.learning_rate) model.compile( optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) predictions = model.predict(dataset) print(predictions)
def test_image_dataset_from_directory_smart_resize(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=2, count=5) dataset = image_dataset.image_dataset_from_directory( directory, batch_size=5, image_size=(18, 18), smart_resize=True) batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertEqual(batch[0].shape, (5, 18, 18, 3))
def test_image_dataset_from_directory_manual_labels(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=2, count=2) dataset = image_dataset.image_dataset_from_directory( directory, batch_size=8, image_size=(18, 18), labels=[0, 1], shuffle=False) batch = next(iter(dataset)) self.assertLen(batch, 2) self.assertAllClose(batch[1], [0, 1])
def test_sample_count(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=4, count=15) dataset = image_dataset.image_dataset_from_directory( directory, batch_size=8, image_size=(18, 18), label_mode=None) sample_count = 0 for batch in dataset: sample_count += batch.shape[0] self.assertEqual(sample_count, 15)
def get_dataset(self): class_names, class_titles = self.get_classes_info() dataset = image_dataset_from_directory( self.get_dataset_path(), label_mode='categorical', class_names=class_names, image_size=(224, 224), validation_split=self.args.validation_split, subset=self.args.subset, seed=0, batch_size=10000) return dataset
def test_image_dataset_from_directory_follow_links(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=2, count=25, nested_dirs=True) dataset = image_dataset.image_dataset_from_directory( directory, batch_size=8, image_size=(18, 18), label_mode=None, follow_links=True) sample_count = 0 for batch in dataset: sample_count += batch.shape[0] self.assertEqual(sample_count, 25)
def test_static_shape_in_graph(self): if PIL is None: return # Skip test if PIL is not available. directory = self._prepare_directory(num_classes=2) dataset = image_dataset.image_dataset_from_directory( directory, batch_size=8, image_size=(18, 18), label_mode='int') test_case = self @def_function.function def symbolic_fn(ds): for x, _ in ds.take(1): test_case.assertListEqual(x.shape.as_list(), [None, 18, 18, 3]) symbolic_fn(dataset)
def get_test_dataset( dataset_path: str, batch_size: int = 32, image_size: Tuple[int, int] = (300, 300), scaling: bool = True, seed: int = 42, ) -> tf.data.Dataset: dataset = image_dataset_from_directory( dataset_path, class_names=class_names, batch_size=batch_size, image_size=image_size, shuffle=False, seed=seed, ) if scaling: dataset = dataset.map(scale_images, num_parallel_calls=AUTOTUNE) return dataset.prefetch(AUTOTUNE)
def channel_redundancy(self): names, titles = self.get_classes_info() self.variants = ('diamondback', 'wool') classes = [titles.index('diamondback'), titles.index('wool')] class_names = [names[c] for c in classes] dataset = image_dataset_from_directory( self.get_dataset_path(), label_mode='categorical', class_names=class_names, image_size=(224, 224), validation_split=self.args.validation_split, subset=self.args.subset, seed=0, batch_size=2000) classes = [0, 1] subplots = [] m = self.get_model() for c in classes: data = [] subplot = [[], []] for x, y in dataset: for x_, y_ in zip(x, y): if c == np.argmax(y_): data.append(x_) profiler_model = self.get_variant_profiler(m) self.compile_model(profiler_model) profiler_model.run_eagerly = True ProfileLayer.activated_channels.clear() profiler_model.predict(np.array(data), batch_size=len(data)) for _, r in ProfileLayer.activated_channels.items(): subplot[0].append(sum(r) / len(r)) subplot[1].append(0) subplots.append(subplot) return [ l for l, _ in enumerate(ProfileLayer.activated_channels.items()) ], { '': subplots }
def test_image_dataset_from_directory_no_images(self): directory = self._prepare_directory(num_classes=2, count=0) with self.assertRaisesRegex(ValueError, 'No images found.'): _ = image_dataset.image_dataset_from_directory(directory)
import matplotlib.pyplot as plt from tensorflow.keras import models from tensorflow.python.keras.preprocessing.image_dataset import image_dataset_from_directory HEIGHT = 224 WIDTH = 224 model_path = "models/model.h5" model = models.load_model(model_path) model.summary() test_dir = "data/test" test_dataset = image_dataset_from_directory(test_dir, labels="inferred", shuffle=False, batch_size=1, label_mode='int', image_size=(HEIGHT, WIDTH)) rank = [0] * 100 for x, y in test_dataset: subject_id = y.numpy()[0] + 1 prediction = model.predict(x) prediction_list = list(enumerate(prediction[0], 1)) prediction_list.sort(key=lambda x: x[1], reverse=True) found = False for id, predicted in enumerate(prediction_list): if predicted[0] == subject_id: print( f"Found subject {subject_id} with score {predicted[1]} on place {id}" )