def visualize(self, train_data, test_data, info): """ Visualizes the data set giving 9 samples from each of the training and testing data sets and their respective labels. :param train_data: tf.data.Dataset object containing training data :param test_data: tf.data.Dataset object containing the testing data :param info: dataset.info for getting information about the dataset (number of classes, samples, etc.) :return: n/a """ tfds.show_examples(info, train_data) tfds.show_examples(info, test_data)
def visualize(data_train, data_test, info): """ The short function visualizes the data set giving 9 samples from each of the training and test datasets and their respective labels. :param data_train: A tf.data.Dataset object containing the training data :param data_test: A tf.data.Dataset object containing the test data :param info: dataset.info for getting information about the dataset (number of classes, samples, etc.) :return: n/a """ tfds.show_examples(data_train, info) tfds.show_examples(data_test, info)
def view_dataset_example(self, dataset='train'): if dataset not in ['train', 'test']: print("error") if self.train_dataset is None or self.test_dataset is None: self.__load_datasets() if dataset == "train": fig = tfds.show_examples(self.train_dataset, self.dataset_info) else: fig = tfds.show_examples(self.test_dataset, self.dataset_info) plt.show()
def test_augmentation(self): data_id = 'tf_flowers' args = f'--data-id={data_id} --autoaugment --bsz=8 --loss=supcon ' args = utils.parser.parse_args(args.split()) strategy = utils.setup(args) _, ds_info = tfds.load(args.data_id, try_gcs=True, data_dir='gs://aigagror/datasets', with_info=True) train_augment_config, val_augment_config = utils.load_augment_configs(args) ds_train, ds_val = data.load_distributed_datasets(args, strategy, ds_info, train_augment_config, val_augment_config) ds_train, ds_val = ds_train.map(lambda x, y: {**x, **y}), ds_val.map(lambda x, y: {**x, **y}) train_fig = tfds.show_examples(ds_train.unbatch(), ds_info, rows=1) val_fig = tfds.show_examples(ds_val.unbatch(), ds_info, rows=1) train_fig.savefig(f'out/{data_id}_train_examples.jpg'), val_fig.savefig(f'out/{data_id}_val_examples.jpg') logging.info("dataset examples saved to './out'")
def show_dataset_info(dataset_builder, plot=False): """ Get information about the dataset using the tfds.core.DatasetBuilder. """ # Get information about the dataset dataset_info = dataset_builder.info if plot: # Grab 9 examples from the training data and display them dataset = dataset_builder.as_dataset(split='train[:9]') viz = tfds.show_examples(dataset_info, dataset) plt.show(viz) return dataset_info
def info(self): print(self.mnist_info) for mnist_example in self.mnist_train.take( 1): # Only take a single example image, label = mnist_example["image"], mnist_example["label"] plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap("Blues")) plt.title("Digit: %d" % label.numpy()) # x = self.mnist_info.features["image"].shape[0] # y = self.mnist_info.features["image"].shape[1][1] print(mnist_example["image"][0][4]) # XX = tf.reshape(X, [-1, 784]) # for # print("number of data points: ", self.mnist_train.images.shape[0], # "number of pixels in each image:", self.mnist_train.images.shape[1]) tfds.show_examples(self.mnist_info, self.mnist_test) plt.show()
def load_ds(args, ds_id, split, augment=False): _, info = tfds.load(ds_id, data_dir=args.data_dir, try_gcs=True, with_info=True) ds = tfds.load(ds_id, split=split, data_dir=args.data_dir, try_gcs=True) # Preprocess processed_ds = preprocess(ds, augment) # Show examples if debug level is log if args.log_level == 'DEBUG': for image_key in ['image', 'segmentation_mask']: if image_key in info.features: tfds.show_examples(processed_ds, info, image_key=image_key, rows=1, cols=5) class_ds = processed_ds.map(class_supervise, tf.data.AUTOTUNE) return class_ds, info
def _generate_single_visualization(full_name: str, dst_dir: str) -> None: """Save the generated figures for the dataset in dst_dir. Args: full_name: Name of the dataset to build `dataset`, `dataset/config`. dst_dir: Destination where the dataset will be saved (as `dataset-config-version`) """ dst_filename = full_name.replace('/', '-') + '.png' dst_path = os.path.join(dst_dir, dst_filename) # If the image already exists, skip the image generation if tf.io.gfile.exists(dst_path): logging.info(f'Skiping visualization for {full_name} (already exists)') return logging.info(f'Generating visualization for {full_name}...') # Load the dataset. builder_name, _, version = full_name.rpartition('/') builder = tfds.builder(f'{builder_name}:{version}') split_names = list(builder.info.splits.keys()) if not split_names: logging.info(f'Dataset `{full_name}` not generated.') return elif 'train' in split_names: split = 'train' else: split = split_names[0] ds = builder.as_dataset(split=split, shuffle_files=False) if not tf.io.gfile.exists(dst_dir): tf.io.gfile.makedirs(dst_dir) try: figure = tfds.show_examples(ds, builder.info) except Exception: # pylint: disable=broad-except logging.info(f'Visualisation not supported for dataset `{full_name}`') return # `savefig` do not support GCS, so first save the image locally. with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = os.path.join(tmp_dir, dst_filename) figure.savefig(tmp_path) tf.io.gfile.copy(tmp_path, dst_path) plt.close(figure)
def generate_visualization(ds_name): """ Method used to generate examples figures of a dataset """ try: ds, ds_info = tfds.load(name=ds_name, split='train', with_info=True) path_dir = tfds.core.get_tfds_path(FIG_DIR) print(path_dir) fig = tfds.show_examples(ds_info, ds, plot_scale=2) suffix = '.png' fpath = os.path.join(path_dir, ds_name + suffix) fig.savefig(fname=fpath) # Optimizing figures optimize_image(ds_name) os.remove(fpath) except RuntimeError: print('The selected dataset is not supported')
(ds), ds_info = tfds.load('colorectal_histology', data_dir=".", shuffle_files=True, split='train', with_info=True, as_supervised=True) assert isinstance(ds, tf.data.Dataset) print(ds_info) # ## Display a few examples from the dataset x_key, y_key = ds_info.supervised_keys ds_temp = ds.map(lambda x, y: {x_key: x, y_key: y}) tfds.show_examples(ds_info, ds_temp) ds_info.features['label'].names # ## Define the data loaders # n = ds_info.splits['train'].num_examples train_split_percentage = 0.80 train_batch_size = 128 test_batch_size = 16 def normalize_img(image, label): """Normalizes images: `uint8` -> `float32`.""" return tf.cast(image, tf.float32) / 255., label
""" #plt.show() ### investigating data batches ### ### DATA ### """ NWPU-RESISC45 This dataset requires you to download the source data manually into download_config.manual_dir (defaults to ~/tensorflow_datasets/manual/): Note: this dataset does not have a test/train split. """ # load data # data, info = tfds.load('resisc45', split="train", with_info=True) # visualize data # tfds.show_examples(data, info) # size of entire dataset # ds_size = info.splits["train"].num_examples image_shape = info.features['image'].shape print(image_shape) # manually split ds into 80:20, train & test respectively # test_ds_size = int(ds_size * 0.20) train_ds_size = ds_size - test_ds_size # split # test_ds = data.take(test_ds_size) train_ds = data.skip(test_ds_size) print("size of test: {}, size of train: {}".format(test_ds_size, train_ds_size)) # num features
import tensorflow as tf import tensorflow_datasets as tfds print(tfds.list_builders()) ds, info = tfds.load("mnist", split="train", shuffle_files=True, with_info=True) assert isinstance(ds, tf.data.Dataset) prefetched_ds = ds.batch(32).prefetch(1) examples = prefetched_ds.take(1) # Only take a single example for example in examples: print(list(example.keys())) image = example["image"] label = example["label"] print(image.shape, label) tfds.benchmark(prefetched_ds, batch_size=32) tfds.benchmark(prefetched_ds, batch_size=32) # Second epoch should be much faster due to auto-caching tfds.show_examples(ds, info)
import tensorflow_hub as hub import os import numpy as np import tensorflow_datasets as tfds import warnings warnings.filterwarnings('ignore') datasets, info = tfds.load(name='beans', with_info=True, as_supervised=True, split=['train', 'test', 'validation']) info train, info_train = tfds.load(name='beans', with_info=True, split='test') tfds.show_examples(info_train, train) def scale(image, label): image = tf.cast(image, tf.float32) image /= 255.0 return tf.image.resize(image, [224, 224]), tf.one_hot(label, 3) def get_dataset(batch_size=32): train_dataset_scaled = datasets[0].map(scale).shuffle(1000).batch( batch_size) test_dataset_scaled = datasets[1].map(scale).batch(batch_size) val_dataset_scaled = datasets[2].map(scale).batch(batch_size) return train_dataset_scaled, test_dataset_scaled, val_dataset_scaled
* as_supervised=True---download the target labels for that data. * split---- 3 splits for that data. """ datasets, info = tfds.load(name='beans', with_info=True, as_supervised=True, split=['train', 'test', 'validation']) """# look at the info about the dataset completely.""" info """# lets visualise the dataset.""" test, info_test = tfds.load(name='beans', with_info=True, split='test') tfds.show_examples(info_test, test) """# lets create a function to scale the dataset so that my training converges faster. here i convert the output pixel value between 0 and 1. The model i am going to use is the mobilenet model. and mobilenet expects the input image to be of shape (224,224). Also one hot encoding the target variable """ def scale(image, label): image = tf.cast(image, tf.float32) image /= 255.0 return tf.image.resize(image, [224, 224]), tf.one_hot(label, 3)
def displayTFDatasetImages(self, amountOfImages, typeOfData): assert isinstance(amountOfImages, int), TypeError data = Visualization().findCorrespondingDataType(typeOfData) fig = tfds.show_examples( self.metaData, data, rows=amountOfImages, cols=amountOfImages)
def show(self, ): fig = tfds.show_examples(self.ds, self.info)
import tensorflow_datasets as tfds import logging logging.basicConfig( level=logging.DEBUG, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S') # tfds works in both Eager and Graph modes # tf.enable_eager_execution() # See available datasets print(tfds.list_builders()) # Construct a tf.data.Dataset train_data, train_info = tfds.load(name="lfw", as_supervised=True, with_info=True, split=tfds.Split.TRAIN) print(train_data) mnist = tfds.load("mnist:1.*.*") lfw_test, test_info = tfds.load("lfw", split=tfds.Split.TEST, with_info=True) print(test_info) fig = tfds.show_examples(test_info, lfw_test)
import tensorflow_datasets as tfds import matplotlib.pyplot as plt import numpy as np ds_train, ds_info = tfds.load('celeb_a', split='test', shuffle_files=False, with_info=True) fig = tfds.show_examples(ds_info, ds_train) sample_size = 2000 ds_train = ds_train.batch(sample_size) features = next(iter(ds_train.take(1))) n, h, w, c = features['image'].shape figure = plt.figure(figsize=(8, 6)) sample_images = features['image'] new_image = np.mean(sample_images, axis=0) plt.imshow(new_image.astype(np.uint8)) plt.axis('off') plt.show()
def show_examples(): dataset, ds_info = tfds.load("cats_vs_dogs", with_info=True) tfds.show_examples(dataset, ds_info)
def test_intro(self): if SHOW_PLOTS: tfds.show_examples(self.ds_train, self.ds_info) plt.show() plt.clf()
import matplotlib.pyplot as plt import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import tensorflow_datasets as tfds # pip install tesnforflow_datasets, have to downgrade absl-py==0.8 (ds_train,ds_test), ds_info = tfds.load( 'mnist', split=['train','test'] , shuffle_files=True, as_supervised=False, # true will be (img,label), false will be a dict with_info=True ) print(ds_info) fig = tfds.show_examples(ds_train,ds_info,rows=4,cols=4) # need as_supervised=False def normalize_img(image,label): # normalize images return tf.cast(image,tf.float32)/255.0, label df_train = ds.train.map(normalize_img)
import numpy as np # Necessary for dealing with https urls import ssl ssl._create_default_https_context = ssl._create_unverified_context # We read only the first 10 training samples ds, ds_info = tfds.load('colorectal_histology', split='train', shuffle_files=True, with_info=True, download=True) assert isinstance(ds, tf.data.Dataset) print(ds_info) # Visualizing images fig = tfds.show_examples(ds, ds_info) # Reading all images (remove break point to read all) for example in tfds.as_numpy(ds): image, label = example['image'], example['label'] break # take one sample from data one_sample = ds.take(1) one_sample = list(one_sample.as_numpy_iterator()) image = one_sample[0]['image'] label = one_sample[0]['label'] print(image.shape, label.shape) # Side by side visualization def visualize(im, imAgmented, operation):