Example #1
0
 def visualize(self, train_data, test_data, info):
     """
     Visualizes the data set giving 9 samples from each of the training and testing data sets and their
     respective labels.
     :param train_data: tf.data.Dataset object containing training data
     :param test_data: tf.data.Dataset object containing the testing data
     :param info: dataset.info for getting information about the dataset (number of classes, samples, etc.)
     :return: n/a
     """
     tfds.show_examples(info, train_data)
     tfds.show_examples(info, test_data)
Example #2
0
def visualize(data_train, data_test, info):
    """
    The short function visualizes the data set giving 9 samples from each of the training and test datasets
    and their respective labels.
    :param data_train: A tf.data.Dataset object containing the training data
    :param data_test: A tf.data.Dataset object containing the test data
    :param info: dataset.info for getting information about the dataset (number of classes, samples, etc.)
    :return: n/a
    """
    tfds.show_examples(data_train, info)
    tfds.show_examples(data_test, info)
 def view_dataset_example(self, dataset='train'):
     if dataset not in ['train', 'test']:
         print("error")
     if self.train_dataset is None or self.test_dataset is None:
         self.__load_datasets()
     
     if dataset == "train":
         fig = tfds.show_examples(self.train_dataset, self.dataset_info)
     else:
         fig = tfds.show_examples(self.test_dataset, self.dataset_info)
     
     plt.show()
Example #4
0
    def test_augmentation(self):
        data_id = 'tf_flowers'
        args = f'--data-id={data_id} --autoaugment --bsz=8 --loss=supcon '
        args = utils.parser.parse_args(args.split())
        strategy = utils.setup(args)

        _, ds_info = tfds.load(args.data_id, try_gcs=True, data_dir='gs://aigagror/datasets', with_info=True)
        train_augment_config, val_augment_config = utils.load_augment_configs(args)
        ds_train, ds_val = data.load_distributed_datasets(args, strategy, ds_info, train_augment_config,
                                                          val_augment_config)

        ds_train, ds_val = ds_train.map(lambda x, y: {**x, **y}), ds_val.map(lambda x, y: {**x, **y})
        train_fig = tfds.show_examples(ds_train.unbatch(), ds_info, rows=1)
        val_fig = tfds.show_examples(ds_val.unbatch(), ds_info, rows=1)
        train_fig.savefig(f'out/{data_id}_train_examples.jpg'), val_fig.savefig(f'out/{data_id}_val_examples.jpg')
        logging.info("dataset examples saved to './out'")
Example #5
0
def show_dataset_info(dataset_builder, plot=False):
    """
    Get information about the dataset using the tfds.core.DatasetBuilder.
    """
    # Get information about the dataset
    dataset_info = dataset_builder.info

    if plot:
        # Grab 9 examples from the training data and display them
        dataset = dataset_builder.as_dataset(split='train[:9]')
        viz = tfds.show_examples(dataset_info, dataset)
        plt.show(viz)

    return dataset_info
Example #6
0
    def info(self):
        print(self.mnist_info)

        for mnist_example in self.mnist_train.take(
                1):  # Only take a single example
            image, label = mnist_example["image"], mnist_example["label"]

            plt.imshow(image.numpy()[:, :, 0].astype(np.float32),
                       cmap=plt.get_cmap("Blues"))
            plt.title("Digit: %d" % label.numpy())

            # x = self.mnist_info.features["image"].shape[0]
            # y = self.mnist_info.features["image"].shape[1][1]

            print(mnist_example["image"][0][4])
            # XX = tf.reshape(X, [-1, 784])

            # for

            # print("number of data points: ", self.mnist_train.images.shape[0],
            #       "number of pixels in each image:", self.mnist_train.images.shape[1])
            tfds.show_examples(self.mnist_info, self.mnist_test)
            plt.show()
Example #7
0
def load_ds(args, ds_id, split, augment=False):
    _, info = tfds.load(ds_id,
                        data_dir=args.data_dir,
                        try_gcs=True,
                        with_info=True)
    ds = tfds.load(ds_id, split=split, data_dir=args.data_dir, try_gcs=True)

    # Preprocess
    processed_ds = preprocess(ds, augment)

    # Show examples if debug level is log
    if args.log_level == 'DEBUG':
        for image_key in ['image', 'segmentation_mask']:
            if image_key in info.features:
                tfds.show_examples(processed_ds,
                                   info,
                                   image_key=image_key,
                                   rows=1,
                                   cols=5)

    class_ds = processed_ds.map(class_supervise, tf.data.AUTOTUNE)

    return class_ds, info
Example #8
0
def _generate_single_visualization(full_name: str, dst_dir: str) -> None:
    """Save the generated figures for the dataset in dst_dir.

  Args:
    full_name: Name of the dataset to build `dataset`, `dataset/config`.
    dst_dir: Destination where the dataset will be saved (as
      `dataset-config-version`)
  """
    dst_filename = full_name.replace('/', '-') + '.png'
    dst_path = os.path.join(dst_dir, dst_filename)
    # If the image already exists, skip the image generation
    if tf.io.gfile.exists(dst_path):
        logging.info(f'Skiping visualization for {full_name} (already exists)')
        return

    logging.info(f'Generating visualization for {full_name}...')
    # Load the dataset.
    builder_name, _, version = full_name.rpartition('/')
    builder = tfds.builder(f'{builder_name}:{version}')
    split_names = list(builder.info.splits.keys())
    if not split_names:
        logging.info(f'Dataset `{full_name}` not generated.')
        return
    elif 'train' in split_names:
        split = 'train'
    else:
        split = split_names[0]
    ds = builder.as_dataset(split=split, shuffle_files=False)

    if not tf.io.gfile.exists(dst_dir):
        tf.io.gfile.makedirs(dst_dir)
    try:
        figure = tfds.show_examples(ds, builder.info)
    except Exception:  # pylint: disable=broad-except
        logging.info(f'Visualisation not supported for dataset `{full_name}`')
        return

    # `savefig` do not support GCS, so first save the image locally.
    with tempfile.TemporaryDirectory() as tmp_dir:
        tmp_path = os.path.join(tmp_dir, dst_filename)
        figure.savefig(tmp_path)
        tf.io.gfile.copy(tmp_path, dst_path)
    plt.close(figure)
Example #9
0
def generate_visualization(ds_name):
    """
  Method used to generate examples figures of a dataset
  """

    try:
        ds, ds_info = tfds.load(name=ds_name, split='train', with_info=True)
        path_dir = tfds.core.get_tfds_path(FIG_DIR)
        print(path_dir)
        fig = tfds.show_examples(ds_info, ds, plot_scale=2)
        suffix = '.png'
        fpath = os.path.join(path_dir, ds_name + suffix)
        fig.savefig(fname=fpath)

        # Optimizing figures
        optimize_image(ds_name)
        os.remove(fpath)

    except RuntimeError:
        print('The selected dataset is not supported')
Example #10
0
(ds), ds_info = tfds.load('colorectal_histology',
                          data_dir=".",
                          shuffle_files=True,
                          split='train',
                          with_info=True,
                          as_supervised=True)

assert isinstance(ds, tf.data.Dataset)
print(ds_info)

# ## Display a few examples from the dataset

x_key, y_key = ds_info.supervised_keys
ds_temp = ds.map(lambda x, y: {x_key: x, y_key: y})
tfds.show_examples(ds_info, ds_temp)

ds_info.features['label'].names

# ## Define the data loaders
#

n = ds_info.splits['train'].num_examples
train_split_percentage = 0.80
train_batch_size = 128
test_batch_size = 16


def normalize_img(image, label):
    """Normalizes images: `uint8` -> `float32`."""
    return tf.cast(image, tf.float32) / 255., label
Example #11
0
"""
#plt.show()

### investigating data batches ###
### DATA ###
"""
NWPU-RESISC45
This dataset requires you to download the source data manually 
into download_config.manual_dir (defaults to ~/tensorflow_datasets/manual/):

Note: this dataset does not have a test/train split.
"""
# load data #
data, info = tfds.load('resisc45', split="train", with_info=True)
# visualize data #
tfds.show_examples(data, info)

# size of entire dataset #
ds_size = info.splits["train"].num_examples
image_shape = info.features['image'].shape
print(image_shape)
# manually split ds into 80:20, train & test respectively #
test_ds_size = int(ds_size * 0.20)
train_ds_size = ds_size - test_ds_size
# split #
test_ds = data.take(test_ds_size)
train_ds = data.skip(test_ds_size)
print("size of test: {}, size of train: {}".format(test_ds_size,
                                                   train_ds_size))

# num features
Example #12
0
import tensorflow as tf
import tensorflow_datasets as tfds

print(tfds.list_builders())

ds, info = tfds.load("mnist", split="train", shuffle_files=True, with_info=True)
assert isinstance(ds, tf.data.Dataset)
prefetched_ds = ds.batch(32).prefetch(1)

examples = prefetched_ds.take(1)  # Only take a single example
for example in examples:
    print(list(example.keys()))
    image = example["image"]
    label = example["label"]
    print(image.shape, label)

tfds.benchmark(prefetched_ds, batch_size=32)
tfds.benchmark(prefetched_ds, batch_size=32)  # Second epoch should be much faster due to auto-caching

tfds.show_examples(ds, info)
import tensorflow_hub as hub
import os
import numpy as np
import tensorflow_datasets as tfds
import warnings
warnings.filterwarnings('ignore')

datasets, info = tfds.load(name='beans',
                           with_info=True,
                           as_supervised=True,
                           split=['train', 'test', 'validation'])

info

train, info_train = tfds.load(name='beans', with_info=True, split='test')
tfds.show_examples(info_train, train)


def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.0
    return tf.image.resize(image, [224, 224]), tf.one_hot(label, 3)


def get_dataset(batch_size=32):
    train_dataset_scaled = datasets[0].map(scale).shuffle(1000).batch(
        batch_size)
    test_dataset_scaled = datasets[1].map(scale).batch(batch_size)
    val_dataset_scaled = datasets[2].map(scale).batch(batch_size)
    return train_dataset_scaled, test_dataset_scaled, val_dataset_scaled
Example #14
0
* as_supervised=True---download the target labels for that data.
* split---- 3 splits for that data.

"""

datasets, info = tfds.load(name='beans',
                           with_info=True,
                           as_supervised=True,
                           split=['train', 'test', 'validation'])
"""# look at the info about the dataset completely."""

info
"""# lets visualise the dataset."""

test, info_test = tfds.load(name='beans', with_info=True, split='test')
tfds.show_examples(info_test, test)
"""# lets create a function to scale the dataset so that my training converges faster.
here i convert the output pixel value between 0 and 1.

The model i am going to use is the mobilenet model. and mobilenet expects the input image to be of shape (224,224).
Also one hot encoding the target variable
"""


def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.0

    return tf.image.resize(image, [224, 224]), tf.one_hot(label, 3)

Example #15
0
 def displayTFDatasetImages(self, amountOfImages, typeOfData):
     assert isinstance(amountOfImages, int), TypeError
     data = Visualization().findCorrespondingDataType(typeOfData)
     fig = tfds.show_examples(
         self.metaData, data, rows=amountOfImages, cols=amountOfImages)
 def show(self, ):
     fig = tfds.show_examples(self.ds, self.info)
Example #17
0
import tensorflow_datasets as tfds
import logging

logging.basicConfig(
    level=logging.DEBUG,
    format=
    '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
    datefmt='%a, %d %b %Y %H:%M:%S')

# tfds works in both Eager and Graph modes
# tf.enable_eager_execution()

# See available datasets
print(tfds.list_builders())

# Construct a tf.data.Dataset
train_data, train_info = tfds.load(name="lfw",
                                   as_supervised=True,
                                   with_info=True,
                                   split=tfds.Split.TRAIN)

print(train_data)

mnist = tfds.load("mnist:1.*.*")

lfw_test, test_info = tfds.load("lfw", split=tfds.Split.TEST, with_info=True)
print(test_info)

fig = tfds.show_examples(test_info, lfw_test)
Example #18
0
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np

ds_train, ds_info = tfds.load('celeb_a',
                              split='test',
                              shuffle_files=False,
                              with_info=True)
fig = tfds.show_examples(ds_info, ds_train)

sample_size = 2000
ds_train = ds_train.batch(sample_size)
features = next(iter(ds_train.take(1)))
n, h, w, c = features['image'].shape

figure = plt.figure(figsize=(8, 6))
sample_images = features['image']
new_image = np.mean(sample_images, axis=0)
plt.imshow(new_image.astype(np.uint8))
plt.axis('off')
plt.show()
Example #19
0
def show_examples():
    dataset, ds_info = tfds.load("cats_vs_dogs", with_info=True)
    tfds.show_examples(dataset, ds_info)
 def test_intro(self):
     if SHOW_PLOTS:
         tfds.show_examples(self.ds_train, self.ds_info)
         plt.show()
         plt.clf()
Example #21
0
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds   # pip install tesnforflow_datasets, have to downgrade absl-py==0.8

(ds_train,ds_test), ds_info = tfds.load(
    'mnist',
    split=['train','test'] ,
    shuffle_files=True,
    as_supervised=False,    # true will be (img,label), false will be a dict
    with_info=True
)

print(ds_info)
fig = tfds.show_examples(ds_train,ds_info,rows=4,cols=4)   # need as_supervised=False

def normalize_img(image,label):
    # normalize images
    return tf.cast(image,tf.float32)/255.0, label


df_train = ds.train.map(normalize_img)







Example #22
0
import numpy as np
# Necessary for dealing with https urls
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

# We read only the first 10 training samples
ds, ds_info = tfds.load('colorectal_histology',
                        split='train',
                        shuffle_files=True,
                        with_info=True,
                        download=True)
assert isinstance(ds, tf.data.Dataset)
print(ds_info)

# Visualizing images
fig = tfds.show_examples(ds, ds_info)

# Reading all images (remove break point to read all)
for example in tfds.as_numpy(ds):
    image, label = example['image'], example['label']
    break
# take one sample from data
one_sample = ds.take(1)
one_sample = list(one_sample.as_numpy_iterator())
image = one_sample[0]['image']
label = one_sample[0]['label']
print(image.shape, label.shape)


# Side by side visualization
def visualize(im, imAgmented, operation):