def test_generator_datashape(self, n, bs): ds = SVHNDataset("test") ds._images = np.random.randint(low=0, high=255, size=(n, 32, 32, 3)) ds.labels = np.random.randint(low=1, high=10, size=(n, 1)) ds_gen = ds.generator(batch_size=bs, flatten=False, ae=False) for i in range(len(ds_gen)): if i == len(ds_gen) - 1: if n % bs == 0: assert (bs, 32, 32, 3) == ds_gen[i][0].shape assert (bs, 1) == ds_gen[i][1].shape else: assert (n % bs, 32, 32, 3) == ds_gen[i][0].shape assert (n % bs, 1) == ds_gen[i][1].shape else: assert (bs, 32, 32, 3) == ds_gen[i][0].shape assert (bs, 1) == ds_gen[i][1].shape
def dataset_to_image_dir(config): file_name = config["general"].get("dataset_all") if not os.path.exists(file_name): os.makedirs("dataset_split", exist_ok=True) train_set = SVHNDataset.from_mat(config["general"].get("train_mat")) plotter = SVHNPlotter(output_dir="dataset_split/images/training") file_names = plotter.save_images(train_set) df = pd.DataFrame() df["labels"] = train_set.labels.flatten() df["file_names"] = file_names df.to_csv(file_name, index=False)
def dataset_to_npy(mat_file): dataset = SVHNDataset.from_mat(mat_file) print(f"{mat_file} set has {len(dataset)} samples") batch = np.zeros((len(dataset), 32, 32, 4), dtype=np.uint8) # Augment the 3rd dimension of dataset batch[:, 0, 0, 3] = dataset.labels.squeeze( ) # and store label in the 0, 0 element of that axis batch[:, :, :, 0:3] = dataset.images print(f"{np.max(batch)}/{np.min(batch)}/{np.mean(batch)}/{np.std(batch)}") np.save(f"dataset_split/arrays/{split_path(mat_file)[0]}", batch)
def plot_ae(config: cp.ConfigParser, tag=None): ae_model = config["general"].get("ae_model") color_mode = config["general"].get("color_mode") tag = config["plot"].get("tag") if tag is None else tag exp_dir = f"experiments/{tag}" print(f"loading experiment results from {exp_dir}") train_set = SVHNDataset.from_mat("dataset/train_32x32.mat") if color_mode == "grayscale": converter = ColorConverter(color_mode) train_set = converter.transform(train_set) with open(os.path.join(exp_dir, f"autoencoder.json"), "r") as f: autoencoder = model_from_json(f.read()) # type: Model autoencoder.load_weights(os.path.join(exp_dir, f"autoencoder_final.h5")) n = 10 if ae_model == "cnn": decoded = autoencoder.predict(train_set.images[:n] / 255) else: decoded = autoencoder.predict(train_set.images_flatten[:n] / 255) plt.figure(figsize=(20, 4)) plt.gray() for i in range(n): ax = plt.subplot(2, n, i + 1) plt.imshow(train_set.images[i].squeeze() / 255) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax = plt.subplot(2, n, i + 1 + n) decoded_img = decoded[i] if ae_model == "mlp": decoded_img = decoded_img.reshape( 32, 32, 3 if train_set.color_mode == "rgb" else 1) decoded_img = decoded_img.squeeze() plt.imshow(decoded_img) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.savefig(os.path.join(exp_dir, "ae_compare.png"))
from keras import optimizers import configparser as cp from datetime import datetime as dt import os from shutil import copyfile from plot.plot_autoencoder import plot_ae if __name__ == "__main__": config = cp.ConfigParser() config.read("config.ini") batch_size = config["general"].getint("batch_size") ae_model = config["general"].get("ae_model") color_mode = config["general"].get("color_mode") noise_ratio = config["general"].getfloat("noise_ratio") train_set = SVHNDataset.from_npy(config["general"].get("training_set")) dev_set = SVHNDataset.from_npy(config["general"].get("dev_set")) print(f"Training Set Color_Mode is {train_set.color_mode}") print(f"Dev Set Color_Mode is {train_set.color_mode}") if color_mode == "grayscale": converter = ColorConverter(color_mode) train_set = converter.transform(train_set) dev_set = converter.transform(dev_set) plotter = SVHNPlotter(output_dir=f"images/{train_set.name}") plotter.save_images(train_set, n=10) plotter.save_mosaic(train_set, row=10, col=10) trn_gen = train_set.generator(batch_size=100, flatten=False, noise=0.05) print(trn_gen[0][0].shape) print(trn_gen[0][1].shape) train_set_gen = SVHNDataset("trn_generator", images=trn_gen[0][0], labels=train_set.labels)
from preprocessing.dataset import SVHNDataset import numpy as np import seaborn as sns import pandas as pd import matplotlib.pyplot as plt if __name__ == "__main__": train_set = SVHNDataset.from_mat("dataset/train_32x32.mat") print(train_set) n = int(0.1 * len(train_set)) shuffle_idx = np.random.permutation(range(len(train_set))) sns.distplot(train_set.images[:, :, :, shuffle_idx[:n]].flatten(), label=f"{train_set.color_mode}") plt.savefig(f"images/distplot_train_{train_set.color_mode}.png") train_set.set_gray_scale() sns.distplot(train_set.images[:, :, :, shuffle_idx[:n]].flatten(), label=f"{train_set.color_mode}") plt.grid() plt.xlabel("Pixel Value") plt.ylabel("Ratio of observations") plt.legend() plt.savefig(f"images/distplot_{train_set.name}.png") plt.close()
from keras import optimizers import configparser as cp from datetime import datetime as dt import os from shutil import copyfile from plot.plot_autoencoder import plot_ae if __name__ == "__main__": config = cp.ConfigParser() config.read("config.ini") batch_size = config["general"].getint("batch_size") ae_model = config["general"].get("ae_model") color_mode = config["general"].get("color_mode") noise_ratio = config["general"].getfloat("noise_ratio") train_set = SVHNDataset.from_npy(config["general"].get("training_set")) dev_set = SVHNDataset.from_npy(config["general"].get("dev_set")) print(f"Training Set Color_Mode is {train_set.color_mode}") print(f"Dev Set Color_Mode is {train_set.color_mode}") if color_mode == "grayscale": converter = ColorConverter(color_mode) train_set = converter.transform(train_set) dev_set = converter.transform(dev_set) print(f"Training Set Color_Mode is {train_set.color_mode}") print(f"Dev Set Color_Mode is {train_set.color_mode}") tag = dt.now().strftime("%m_%d_%H%M%S") + f"_{color_mode}_{ae_model}" log_dir = f"logs/{tag}" os.makedirs(log_dir, exist_ok=True) exp_dir = f"experiments/{tag}" os.makedirs(exp_dir, exist_ok=True)
def test_name(self): ds = SVHNDataset("test") assert ds.name == "test"
def test_generator_batch(self, n, bs): ds = SVHNDataset("test") ds._images = np.random.randint(low=0, high=255, size=(n, 32, 32, 3)) ds.labels = np.random.randint(low=1, high=10, size=(n, 1)) assert np.ceil(n / bs) == len(ds.generator(batch_size=bs))
def test_from_npy(self): ds = SVHNDataset.from_npy("dataset_split/arrays/training/rgb_all.npy") assert ds.name == "rgb_all" assert ds.labels.shape == (71791, ) assert ds.images.shape == (71791, 32, 32, 3)
def test_from_mat(self): ds = SVHNDataset.from_mat("dataset/test_32x32.mat") assert ds.name == "test_32x32"