예제 #1
0
 def test_generator_datashape(self, n, bs):
     ds = SVHNDataset("test")
     ds._images = np.random.randint(low=0, high=255, size=(n, 32, 32, 3))
     ds.labels = np.random.randint(low=1, high=10, size=(n, 1))
     ds_gen = ds.generator(batch_size=bs, flatten=False, ae=False)
     for i in range(len(ds_gen)):
         if i == len(ds_gen) - 1:
             if n % bs == 0:
                 assert (bs, 32, 32, 3) == ds_gen[i][0].shape
                 assert (bs, 1) == ds_gen[i][1].shape
             else:
                 assert (n % bs, 32, 32, 3) == ds_gen[i][0].shape
                 assert (n % bs, 1) == ds_gen[i][1].shape
         else:
             assert (bs, 32, 32, 3) == ds_gen[i][0].shape
             assert (bs, 1) == ds_gen[i][1].shape
예제 #2
0
def dataset_to_image_dir(config):
    file_name = config["general"].get("dataset_all")
    if not os.path.exists(file_name):
        os.makedirs("dataset_split", exist_ok=True)
        train_set = SVHNDataset.from_mat(config["general"].get("train_mat"))
        plotter = SVHNPlotter(output_dir="dataset_split/images/training")
        file_names = plotter.save_images(train_set)
        df = pd.DataFrame()
        df["labels"] = train_set.labels.flatten()
        df["file_names"] = file_names
        df.to_csv(file_name, index=False)
예제 #3
0
def dataset_to_npy(mat_file):
    dataset = SVHNDataset.from_mat(mat_file)
    print(f"{mat_file} set has {len(dataset)} samples")

    batch = np.zeros((len(dataset), 32, 32, 4),
                     dtype=np.uint8)  # Augment the 3rd dimension of dataset
    batch[:, 0, 0, 3] = dataset.labels.squeeze(
    )  # and store label in the 0, 0 element of that axis
    batch[:, :, :, 0:3] = dataset.images
    print(f"{np.max(batch)}/{np.min(batch)}/{np.mean(batch)}/{np.std(batch)}")
    np.save(f"dataset_split/arrays/{split_path(mat_file)[0]}", batch)
예제 #4
0
def plot_ae(config: cp.ConfigParser, tag=None):
    ae_model = config["general"].get("ae_model")
    color_mode = config["general"].get("color_mode")
    tag = config["plot"].get("tag") if tag is None else tag
    exp_dir = f"experiments/{tag}"
    print(f"loading experiment results from {exp_dir}")

    train_set = SVHNDataset.from_mat("dataset/train_32x32.mat")
    if color_mode == "grayscale":
        converter = ColorConverter(color_mode)
        train_set = converter.transform(train_set)

    with open(os.path.join(exp_dir, f"autoencoder.json"), "r") as f:
        autoencoder = model_from_json(f.read())  # type: Model
    autoencoder.load_weights(os.path.join(exp_dir, f"autoencoder_final.h5"))

    n = 10
    if ae_model == "cnn":
        decoded = autoencoder.predict(train_set.images[:n] / 255)
    else:
        decoded = autoencoder.predict(train_set.images_flatten[:n] / 255)
    plt.figure(figsize=(20, 4))
    plt.gray()
    for i in range(n):
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(train_set.images[i].squeeze() / 255)
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        ax = plt.subplot(2, n, i + 1 + n)
        decoded_img = decoded[i]

        if ae_model == "mlp":
            decoded_img = decoded_img.reshape(
                32, 32, 3 if train_set.color_mode == "rgb" else 1)
        decoded_img = decoded_img.squeeze()
        plt.imshow(decoded_img)
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.savefig(os.path.join(exp_dir, "ae_compare.png"))
예제 #5
0
from keras import optimizers
import configparser as cp
from datetime import datetime as dt
import os
from shutil import copyfile
from plot.plot_autoencoder import plot_ae

if __name__ == "__main__":
    config = cp.ConfigParser()
    config.read("config.ini")

    batch_size = config["general"].getint("batch_size")
    ae_model = config["general"].get("ae_model")
    color_mode = config["general"].get("color_mode")
    noise_ratio = config["general"].getfloat("noise_ratio")
    train_set = SVHNDataset.from_npy(config["general"].get("training_set"))
    dev_set = SVHNDataset.from_npy(config["general"].get("dev_set"))
    print(f"Training Set Color_Mode is {train_set.color_mode}")
    print(f"Dev Set Color_Mode is {train_set.color_mode}")
    if color_mode == "grayscale":
        converter = ColorConverter(color_mode)
        train_set = converter.transform(train_set)
        dev_set = converter.transform(dev_set)
    plotter = SVHNPlotter(output_dir=f"images/{train_set.name}")
    plotter.save_images(train_set, n=10)
    plotter.save_mosaic(train_set, row=10, col=10)

    trn_gen = train_set.generator(batch_size=100, flatten=False, noise=0.05)
    print(trn_gen[0][0].shape)
    print(trn_gen[0][1].shape)
    train_set_gen = SVHNDataset("trn_generator", images=trn_gen[0][0], labels=train_set.labels)
from preprocessing.dataset import SVHNDataset
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

if __name__ == "__main__":
    train_set = SVHNDataset.from_mat("dataset/train_32x32.mat")
    print(train_set)
    n = int(0.1 * len(train_set))
    shuffle_idx = np.random.permutation(range(len(train_set)))

    sns.distplot(train_set.images[:, :, :, shuffle_idx[:n]].flatten(),
                 label=f"{train_set.color_mode}")
    plt.savefig(f"images/distplot_train_{train_set.color_mode}.png")
    train_set.set_gray_scale()
    sns.distplot(train_set.images[:, :, :, shuffle_idx[:n]].flatten(),
                 label=f"{train_set.color_mode}")
    plt.grid()
    plt.xlabel("Pixel Value")
    plt.ylabel("Ratio of observations")
    plt.legend()
    plt.savefig(f"images/distplot_{train_set.name}.png")
    plt.close()
from keras import optimizers
import configparser as cp
from datetime import datetime as dt
import os
from shutil import copyfile
from plot.plot_autoencoder import plot_ae

if __name__ == "__main__":
    config = cp.ConfigParser()
    config.read("config.ini")

    batch_size = config["general"].getint("batch_size")
    ae_model = config["general"].get("ae_model")
    color_mode = config["general"].get("color_mode")
    noise_ratio = config["general"].getfloat("noise_ratio")
    train_set = SVHNDataset.from_npy(config["general"].get("training_set"))
    dev_set = SVHNDataset.from_npy(config["general"].get("dev_set"))
    print(f"Training Set Color_Mode is {train_set.color_mode}")
    print(f"Dev Set Color_Mode is {train_set.color_mode}")
    if color_mode == "grayscale":
        converter = ColorConverter(color_mode)
        train_set = converter.transform(train_set)
        dev_set = converter.transform(dev_set)
    print(f"Training Set Color_Mode is {train_set.color_mode}")
    print(f"Dev Set Color_Mode is {train_set.color_mode}")

    tag = dt.now().strftime("%m_%d_%H%M%S") + f"_{color_mode}_{ae_model}"
    log_dir = f"logs/{tag}"
    os.makedirs(log_dir, exist_ok=True)
    exp_dir = f"experiments/{tag}"
    os.makedirs(exp_dir, exist_ok=True)
예제 #8
0
 def test_name(self):
     ds = SVHNDataset("test")
     assert ds.name == "test"
예제 #9
0
 def test_generator_batch(self, n, bs):
     ds = SVHNDataset("test")
     ds._images = np.random.randint(low=0, high=255, size=(n, 32, 32, 3))
     ds.labels = np.random.randint(low=1, high=10, size=(n, 1))
     assert np.ceil(n / bs) == len(ds.generator(batch_size=bs))
예제 #10
0
 def test_from_npy(self):
     ds = SVHNDataset.from_npy("dataset_split/arrays/training/rgb_all.npy")
     assert ds.name == "rgb_all"
     assert ds.labels.shape == (71791, )
     assert ds.images.shape == (71791, 32, 32, 3)
예제 #11
0
 def test_from_mat(self):
     ds = SVHNDataset.from_mat("dataset/test_32x32.mat")
     assert ds.name == "test_32x32"