Exemple #1
0
def test_cifar():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (X_train, y_train), (X_test, y_test) = cifar10.load_data()
        (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine')
        (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse')
Exemple #2
0
def test_cifar():
    # only run data download tests 20% of the time
    # to speed up frequent testing
    random.seed(time.time())
    if random.random() > 0.8:
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
        (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse')
        assert len(x_train) == len(y_train) == 50000
        assert len(x_test) == len(y_test) == 10000
def load_data(name):
    if name == 'cifar10':
        (X_train, y_train), (X_test, y_test), nout = load_cifar10(path=args.data_dir)
        nout = 16
    elif name == 'cifar100':
        (X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='fine')
        X_train = X_train.reshape(50000, 3072)
        X_test = X_test.reshape(10000, 3072)
        nout = 128
    elif name == 'svhn':
        from scipy.io import loadmat
        train = loadmat('../data/svhm_train.mat')
        test = loadmat('../data/svhn_test.mat')
        (X_train, y_train), (X_test, y_test) = (train['X'], train['y']), (test['X'], test['y'])
        s = X_train.shape
        X_train = X_train.reshape(-1, s[-1]).transpose()
        s = X_test.shape
        X_test = X_test.reshape(-1, s[-1]).transpose()
        temp = np.empty(X_train.shape, dtype=np.uint)
        np.copyto(temp, X_train)
        X_train = temp
        temp = np.empty(X_test.shape, dtype=np.uint)
        np.copyto(temp, X_test)
        X_test = temp
        nout = 16
    return (X_train, y_train), (X_test, y_test), nout
Exemple #4
0
 def data_mix(self):
     
     # randomly choose dataset
     dataset = random.choice(['mnist', 'cifar10', 'cifar100'])#
     
     n_labels = 10
     
     if dataset == "mnist":
         data = mnist.load_data()
     
     if dataset == "cifar10":
         data = cifar10.load_data()
     
     if dataset == "cifar100":
         data = cifar100.load_data()
         n_labels = 100
     
     # Choose dataset size. This affects regularization needed
     r = np.random.rand()
     
     # not using full dataset to make regularization more important and 
     # speed up testing a little bit
     data_size = int( 2000 * (1-r) + 40000 * r )
     
     # I do not use test data for validation, but last 10000 instances in dataset 
     # so that trained models can be compared to results in literature
     (CX, CY), (CXt, CYt) = data
     
     if dataset == "mnist":
         CX = np.expand_dims(CX, axis=1)
     
     data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:];
      
     return data, n_labels
Exemple #5
0
def construct_split_cifar100(num_tasks=3, num_classes=10):
    """Split CIFAR100 dataset and relabel classes num_classes

        Args:
            num_tasks: the number of tasks
            num_classes: the number of classes per task

        Returns:
            List of (X, y) tuples representing each dataset
    """
    # Load CIFAR100 data and normalize
    (X_train, y_train), (X_test, y_test) = cifar100.load_data()
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    m = np.max( (np.max(X_train), np.max(X_test) ) )
    X_train /= m
    X_test /= m

    X, y = X_train, y_train

    # split dataset by labels
    # here we also flatten the labels of cifar100 to match num_classes via modulus operation
    task_labels = [ range(num_classes*i,num_classes*(i+1)) for i in range(num_tasks) ]
    datasets = [] 
    for labels in task_labels:
        idx = np.in1d(y, labels)
        data = X[idx], np_utils.to_categorical(y[idx]%num_classes, num_classes)
        datasets.append(data)

    return datasets
Exemple #6
0
 def get_cifar100():
     (X_train, y_train), (X_test, y_test) = cifar100.load_data()
     Y_train = np_utils.to_categorical(y_train, 100).astype("float32")
     Y_test = np_utils.to_categorical(y_test, 100).astype("float32")
     X_train = X_train.astype("float32") / 255
     X_test = X_test.astype("float32") / 255
     return (X_train, Y_train), (X_test, Y_test)
Exemple #7
0
def get_cifar100():
    """Get cifar100 data."""
    (X_train, y_train), (X_test, y_test) = cifar100.load_data()
    Y_train = np_utils.to_categorical(y_train, 100)
    Y_test = np_utils.to_categorical(y_test, 100)
    X_train = X_train.astype('float32') / 255
    X_test = X_test.astype('float32') / 255
    return X_train, X_test, Y_train, Y_test
Exemple #8
0
    def train(self,model):

        #training parameters
        batch_size = 128
        maxepoches = 250
        learning_rate = 0.1
        lr_decay = 1e-6

        # The data, shuffled and split between train and test sets:
        (x_train, y_train), (x_test, y_test) = cifar100.load_data()
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train, x_test = self.normalize(x_train, x_test)

        y_train = keras.utils.to_categorical(y_train, self.num_classes)
        y_test = keras.utils.to_categorical(y_test, self.num_classes)

        lrf = learning_rate


        #data augmentation
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)



        #optimization details
        sgd = optimizers.SGD(lr=lrf, decay=lr_decay, momentum=0.9, nesterov=True)
        model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])


        # training process in a for loop with learning rate drop every 25 epoches.

        for epoch in range(1,maxepoches):

            if epoch%25==0 and epoch>0:
                lrf/=2
                sgd = optimizers.SGD(lr=lrf, decay=lr_decay, momentum=0.9, nesterov=True)
                model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

            historytemp = model.fit_generator(datagen.flow(x_train, y_train,
                                             batch_size=batch_size),
                                steps_per_epoch=x_train.shape[0] // batch_size,
                                epochs=epoch,
                                validation_data=(x_test, y_test),initial_epoch=epoch-1)
        model.save_weights('cifar100vgg.h5')
        return model
def data():

    nb_classes_fine = 100
    nb_classes_coarse = 20

    (X_train, y_train_fine), (X_test, y_test_fine) = cifar100.load_data(label_mode='fine')
    (_, y_train_coarse), (_, y_test_coarse) = cifar100.load_data(label_mode='coarse')
    
    Y_train = np_utils.to_categorical(y_train_coarse, nb_classes_coarse)
    Y_test = np_utils.to_categorical(y_test_coarse, nb_classes_coarse)
    
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255

    return X_train, Y_train, X_test, Y_test
    """   
Exemple #10
0
def generateSpecializedData():
        (x_train, y_train), (x_test, y_test) = cifar100.load_data(label_mode='fine')


        index0 = list(np.where(y_train[:,0] == 0)[0])
        index1 = list(np.where(y_train[:,0] == 1)[0])
        index2 = list(np.where(y_train[:,0] == 2)[0])
        index3 = list(np.where(y_train[:,0] == 3)[0])
        index4 = list(np.where(y_train[:,0] == 4)[0])
        index5 = list(np.where(y_train[:,0] == 5)[0])
        index6 = list(np.where(y_train[:,0] == 6)[0])
        index7 = list(np.where(y_train[:,0] == 7)[0])
        index8 = list(np.where(y_train[:,0] == 8)[0])
        index9 = list(np.where(y_train[:,0] == 9)[0])

        index_others = list(range(500))

        train_index = index0[:450]
        train_index += index1[:450]
        train_index += index2[:450]
        train_index += index3[:450]
        train_index += index4[:450]
        train_index += index5[:450]
        train_index += index6[:450]
        train_index += index7[:450]
        train_index += index8[:450]
        train_index += index9[:450]
        train_index += index_others[:450]

        test_index = index0[-50:]
        test_index += index1[-50:]
        test_index += index2[-50:]
        test_index += index3[-50:]
        test_index += index4[-50:]
        test_index += index5[-50:]
        test_index += index6[-50:]
        test_index += index7[-50:]
        test_index += index8[-50:]
        test_index += index9[-50:]
        test_index += index_others[-50:]

        
        y_train[index_others] = 10


        sp_y_train = y_train[train_index]
        sp_y_test = y_train[test_index]
        sp_x_train = x_train[train_index]
        sp_x_test = x_train[test_index]

        sp_x_train = sp_x_train.astype('float32')
        sp_x_test = sp_x_test.astype('float32')

        sp_y_train = keras.utils.to_categorical(sp_y_train, 11)
        sp_y_test = keras.utils.to_categorical(sp_y_test, 11)
        return sp_x_train, sp_x_test, sp_y_train, sp_y_test
    def test_cifar(self):
        print('cifar10')
        (X_train, y_train), (X_test, y_test) = cifar10.load_data()
        print(X_train.shape)
        print(X_test.shape)
        print(y_train.shape)
        print(y_test.shape)

        print('cifar100 fine')
        (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine')
        print(X_train.shape)
        print(X_test.shape)
        print(y_train.shape)
        print(y_test.shape)

        print('cifar100 coarse')
        (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse')
        print(X_train.shape)
        print(X_test.shape)
        print(y_train.shape)
        print(y_test.shape)
Exemple #12
0
def construct_transfer_cifar10_cifar100(nb_tasks=4, split='train'):
    """
    Returns a two task dataset in which the first task is the full CIFAR10 dataset and the second task are 10 from CIFAR100
    classes from the CIFAR100 dataset.

    params:
        nb_tasks The total number of tasks 
        split Whether to return training or validation data

    returns:
        A list with two tuples containing the two data sets
    """
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    # X_train = X_train.reshape(-1, 3, 32, 32)
    # X_test = X_test.reshape(-1, 32**2)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    no = X_train.max()
    X_train /= no
    X_test /= no

    if split == 'train':
        X, y = X_train, y_train
    else:
        X, y = X_test, y_test

    nb_classes = nb_tasks*10
    datasets = [(X,np_utils.to_categorical(y, nb_classes))]

    # Load CIFAR100 data and normalize
    (X_train, y_train), (X_test, y_test) = cifar100.load_data()
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    m = np.max( (np.max(X_train), np.max(X_test) ) )
    X_train /= m
    X_test /= m

    if split == 'train':
        X, y = X_train, y_train
    else:
        X, y = X_test, y_test

    # split dataset by labels
    task_labels = [ range(10*i,10*(i+1)) for i in range(1,nb_tasks) ]
    for labels in task_labels:
        idx = np.in1d(y+10, labels)
        data = X[idx], np_utils.to_categorical(y[idx]+10, nb_classes)
        datasets.append(data)


    all_task_labels = [range(10)]
    all_task_labels.extend(task_labels)
    return all_task_labels, datasets
def load_cifar_100():
    from keras.datasets import cifar100
    num_classes = 100
    (x_train, y_train), (x_test, y_test) = cifar100.load_data()
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255.0
    x_test /= 255.0    
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    return (x_train,y_train),(x_test,y_test)
def data():

    nb_classes_fine = 100
    nb_classes_coarse = 20

    (X_train, y_train_fine), (X_test, y_test_fine) = cifar100.load_data(label_mode='fine')
    (_, y_train_coarse), (_, y_test_coarse) = cifar100.load_data(label_mode='coarse')

    # convert class vectors to binary class matrices
    Y_train_fine = np_utils.to_categorical(y_train_fine, nb_classes_fine)
    Y_train_coarse = np_utils.to_categorical(y_train_coarse, nb_classes_coarse)
    Y_test_fine = np_utils.to_categorical(y_test_fine, nb_classes_fine)
    Y_test_coarse = np_utils.to_categorical(y_test_coarse, nb_classes_coarse)

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255

    #Y_train = np.concatenate((Y_train_coarse, Y_train_fine), axis=1)
    #Y_test = np.concatenate((Y_test_coarse, Y_test_fine), axis=1)

    return X_train, Y_train_fine, Y_train_coarse, X_test, Y_test_fine, Y_test_coarse
Exemple #15
0
    def train(self,model):

        #training parameters
        batch_size = 128
        maxepoches = 30
        learning_rate = 0.1
        lr_decay = 1e-6

        # The data, shuffled and split between train and test sets:


        #x_train, x_test, y_train, y_test = generateSpecializedData()
        (x_train, y_train), (x_test, y_test) = cifar100.load_data()
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train, x_test = self.normalize(x_train, x_test)


        originalModel = cifar100vgg(False)
        
        y_train = originalModel.model.predict(x_train)
        y_test = originalModel.model.predict(x_test)


        #y_train = keras.utils.to_categorical(y_train, self.num_classes)
        #y_test = keras.utils.to_categorical(y_test, self.num_classes)

        lrf = learning_rate



        #optimization details
        sgd = optimizers.SGD(lr=lrf, decay=lr_decay, momentum=0.9, nesterov=True)
        model.compile(loss='mean_squared_error', optimizer=sgd,metrics=['accuracy'])


        # training process in a for loop with learning rate drop every 25 epoches.

        for epoch in range(1,maxepoches):

            if epoch%25==0 and epoch>0:
                lrf/=2
                sgd = optimizers.SGD(lr=lrf, decay=lr_decay, momentum=0.9, nesterov=True)
                model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy'])

            historytemp = model.fit(x = x_train, y = y_train, epochs = epoch, verbose =2, validation_data=(x_test, y_test))

        model.save_weights('half2.h5')
        return model
Exemple #16
0
def loadCifar100():
    train_set, test_set = cifar100.load_data()
    X, Y = train_set
    X = X.reshape((X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
    Y = Y.reshape((Y.shape[0],))

    trainingData = MakeData.shared((X[0:40000, :], Y[0:40000]))
    validationData = MakeData.shared((X[40000:50000, :], Y[40000:50000]))

    X, Y = test_set
    X = X.reshape((X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
    Y = Y.reshape((Y.shape[0],))
    testData = MakeData.shared((X, Y))

    return trainingData, validationData, testData
def data():

    nb_dim=50
    (X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='fine')

    # convert class vectors to w2v class matrices
    Y_train = get_w2v_labels(y_train, dim=nb_dim)
    Y_test = get_w2v_labels(y_test, dim=nb_dim)

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255

    return X_train, Y_train, X_test, Y_test
Exemple #18
0
def cifar100(*args, **kwargs):
    dataset = cx.Dataset()
    from keras.datasets import cifar100
    (x_train, y_train), (x_test, y_test) = cifar100.load_data()
    inputs = np.concatenate((x_train, x_test))
    labels = np.concatenate((y_train, y_test))
    targets = to_categorical(labels, 100)
    labels = np.array([str(label[0]) for label in labels], dtype=str)
    inputs = inputs.astype('float32')
    inputs /= 255
    dataset.name = "CIFAR-100"
    dataset.description = """
Original source: https://www.cs.toronto.edu/~kriz/cifar.html

This dataset is just like the CIFAR-10, except it has 100 classes
containing 600 images each. The 100 classes in the CIFAR-100 are grouped
into 20 superclasses. Each image comes with a "fine" label (the class
to which it belongs) and a "coarse" label (the superclass to which it
belongs).  Here is the list of classes in the CIFAR-100:

Superclass                     | Classes
-------------------------------|-----------------------------------------------------
aquatic mammals	               | beaver, dolphin, otter, seal, whale
fish                           | aquarium fish, flatfish, ray, shark, trout
flowers	                       | orchids, poppies, roses, sunflowers, tulips
food containers                | bottles, bowls, cans, cups, plates
fruit and vegetables           | apples, mushrooms, oranges, pears, sweet peppers
household electrical devices   | clock, computer keyboard, lamp, telephone, television
household furniture            | bed, chair, couch, table, wardrobe
insects	                       | bee, beetle, butterfly, caterpillar, cockroach
large carnivores               | bear, leopard, lion, tiger, wolf
large man-made outdoor things  | bridge, castle, house, road, skyscraper
large natural outdoor scenes   | cloud, forest, mountain, plain, sea
large omnivores and herbivores | camel, cattle, chimpanzee, elephant, kangaroo
medium-sized mammals           | fox, porcupine, possum, raccoon, skunk
non-insect invertebrates       | crab, lobster, snail, spider, worm
people	                       | baby, boy, girl, man, woman
reptiles                       | crocodile, dinosaur, lizard, snake, turtle
small mammals                  | hamster, mouse, rabbit, shrew, squirrel
trees                          | maple, oak, palm, pine, willow
vehicles 1                     | bicycle, bus, motorcycle, pickup truck, train
vehicles 2                     | lawn-mower, rocket, streetcar, tank, tractor

"""
    dataset.load_direct([inputs], [targets], [labels])
    return dataset
def load_cifar100() :
    (train_data, train_labels), (test_data, test_labels) = cifar100.load_data()
    # train_data = train_data / 255.0
    # test_data = test_data / 255.0
    train_data, test_data = normalize(train_data, test_data)

    train_labels = to_categorical(train_labels, 100)
    test_labels = to_categorical(test_labels, 100)

    seed = 777
    np.random.seed(seed)
    np.random.shuffle(train_data)
    np.random.seed(seed)
    np.random.shuffle(train_labels)


    return train_data, train_labels, test_data, test_labels
def generateSpecializedData():
	(x_train, y_train), (x_test, y_test) = cifar100.load_data(label_mode='fine')

	index0 = np.where(y_train[:,0] == 0)[0]
	index1 = np.where(y_train[:,0] == 1)[0]
	index2 = np.where(y_train[:,0] == 2)[0]
	index3 = np.where(y_train[:,0] == 3)[0]
	index4 = np.where(y_train[:,0] == 4)[0]
	index5 = np.where(y_train[:,0] == 5)[0]
	index6 = np.where(y_train[:,0] == 6)[0]
	index7 = np.where(y_train[:,0] == 7)[0]
	index8 = np.where(y_train[:,0] == 8)[0]
	index9 = np.where(y_train[:,0] == 9)[0]
	index_others = y_train[:500,0]
	index = np.concatenate((index0, index1), axis = 0)
	index = np.concatenate((index, index2), axis = 0)
	index = np.concatenate((index, index3), axis = 0)
	index = np.concatenate((index, index4), axis = 0)
	index = np.concatenate((index, index5), axis = 0)
	index = np.concatenate((index, index6), axis = 0)
	index = np.concatenate((index, index7), axis = 0)
	index = np.concatenate((index, index8), axis = 0)
	index = np.concatenate((index, index9), axis = 0)
	index = np.concatenate((index, index_others), axis = 0)

	np.random.shuffle(index)


	train_index = index[:5000]
	test_index = index[-5000:]

	y_train[index_others] = 10


	sp_y_train = y_train[train_index]
	sp_y_test = y_train[test_index]
	sp_x_train = x_train[train_index]
	sp_x_test = x_train[test_index]

	sp_x_train = sp_x_train.astype('float32')
	sp_x_test = sp_x_test.astype('float32')

	sp_y_train = keras.utils.to_categorical(sp_y_train, 11)
	sp_y_test = keras.utils.to_categorical(sp_y_test, 11)
	return sp_x_train, sp_x_test, sp_y_train, sp_y_test
Exemple #21
0
def load_cifar100(label_mode='coarse'):
    (X_train, y_train), (X_test,
                         y_test) = cifar100.load_data(label_mode=label_mode)
    X_train = normalize_minus1_1(cast_to_floatx(X_train))
    X_test = normalize_minus1_1(cast_to_floatx(X_test))
    return (X_train, y_train), (X_test, y_test)
Exemple #22
0
from keras import backend as K
from visualize import plot_cm, get_accuracy
try:
    to_unicode = unicode
except NameError:
    to_unicode = str

n_classes = 100


# Load model
model = load_model('cifar100.h5')


# Load validation data
(X, y), (X_test, y_test) = cifar100.load_data()

X_train, X_val, y_train, y_val = train_test_split(X, y,
                                                  test_size=0.20,
                                                  random_state=42)

print("image_dim_ordering: %s" % K.image_dim_ordering())
print("shape=%s" % str(X_train.shape))

X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.0
X_val /= 255.0
X_test /= 255.0
from __future__ import print_function
from keras.datasets import cifar10, cifar100

print('cifar10')
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
print(y_test[:20])

print('cifar100 fine')
(X_train, y_train), (X_test, y_test) = cifar100.load_data('fine')
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
print(y_test[:20])

print('cifar100 coarse')
(X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse')
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
print(y_test[:20])


# 데이터로드
from keras.datasets import cifar100
(x_train, y_train), (x_test,
                     y_test) = cifar100.load_data()  # (50000, 32, 32, 3)

# 전처리
# x는 표준화
x_train = x_train / 99
x_test = x_test / 99
# y는 원핫 인코딩
from keras.utils import np_utils
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
#----------------------------------------------------------------------------------------------------------------------
# 3차원 모델 (LSTM)
from keras.models import Sequential, Model
from keras.layers import Input, LSTM, Dense, Conv2D, Dropout, MaxPooling2D, Flatten
x_train = x_train.reshape(-1, 32 * 32, 3)
x_test = x_test.reshape(-1, 32 * 32, 3)

input1 = Input(shape=(32 * 32, 3))
layer1 = LSTM(32)(input1)
layer2 = Dense(64)(layer1)
layer3 = Dropout(0.2)(layer2)

layer4 = Dense(128)(layer3)
layer5 = Dense(64)(layer4)
layer6 = Dropout(0.4)(layer5)

output1 = Dense(100, activation='softmax')(layer6)
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils

batch_size = 32
nb_classes = 100
nb_epoch = 200
data_augmentation = True

# input image dimensions
img_rows, img_cols = 32, 32
# the CIFAR10 images are RGB
img_channels = 3

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = cifar100.load_data()
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

model = Sequential()

model.add(Convolution2D(32, 3, 3, border_mode='same',
                        input_shape=(img_channels, img_rows, img_cols)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
Exemple #26
0
# 20-06-01 / 월 / save 용 main 파일
# cifar100.hamsu.CNN
''' < 코드 구성 목록>
 1. 'dataset'_model_save.h5
 2. 'dataset'_save_weights.h5
 3. 'dataset'_checkpoint_best.h5
'''

### 1. 데이터
import numpy as np
from keras.datasets import cifar100

(x_train, y_train), (x_test, y_test) = cifar100.load_data()
print(x_train.shape)  # (50000, 32, 32, 3)
print(x_test.shape)  # (10000, 32, 32, 3)
print(y_train.shape)  # (50000, 1)
print(y_test.shape)  # (10000, 1)

# 전처리 1. OneHotEncoding
from keras.utils import np_utils

y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
print(y_train.shape)  # (50000, 100)

# 전처리 2. 리쉐이프 & 정규화
x_train = x_train.reshape(50000, 32, 32, 3).astype('float32') / 255
x_test = x_test.reshape(10000, 32, 32, 3).astype('float32') / 255

### 2. 모델
from keras.models import Model
Exemple #27
0
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import regularizers
from keras.regularizers import l2
from keras.callbacks import ModelCheckpoint, EarlyStopping

import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf

currentPath = os.getcwd()
os.chdir('D:/workspace/Deep_Learning/0.미니프로젝트_CIFAR10')

# CIFAR10 데이터 로딩 및 확인

(X_train, y_train0), (X_test, y_test0) = cifar100.load_data()
print(X_train.shape, X_train.dtype)
print(y_train0.shape, y_train0.dtype)
print(X_test.shape, X_test.dtype)
print(y_test0.shape, y_test0.dtype)

# 데이터 확인 / interpolation = "bicubic" -> 인접한 16개 화소의 화소값과 거리에 따른 가중치의 곱을 사용
plt.subplot(141)
plt.imshow(X_train[0], interpolation="bicubic")
plt.grid(False)
plt.subplot(142)
plt.imshow(X_train[4], interpolation="bicubic")
plt.grid(False)
plt.subplot(143)
plt.imshow(X_train[8], interpolation="bicubic")
plt.grid(False)
def train(d):
    #
    # Log important data about how we were invoked.
    #

    L.getLogger("entry").info("INVOCATION:     " + " ".join(sys.argv))
    L.getLogger("entry").info("HOSTNAME:       " + socket.gethostname())
    L.getLogger("entry").info("PWD:            " + os.getcwd())

    summary = "\n"
    summary += "Environment:\n"
    summary += summarizeEnvvar("THEANO_FLAGS") + "\n"
    summary += "\n"
    summary += "Software Versions:\n"
    summary += "Theano:                  " + T.__version__ + "\n"
    summary += "Keras:                   " + keras.__version__ + "\n"
    summary += "\n"
    summary += "Arguments:\n"
    summary += "Path to Datasets:        " + str(d.datadir) + "\n"
    summary += "Path to Workspace:       " + str(d.workdir) + "\n"
    summary += "Model:                   " + str(d.model) + "\n"
    summary += "Dataset:                 " + str(d.dataset) + "\n"
    summary += "Number of Epochs:        " + str(d.num_epochs) + "\n"
    summary += "Batch Size:              " + str(d.batch_size) + "\n"
    summary += "Number of Start Filters: " + str(d.start_filter) + "\n"
    summary += "Number of Blocks/Stage:  " + str(d.num_blocks) + "\n"
    summary += "Optimizer:               " + str(d.optimizer) + "\n"
    summary += "Learning Rate:           " + str(d.lr) + "\n"
    summary += "Learning Rate Decay:     " + str(d.decay) + "\n"
    summary += "Learning Rate Schedule:  " + str(d.schedule) + "\n"
    summary += "Clipping Norm:           " + str(d.clipnorm) + "\n"
    summary += "Clipping Value:          " + str(d.clipval) + "\n"
    summary += "Dropout Probability:     " + str(d.dropout) + "\n"
    if d.optimizer in ["adam"]:
        summary += "Beta 1:                  " + str(d.beta1) + "\n"
        summary += "Beta 2:                  " + str(d.beta2) + "\n"
    else:
        summary += "Momentum:                " + str(d.momentum) + "\n"
    L.getLogger("entry").info(summary[:-1])

    #
    # Load dataset
    #

    L.getLogger("entry").info("Loading dataset {:s} ...".format(d.dataset))
    np.random.seed(d.seed % 2**32)
    if d.dataset == 'cifar10':
        (X_train, y_train), (X_test, y_test) = cifar10.load_data()
        nb_classes = 10
        n_train = 45000
    elif d.dataset == 'cifar100':
        (X_train, y_train), (X_test, y_test) = cifar100.load_data()
        nb_classes = 100
        n_train = 45000
    elif d.dataset == 'svhn':
        (X_train, y_train), (X_test, y_test) = svhn2.load_data()
        nb_classes = 10
        # Make classes 0 - 9 instead of 1 - 10
        y_train = y_train - 1
        y_test = y_test - 1
        n_train = 65000

    #
    # Compute and Shuffle Training/Validation/Test Split
    #

    shuf_inds = np.arange(len(y_train))
    np.random.seed(0xDEADBEEF)
    np.random.shuffle(shuf_inds)
    train_inds = shuf_inds[:n_train]
    val_inds = shuf_inds[n_train:]

    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0

    X_train_split = X_train[train_inds]
    X_val_split = X_train[val_inds]
    y_train_split = y_train[train_inds]
    y_val_split = y_train[val_inds]

    pixel_mean = np.mean(X_train_split, axis=0)

    X_train = X_train_split.astype(np.float32) - pixel_mean
    X_val = X_val_split.astype(np.float32) - pixel_mean
    X_test = X_test.astype(np.float32) - pixel_mean

    Y_train = to_categorical(y_train_split, nb_classes)
    Y_val = to_categorical(y_val_split, nb_classes)
    Y_test = to_categorical(y_test, nb_classes)

    if d.no_validation:
        X_train = np.concatenate([X_train, X_val], axis=0)
        Y_train = np.concatenate([Y_train, Y_val], axis=0)

    L.getLogger("entry").info("Training   set shape: " + str(X_train.shape))
    L.getLogger("entry").info("Validation set shape: " + str(X_val.shape))
    L.getLogger("entry").info("Test       set shape: " + str(X_test.shape))
    L.getLogger("entry").info("Loaded  dataset {:s}.".format(d.dataset))

    #
    # Initial Entry or Resume?
    #

    initialEpoch = 0
    chkptFilename = os.path.join(d.workdir, "chkpts", "ModelChkpt.hdf5")
    isResuming = os.path.isfile(chkptFilename)
    if isResuming:
        # Reload Model and Optimizer
        L.getLogger("entry").info("Reloading a model from " + chkptFilename +
                                  " ...")
        np.random.seed(d.seed % 2**32)
        model = KM.load_model(chkptFilename,
                              custom_objects={
                                  "ComplexConv2D": ComplexConv2D,
                                  "ComplexBatchNormalization": ComplexBN,
                                  "GetReal": GetReal,
                                  "GetImag": GetImag
                              })
        L.getLogger("entry").info("... reloading complete.")

        with H.File(chkptFilename, "r") as f:
            initialEpoch = int(f["initialEpoch"][...])
        L.getLogger("entry").info(
            "Training will restart at epoch {:5d}.".format(initialEpoch + 1))
        L.getLogger("entry").info("Compilation Started.")
    else:
        # Model
        L.getLogger("entry").info("Creating new model from scratch.")
        np.random.seed(d.seed % 2**32)
        model = getResnetModel(d)

        # Optimizer
        if d.optimizer in ["sgd", "nag"]:
            opt = SGD(lr=d.lr,
                      momentum=d.momentum,
                      decay=d.decay,
                      nesterov=(d.optimizer == "nag"),
                      clipnorm=d.clipnorm)
        elif d.optimizer == "rmsprop":
            opt = RMSProp(lr=d.lr, decay=d.decay, clipnorm=d.clipnorm)
        elif d.optimizer == "adam":
            opt = Adam(lr=d.lr,
                       beta_1=d.beta1,
                       beta_2=d.beta2,
                       decay=d.decay,
                       clipnorm=d.clipnorm)
        else:
            raise ValueError("Unknown optimizer " + d.optimizer)

        # Compile the model with that optimizer.
        L.getLogger("entry").info("Compilation Started.")
        model.compile(opt, 'categorical_crossentropy', metrics=['accuracy'])

    #
    # Precompile several backend functions
    #

    if d.summary:
        model.summary()
    L.getLogger("entry").info("# of Parameters:              {:10d}".format(
        model.count_params()))
    L.getLogger("entry").info("Compiling Train   Function...")
    t = -time.time()
    model._make_train_function()
    t += time.time()
    L.getLogger("entry").info(
        "                              {:10.3f}s".format(t))
    L.getLogger("entry").info("Compiling Predict Function...")
    t = -time.time()
    model._make_predict_function()
    t += time.time()
    L.getLogger("entry").info(
        "                              {:10.3f}s".format(t))
    L.getLogger("entry").info("Compiling Test    Function...")
    t = -time.time()
    model._make_test_function()
    t += time.time()
    L.getLogger("entry").info(
        "                              {:10.3f}s".format(t))
    L.getLogger("entry").info("Compilation Ended.")

    #
    # Create Callbacks
    #

    newLineCb = PrintNewlineAfterEpochCallback()
    lrSchedCb = LearningRateScheduler(schedule)
    testErrCb = TestErrorCallback((X_test, Y_test))
    saveLastCb = SaveLastModel(d.workdir, period=10)
    saveBestCb = SaveBestModel(d.workdir)
    trainValHistCb = TrainValHistory()

    callbacks = []
    callbacks += [newLineCb]
    if d.schedule == "default":
        callbacks += [lrSchedCb]
    callbacks += [testErrCb]
    callbacks += [saveLastCb]
    callbacks += [saveBestCb]
    callbacks += [trainValHistCb]

    #
    # Create training data generator
    #

    datagen = ImageDataGenerator(height_shift_range=0.125,
                                 width_shift_range=0.125,
                                 horizontal_flip=True)

    #
    # Enter training loop.
    #

    L.getLogger("entry").info("**********************************************")
    if isResuming:
        L.getLogger("entry").info(
            "*** Reentering Training Loop @ Epoch {:5d} ***".format(
                initialEpoch + 1))
    else:
        L.getLogger("entry").info(
            "***  Entering Training Loop  @ First Epoch ***")
    L.getLogger("entry").info("**********************************************")

    model.fit_generator(generator=datagen.flow(X_train,
                                               Y_train,
                                               batch_size=d.batch_size),
                        steps_per_epoch=(len(X_train) + d.batch_size - 1) //
                        d.batch_size,
                        epochs=d.num_epochs,
                        verbose=1,
                        callbacks=callbacks,
                        validation_data=(X_val, Y_val),
                        initial_epoch=initialEpoch)

    #
    # Dump histories.
    #

    np.savetxt(os.path.join(d.workdir, 'test_loss.txt'),
               np.asarray(testErrCb.loss_history))
    np.savetxt(os.path.join(d.workdir, 'test_acc.txt'),
               np.asarray(testErrCb.acc_history))
    np.savetxt(os.path.join(d.workdir, 'train_loss.txt'),
               np.asarray(trainValHistCb.train_loss))
    np.savetxt(os.path.join(d.workdir, 'train_acc.txt'),
               np.asarray(trainValHistCb.train_acc))
    np.savetxt(os.path.join(d.workdir, 'val_loss.txt'),
               np.asarray(trainValHistCb.val_loss))
    np.savetxt(os.path.join(d.workdir, 'val_acc.txt'),
               np.asarray(trainValHistCb.val_acc))
def get_mask(is_cifar100, hierarchy, num_pri):
    num_aux = hierarchy*num_pri
    if (is_cifar100):
	    (_, y_train_aux), (_, y_test_aux) = cifar100.load_data("coarse")

	    y_train = class_20_to_10(y_train_aux)
	    y_test = class_20_to_10(y_test_aux)

	    psi = np.asarray([hierarchy]*num_pri)

	    def mask_matrix(psi, num_aux):
	        index = np.zeros([psi.shape[0], num_aux])

	        for i in range(psi.shape[0]):
		        for j in range(psi[0]): 
		            index[i, np.sum(psi[:i])+j] =1
	        return index

	    index = mask_matrix(psi,num_aux)
	    M_tr = np.zeros([y_train.shape[0],num_aux], dtype='float32')

	    i = 0
	    for val in y_train:
	        M_tr[i] = index[int(val)]
	        i += 1


	    M_ts = np.zeros([y_test.shape[0],num_aux], dtype='float32')

	    i = 0
	    for val in y_test:
	        M_ts[i] = index[int(val)]
	        i += 1

	    np.save('train_mask_'+str(hierarchy)+'_100', M_tr)
	    np.save('test_mask_'+str(hierarchy)+'_100', M_ts)

    else:
	    (_, y_train), (_, y_test) = cifar10.load_data()

	    psi = np.asarray([hierarchy]*num_pri)

	    def mask_matrix(psi, num_aux):
	        index = np.zeros([psi.shape[0], num_aux])

	        for i in range(psi.shape[0]):
		        for j in range(psi[0]): 
		            index[i, np.sum(psi[:i])+j] =1
	        return index

	    index = mask_matrix(psi,num_aux)
	    M_tr = np.zeros([y_train.shape[0],num_aux], dtype='float32')

	    i = 0
	    for val in y_train:
	        M_tr[i] = index[int(val)]
	        i += 1

	    M_ts = np.zeros([y_test.shape[0],num_aux], dtype='float32')

	    i = 0
	    for val in y_test:
	        M_ts[i] = index[int(val)]
	        i += 1

	    np.save('train_mask_'+str(hierarchy), M_tr)
	    np.save('test_mask_'+str(hierarchy), M_ts)

    return
import sys
from IPython.core import ultratb

sys.excepthook = ultratb.FormattedTB(mode="Verbose", color_scheme="Linux", call_pdb=1)

batch_size = 32
nb_classes_fine = 100
nb_classes_coarse = 20

# input image dimensions
img_rows, img_cols = 32, 32
# the CIFAR10 images are RGB
img_channels = 3

# the data, shuffled and split between train and test sets
(X_train, y_train_fine), (X_test, y_test_fine) = cifar100.load_data(label_mode="fine")
(_, y_train_coarse), (_, y_test_coarse) = cifar100.load_data(label_mode="coarse")
print("X_train shape:", X_train.shape)
print(X_train.shape[0], "train samples")
print(X_test.shape[0], "test samples")
print("y_train_fine shape:", y_train_fine.shape)
print("y_train_coarse shape:", y_train_coarse.shape)

# convert class vectors to binary class matrices
Y_train_fine = np_utils.to_categorical(y_train_fine, nb_classes_fine)
Y_train_coarse = np_utils.to_categorical(y_train_coarse, nb_classes_coarse)
Y_test_fine = np_utils.to_categorical(y_test_fine, nb_classes_fine)
Y_test_coarse = np_utils.to_categorical(y_test_coarse, nb_classes_coarse)
print("Y_train_fine shape:", Y_train_fine.shape)
print("Y_train_coarse shape:", Y_train_coarse.shape)
def single_trial(use_tpu, batch_size, use_validation, use_augment, from_storage, parallel_workers):
    K.clear_session()
    model = create_wideresnet(7, 4, use_tpu)

    train_gen = ImageDataGenerator(
        rescale=1.0/255,
        width_shift_range=4.0/32,
        height_shift_range=4.0/32,
        horizontal_flip=True)
    val_gen = ImageDataGenerator(
        rescale=1.0/255)

    if not from_storage:
        (X_train, y_train), (X_test, y_test) = cifar100.load_data()
        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)
        if not use_augment:
            X_train = (X_train / 255.0).astype(np.float32)
            X_test = (X_test / 255.0).astype(np.float32)

    timer = Timer()
    hist = History()

    n_train_examples, n_test_examples = 50000, 10000
    n_epochs = 1
    multiprocess = False if parallel_workers <= 1 else True

    print("Start training...")
    print(f"use_tpu:{use_tpu}, batch_size:{batch_size}, use_validation:{use_validation}, use_augment:{use_augment}, from_storage:{from_storage}, workers:{parallel_workers}")

    if from_storage:
        if use_augment:
            if use_validation:
                model.fit_generator(train_gen.flow_from_directory("cifar100-raw/train", target_size=(32, 32), 
                                                                  class_mode="categorical", shuffle=True,
                                                                  batch_size=batch_size), 
                                    steps_per_epoch=n_train_examples//batch_size, epochs=n_epochs,
                                    callbacks=[timer, hist],
                                    workers=parallel_workers, use_multiprocessing=multiprocess,
                                    validation_data=val_gen.flow_from_directory("cifar100-raw/test", target_size=(32, 32),
                                                                                class_mode="categorical", shuffle=True,
                                                                                batch_size=batch_size),
                                    validation_steps=n_test_examples//batch_size)
            else:
                model.fit_generator(train_gen.flow_from_directory("cifar100-raw/train", target_size=(32, 32), 
                                                                  class_mode="categorical", shuffle=True,
                                                                  batch_size=batch_size), 
                                    steps_per_epoch=n_train_examples//batch_size, epochs=n_epochs,
                                    callbacks=[timer, hist],
                                    workers=parallel_workers, use_multiprocessing=multiprocess)
        else:
            if use_validation:
                model.fit_generator(val_gen.flow_from_directory("cifar100-raw/train", target_size=(32, 32),
                                                                class_mode="categorical", shuffle=True,
                                                                batch_size=batch_size),
                                    steps_per_epoch=n_train_examples//batch_size, epochs=n_epochs,
                                    callbacks=[timer, hist],
                                    workers=parallel_workers, use_multiprocessing=multiprocess,
                                    validation_data=val_gen.flow_from_directory("cifar100-raw/test", target_size=(32, 32),
                                                                                class_mode="categorical", shuffle=True,
                                                                                batch_size=batch_size),
                                    validation_steps=n_test_examples//batch_size)
            else:
                model.fit_generator(val_gen.flow_from_directory("cifar100-raw/train", target_size=(32, 32),
                                                                class_mode="categorical", shuffle=True,
                                                                batch_size=batch_size),
                                    steps_per_epoch=n_train_examples//batch_size, epochs=n_epochs,
                                    callbacks=[timer, hist],
                                    workers=parallel_workers, use_multiprocessing=multiprocess)
    else:
        if use_augment:
            if use_validation:
                model.fit_generator(train_gen.flow(X_train, y_train, batch_size=batch_size, shuffle=True),
                                    steps_per_epoch=n_train_examples//batch_size,
                                    epochs=n_epochs, callbacks=[timer, hist],
                                    workers=parallel_workers, use_multiprocessing=multiprocess,
                                    validation_data=val_gen.flow(X_test, y_test), validation_steps=n_test_examples//batch_size)
            else:
                model.fit_generator(train_gen.flow(X_train, y_train, batch_size=batch_size, shuffle=True),
                                    steps_per_epoch=n_train_examples//batch_size,
                                    epochs=n_epochs, callbacks=[timer, hist],
                                    workers=parallel_workers, use_multiprocessing=multiprocess)
        else:
            # fitは並列化できない
            if use_validation:
                model.fit(X_train, y_train, batch_size=batch_size, epochs=n_epochs, callbacks=[timer, hist],
                          validation_data=(X_test, y_test))
            else:
                model.fit(X_train, y_train, batch_size=batch_size, epochs=n_epochs, callbacks=[timer, hist])

    history = hist.history
    history["initial_time"] = timer.inital_time
    history["times"] = timer.times

    result = {
        "device": "tpu" if use_tpu else "gpu",
        "batch_size" : batch_size,
        "use_validation" : use_validation,
        "use_augmentation" : use_augment,
        "from_storage": from_storage,
        "result" : history,
        "num_workers" : parallel_workers
    }

    return result
Exemple #32
0
                          nb_filter=nb_filter,
                          dropout_rate=dropout_rate,
                          bottleneck=bottleneck,
                          reduction=reduction,
                          weights=None)
func = K.function([model.layers[0].input], [model.layers[-2].output])

print("Model created")

optimizer = Adam(lr=1e-3,
                 decay=0.)  # Using Adam instead of SGD to speed up training
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=["accuracy"])

(trainX, trainY), (testX, testY) = cifar100.load_data()

trainX = trainX.astype('float32') / 255.
testX = testX.astype('float32') / 255.

trainX_mean = np.mean(trainX, axis=0)
trainX -= trainX_mean
testX -= trainX_mean

Y_train = np_utils.to_categorical(trainY, nb_classes)
Y_test = np_utils.to_categorical(testY, nb_classes)

generator = ImageDataGenerator(width_shift_range=0.1,
                               height_shift_range=0.1,
                               horizontal_flip=True)
Exemple #33
0
def get_cifar(dataset="cifar10", data_format="channels_first", augmented=False, batch_size=128, preprocessing="center", seed=777):
    """
    Returns train iterator and test X, y.
    """

    N = 1280

    # the data, shuffled and split between train and test sets
    if dataset == 'cifar10':
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    elif dataset == 'cifar100':
        (x_train, y_train), (x_test, y_test) = cifar100.load_data()
    else:
        raise NotImplementedError()

    if x_train.shape[3] == 3:
        logging.info("Transposing")
        x_train = x_train.transpose((0, 3, 1, 2))[0:N] # For speed
        x_test = x_test.transpose((0, 3, 1, 2))[0:N] # For speed
    assert x_train.shape[1] == 3

    if preprocessing == "center":
        mean = np.mean(x_train, axis=0, keepdims=True)
        std = np.std(x_train)
        x_train = (x_train - mean) / std
        x_test = (x_test - mean) / std
    elif preprocessing == "01": # Required by scatnet
        x_train = x_train / 255.0
        x_test = x_test / 255.0
    else:
        raise NotImplementedError("Not implemented preprocessing " + preprocessing)

    logging.info('x_train shape:' + str(x_train.shape))
    logging.info(str(x_train.shape[0]) + 'train samples')
    logging.info(str(x_test.shape[0]) + 'test samples')

    # # convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(y_train)[0:N]
    y_test = np_utils.to_categorical(y_test)[0:N]

    # float32
    x_train = x_train.astype("float32")
    x_test = x_test.astype("float32")

    train, test = None, [x_test, y_test]
    if augmented:
        datagen_train = ImageDataGenerator(
            featurewise_center=False,
            samplewise_center=False,
            featurewise_std_normalization=False,
            samplewise_std_normalization=False,
            zca_whitening=False,
            rotation_range=0,
            data_format=data_format,
            width_shift_range=0.125,
            height_shift_range=0.125,
            horizontal_flip=True,
            vertical_flip=False)
        datagen_train.fit(x_train)
        train = datagen_train.flow(x_train, y_train, batch_size=batch_size, shuffle=True)
    else:
        train = _to_gen_with_shuffling([x_train, y_train], batch_size, seed)

    test = _to_gen_with_shuffling(test, batch_size, seed)

    return train, test, {"x_train": x_train, "y_train": y_train, "x_test": x_test, "y_test": y_test}
Exemple #34
0
def train(params, model):
    if params.dataset == 'cifar10':
        (X_train, y_train), (X_test, y_test) = cifar10.load_data()
        nb_classes = 10
        n_train = 45000
    elif params.dataset == 'cifar100':
        (X_train, y_train), (X_test, y_test) = cifar100.load_data()
        nb_classes = 100
        n_train = 45000

    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0

    shuf_inds = np.arange(len(y_train))
    np.random.seed(424242)
    np.random.shuffle(shuf_inds)
    train_inds = shuf_inds[:n_train]
    val_inds = shuf_inds[n_train:]

    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0

    X_train_split = X_train[train_inds]
    X_val_split = X_train[val_inds]
    y_train_split = y_train[train_inds]
    y_val_split = y_train[val_inds]

    pixel_mean = np.mean(X_train_split, axis=0)

    X_train = X_train_split.astype(np.float32) - pixel_mean
    X_val = X_val_split.astype(np.float32) - pixel_mean
    X_test = X_test.astype(np.float32) - pixel_mean

    Y_train = to_categorical(y_train_split, nb_classes)
    Y_val = to_categorical(y_val_split, nb_classes)
    Y_test = to_categorical(y_test, nb_classes)

    datagen = ImageDataGenerator(height_shift_range=0.125,
                                 width_shift_range=0.125,
                                 horizontal_flip=True)

    testErrCb = TestErrorCallback((X_test, Y_test))
    trainValHistCb = TrainValHistory()
    lrSchedCb = LearningRateScheduler(schedule)
    callbacks = [ModelCheckpoint('{}_weights.hd5'.format(params.mode), monitor='val_loss', verbose=0, save_best_only=True),
                 testErrCb,
                 lrSchedCb,
                 trainValHistCb]

    model.fit_generator(generator=datagen.flow(X_train, Y_train, batch_size=params.batch_size),
                        steps_per_epoch=(len(X_train)+params.batch_size-1) // params.batch_size,
                        epochs=params.num_epochs,
                        verbose=1,
                        callbacks=callbacks,
                        validation_data=(X_val, Y_val))

    # Dump histories.
    np.savetxt('{}_test_loss.txt'.format(params.mode), np.asarray(testErrCb.loss_history))
    np.savetxt('{}_test_acc.txt'.format(params.mode), np.asarray(testErrCb.acc_history))
    np.savetxt('{}_train_loss.txt'.format(params.mode), np.asarray(trainValHistCb.train_loss))
    np.savetxt('{}_train_acc.txt'.format(params.mode), np.asarray(trainValHistCb.train_acc))
    np.savetxt('{}_val_loss.txt'.format(params.mode), np.asarray(trainValHistCb.val_loss))
    np.savetxt('{}_val_acc.txt'.format(params.mode), np.asarray(trainValHistCb.val_acc))
Exemple #35
0
def train_data(ds_idx):
    time.sleep(5000 * random())
    # the data, shuffled and split between tran and test sets
    (X_train, y_train), (X_test, y_test) = cifar100.load_data(test_split=0.15)

    train_idx = np.where((y_train >= ds_idx * 10) & (y_train < (1 + ds_idx) * 10))[0]
    test_idx = np.where((y_test >= ds_idx * 10) & (y_test < (1 + ds_idx) * 10))[0]

    X_train = np.array([X_train[i] for i in train_idx])
    y_train  = np.array([y_train[i] for i in train_idx])
    X_test = np.array([X_test[i] for i in test_idx])
    y_test = np.array([y_test[i] for i in test_idx])

    print X_train.shape[0], 'train samples'
    print X_test.shape[0], 'test samples'

    y_train -= ds_idx * 10
    y_test -= ds_idx * 10

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    model = Sequential()

    model.add(Convolution2D(32, 3, 3, 3, border_mode='full'))
    model.add(Activation('relu'))
    model.add(Dropout(0.8))
    model.add(Convolution2D(32, 32, 3, 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(poolsize=(2, 2)))
    model.add(Dropout(0.75))

    model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
    model.add(Activation('relu'))
    model.add(Dropout(0.7))
    model.add(Convolution2D(64, 64, 3, 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(poolsize=(2, 2)))
    model.add(Dropout(0.6))

    model.add(Flatten(64*8*8))
    model.add(Dense(64*8*8, 512, init='normal'))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(512, nb_classes, init='normal'))
    model.add(Activation('softmax'))

    # let's train the model using SGD + momentum (how original).
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd)

    if not data_augmentation:
        print "Not using data augmentation or normalization"

        X_train = X_train.astype("float32")
        X_test = X_test.astype("float32")
        X_train /= 255
        X_test /= 255
        model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=10)
        score = model.evaluate(X_test, Y_test, batch_size=batch_size)
        print 'Test score:', score

    else:
        print "Using real time data augmentation"

        # this will do preprocessing and realtime data augmentation
        datagen = ImageDataGenerator(
            featurewise_center=True, # set input mean to 0 over the dataset
            samplewise_center=False, # set each sample mean to 0
            featurewise_std_normalization=True, # divide inputs by std of the dataset
            samplewise_std_normalization=False, # divide each input by its std
            zca_whitening=False, # apply ZCA whitening
            rotation_range=20, # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.3, # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.3, # randomly shift images vertically (fraction of total height)
            horizontal_flip=True, # randomly flip images
            vertical_flip=False) # randomly flip images

        # compute quantities required for featurewise normalization
        # (std, mean, and principal components if ZCA whitening is applied)
        datagen.fit(X_train)
        best_score = 0.0
        best_epoch = 0

        for e in range(nb_epoch):
            print '-'*40
            print 'Epoch', e
            print '-'*40
            print "Training..."
            # batch train with realtime data augmentation
            progbar = generic_utils.Progbar(X_train.shape[0])
            for X_batch, Y_batch in datagen.flow(X_train, Y_train):
                loss = model.train(X_batch, Y_batch)
                progbar.add(X_batch.shape[0], values=[("train loss", loss)])

            print "Testing..."
            # test time!
            progbar = generic_utils.Progbar(X_test.shape[0])
            pred = model.predict_classes(X_test, batch_size=batch_size)
            score = np_utils.accuracy(pred, Y_test)
            best_epoch, best_score = (best_epoch, best_score) if best_score >= score else (e, score)
            print 'Score: ', score
            print 'Best: ', best_score, ' at epoch: ', best_epoch
            #for X_batch, Y_batch in datagen.flow(X_test, Y_test):
                #score = model.test(X_batch, Y_batch)
                #progbar.add(X_batch.shape[0], values=[("test loss", score)])
        all_time_best.append((best_epoch, best_score))
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils, generic_utils
from keras.callbacks import ModelCheckpoint
import cPickle as pickle
from sklearn.metrics import confusion_matrix


batch_size = 50
nb_classes = 20
nb_epoch = 25
img_rows, img_cols = 32, 32
img_channels = 3

# the data, shuffled and split between tran and test sets
(X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='coarse')
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
print (Y_test.shape)
print ("binary classes done...")

model = Sequential()
#first layer convolution -> relu -> maxpool
model.add(Convolution2D(32, 3, 3, border_mode='full',
                        input_shape=(img_channels, img_rows, img_cols)))
model.add(Activation('relu'))
Exemple #37
0
def init_eval_environment():
    """Prepares the dataset and datagenerators used to train a model."""
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    logging.set_verbosity(
        logging.ERROR)  # Supress all kind of deprecation warnings

    (x_train, y_train), (x_test, y_test) = cifar100.load_data()
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')

    # Preprocess
    mean = np.mean(x_train, axis=(0, 1, 2))  # Per channel normalization
    std = np.std(x_train, axis=(0, 1, 2))
    x_train = (x_train - mean) / (std + 1e-7)
    x_test = (x_test - mean) / (std + 1e-7)

    y_train = np_utils.to_categorical(y_train, params.NUM_CLASSES)
    y_test = np_utils.to_categorical(y_test, params.NUM_CLASSES)

    # Data input
    class Dataset:
        """Class required for retrieving and preprocessing a single image."""
        def __init__(self, img_data, label_data, augmentation=False):
            self.img_data = img_data
            self.label_data = label_data
            self.augmentation = augmentation

        def __getitem__(self, i):
            label = self.label_data[i]
            image = self.img_data[i]
            if self.augmentation:
                image = preprocess_img.augment_img(image)
            return image, label

        def __len__(self):
            return len(self.label_data)

    class Dataloader(K.utils.Sequence):
        """Class required for iterating over batches of processed images."""
        def __init__(self, dataset, batch_size, shuffle=False):
            self.dataset = dataset
            self.batch_size = batch_size
            self.shuffle = shuffle
            self.indexes = np.arange(len(dataset))
            self.on_epoch_end()

        def __getitem__(self, i):
            start = i * self.batch_size
            stop = (i + 1) * self.batch_size
            data = []
            for j in range(start, stop):
                data.append(self.dataset[j])

            # Transpose list of lists
            batch = [np.stack(samples, axis=0) for samples in zip(*data)]
            return batch

        def __len__(self):
            """Denotes the number of batches per epoch"""
            return len(self.indexes) // self.batch_size

        def on_epoch_end(self):
            """Callback function to shuffle indexes each epoch"""
            if self.shuffle:
                self.indexes = np.random.permutation(self.indexes)

    train_dataset = Dataset(x_train, y_train, augmentation=True)
    val_dataset = Dataset(x_test, y_test)

    train_dataloader = Dataloader(train_dataset,
                                  batch_size=params.BATCH_SIZE,
                                  shuffle=True)
    val_dataloader = Dataloader(val_dataset,
                                batch_size=params.BATCH_SIZE,
                                shuffle=False)
    return train_dataloader, val_dataloader
Exemple #38
0
    #img_size = 512
    if x.shape[0] > img_size and x.shape[1] > img_size:
        xrand, yrand = 0, 0
        if rand != 0:
            xrand = np.random.randint(-rand, rand)
            yrand = np.random.randint(-rand, rand)
        xm = int(x.shape[0]//2-(img_size/2)+xrand)
        ym = int(x.shape[1]//2-(img_size/2)+yrand)
        return x[xm:xm+img_size, ym:ym+img_size, :]
    else:
        raise ResourceWarning('Image too small ({}, {}), passing'.format(x.shape[0], x.shape[1]))

data_dir = 'data'
    
if dataset == 'cifar100':
    (y_train, temp), (y_test, temp2) = cifar100.load_data()
    del(temp)
    del(temp2)
    y_train = y_train[:100]
    y_test = y_test[:10]
    
else:
    if os.path.isdir(dataset):
        data_dir = dataset
        val_dir = dataset+'_val'
    
    if os.path.isdir(args.valdir):
        val_dir = args.valdir

#print("Loading images into memory...", end='', flush=True)
#y_train = []
Exemple #39
0
def load_data(dataset,
              trans_type=TRANSFORMATION.clean,
              channel_first=False,
              trans_set='both'):
    assert dataset in DATA.get_supported_datasets()
    assert trans_set is None or trans_set in ['none', 'train', 'test', 'both']

    X_train = None
    Y_train = None
    X_test = None
    Y_test = None
    img_rows = 0
    img_cols = 0
    nb_channels = 0
    nb_classes = 0

    if DATA.mnist == dataset:
        """
        Dataset of 60,000 28x28 grayscale images of the 10 digits,
        along with a test set of 10,000 images.
        """
        (X_train, Y_train), (X_test, Y_test) = mnist.load_data()

        nb_examples, img_rows, img_cols = X_test.shape
        nb_channels = 1
        nb_classes = 10
    elif DATA.fation_mnist == dataset:
        """
        Dataset of 60,000 28x28 grayscale images of 10 fashion categories,
        along with a test set of 10,000 images. The class labels are:
        Label   Description
        0       T-shirt/top
        1       Trouser
        2       Pullover
        3       Dress
        4       Coat
        5       Sandal
        6       Shirt
        7       Sneaker
        8       Bag
        9       Ankle boot
        """
        (X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data()

        nb_examples, img_rows, img_cols = X_test.shape
        nb_channels = 1
        nb_classes = 10
    elif DATA.cifar_10 == dataset:
        """
        Dataset of 50,000 32x32 color training images, labeled over 10 categories, and 10,000 test images.
        """
        (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

        nb_examples, img_rows, img_cols, nb_channels = X_test.shape
        nb_classes = 10
    elif DATA.cifar_100 == dataset:
        (X_train, Y_train), (X_test,
                             Y_test) = cifar100.load_data(label_mode='fine')
        nb_examples, img_rows, img_cols, nb_channels = X_test.shape
        nb_classes = 100

    X_train = X_train.reshape(-1, img_rows, img_cols, nb_channels)
    X_test = X_test.reshape(-1, img_rows, img_cols, nb_channels)
    """
    cast pixels to floats, normalize to [0, 1] range
    """
    X_train = X_train.astype(np.float32)
    X_test = X_test.astype(np.float32)
    X_train = data_utils.rescale(X_train, range=(0., 1.))
    X_test = data_utils.rescale(X_test, range=(0., 1.))
    """
    one-hot-encode the labels
    """
    Y_train = keras.utils.to_categorical(Y_train, nb_classes)
    Y_test = keras.utils.to_categorical(Y_test, nb_classes)
    """
    transform images
    """
    if trans_set is not None:
        if trans_set in ['train', 'both']:
            X_train = transform(X_train, trans_type)
            X_train = data_utils.rescale(X_train, range=(0., 1.))

        if trans_set in ['test', 'both']:
            X_test = transform(X_test, trans_type)
            X_test = data_utils.rescale(X_test, range=(0., 1.))

    if channel_first:
        X_train = data_utils.set_channels_first(X_train)
        X_test = data_utils.set_channels_first(X_test)
    """
    summarize data set
    """
    print('Dataset({}) Summary:'.format(dataset.upper()))
    print('Train set: {}, {}'.format(X_train.shape, Y_train.shape))
    print('Test set: {}, {}'.format(X_test.shape, Y_test.shape))
    return (X_train, Y_train), (X_test, Y_test)
        Y = np_utils.to_categorical(y, nb_classes_coarse)
    else:
        Y = np_utils.to_categorical(y, nb_classes_fine)
    
    # Test the model
    Y_predict = model.predict(X, batch_size=batch_size, verbose=1)
    
    # Convert floating point vector to a clean binary vector with only two 1's
    Y_predict_clean = clean_vec(Y_predict)
    
    acc = accuracy(Y_predict_clean, Y)
    print("%s accuracy: %f" % (prefix_string, acc))

if 'hierarchy' in model_name:
    # Load and format data
    (X_train, y_train_fine), (X_test, y_test_fine) = cifar100.load_data(label_mode='fine')
    (_, y_train_coarse), (_, y_test_coarse) = cifar100.load_data(label_mode='coarse')

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255
    
    Y_train_fine = np_utils.to_categorical(y_train_fine, nb_classes_fine)
    Y_train_coarse = np_utils.to_categorical(y_train_coarse, nb_classes_coarse)
    Y_test_fine = np_utils.to_categorical(y_test_fine, nb_classes_fine)
    Y_test_coarse = np_utils.to_categorical(y_test_coarse, nb_classes_coarse)
    
    Y_train = np.concatenate((Y_train_coarse, Y_train_fine), axis=1)
    Y_test = np.concatenate((Y_test_coarse, Y_test_fine), axis=1)
Exemple #41
0
    def train(self, model):

        #training parameters
        batch_size = 128
        maxepoches = 250
        learning_rate = 0.1
        lr_decay = 1e-6
        lr_drop = 20

        # The data, shuffled and split between train and test sets:
        (x_train, y_train), (x_test, y_test) = cifar100.load_data()
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train, x_test = self.normalize(x_train, x_test)

        y_train = keras.utils.to_categorical(y_train, self.num_classes)
        y_test = keras.utils.to_categorical(y_test, self.num_classes)

        def lr_scheduler(epoch):
            return learning_rate * (0.5**(epoch // lr_drop))

        reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)

        #data augmentation
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=
            False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=
            15,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=
            0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=
            0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)

        #optimization details
        sgd = optimizers.SGD(lr=learning_rate,
                             decay=lr_decay,
                             momentum=0.9,
                             nesterov=True)
        model.compile(loss='categorical_crossentropy',
                      optimizer=sgd,
                      metrics=['accuracy'])

        # training process in a for loop with learning rate drop every 25 epoches.

        historytemp = model.fit_generator(datagen.flow(x_train,
                                                       y_train,
                                                       batch_size=batch_size),
                                          steps_per_epoch=x_train.shape[0] //
                                          batch_size,
                                          epochs=maxepoches,
                                          validation_data=(x_test, y_test),
                                          callbacks=[reduce_lr],
                                          verbose=2)
        model.save_weights('cifar100vgg.h5')
        return model
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

batch_size = 128
num_classes = 100
epochs = 12

# input image dimensions
img_rows, img_cols = 32, 32

# the data, split between train and test sets

# SUPERCLASSE
(x_train, y_train), (x_test, y_test) = cifar100.load_data(label_mode='fine')

input_shape = (img_rows, img_cols, 3)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
def get_data():
	(X_train, y_train), (X_test, y_test) = cifar100.load_data()
	return X_test, y_test