Ejemplo n.º 1
0
def train(cnn, epochs=80, learn_rate=0.001, batch_size=100, gpu=True):
    """
    Train a regression CNN. Note that you do not need this function.
    Included for refrence.
    """
    if gpu:
        cnn.cuda()

    # Set up L2 loss
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(cnn.parameters(), lr=learn_rate)

    # Loading & transforming data
    (x_train, y_train), (x_test, y_test) = load_cifar10()
    train_rgb, train_grey = process(x_train, y_train)
    test_rgb, test_grey = process(x_test, y_test)
    print(train_rgb)
    print(len(train_rgb[0]))
    print(len(train_rgb[0][0]))
    print(len(train_rgb[0][0][0]))
    input()

    print("Beginning training ...")

    for epoch in range(epochs):
        # Train the Model
        cnn.train() # Change model to 'train' mode
        for i, (xs, ys) in enumerate(get_batch(train_grey,
                                               train_rgb,
                                               batch_size)):
            images, labels = get_torch_vars(xs, ys, gpu)
            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = cnn(images)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
        print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, epochs, loss.data[0]))

        # Evaluate the model
        cnn.eval()  # Change model to 'eval' mode (BN uses moving mean/var).
        losses = []
        for i, (xs, ys) in enumerate(get_batch(test_grey,
                                               test_rgb,
                                               batch_size)):
            images, labels = get_torch_vars(xs, ys, gpu)
            outputs = cnn(images)

            val_loss = criterion(outputs, labels)
            losses.append(val_loss.data[0])

        val_loss = np.mean(losses)
        print('Epoch [%d/%d], Val Loss: %.4f' % (epoch+1, epochs, val_loss))

    # Save the Trained Model
    torch.save(cnn.state_dict(), 'regression_cnn_k%d_f%d.pkl' % (
        args.kernel, args.num_filters))
Ejemplo n.º 2
0
def plot_activation(args, cnn, reg=True):
    # LOAD THE COLOURS CATEGORIES
    colours = np.load(args.colours)[0]
    num_colours = np.shape(colours)[0]

    (x_train, y_train), (x_test, y_test) = load_cifar10()
    test_rgb, test_grey = process_cls(x_test,
                                      y_test,
                                      downsize_input=args.downsize_input)
    test_rgb_cat = get_rgb_cat(test_rgb, colours)

    # Take the idnex of the test image
    id = args.index
    outdir = "outputs/" + args.experiment_name + '/act' + str(id)
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    images, labels = get_torch_vars(np.expand_dims(test_grey[id], 0),
                                    np.expand_dims(test_rgb_cat[id], 0),
                                    args.gpu, reg)
    cnn.cpu()
    outputs = cnn(images)
    _, predicted = torch.max(outputs.data, 1, keepdim=True)
    predcolor = get_cat_rgb(predicted.cpu().numpy()[0, 0, :, :], colours)
    img = predcolor
    toimage(predcolor, cmin=0, cmax=1) \
        .save(os.path.join(outdir, "output_%d.png" % id))

    if not args.downsize_input:
        img = np.tile(np.transpose(test_grey[id], [1, 2, 0]), [1, 1, 3])
    else:
        img = np.transpose(test_grey[id], [1, 2, 0])
    toimage(img, cmin=0, cmax=1) \
        .save(os.path.join(outdir, "input_%d.png" % id))

    img = np.transpose(test_rgb[id], [1, 2, 0])
    toimage(img, cmin=0, cmax=1) \
        .save(os.path.join(outdir, "input_%d_gt.png" % id))

    def add_border(img):
        return np.pad(img, 1, "constant", constant_values=1.0)

    def draw_activations(path, activation, imgwidth=4):
        img = np.vstack([
            np.hstack([
                add_border(filter)
                for filter in activation[i * imgwidth:(i + 1) * imgwidth, :, :]
            ]) for i in range(activation.shape[0] // imgwidth)
        ])
        scipy.misc.imsave(path, img)

    for i, tensor in enumerate(
        [cnn.out1, cnn.out2, cnn.out3, cnn.out4, cnn.out5]):
        draw_activations(
            os.path.join(outdir, "conv%d_out_%d.png" % (i + 1, id)),
            tensor.data.cpu().numpy()[0])
    print("visualization results are saved to %s" % outdir)
Ejemplo n.º 3
0
    def train(self, epochs, batch_size=128, sample_interval=50):
        x_train_public, y_train_public, _, _, \
        x_train_secret, y_train_secret, _, _ = load_data.load_cifar10()

        label_secret = np.ones(shape=(batch_size, 1))
        label_public = np.zeros(shape=(batch_size, 1))

        for epoch in range(epochs):
            start = time.time()
            print("In the epoch ", epoch, "/", epochs)

            ####### generate pics for public pics #######
            idx_public = random.sample(range(0, x_train_public.shape[0]),
                                       batch_size)
            image_batch_public = x_train_public[idx_public, :, :, :]
            label_batch_public = y_train_public[idx_public, :]
            generated_images_public = self.ae.predict(image_batch_public)

            ####### generate pics for secret pics #######
            idx_secret = random.sample(range(0, x_train_secret.shape[0]),
                                       batch_size)
            image_batch_secret = x_train_secret[idx_secret, :, :, :]
            label_batch_secret = y_train_secret[idx_secret, :]
            generated_images_secret = self.ae.predict(image_batch_secret)

            l1 = self.attack.train_on_batch(image_batch_public,
                                            [label_public, label_batch_public])
            l2 = self.attack.train_on_batch(generated_images_public,
                                            [label_public, label_batch_public])
            l3 = self.attack.train_on_batch(image_batch_secret,
                                            [label_secret, label_batch_secret])
            l4 = self.attack.train_on_batch(generated_images_secret,
                                            [label_secret, label_batch_secret])

            g_loss1 = self.combined_model.train_on_batch(
                image_batch_public,
                [label_public, image_batch_public, label_batch_public])
            g_loss2 = self.combined_model.train_on_batch(
                image_batch_secret,
                [label_public, image_batch_secret, label_batch_secret])

            print("Epoch ", epoch, "took time", time.time() - start)
            if epoch % 20 == 0:
                self.save_model(epoch)
                self.sample_images(image_batch_secret[0], epoch, 'secret')
                self.sample_images(image_batch_public[0], epoch, 'public')
Ejemplo n.º 4
0
"""
K-means clustering of colors in RGB space.

You do not need this file for this assignment; it is included for completeness
to show how the colors categories were generated.
"""

from __future__ import print_function

import numpy as np
import scipy.misc
import scipy.cluster

from load_data import load_cifar10

HORSE_CATEGORY = 7
k = 24

(x_train, y_train), (x_test, y_test) = load_cifar10()
MAX_PIXEL = 256.0
x_train = x_train / MAX_PIXEL
x_train = x_train[np.where(y_train == HORSE_CATEGORY)[0], :, :, :]

train_rgb = np.reshape(x_train, [-1, 3])
result = scipy.cluster.vq.kmeans(train_rgb, k)

np.save("colors/color_kmeans%d_horse.npy" % k, result)
Ejemplo n.º 5
0
import theano.tensor as T
import numpy as np
import matplotlib.pyplot as plt
plt.ion()

import load_data

from theano.tensor.nnet import conv
from theano.tensor.signal import downsample
# MULTIVERSO: import multiverso
import multiverso as mv
# MULTIVERSO: the sharedvar in theano_ext acts same like Theano's
# sharedVariables. But it use multiverso as the backend
from multiverso.theano_ext import sharedvar

x_train, t_train, x_test, t_test = load_data.load_cifar10()
labels_test = np.argmax(t_test, axis=1)

# reshape data
x_train = x_train.reshape((x_train.shape[0], 3, 32, 32))
x_test = x_test.reshape((x_test.shape[0], 3, 32, 32))

# define symbolic Theano variables
x = T.tensor4()
t = T.matrix()


# define model: neural network
def floatX(x):
    return np.asarray(x, dtype=theano.config.floatX)
Ejemplo n.º 6
0
                        valid_batch_acc.data.cpu().numpy()),  #[0]),
                    'avgacc:{:.3f}'.format(np.mean(prev_accs_valid)))
                start = time.time()


if __name__ == "__main__":

    load_ = 0
    save_ = 0
    save_file = home + '/Documents/tmp/model.pt'

    #Load data

    # train_x, train_y, valid_x, valid_y = load_mnist()

    train_x, train_y, valid_x, valid_y = load_cifar10()
    train_x = np.reshape(train_x, [train_x.shape[0], 3, 32, 32])
    valid_x = np.reshape(valid_x, [valid_x.shape[0], 3, 32, 32])

    print(train_x.shape)
    print(train_y.shape)
    print(valid_x.shape)
    print(valid_y.shape)
    print()

    #Init model
    print('Loading model')
    use_cuda = True  # torch.cuda.is_available()
    n_gpus = 1  #2 #torch.cuda.device_count()
    if n_gpus < 2:
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'  # '1' #which gpu
Ejemplo n.º 7
0
import numpy as np
import matplotlib.pyplot as plt
plt.ion()

import load_data

from theano.tensor.nnet import conv
from theano.tensor.signal import downsample
# MULTIVERSO: import multiverso
import multiverso as mv
# MULTIVERSO: the sharedvar in theano_ext acts same like Theano's
# sharedVariables. But it use multiverso as the backend
from multiverso.theano_ext import sharedvar


x_train, t_train, x_test, t_test = load_data.load_cifar10()
labels_test = np.argmax(t_test, axis=1)


# reshape data
x_train = x_train.reshape((x_train.shape[0], 3, 32, 32))
x_test = x_test.reshape((x_test.shape[0], 3, 32, 32))


# define symbolic Theano variables
x = T.tensor4()
t = T.matrix()


# define model: neural network
def floatX(x):
Ejemplo n.º 8
0
import numpy as np
import matplotlib.pyplot as plt
import random
import imageio
import pickle
import os
from load_data import load_cifar10
from PIL import Image
import time

cifar10_dir = 'D:\\assignment1\\cifar-10-batches-py'
x_train, y_train, x_test, y_test = load_cifar10(cifar10_dir)

x_train = np.reshape(x_train, (x_train.shape[0], -1))

x_test = np.reshape(x_test, (x_test.shape[0], -1))

#print(mean_image[:10])
#plt.figure(figsize=(4,4))
#plt.imshow(mean_image.reshape((32,32,3)).astype('uint8'))
#plt.show()

#x_train = np.hstack([x_train, np.ones((x_train.shape[0], 1))])
#x_val = np.hstack([x_val, np.ones((x_val.shape[0], 1))])
#x_test = np.hstack([x_test, np.ones((x_test.shape[0], 1))])
#x_dev = np.hstack([x_dev, np.ones((x_dev.shape[0], 1))])

num_sample = x_train.shape[0]
num_class = 10
din = x_train.shape[1]
dout = 10
Ejemplo n.º 9
0


if __name__ == "__main__":

    load_ = 0
    save_ = 0
    save_file = home+'/Documents/tmp/model.pt'



    #Load data

    # train_x, train_y, valid_x, valid_y = load_mnist()

    train_x, train_y, valid_x, valid_y = load_cifar10()
    train_x = np.reshape(train_x, [train_x.shape[0], 3, 32, 32])
    valid_x = np.reshape(valid_x, [valid_x.shape[0], 3, 32, 32])

    print (train_x.shape)
    print (train_y.shape)
    print (valid_x.shape)
    print (valid_y.shape)
    print()


    #Init model
    print ('Loading model')
    use_cuda = True# torch.cuda.is_available()
    n_gpus = 1#2 #torch.cuda.device_count()
    if n_gpus < 2:
def train(args, cnn=None):
    # Set the maximum number of threads to prevent crash in Teaching Labs
    torch.set_num_threads(5)
    # Numpy random seed
    np.random.seed(args.seed)

    # Save directory
    save_dir = "outputs/" + args.experiment_name

    # LOAD THE COLOURS CATEGORIES
    colours = np.load(args.colours, encoding='bytes')[0]
    num_colours = np.shape(colours)[0]
    # INPUT CHANNEL
    num_in_channels = 1 if not args.downsize_input else 3
    # LOAD THE MODEL
    if cnn is None:
        if args.model == "CNN":
            cnn = CNN(args.kernel, args.num_filters, num_colours,
                      num_in_channels)
        elif args.model == "UNet":
            cnn = UNet(args.kernel, args.num_filters, num_colours,
                       num_in_channels)

    # LOSS FUNCTION
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(cnn.parameters(), lr=args.learn_rate)

    # DATA
    print("Loading data...")
    (x_train, y_train), (x_test, y_test) = load_cifar10()

    print("Transforming data...")
    train_rgb, train_grey = process_cls(x_train,
                                        y_train,
                                        downsize_input=args.downsize_input)
    train_rgb_cat = get_rgb_cat(train_rgb, colours)
    test_rgb, test_grey = process_cls(x_test,
                                      y_test,
                                      downsize_input=args.downsize_input)
    test_rgb_cat = get_rgb_cat(test_rgb, colours)

    # Create the outputs folder if not created already
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    print("Beginning training ...")
    if args.gpu:
        cnn.cuda()
    start = time.time()

    train_losses = []
    valid_losses = []
    valid_accs = []
    for epoch in range(args.epochs):
        # Train the Model
        cnn.train()  # change model to 'train' mode
        losses = []
        for i, (xs, ys) in enumerate(
                get_batch(train_grey, train_rgb_cat, args.batch_size)):
            images, labels = get_torch_vars(xs, ys, args.gpu, False)
            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = cnn(images)
            loss = compute_loss(criterion,
                                outputs,
                                labels,
                                batch_size=args.batch_size,
                                num_colours=num_colours)
            loss.backward()
            optimizer.step()
            losses.append(loss.data.item())

        # plot training images
        if args.plot:
            _, predicted = torch.max(outputs.data, 1, keepdim=True)
            plot_cls(xs, ys,
                     predicted.cpu().numpy(), colours,
                     save_dir + '/train_%d.png' % epoch, args.visualize,
                     args.downsize_input)

        # plot training images
        avg_loss = np.mean(losses)
        train_losses.append(avg_loss)
        time_elapsed = time.time() - start
        print('Epoch [%d/%d], Loss: %.4f, Time (s): %d' %
              (epoch + 1, args.epochs, avg_loss, time_elapsed))

        # Evaluate the model
        cnn.eval()  # Change model to 'eval' mode (BN uses moving mean/var).
        val_loss, val_acc = run_validation_step(
            cnn, criterion, test_grey, test_rgb_cat, args.batch_size, colours,
            save_dir + '/test_%d.png' % epoch, args.visualize,
            args.downsize_input, args.gpu, False)

        time_elapsed = time.time() - start
        valid_losses.append(val_loss)
        valid_accs.append(val_acc)
        print('Epoch [%d/%d], Val Loss: %.4f, Val Acc: %.1f%%, Time(s): %d' %
              (epoch + 1, args.epochs, val_loss, val_acc, time_elapsed))

    # Plot training curve
    plt.figure()
    plt.plot(train_losses, "ro-", label="Train")
    plt.plot(valid_losses, "go-", label="Validation")
    plt.legend()
    plt.title("Loss")
    plt.xlabel("Epochs")
    plt.savefig(save_dir + "/training_curve.png")

    if args.checkpoint:
        print('Saving model...')
        torch.save(cnn.state_dict(), args.checkpoint)

    return cnn
Ejemplo n.º 11
0
def main(args):
    input_shape = (32, 32, 3)
    num_classes = 10
    batch_size = int(args.batch_size)
    epochs = int(args.epochs)

    # Load cifar10 data
    (X_train, y_train), (X_test, y_test) = load_cifar10()

    # Define model
    model = MobileNetV2(input_shape=input_shape,
                        nb_class=num_classes,
                        include_top=True).build()
    MODEL_NAME = "mobilenetv2__" + datetime.now().strftime("%Y-%m%d-%H%M%S")

    # Path & Env. settings -------------------------------------------------------------
    LOG_DIR = os.path.join("./log", MODEL_NAME)
    if not os.path.exists(LOG_DIR):
        os.makedirs(LOG_DIR)

    shutil.copyfile(os.path.join(os.getcwd(), 'train.sh'),
                    os.path.join(LOG_DIR, 'train.sh'))
    shutil.copyfile(os.path.join(os.getcwd(), 'train.py'),
                    os.path.join(LOG_DIR, 'train.py'))
    shutil.copyfile(os.path.join(os.getcwd(), 'models.py'),
                    os.path.join(LOG_DIR, 'models.py'))

    MODEL_WEIGHT_CKP_PATH = os.path.join(LOG_DIR, "best_weights.h5")
    MODEL_TRAIN_LOG_CSV_PATH = os.path.join(LOG_DIR, "train_log.csv")
    # ----------------------------------------------------------------------------------

    # Compile model
    model.summary()
    model.compile(
        optimizer=keras.optimizers.SGD(lr=2e-2,
                                       momentum=0.9,
                                       decay=0.0,
                                       nesterov=False),
        loss='categorical_crossentropy',
        loss_weights=[
            1.0
        ],  # The loss weight for model output without regularization loss. Set 0.0 due to validate only regularization factor.
        metrics=['accuracy'])

    # Load initial weights from pre-trained model
    if args.trans_learn:
        model.load_weights(str(args.weights_path), by_name=False)
        print("Load model init weights from", MODEL_INIT_WEIGHTS_PATH)

    print("Produce training results in", LOG_DIR)

    # Set learning rate
    learning_rates = []
    for i in range(5):
        learning_rates.append(2e-2)
    for i in range(50 - 5):
        learning_rates.append(1e-2)
    for i in range(100 - 50):
        learning_rates.append(8e-3)
    for i in range(150 - 100):
        learning_rates.append(4e-3)
    for i in range(200 - 150):
        learning_rates.append(2e-3)
    for i in range(300 - 200):
        learning_rates.append(1e-3)

    # Set model callbacks
    callbacks = []
    callbacks.append(
        ModelCheckpoint(MODEL_WEIGHT_CKP_PATH,
                        monitor='val_loss',
                        save_best_only=True,
                        save_weights_only=True))
    callbacks.append(CSVLogger(MODEL_TRAIN_LOG_CSV_PATH))
    callbacks.append(
        LearningRateScheduler(lambda epoch: float(learning_rates[epoch])))

    # data generator with data augumatation
    datagen = keras.preprocessing.image.ImageDataGenerator(
        featurewise_center=False,
        featurewise_std_normalization=False,
        rotation_range=0.0,
        width_shift_range=0.2,
        height_shift_range=0.2,
        vertical_flip=False,
        horizontal_flip=True)
    datagen.fit(X_train)

    # Train model
    history = model.fit_generator(datagen.flow(X_train,
                                               y_train,
                                               batch_size=batch_size),
                                  steps_per_epoch=len(X_train) / batch_size,
                                  epochs=epochs,
                                  verbose=1,
                                  callbacks=callbacks,
                                  validation_data=(X_test, y_test))

    # Validation
    val_loss, val_acc = model.evaluate(X_test, y_test, verbose=1)
    print("--------------------------------------")
    print("model name : ", MODEL_NAME)
    print("validation loss     : {:.5f}".format(val_loss))
    print("validation accuracy : {:.5f}".format(val_acc))

    # Save model as "instance"
    ins_name = 'model_instance'
    ins_path = os.path.join(LOG_DIR, ins_name) + '.h5'
    model.save(ins_path)

    # Save model as "architechture"
    arch_name = 'model_fin_architechture'
    arch_path = os.path.join(LOG_DIR, arch_name) + '.json'
    json_string = model.to_json()
    with open(arch_path, 'w') as f:
        f.write(json_string)
Ejemplo n.º 12
0
def run_ResNet(dataset,
               depth,
               n_epochs,
               batch_size,
               lookahead,
               alpha0,
               experiment_dir,
               epsilon,
               random_seed,
               output_file_base_name,
               gradient_clipping=None,
               force=False,
               n_validation_resamples=3.,
               n_test_resamples=5.):

    # LOAD DATA
    if "mnist_plus_rot" in dataset:
        datasets = load_mnist_w_rotations(dataset,
                                          flatten=False,
                                          split=(70000, 10000, 20000))
        dataset_name = "mnist_w_rotation"
        input_layer = InputLayer(shape=(None, 1, 28, 28))
        output_size = 10

    elif "mnist" in dataset:
        # We follow the approach used in [2] to split the MNIST dataset.
        datasets = load_mnist(dataset,
                              flatten=False,
                              split=(45000, 5000, 10000))
        dataset_name = "mnist"
        input_layer = InputLayer(shape=(None, 1, 28, 28))
        output_size = 10

    elif "cifar10" in dataset:
        # We split the Cifar-10 dataset according to [2].
        datasets = load_cifar10(dataset,
                                flatten=False,
                                split=(45000, 5000, 10000))
        dataset_name = "cifar10"
        input_layer = InputLayer(shape=(None, 3, 32, 32))
        output_size = 10

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    train_set_size = int(train_set_y.shape[0].eval())
    valid_set_size = int(valid_set_y.shape[0].eval())
    test_set_size = int(test_set_y.shape[0].eval())
    print 'Dataset {} loaded ({:,}|{:,}|{:,})'.format(dataset_name,
                                                      train_set_size,
                                                      valid_set_size,
                                                      test_set_size)

    # compute number of minibatches for training, validation and testing
    n_train_batches = int(np.ceil(train_set_size / batch_size))
    n_valid_batches = int(np.ceil(valid_set_size / batch_size))
    n_test_batches = int(np.ceil(test_set_size / batch_size))

    # BUILD MODEL
    print 'Building the model ...'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    index.tag.test_value = 0

    # epoch = T.scalar()
    x = T.tensor4('x')  # the data is presented as rasterized images
    y = T.vector(
        'y')  # the labels are presented as 1D vector of [floatX] labels.

    # Test values are useful for debugging with THEANO_FLAGS="compute_test_value=warn"
    x.tag.test_value = train_set_x[:batch_size].eval()
    y.tag.test_value = train_set_y[:batch_size].eval()

    input_layer.input_var = x
    layers_per_phase = ((depth - 2) // 9) * 3
    network, infos = build_sb_resnet(input_layer, depth, output_size)
    print "Number of parameters in model: {:,}".format(
        lasagne.layers.count_params(network, trainable=True))

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    ll_term = lasagne.objectives.categorical_crossentropy(
        prediction, T.cast(y, dtype="int32"))
    kl_term_1 = calc_kl_divergence(infos[0], alpha=1., beta=alpha0)
    kl_term_2 = calc_kl_divergence(infos[1], alpha=1., beta=alpha0)
    kl_term_3 = calc_kl_divergence(infos[2], alpha=1., beta=alpha0)
    kl_term = kl_term_1 + kl_term_2 + kl_term_3
    cost = T.mean(ll_term + kl_term)

    # Compute average number of layers that have a stick length >= 1% in each phase.
    avg_n_layers_phase1 = calc_avg_n_layers(infos[0])
    avg_n_layers_phase2 = calc_avg_n_layers(infos[1])
    avg_n_layers_phase3 = calc_avg_n_layers(infos[2])
    avg_kl_term_1 = T.mean(kl_term_1)
    avg_kl_term_2 = T.mean(kl_term_2)
    avg_kl_term_3 = T.mean(kl_term_3)

    # Build the expresson for the cost function.
    params = lasagne.layers.get_all_params(network, trainable=True)

    # If params already exist and 'force' is False, reload parameters.
    params_pkl_filename = pjoin(
        experiment_dir,
        'conv_sb-resnet_params_' + output_file_base_name + '.pkl')
    print "Checking if '{}' already exists.".format(params_pkl_filename)
    if os.path.isfile(params_pkl_filename) and not force:
        print "Yes! Reloading existing parameters and resuming training (use --force to overwrite)."
        last_params = cPickle.load(open(params_pkl_filename, 'rb'))
        for param, last_param in zip(params, last_params):
            param.set_value(last_param)
    elif force:
        print "Yes! but --force was used. Starting from scratch."
    else:
        print "No! Starting from scratch."

    gradients = dict(zip(params, T.grad(cost, params)))

    if gradient_clipping is not None:
        grad_norm = T.sqrt(
            sum(map(lambda d: T.sqr(d).sum(), gradients.values())))
        # Note that rescaling is one if grad_norm <= threshold.
        rescaling = gradient_clipping / T.maximum(grad_norm, gradient_clipping)

        new_gradients = OrderedDict()
        for param, gparam in gradients.items():
            gparam_clipped = gparam * rescaling
            new_gradients[param] = gparam_clipped

        gradients = new_gradients

    updates = utils.get_adam_updates_from_gradients(gradients)

    # Compile theano function for training. This updates the model parameters and
    # returns the training nll term, kl term, and the avg. nb. of layers used in each phase.
    print 'Compiling train function ...'
    compiling_start = time.time()
    train_model = theano.function(
        inputs=[index],
        outputs=[
            ll_term.mean(),
            kl_term.mean(), avg_n_layers_phase1, avg_n_layers_phase2,
            avg_n_layers_phase3, avg_kl_term_1, avg_kl_term_2, avg_kl_term_3
        ],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    print "{:.2f}".format((time.time() - compiling_start) / 60.)

    # Create a loss expression for validation/testing
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(
        test_prediction, T.cast(y, dtype="int32"))
    test_loss = test_loss.mean()
    test_error = T.sum(T.neq(T.argmax(test_prediction, axis=1), y),
                       dtype=theano.config.floatX)

    print 'Compiling valid function ...'
    compiling_start = time.time()
    valid_model = theano.function(
        inputs=[index],
        outputs=[test_loss, test_error],
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })
    print "{:.2f}".format((time.time() - compiling_start) / 60.)

    print 'Compiling test function ...'
    compiling_start = time.time()
    test_model = theano.function(
        inputs=[index],
        outputs=[test_loss, test_error],
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })
    print "{:.2f}".format((time.time() - compiling_start) / 60.)

    ###############
    # TRAIN MODEL #
    ###############
    print 'Training for {} epochs ...'.format(n_epochs)

    best_params = None
    best_valid_error = np.inf
    best_iter = 0
    start_time = time.clock()

    results_filename = pjoin(
        experiment_dir,
        "conv_sb-resnet_results_" + output_file_base_name + ".txt")
    if os.path.isfile(results_filename) and not force:
        last_result = open(results_filename, 'rb').readlines()[-1]
        idx_start = len("epoch ")
        idx_end = last_result.find(",", idx_start + 1)
        start_epoch = int(last_result[idx_start:idx_end]) + 1
        results_file = open(results_filename, 'ab')
    else:
        start_epoch = 0
        results_file = open(results_filename, 'wb')

    stop_training = False
    for epoch_counter in range(start_epoch, n_epochs):
        if stop_training:
            break

        # Train this epoch
        epoch_start_time = time.time()
        avg_training_loss_tracker = 0.
        avg_training_kl_tracker = 0.
        avg_n_layers_phase1_tracker = 0.
        avg_n_layers_phase2_tracker = 0.
        avg_n_layers_phase3_tracker = 0.
        avg_kl_term_1_tracker = 0.
        avg_kl_term_2_tracker = 0.
        avg_kl_term_3_tracker = 0.

        for minibatch_index in xrange(n_train_batches):
            avg_training_loss, avg_training_kl, avg_n_layers_phase1, avg_n_layers_phase2, avg_n_layers_phase3, avg_kl_term_1, avg_kl_term_2, avg_kl_term_3 = train_model(
                minibatch_index)
            if minibatch_index % 1 == 0:
                results = "batch #{}-{}, avg n_layers per phase ({:.2f}|{:.2f}|{:.2f})/{}, training loss (nll) {:.4f}, training kl-div {:.4f} ({:.4f}|{:.4f}|{:.4f}), time {:.2f}m"
                results = results.format(epoch_counter, minibatch_index,
                                         float(avg_n_layers_phase1),
                                         float(avg_n_layers_phase2),
                                         float(avg_n_layers_phase3),
                                         layers_per_phase,
                                         float(avg_training_loss),
                                         float(avg_training_kl),
                                         float(avg_kl_term_1),
                                         float(avg_kl_term_2),
                                         float(avg_kl_term_3),
                                         (time.time() - epoch_start_time) /
                                         60.)
                print results

            if np.isnan(avg_training_loss):
                msg = "NaN detected! Stopping."
                print msg
                results_file.write(msg + "\n")
                results_file.flush()
                sys.exit(1)

            avg_training_loss_tracker += avg_training_loss
            avg_training_kl_tracker += avg_training_kl
            avg_n_layers_phase1_tracker += avg_n_layers_phase1
            avg_n_layers_phase2_tracker += avg_n_layers_phase2
            avg_n_layers_phase3_tracker += avg_n_layers_phase3
            avg_kl_term_1_tracker += avg_kl_term_1
            avg_kl_term_2_tracker += avg_kl_term_2
            avg_kl_term_3_tracker += avg_kl_term_3

        epoch_end_time = time.time()

        # Compute some infos about training.
        avg_training_loss_tracker /= n_train_batches
        avg_training_kl_tracker /= n_train_batches
        avg_n_layers_phase1_tracker /= n_train_batches
        avg_n_layers_phase2_tracker /= n_train_batches
        avg_n_layers_phase3_tracker /= n_train_batches
        avg_kl_term_1_tracker /= n_train_batches
        avg_kl_term_2_tracker /= n_train_batches
        avg_kl_term_3_tracker /= n_train_batches

        # Compute validation error --- sample multiple times to simulate posterior predictive distribution
        valid_errors = np.zeros((n_valid_batches, ))
        valid_loss = np.zeros((n_valid_batches, ))
        for idx in xrange(int(n_validation_resamples)):
            temp_valid_loss, temp_valid_errors = zip(
                *[valid_model(i) for i in xrange(n_valid_batches)])
            valid_errors += temp_valid_errors
            valid_loss += temp_valid_loss
        valid_loss = np.sum(
            valid_loss / n_validation_resamples) / n_valid_batches
        valid_nb_errors = np.sum(valid_errors / n_validation_resamples)
        valid_error = valid_nb_errors / valid_set_size

        results = (
            "epoch {}, avg n_layers per phase ({:.2f}|{:.2f}|{:.2f})/{}, train loss (nll) {:.4f}, "
            "train kl-div {:.4f}, train kl-div per phase ({:.4f}|{:.4f}|{:.4f}), "
            "valid loss {:.4f}, valid error {:.2%} ({:,}), time {:.2f}m")

        if valid_error < best_valid_error:
            best_iter = epoch_counter
            best_valid_error = valid_error
            results += " **"
            # Save progression
            best_params = [param.get_value().copy() for param in params]
            cPickle.dump(best_params,
                         open(params_pkl_filename, 'wb'),
                         protocol=cPickle.HIGHEST_PROTOCOL)
        elif epoch_counter - best_iter > lookahead:
            stop_training = True

        # Report and save progress.
        results = results.format(epoch_counter, avg_n_layers_phase1_tracker,
                                 avg_n_layers_phase2_tracker,
                                 avg_n_layers_phase3_tracker, layers_per_phase,
                                 avg_training_loss_tracker,
                                 avg_training_kl_tracker,
                                 avg_kl_term_1_tracker, avg_kl_term_2_tracker,
                                 avg_kl_term_3_tracker, valid_loss,
                                 valid_error, valid_nb_errors,
                                 (epoch_end_time - epoch_start_time) / 60)
        print results

        results_file.write(results + "\n")
        results_file.flush()

    end_time = time.clock()

    # Reload best model.
    for param, best_param in zip(params, best_params):
        param.set_value(best_param)

    # Compute test error --- sample multiple times to simulate posterior predictive distribution
    test_errors = np.zeros((n_test_batches, ))
    test_loss = np.zeros((n_test_batches, ))
    for idx in xrange(int(n_test_resamples)):
        temp_test_loss, temp_test_errors = zip(
            *[test_model(i) for i in xrange(n_test_batches)])
        test_errors += temp_test_errors
        test_loss += temp_test_loss
    test_loss = np.sum(test_loss / n_test_resamples) / n_test_batches
    test_nb_errors = np.sum(test_errors / n_test_resamples)
    test_error = test_nb_errors / test_set_size

    results = "Done! best epoch {}, test loss {:.4f}, test error {:.2%} ({:,}), training time {:.2f}m"
    results = results.format(best_iter, test_loss, test_error, test_nb_errors,
                             (end_time - start_time) / 60)
    print results

    results_file.write(results + "\n")
    results_file.close()

    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 13
0
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.advanced_activations import LeakyReLU
import load_data
from keras.models import load_model
import numpy as np
import os 

def generated_images(data):
    model = load_model('./models_vaegan/1980_autoencoder.h5')
    generated_images = model.predict(data)
    return generated_images

x_train_public, y_train_public, x_test_public, y_test_public,\
           x_train_secret, y_train_secret, x_test_secret, y_test_secret  = load_data.load_cifar10()
x_train_public_generated = generated_images(x_train_public)
x_test_public_generated = generated_images(x_test_public)
x_train_secret_generated = generated_images(x_train_secret)
x_test_secret_generated = generated_images(x_test_secret)


def cnn_model():
    d0 = Input((x_train_public.shape[1:]))
    # x0 = Dense(img_rows*img_cols*1, activation = 'relu')(d0)
    # x0 = Reshape((img_rows,img_cols,1))(x0)
    x = Conv2D(32, (5, 5), padding='same', name='id_conv1')(d0)
    x = LeakyReLU(0.2)(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((3, 3), padding='same', strides=(2, 2))(x)