def test(depth, p, dataset, num_epochs=200, seed=None):
    if seed is None:
        seed = 0

    np.random.seed(seed)

    data = None
    if dataset == "mnist":
        data = mnist.load().astype(np.float32)
    elif dataset == "cifar10":
        data = cifar10.load().astype(np.float32)

    num_observations, input_dim = data.shape
    data_split_index = int(num_observations * 0.9)
    training_data_iterator = DataIterator(batch_size, data[:data_split_index],
                                          data[:data_split_index])
    validation_data_iterator = DataIterator(batch_size,
                                            data[data_split_index:],
                                            data[data_split_index:])

    # make net
    net = Network(input_dim, input_dim, hidden_layers=([
        1000,
    ] * depth), p=p)
    losses = net.train(training_data_iterator,
                       validation_data_iterator,
                       num_epochs=num_epochs)
    net.close()

    return losses
    train_ = {
        'data'  : train[0].reshape(-1, 28, 28,1),
        'labels': one_hot(train[1], 10)
    }
    test_ = {
        'data'  : test[0].reshape(-1, 28, 28,1),
        'labels': one_hot(test[1], 10)
    }
    valid_ = {
        'data'  : valid[0].reshape(-1, 28, 28,1),
        'labels': one_hot(valid[1], 10)
    }
    f.close()
    return train_, test_, valid_

train = cifar10.load(full= True)

# train, test, valid = mnist()
cursor = 0


def next_batch(nB):
    global cursor
    begin, end = None, None
    if cursor + nB < train.shape[0]:
        begin = cursor
    else:
        begin = cursor = 0
    end = cursor + nB
    cursor += nB
    return {
    data['data0'] /= 255.
    data['data1'] /= 255.
    return data

# Logger setup
logger = Logger('CIFAR SIAMESE',
        train_log_mode='TRAIN_LOSS_ONLY',
        test_log_mode='TEST_LOSS_ONLY')

# Configure GPU Device
if args.gpu >= 0:
    cuda.check_cuda_available()
xp = cuda.cupy if args.gpu >= 0 else np

# loading dataset
dataset = cifar10.load()

dim = dataset['train']['data'][0].size
N_train = len(dataset['train']['target'])
N_test = len(dataset['test']['target'])
train_data_dict = {'data':dataset['train']['data'].astype(np.float32),
                   'target':dataset['train']['target'].astype(np.int32)}
test_data_dict = {'data':dataset['test']['data'].astype(np.float32),
                  'target':dataset['test']['target'].astype(np.int32)}
train_data = datafeeders.SiameseFeeder(train_data_dict, batchsize=args.batch)
test_data = datafeeders.SiameseFeeder(test_data_dict, batchsize=args.valbatch)

train_data.hook_preprocess(cifar_preprocess)
test_data.hook_preprocess(cifar_preprocess)

def test_init(weight_sigma_square):
    p = 0.6
    max_num_layers = 1000
    num_hidden_layers = np.floor(
        np.min(
            [max_num_layers,
             depth("Dropout", weight_sigma_square, p) * 1.1])).astype(int)

    print("Testing sigma_w = {: 2.2f}; Using {: 4d} layers...".format(
        weight_sigma_square, num_hidden_layers))

    batch_size = 128

    input_data = None
    if dataset == "mnist":
        input_data = mnist.load().astype(np.float32)
    elif dataset == "cifar10":
        input_data = cifar10.load().astype(np.float32)

    # batch data for memory purposes
    input_data_iterator = DataIterator(batch_size, input_data, input_data)
    num_batches = input_data_iterator.size()

    input_size = input_data.shape[-1]
    net = Network(input_size,
                  input_size, [
                      1000,
                  ] * num_hidden_layers,
                  activation="relu",
                  weight_sigma=np.sqrt(weight_sigma_square),
                  dist="bern",
                  p=p)

    variances = np.empty((num_batches, max_num_layers))
    variances.fill(np.nan)

    with tqdm(desc="batches", total=num_batches) as progress_bar:
        for i, batch in enumerate(input_data_iterator):
            variance = net.get_acts(batch.input,
                                    early_stopping=True,
                                    return_variance=True)
            variances[i, :len(variance)] = variance
            progress_bar.update(1)

    bad_indices = np.isnan(variances) + np.isinf(variances)
    variances = np.ma.array(variances, mask=bad_indices)
    means = np.mean(variances, axis=0)
    mask = np.ma.getmask(means)
    means[mask] = np.nan
    means = np.array(means)

    if os.path.exists(data_save_path):
        previous_sims = np.load(data_save_path)
        previous_sims = np.vstack([previous_sims, means])
        np.save(data_save_path, previous_sims)

        previous_sims = np.load(sigma_save_path)
        previous_sims = np.append(previous_sims, [
            weight_sigma_square,
        ],
                                  axis=0)
        np.save(sigma_save_path, previous_sims)
    else:
        np.save(sigma_save_path, np.array([
            weight_sigma_square,
        ]))
        np.save(data_save_path, means)
Exemple #5
0
def JL_reconstruction(data='mnist',
                      JL_dim=32 * 32 / 2,
                      batch_size=100,
                      seed=None):
    # -------------------------------------------------------
    # get the dataset as infinite generator
    if seed is not None:
        np.random.seed(seed)

    if data == 'cifar10':
        data_dir = settings.filepath_cifar10
        train_gen, dev_gen = cifar10.load(batch_size, data_dir=data_dir)
        picture_size = 32 * 32 * 3
    elif data == 'celebA32':
        data_dir = settings.filepath_celebA32
        train_gen, dev_gen = celeba.load(batch_size,
                                         data_dir=data_dir,
                                         black_white=False)
        picture_size = 32 * 32 * 3
    elif data == 'mnist':
        filename = '../data/MNIST/mnist32_zoom_1'
        train_gen, n_samples_train, dev_gen, n_samples_test = preprocessing_mnist.load(
            filename, batch_size, npy=True)
        picture_size = 32 * 32
    elif data == 'celebA32_bw':
        data_dir = settings.filepath_celebA32
        train_gen, dev_gen = celeba.load(batch_size,
                                         data_dir=data_dir,
                                         black_white=True)
        picture_size = 32 * 32

    # -------------------------------------------------------
    # make directories
    dir1 = 'JL_reconstruction/'
    path = dir1 + data + '/'
    if not os.path.isdir(dir1):
        call(['mkdir', dir1])
    if not os.path.isdir(path):
        call(['mkdir', path])

    # -------------------------------------------------------
    # JL mapping
    A = np.random.randn(JL_dim, picture_size) / np.sqrt(picture_size)
    ATA = np.matmul(np.transpose(A), A)

    # JL error
    JL_error = np.round(np.sqrt(8 * np.log(2 * batch_size) / JL_dim),
                        decimals=4)
    print '\ndata dimension: {}'.format(picture_size)
    print 'JL dimension:   {}'.format(JL_dim)
    print 'batch size:     {}'.format(batch_size)
    print 'JL error:       {}\n'.format(JL_error)

    # -------------------------------------------------------
    # encode and decode data
    im = train_gen().next()[0]
    im1 = im / 255.99

    reconstruction = np.matmul(im1, ATA)  #/ float(picture_size)
    reconstruction = (255.99 * np.clip(reconstruction, 0, 1)).astype('uint8')

    # reconstruction = np.matmul(im, ATA)  # / float(picture_size)
    # reconstruction = (np.clip(reconstruction, 0, 255)).astype('uint8')

    save_images.save_images(im, save_path=path + 'true_images.png')
    save_images.save_images(reconstruction,
                            save_path=path + 'JL_reconstructed_image.png')

    im_d = np.zeros((100, picture_size))
    for i in range(batch_size):
        A = np.random.randn(JL_dim, picture_size) / np.sqrt(picture_size)
        ATA = np.matmul(np.transpose(A), A)
        reconstruction = np.matmul(im1[i].reshape((1, picture_size)),
                                   ATA)  # / float(picture_size)
        reconstruction = (255.99 *
                          np.clip(reconstruction, 0, 1)).astype('uint8')
        im_d[i] = reconstruction.reshape((picture_size, ))
    im_d = im_d.astype('uint8')
    save_images.save_images(im_d,
                            save_path=path +
                            'different_JL_reconstructed_image.png')
Exemple #6
0
    if use_cuda:
        fixed_noise_128 = fixed_noise_128.cuda(gpu)
    with torch.no_grad():
        noisev = autograd.Variable(fixed_noise_128)
    samples = netG(noisev)
    samples = samples.view(-1, 3, 32, 32)
    samples = samples.mul(0.5).add(0.5)
    samples = samples.cpu().data.numpy()
    save_images(samples,
                'results_2/' + str(name) + '/samples_' + str(frame) + '.png')
    # save_images(samples, './samples_{}.jpg'.format(frame))


# Dataset iterator
# train_gen = load(BATCH_SIZE, data_dir=DATA_DIR)
train_gen, dev_gen = cifar10.load(BATCH_SIZE, data_dir=DATA_DIR)


def inf_train_gen():
    while True:
        for images in train_gen():
            yield images


gen = inf_train_gen()
preprocess = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

criterion = nn.BCEWithLogitsLoss()
Exemple #7
0
import keras
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from optimizers import MaSS
from cifar10 import load
from resnet import resnet_v1, resnet_v2
import os

batch_size = 64
epochs = 200
data_augmentation = True
num_classes = 10

# Load Cifar-10 data
(x_train, y_train), (x_test, y_test) = load()
input_shape = x_train.shape[1:]

# Model parameters
n = 5
version = 1
if version == 1:
    depth = n * 6 + 2
elif version == 2:
    depth = n * 9 + 2

# Model name, depth and version
model_type = 'ResNet%dv%d' % (depth, version)

################################################################
# Whenever learning rate reduces, restart the MaSS optimizer at the latest learned weights.
Exemple #8
0

def cifar_preprocess(data):
    data['data'] /= 255.
    return data

# Logger setup
logger = Logger('CIFAR10 AllConvNet')

# Configure GPU Device
if args.gpu >= 0:
    cuda.check_cuda_available()
xp = cuda.cupy if args.gpu >= 0 else np

# loading dataset
dataset = cifar10.load()

dim = dataset['train']['data'][0].size
N_train = len(dataset['train']['target'])
N_test = len(dataset['test']['target'])
train_data_dict = {'data':dataset['train']['data'].astype(np.float32),
                   'target':dataset['train']['target'].astype(np.int32)}
test_data_dict = {'data':dataset['test']['data'].astype(np.float32),
                  'target':dataset['test']['target'].astype(np.int32)}
train_data = DataFeeder(train_data_dict, batchsize=args.batch)
test_data = DataFeeder(test_data_dict, batchsize=args.valbatch)

train_data.hook_preprocess(cifar_preprocess)
test_data.hook_preprocess(cifar_preprocess)

def train(input_dim=INPUT_DIM,
          batch_size=BATCH_SIZE,
          n_features_first=N_FEATURES_FIRST,
          learning_rate=1e-4,
          epochs=ITERS,
          fixed_noise_size=FIXED_NOISE_SIZE,
          n_features_reduction_factor=2,
          architecture='JLSWGN',
          init_method='He',
          BN=True,
          JL_dim=None,
          JL_error=0.5,
          n_projections=10000,
          data='cifar10',
          load_saved=True):
    """
    - this is the function to use to train a Johnson-Lindenstrauss Generative Network model which uses the sliced
      Wasserstein-2 distance as objective funtion (JLSWGN) for CIFAR10, with the configuration given by the parameters
    - the function computes losses and auto-saves the model every 100 steps and automatically resumes training where it
      stopped (when load_saved=True)

    :param input_dim: the dimension of the latent space -> Z
    :param batch_size: the batch size, should be a divisor of 50k
    :param n_features_first: the number of feature maps in the first step of the generator
    :param epochs: the number of epochs to train for (in fact this number should be 50k/batch_size*true_epochs)
    :param fixed_noise_size: the number of pictures that is generated during training for visual progress
    :param n_features_reduction_factor: integer, e.g.: 1: use same number of feature-maps everywhere, 2: half the number
           of feature-maps in every step
    :param architecture: right now only supports 'JLSWGN', 'SWGN', defaults to 'JLSWGN'
    :param init_method: the method with which the variables are initialized, support: 'uniform', 'He', defaults to 'He'
    :param BN: shall batch normalization be used
    :param JL_dim: the target dimension of the JL mapping
    :param JL_error: the max pairwise distance deviation error of the JL mapping, only applies when JL_dim=None
    :param n_projections: number of random projections in sliced Wasserstein-2 distance
    :param data: the data set which shall be used for training: celebA32, celebA32_bw, cifar10, mnist
    :param load_saved: whether an already existing training progress shall be loaded to continue there (if one exists)
    :return:
    """

    # -------------------------------------------------------
    # setting for sending emails and getting statistics
    send = settings.send_email

    # -------------------------------------------------------
    # architecture default
    use_JL = True
    if architecture not in ['SWGN']:
        architecture = 'JLSWGN'
    if architecture == 'SWGN':
        use_JL = False
        JL_error = None
        JL_dim = None

    # -------------------------------------------------------
    # data set default
    if data not in ['cifar10', 'celebA32', 'celebA32_bw', 'mnist', 'celebA64']:
        data = 'cifar10'
    if data in ['celebA32_bw', 'mnist']:
        picture_size = 32 * 32
        picture_dim = [-1, 32, 32]
        power = 5
        n_features_image = 1
    elif data in ['cifar10', 'celebA32']:
        picture_size = 32 * 32 * 3
        picture_dim = [-1, 32, 32, 3]
        power = 5
        n_features_image = 3
    elif data in ['celebA64']:
        picture_size = 64 * 64 * 3
        picture_dim = [-1, 64, 64, 3]
        power = 6
        n_features_image = 3
    print 'data set: {}'.format(data)
    print

    # -------------------------------------------------------
    # init_method default
    if init_method not in ['uniform']:
        init_method = 'He'

    # -------------------------------------------------------
    # JL_dim:
    if JL_dim is None:
        if JL_error is None and use_JL:
            use_JL = False
            architecture = 'SWGN'
            print
            print 'architecture changed to SWGN, since JL_dim and JL_error were None'
            print
        elif JL_error is not None:
            JL_dim = int(math.ceil(8 * np.log(2 * batch_size) / (JL_error**2)))
            # this uses the constant given on the Wikipedia page of "Johnson-Lindenstrauss Lemma"
    else:
        JL_error = np.round(np.sqrt(8 * np.log(2 * batch_size) / JL_dim),
                            decimals=4)

    if use_JL and JL_dim >= picture_size:
        use_JL = False
        architecture = 'SWGN'
        JL_error = None
        JL_dim = None
        print
        print 'JL mapping is not used, since the target dimension was chosen bigger than the input dimension'
        print

    print 'JL_dim = {}'.format(JL_dim)
    print 'JL_error = {}'.format(JL_error)
    print

    # -------------------------------------------------------
    # create unique folder name
    dir1 = 'JLSWGN/'
    directory = dir1+str(data)+'_'+str(input_dim)+'_'+str(batch_size)+'_'+str(n_features_first)+'_'+\
                str(learning_rate)+'_'+str(n_features_reduction_factor)+'_'+\
                str(architecture)+'_'+str(init_method)+'_'+str(BN)+'_'+str(JL_dim)+'_'+str(JL_error)+'_'+\
                str(n_projections)+'/'
    samples_dir = directory + 'samples/'
    model_dir = directory + 'model/'

    # create directories if they don't exist
    if not os.path.isdir(dir1):
        call(['mkdir', dir1])

    if not os.path.isdir(directory):
        load_saved = False
        print 'make new directory:', directory
        print
        call(['mkdir', directory])
        call(['mkdir', samples_dir])
        call(['mkdir', model_dir])

    # if directories already exist, but model wasn't saved so far, set load_saved to False
    if 'training_progress.csv' not in os.listdir(directory):
        load_saved = False

    # -------------------------------------------------------
    # initialize a TF session
    config = tf.ConfigProto()
    if N_CPUS_TF is None:
        number_cpus_tf = settings.number_cpus
    else:
        number_cpus_tf = N_CPUS_TF
    config.intra_op_parallelism_threads = number_cpus_tf
    config.inter_op_parallelism_threads = number_cpus_tf
    session = tf.Session(config=config)

    # -------------------------------------------------------
    # convenience function to build the model
    def build_model():
        """
        - function to build the model
        """
        with tf.name_scope('placeholders'):
            real_data_int = tf.placeholder(tf.int32,
                                           shape=[None, picture_size])
            x_true = 2 * ((tf.cast(real_data_int, tf.float32) / 255.) - .5)
            z = tf.placeholder(tf.float32, [None, input_dim])
            if use_JL:
                JL = tf.placeholder(tf.float32, [picture_size, JL_dim])
                P_non_normalized = tf.placeholder(tf.float32,
                                                  [JL_dim, n_projections])
                P_non_normalized_SWD = tf.placeholder(
                    tf.float32, [picture_size, n_projections])
            else:
                JL = None
                P_non_normalized = tf.placeholder(
                    tf.float32, [picture_size, n_projections])
                P_non_normalized_SWD = None

        x_generated = generator(
            z,
            n_features_first=n_features_first,
            n_features_reduction_factor=n_features_reduction_factor,
            min_features=64,
            BN=BN,
            power=power,
            init_method=init_method,
            n_features_image=n_features_image)

        # define loss (big part taken from SWG)
        with tf.name_scope('loss'):
            # apply the Johnson-Lindenstrauss map, if wanted, to the flattened array
            if use_JL:
                JL_true = tf.matmul(x_true, JL)
                JL_gen = tf.matmul(x_generated, JL)
            else:
                JL_true = x_true
                JL_gen = x_generated

            # next project the samples (images). After being transposed, we have tensors
            # of the format: [[projected_image1_proj1, projected_image2_proj1, ...],
            #                 [projected_image1_proj2, projected_image2_proj2, ...],...]
            # Each row has the projections along one direction. This makes it easier for the sorting that follows.
            # first normalize the random normal vectors to lie in the sphere
            P = tf.nn.l2_normalize(P_non_normalized, axis=0)

            projected_true = tf.transpose(tf.matmul(JL_true, P))
            projected_fake = tf.transpose(tf.matmul(JL_gen, P))

            sorted_true, true_indices = tf.nn.top_k(input=projected_true,
                                                    k=batch_size)
            sorted_fake, fake_indices = tf.nn.top_k(input=projected_fake,
                                                    k=batch_size)

            # For faster gradient computation, we do not use sorted_fake to compute
            # loss. Instead we re-order the sorted_true so that the samples from the
            # true distribution go to the correct sample from the fake distribution.

            # It is less expensive (memory-wise) to rearrange arrays in TF.
            # Flatten the sorted_true from dim [n_projections, batch_size].
            flat_true = tf.reshape(sorted_true, [-1])

            # Modify the indices to reflect this transition to an array.
            # new index = row + index
            rows = np.asarray([
                batch_size * np.floor(i * 1.0 / batch_size)
                for i in range(n_projections * batch_size)
            ])
            rows = rows.astype(np.int32)
            flat_idx = tf.reshape(fake_indices, [-1, 1]) + np.reshape(
                rows, [-1, 1])

            # The scatter operation takes care of reshaping to the rearranged matrix
            shape = tf.constant([batch_size * n_projections])
            rearranged_true = tf.reshape(
                tf.scatter_nd(flat_idx, flat_true, shape),
                [n_projections, batch_size])

            generator_loss = tf.reduce_mean(
                tf.square(projected_fake - rearranged_true))

            # get for JLSWGN the sliced Wasserstein distance (SWD) (since SWD and JLSWD are not comparable)
            if use_JL:
                P_SWD = tf.nn.l2_normalize(P_non_normalized_SWD, axis=0)

                projected_true_SWD = tf.transpose(tf.matmul(x_true, P_SWD))
                projected_fake_SWD = tf.transpose(tf.matmul(
                    x_generated, P_SWD))

                sorted_true_SWD, true_indices_SWD = tf.nn.top_k(
                    input=projected_true_SWD, k=batch_size)
                sorted_fake_SWD, fake_indices_SWD = tf.nn.top_k(
                    input=projected_fake_SWD, k=batch_size)

                flat_true_SWD = tf.reshape(sorted_true_SWD, [-1])
                flat_idx_SWD = tf.reshape(fake_indices_SWD,
                                          [-1, 1]) + np.reshape(rows, [-1, 1])

                rearranged_true_SWD = tf.reshape(
                    tf.scatter_nd(flat_idx_SWD, flat_true_SWD, shape),
                    [n_projections, batch_size])

                SWD = tf.reduce_mean(
                    tf.square(projected_fake_SWD - rearranged_true_SWD))
            else:
                SWD = generator_loss

        with tf.name_scope('optimizer'):
            generator_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
            g_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5)
            g_train = g_optimizer.minimize(generator_loss,
                                           var_list=generator_vars)

        # initialize variables using init_method
        session.run(tf.global_variables_initializer())

        return real_data_int, z, x_generated, JL, P_non_normalized, P_non_normalized_SWD, SWD, g_train

    # -------------------------------------------------------
    # build the model
    real_data_int, z, x_generated, JL, P_non_normalized, P_non_normalized_SWD, SWD, g_train = build_model(
    )

    # -------------------------------------------------------
    # For creating and saving samples (taken from IWGAN)
    fixed_noise = np.random.normal(size=(fixed_noise_size,
                                         input_dim)).astype('float32')

    def generate_image(frame):
        samples = session.run(x_generated, feed_dict={z: fixed_noise})
        samples = ((samples + 1.) * (255. / 2)).astype(
            'uint8')  # transform linearly from [-1,1] to [0,255]
        samples = samples.reshape(picture_dim)
        save_images.save_images(samples,
                                samples_dir + 'iteration_{}.png'.format(frame))

    # -------------------------------------------------------
    # For calculating inception score
    softmax = None

    def get_inception_score(n=N_IS, softmax=softmax):
        all_samples = []
        for i in xrange(n / 100):
            z_input = np.random.randn(100, input_dim)
            all_samples.append(session.run(x_generated, feed_dict={z:
                                                                   z_input}))
        all_samples = np.concatenate(all_samples, axis=0)
        all_samples = ((all_samples + 1.) * (255. / 2)).astype('int32')
        all_samples = all_samples.reshape((-1, 32, 32, 3))
        return inception_score.get_inception_score(list(all_samples),
                                                   softmax=softmax)

    # -------------------------------------------------------
    # get the dataset as infinite generator
    if data == 'cifar10':
        data_dir = settings.filepath_cifar10
        train_gen, dev_gen = cifar10.load(batch_size, data_dir=data_dir)
        n_dev_samples = 10000
    elif data == 'celebA32':
        data_dir = settings.filepath_celebA32
        train_gen, dev_gen = celeba.load(batch_size,
                                         data_dir=data_dir,
                                         black_white=False)
        n_dev_samples = 10000
    elif data == 'mnist':
        filename = '../data/MNIST/mnist32_zoom_1'
        train_gen, n_samples_train, dev_gen, n_samples_test = preprocessing_mnist.load(
            filename, batch_size, npy=True)
        n_dev_samples = 10000
    elif data == 'celebA32_bw':
        data_dir = settings.filepath_celebA32
        train_gen, dev_gen = celeba.load(batch_size,
                                         data_dir=data_dir,
                                         black_white=True)
        n_dev_samples = 10000
    elif data == 'celebA64':
        if settings.euler:
            data_dir = settings.filepath_celebA64_euler
        else:
            data_dir = settings.filepath_celebA64
        train_gen, dev_gen = celeba.load(batch_size,
                                         data_dir=data_dir,
                                         black_white=False)
        n_dev_samples = 10000

    def inf_train_gen():
        while True:
            for images, _ in train_gen():
                yield images

    gen = inf_train_gen()

    # -------------------------------------------------------
    # for saving the model create a saver
    saver = tf.train.Saver(max_to_keep=1)
    epochs_trained = 0
    if data == 'cifar10' and COMPUTE_IS:
        tp_columns = [
            'iteration', 'time_for_iterations', 'SWD_approximation',
            'time_for_SWD', 'IS', 'time_for_IS'
        ]
    else:
        tp_columns = [
            'iteration', 'time_for_iterations', 'SWD_approximation',
            'time_for_SWD'
        ]
    training_progress = pd.DataFrame(data=None, index=None, columns=tp_columns)

    # restore the model:
    if load_saved:
        saver.restore(sess=session, save_path=model_dir + 'saved_model')
        epochs_trained = int(np.loadtxt(fname=model_dir + 'epochs.csv'))
        tp_app = pd.read_csv(filepath_or_buffer=directory +
                             'training_progress.csv',
                             index_col=0,
                             header=0)
        training_progress = pd.concat([training_progress, tp_app],
                                      axis=0,
                                      ignore_index=True)
        print 'loaded training progress, and the model, which was already trained for {} epochs'.format(
            epochs_trained)
        print training_progress
        print

    # if the network is already trained completely, set send to false
    if epochs_trained == epochs:
        send = False

    # -------------------------------------------------------
    # print and get model summary
    n_params_gen = model_summary(scope='generator')[0]
    print

    # -------------------------------------------------------
    # FK: print model config to file
    model_config = [[
        'input_dim', 'batch_size', 'n_features_first', 'learning_rate',
        'fixed_noise_size', 'n_features_reduction_factor', 'architecture',
        'init_method', 'BN', 'JL_dim', 'JL_error', 'n_projections', 'data_set',
        'n_trainable_params_gen'
    ],
                    [
                        input_dim, batch_size, n_features_first, learning_rate,
                        fixed_noise_size, n_features_reduction_factor,
                        architecture, init_method, BN, JL_dim, JL_error,
                        n_projections, data, n_params_gen
                    ]]
    model_config = np.transpose(model_config)
    model_config = pd.DataFrame(data=model_config)
    model_config.to_csv(path_or_buf=directory + 'model_config.csv')
    print 'saved model configuration'
    print

    # -------------------------------------------------------
    # training loop
    print 'train model with config:'
    print model_config
    print

    t = time.time()  # get start time

    for i in xrange(epochs - epochs_trained):
        # print the current epoch
        print('iteration={}/{}'.format(i + epochs_trained + 1, epochs))

        images = gen.next()
        z_train = np.random.randn(batch_size, input_dim)
        if use_JL:
            JL_train = np.random.randn(picture_size, JL_dim)
            P_train = np.random.randn(JL_dim, n_projections)
            session.run(g_train,
                        feed_dict={
                            real_data_int: images,
                            z: z_train,
                            JL: JL_train,
                            P_non_normalized: P_train
                        })
        else:
            P_train = np.random.randn(picture_size, n_projections)
            session.run(g_train,
                        feed_dict={
                            real_data_int: images,
                            z: z_train,
                            P_non_normalized: P_train
                        })

        if not settings.euler:
            mem = memory()
            print 'memory use (GB): {}'.format(mem)

        # all STEP_SIZE_LOSS_COMPUTATION steps compute the losses and elapsed times, and generate images, and save model
        if (i + epochs_trained) % STEP_SIZE_LOSS_COMPUTATION == (
                STEP_SIZE_LOSS_COMPUTATION - 1):
            # get time for last 100 epochs
            elapsed_time = time.time() - t

            # generate sample images from fixed noise
            generate_image(i + epochs_trained + 1)
            print 'generated images'

            # compute and save losses on dev set, starting after ??? iterations
            if i + epochs_trained + 1 >= START_COMPUTING_LOSS:
                t = time.time()
                dev_d_loss = []
                print 'compute loss'
                j = 0
                for images_dev, _ in dev_gen():
                    if not settings.euler:
                        # progress bar
                        sys.stdout.write(
                            '\r>> Compute SWD %.1f%%' %
                            (float(j) / float(n_dev_samples / batch_size) *
                             100.0))
                        sys.stdout.flush()
                        j += 1
                    z_train_dev = np.random.randn(batch_size, input_dim)
                    P_train_dev = np.random.randn(picture_size, n_projections)
                    if use_JL:
                        _dev_d_loss = session.run(SWD,
                                                  feed_dict={
                                                      real_data_int:
                                                      images_dev,
                                                      z:
                                                      z_train_dev,
                                                      P_non_normalized_SWD:
                                                      P_train_dev
                                                  })
                    else:
                        _dev_d_loss = session.run(SWD,
                                                  feed_dict={
                                                      real_data_int:
                                                      images_dev,
                                                      z: z_train_dev,
                                                      P_non_normalized:
                                                      P_train_dev
                                                  })
                    dev_d_loss.append(_dev_d_loss)
                dev_loss = np.mean(dev_d_loss)
                t_loss = time.time() - t

                # compute inception score (IS)
                if data == 'cifar10' and COMPUTE_IS:
                    if (i + epochs_trained) % IS_FREQ == (IS_FREQ - 1):
                        print 'compute inception score'
                        t = time.time()
                        IS_mean, IS_std, softmax = get_inception_score(
                            N_IS, softmax=softmax)
                        IS = (IS_mean, IS_std)
                        t_IS = time.time() - t
                    else:
                        IS = None
                        t_IS = None
            else:
                dev_loss = None
                t_loss = None
                IS = None
                t_IS = None

            if data == 'cifar10' and COMPUTE_IS:
                tp_app = pd.DataFrame(data=[[
                    i + epochs_trained + 1, elapsed_time, dev_loss, t_loss, IS,
                    t_IS
                ]],
                                      index=None,
                                      columns=tp_columns)
            else:
                tp_app = pd.DataFrame(data=[[
                    i + epochs_trained + 1, elapsed_time, dev_loss, t_loss
                ]],
                                      index=None,
                                      columns=tp_columns)
            training_progress = pd.concat([training_progress, tp_app],
                                          axis=0,
                                          ignore_index=True)

            # save model
            saver.save(sess=session, save_path=model_dir + 'saved_model')
            # save number of epochs trained
            np.savetxt(fname=model_dir + 'epochs.csv',
                       X=[i + epochs_trained + 1])
            print 'saved model after training epoch {}'.format(i +
                                                               epochs_trained +
                                                               1)
            # save training progress
            training_progress.to_csv(path_or_buf=directory +
                                     'training_progress.csv')
            print 'saved training progress'
            print

            # fix new start time
            t = time.time()

    # -------------------------------------------------------
    # after training close the session
    session.close()
    tf.reset_default_graph()

    # -------------------------------------------------------
    # when training is done send email
    if send:
        subject = 'JL-SWG ({}) training finished'.format(data)
        body = 'to download the results of this model use (in the terminal):\n\n'
        body += 'scp -r [email protected]:/cluster/home/fkrach/MasterThesis/MTCode1/' + directory + ' .'
        files = [
            directory + 'model_config.csv',
            directory + 'training_progress.csv',
            samples_dir + 'iteration_{}.png'.format(epochs)
        ]
        send_email.send_email(subject=subject, body=body, file_names=files)

    return directory
Exemple #10
0
# truck, class-id: 9

# Necessary Imports and obtaining the training and testing data.
import cifar10
import numpy as np
from bwsi_grader.cogworks.nearest_neighbors import grade_distances
from bwsi_grader.cogworks.nearest_neighbors import grade_predict
from bwsi_grader.cogworks.nearest_neighbors import grade_make_folds
# "cifar10" must be a subfolder in the current directory for this part to work.
if not cifar10.get_path().is_file():
    cifar10.download()
else:
    print("cifar10 is already downloaded at:\n{}".format(cifar10.get_path()))
# Loading in the training data and converting them into floats.
x_train, y_train, x_test, y_test = (i.astype("float32")
                                    for i in cifar10.load())
x_train = x_train.transpose([0, 2, 3, 1])
x_test = x_test.transpose([0, 2, 3, 1])

# Limiting the data to make the program run faster.
x_train, y_train = x_train[:5000], y_train[:5000]
x_test, y_test = x_test[:500], y_test[:500]
print("\n")
print('Training data shape: ', x_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', x_test.shape)
print('Test labels shape: ', y_test.shape)
# Flattening the data values
print("\n")
x_train = np.reshape(x_train, (x_train.shape[0], -1))
x_test = np.reshape(x_test, (x_test.shape[0], -1))