Exemple #1
0
def test_vs_A2(batch_normalize):

    np.random.seed(42)
    X, Y, y = dataset.load_cifar10(batch='data_batch_1', limit_N=None)
    X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch',
                                                  limit_N=None)
    K, d = (Y.shape[0], X.shape[0])

    net_sizes = [d, 50, K]
    gd_params = {
        'eta': 0.024749,
        'batch_size': 100,
        'epochs': 10,
        'gamma': 0.9,
        'decay_rate': 0.80,
        'lambda': 0.000242,
        'batch_normalize': batch_normalize
    }

    net = Net(net_sizes, gd_params)
    r = net.train(X, Y, X_test, Y_test, silent=False)

    losses, test_losses, accuracies, test_accuracies = (r['losses'],
                                                        r['test_losses'],
                                                        r['accuracies'],
                                                        r['test_accuracies'])

    print("Final accuracy: {}".format(accuracies[-1]))
    print("Final accuracy (test): {}".format(test_accuracies[-1]))

    return r
Exemple #2
0
def params_search(min_eta, max_eta, min_lambda, max_lambda, silent=False, limit_N=None, combs=100):

    if not silent:
        print("Running parameters search...")

    np.random.seed(42)

    X, Y, y = dataset.load_cifar10(batch='data_batch_1', limit_N=limit_N)
    X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch', limit_N=limit_N)
    K, d = (Y.shape[0], X.shape[0])

    net_sizes = [d, 50, 30, K]

    default_params = {
        'eta': 0.020,
        'batch_size': 100,
        'epochs': 10,
        'gamma': 0.9,
        'decay_rate': 0.98,
        'lambda': 0.000001,
        'batch_normalize': True
    }

    configs = _get_configs(default_params, combs, min_eta, max_eta, min_lambda, max_lambda)

    net = Net(net_sizes, default_params)
    Ws, bs = net.Ws, net.bs

    parallel = Parallel(n_jobs=8, backend='multiprocessing', verbose=5)
    results = parallel(delayed(_search_worker)(net_sizes, c, Ws, bs, X, Y, X_test, Y_test) for c in configs)

    if not silent:
        print("Parameters search done.")

    return configs, results
Exemple #3
0
def test_four_layers(batch_normalize):

    np.random.seed(42)
    X, Y, y = dataset.load_multibatch_cifar10()
    X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch')
    K, d = (Y.shape[0], X.shape[0])

    net_sizes = [d, 50, 30, 10, K]

    gd_params = {
        'eta': 0.03,
        'batch_size': 100,
        'epochs': 20,
        'gamma': 0.9,
        'decay_rate': 0.95,
        'lambda': 0.0,
        'batch_normalize': batch_normalize
    }

    net = Net(net_sizes, gd_params)
    r = net.train(X, Y, X_test, Y_test, silent=False)

    # costs, test_costs = (r['costs'], r['test_costs'])
    losses, test_losses, accuracies, test_accuracies = (r['losses'],
                                                        r['test_losses'],
                                                        r['accuracies'],
                                                        r['test_accuracies'])

    print("Final accuracy: {}".format(accuracies[-1]))
    print("Final accuracy (test): {}".format(test_accuracies[-1]))

    return r
Exemple #4
0
def test_final_model():

    np.random.seed(42)
    X, Y, y = dataset.load_multibatch_cifar10()
    X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch')
    K, d = (Y.shape[0], X.shape[0])

    net_sizes = [d, 50, 30, K]

    gd_params = {
        'eta': 0.0169,
        'batch_size': 100,
        'epochs': 20,
        'gamma': 0.6,
        'decay_rate': 0.93,
        'lambda': 5e-5,
        'plateau_guard': 0.0002,
        'batch_normalize': True
    }

    net = Net(net_sizes, gd_params)
    r = net.train(X, Y, X_test, Y_test, silent=False)

    losses, test_losses, accuracies, test_accuracies = (r['losses'],
                                                        r['test_losses'],
                                                        r['accuracies'],
                                                        r['test_accuracies'])

    print("Final accuracy: {}".format(accuracies[-1]))
    print("Final accuracy (test): {}".format(test_accuracies[-1]))

    plot_results(r, '../Report/Figs/final_model.eps')

    return r
def get_cifar10_data(n_train=49000,
                     n_val=1000,
                     n_test=10000,
                     subtract_mean=True):

    X_train, y_train, X_test, y_test = load_cifar10()

    # Subsample the data
    mask = list(range(n_train, n_train + n_val))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(n_train))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(n_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    if subtract_mean:
        mean_image = np.mean(X_train, axis=0)
        X_train -= mean_image
        X_val -= mean_image
        X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test
Exemple #6
0
def test_import():

    filepath = './model_epoch_20.pkl'
    net = Net.import_model(filepath)

    X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch')
    acc = net.compute_accuracy(X_test, y_test)
    print('test acc', acc)
Exemple #7
0
def load_image_data(data, n_xl, n_channels, output_batch_size):
    if data == 'mnist':
        # Load MNIST
        data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'data', 'mnist.pkl.gz')
        x_train, t_train, x_valid, t_valid, _, _ = \
            dataset.load_mnist_realval(data_path)
        x_train = np.vstack([x_train, x_valid]).astype('float32')
        x_train = np.reshape(x_train, [-1, n_xl, n_xl, n_channels])

        x_train2 = x_train[:output_batch_size]
        t_train2 = t_train[:output_batch_size]
        t_train2 = np.nonzero(t_train2)[1]
        order = np.argsort(t_train2)
        sorted_x_train = x_train2[order]
    elif data == 'svhn':
        # Load SVHN data
        print('Reading svhn...')
        time_read = -time.time()
        print('Train')
        x_train = np.load('data/svhn_train1_x.npy')
        y_train = np.load('data/svhn_train1_y.npy')
        print('Test')
        x_test = np.load('data/svhn_test_x.npy')
        y_test = np.load('data/svhn_test_y.npy')
        time_read += time.time()
        print('Finished in {:.4f} seconds'.format(time_read))

        x_train2 = x_train[:output_batch_size]
        y_train2 = y_train[:output_batch_size]
        order = np.argsort(y_train2)
        sorted_x_train = x_train2[order]
    elif data == 'lfw':
        # Load LFW data
        print('Reading lfw...')
        time_read = -time.time()
        x_train = np.load('data/lfw.npy').astype(np.float32)
        print(x_train.shape)
        x_train = np.reshape(x_train, [-1, n_xl, n_xl, n_channels])
        time_read += time.time()
        print('Finished in {:.4f} seconds'.format(time_read))

        sorted_x_train = x_train[:output_batch_size]
    else:
        x_train, t_train, x_test, t_test = \
            dataset.load_cifar10('data/cifar10/cifar-10-python.tar.gz', normalize=True, one_hot=True)
        x = np.vstack((x_train, x_test))
        t = np.vstack((t_train, t_test))

        x2 = x[:output_batch_size]
        t2 = np.argmax(t[:output_batch_size], 1)
        order = np.argsort(t2)

        x_train = x
        sorted_x_train = x2[order]

    return x_train, sorted_x_train
Exemple #8
0
def test_gradients(batch_normalize):

    np.random.seed(42)
    X, Y, y = dataset.load_cifar10(batch='data_batch_1',
                                   limit_N=100,
                                   limit_d=100)
    K, d = (Y.shape[0], X.shape[0])

    net_sizes = [d, 50, 30, 20, K]
    gd_params = {'lambda': 0.0, 'batch_normalize': batch_normalize}

    net = Net(net_sizes, gd_params)

    print('\nComputing gradients (analytical methods)...')

    if batch_normalize:
        ss, s_means, s_vars, Hs, P = net._forward_bn(X)
        dtheta = net._backward_bn(X, Y, P, Hs, ss, s_means, s_vars)
    else:
        Hs, P = net._forward(X)
        dtheta = net._backward(X, Y, P, Hs)

    dummy_net = Net(net.network_sizes, gd_params, init_theta=False)

    def dummy_cost_fn(_X, _Y, _W, _b, _lamb):
        dummy_net.Ws = _W
        dummy_net.bs = _b
        return dummy_net.compute_cost(_X, _Y)

    print('Computing gradients (fast numerical method)...')
    dtheta_num = compute_gradients_num(X, Y, net.Ws, net.bs, net.lamb, 1e-5,
                                       dummy_cost_fn)

    print('Computing gradients (slow numerical method)...')
    dtheta_num_slow = compute_gradients_num_slow(X, Y, net.Ws, net.bs,
                                                 net.lamb, 1e-5, dummy_cost_fn)

    print('\nDone\n')

    print('Mean relative errors between numerical methods:\n{}\n'.format(
        compare_dthetas(dtheta_num, dtheta_num_slow)))

    print('Mean relative errors between analytical and slow numerical:\n{}\n'.
          format(compare_dthetas(dtheta, dtheta_num_slow)))

    print('Mean relative errors between analytical and fast numerical:\n{}\n'.
          format(compare_dthetas(dtheta, dtheta_num)))
Exemple #9
0
def test_model(model_path=None):

    # Load the dataset with augmentations
    start_time = time.time()
    ((generator_train, generator_test),
     (x_train, y_train), (x_test, y_test),
     (x_val, y_val)) = load_cifar10()

    model = load_model(model_path)

    optimizer = SGD(lr=0.1, momentum=0.9, nesterov=True)
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])

    loss = model.evaluate_generator(generator_test.flow(x_test, y_test))
    print('Loss was: %s' % loss)
    return loss
Exemple #10
0
def main():

    seed = 1234
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    client.notify("==> Loading the dataset...")
    dataset = load_cifar10(batch=args.batch)
    train_dl = dataset['train']
    test_dl = dataset['test']

    client.notify("==> Loading the model...")
    net = Resnet50(output_dim=10).to(device)
    if args.weight_file is not None:
        weights = torch.load(weight_file)
        net.load_state_dict(weights, strict=False)

    if os.exists('./models') is False:
        os.makedirs('./models')

    optimizer = optimizers.Adam(net.parameters(), lr=1e-4)
    lr_scheduler = optimizers.lr_scheduler.StepLR(optimizer, 5, 0.1)

    history = {
        'epochs': np.arange(1, args.epochs+1),
        'train_loss': [],
        'train_acc': [],
        'test_loss': [],
        'test_acc': []
    }

    client.notify('==> Start training...')
    for epoch in range(args.epoch):
        train(net, optimizer, train_dl, epoch, history)
        lr_scheduler.step()
        test(net, test_dl, epoch, history)s

    client.notify("==> Training Done")

    plot_result(history)
    client.notify('==> Saved plot')
Exemple #11
0
def run_experiment(args):
    import os
    # set environment variables for tensorflow
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    import inspect
    import shutil
    import numpy as np
    import tensorflow as tf

    from collections import OrderedDict
    import matplotlib.pyplot as plt
    plt.switch_backend('Agg')

    import utils
    import paramgraphics
    import nn
    from tensorflow.contrib.framework.python.ops import arg_scope
    # import tensorflow.contrib.layers as layers

    # ----------------------------------------------------------------
    # Arguments and Settings
    args.message = 'LBT-GAN-cifar10_' + args.message
    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    # copy file for reproducibility
    logger, dirname = utils.setup_logging(args)
    script_fn = inspect.getfile(inspect.currentframe())
    script_src = os.path.abspath(script_fn)
    script_dst = os.path.abspath(os.path.join(dirname, script_fn))
    shutil.copyfile(script_src, script_dst)
    logger.info("script copied from %s to %s" % (script_src, script_dst))

    # print arguments
    for k, v in sorted(vars(args).items()):
        logger.info("  %20s: %s" % (k, v))

    # get arguments
    batch_size = args.batch_size
    batch_size_est = args.batch_size_est
    gen_lr = args.gen_lr
    dis_lr = args.dis_lr
    est_lr = args.est_lr
    lambda_gan = args.lambda_gan
    beta1 = 0.5
    epsilon = 1e-8
    max_iter = args.max_iter
    viz_every = args.viz_every
    z_dim, vae_z_dim = utils.get_ints(args.z_dims)
    unrolling_steps = args.unrolling_steps
    assert unrolling_steps > 0
    n_viz = args.n_viz

    # ----------------------------------------------------------------
    # Dataset
    from dataset import load_cifar10, DataSet
    train_x, train_y, test_x, test_y = load_cifar10()
    train_x = train_x * 2. - 1.
    test_x = test_x * 2. - 1.

    dtrain = DataSet(train_x, train_y)
    dtest = DataSet(test_x, test_y)

    # data_channel = 3
    x_dim = 32 * 32 * 3
    dim_input = (32, 32)

    # ----------------------------------------------------------------
    # Model setup
    logger.info("Setting up model ...")

    def discriminator(x, Reuse=tf.AUTO_REUSE, is_training=True):
        def leaky_relu(x, alpha=0.2):
            return tf.maximum(alpha * x, x)

        with tf.variable_scope("discriminator", reuse=Reuse):

            x = tf.reshape(x, [batch_size, 32, 32, 3])
            lx = tf.layers.dropout(x, 0.2, training=is_training)

            conv1 = tf.layers.conv2d(lx,
                                     64,
                                     5,
                                     2,
                                     use_bias=True,
                                     padding='same')
            conv1 = leaky_relu(conv1)

            conv2 = tf.layers.conv2d(conv1,
                                     128,
                                     5,
                                     2,
                                     use_bias=False,
                                     padding='same')
            conv2 = tf.layers.batch_normalization(conv2, training=is_training)
            conv2 = leaky_relu(conv2)

            conv3 = tf.layers.conv2d(conv2,
                                     256,
                                     5,
                                     2,
                                     use_bias=False,
                                     padding='same')
            conv3 = tf.layers.batch_normalization(conv3, training=is_training)
            conv3 = leaky_relu(conv3)
            conv3 = tf.layers.flatten(conv3)

            fc2 = tf.layers.dense(conv3, 1)
            return fc2

    def generator(z, Reuse=tf.AUTO_REUSE, flatten=True, is_training=True):
        if args.g_nonlin == 'relu':
            # print("Use Relu in G")
            nonlin = tf.nn.relu
        else:
            # print("Use tanh in G")
            nonlin = tf.nn.tanh
        # nonlin = tf.nn.relu if args.g_nonlin == 'relu' else tf.nn.tanh

        # norm_prms = {'is_training': is_training, 'decay': 0.9, 'scale': False}
        with tf.variable_scope("generator", reuse=Reuse):
            # x = layers.fully_connected(x, 4 * 4 * 512)
            lx = tf.layers.dense(z, 4 * 4 * 512)
            lx = tf.reshape(lx, [-1, 4, 4, 512])
            lx = tf.layers.batch_normalization(lx, training=is_training)
            lx = nonlin(lx)

            # x = tf.reshape(x, (-1, 4, 4, 512))
            # x = conv_concate_onehot(x, y)
            # x = layers.conv2d_transpose(x, 256, 5, 2)
            lx = tf.layers.conv2d_transpose(lx,
                                            256,
                                            5,
                                            2,
                                            use_bias=False,
                                            padding='same')
            lx = tf.layers.batch_normalization(lx, training=is_training)
            lx = nonlin(lx)
            # x = conv_concate_onehot(x, y)
            # x = layers.conv2d_transpose(x, 128, 5, 2)
            lx = tf.layers.conv2d_transpose(lx,
                                            128,
                                            5,
                                            2,
                                            use_bias=False,
                                            padding='same')
            lx = tf.layers.batch_normalization(lx, training=is_training)
            lx = nonlin(lx)

            # x = conv_concate_onehot(x, y)
            # x = layers.conv2d_transpose(
            #     x, 3, 5, 2, normalizer_fn=None, activation_fn=nn.tanh)
            lx = tf.layers.conv2d_transpose(lx, 3, 5, 2, padding='same')
            lx = tf.nn.tanh(lx)

            if flatten is True:
                lx = tf.layers.flatten(lx)
            return lx

    nonlin = tf.nn.relu

    def compute_est_samples(z, params=None, reuse=tf.AUTO_REUSE):
        with tf.variable_scope("estimator"):
            with arg_scope([nn.dense], params=params):
                with tf.variable_scope("decoder", reuse=reuse):
                    h_dec_1 = nn.dense(z,
                                       vae_z_dim,
                                       200 * 2,
                                       "dense1",
                                       nonlinearity=nonlin)
                    h_dec_2 = nn.dense(h_dec_1,
                                       200 * 2,
                                       500 * 2,
                                       "dense2",
                                       nonlinearity=nonlin)
                    x_mean = nn.dense(h_dec_2,
                                      500 * 2,
                                      x_dim,
                                      "dense3",
                                      nonlinearity=None)
                    x_mean = tf.nn.tanh(x_mean)
                    return x_mean

    def compute_est_ll(x, params=None, reuse=tf.AUTO_REUSE):
        with tf.variable_scope("estimator", reuse=reuse):
            logvae_x_var = tf.get_variable(
                "logvae_x_var", (),
                tf.float32,
                trainable=True,
                initializer=tf.constant_initializer(-1))

            with arg_scope([nn.dense], params=params):
                with tf.variable_scope("encoder", reuse=reuse):
                    h_enc_1 = nn.dense(x,
                                       x_dim,
                                       500 * 2,
                                       "dense1",
                                       nonlinearity=nonlin)
                    # h_enc_1 = nn.batch_norm(h_enc_1, "bn1", 129, 2)
                    h_enc_2 = nn.dense(h_enc_1,
                                       500 * 2,
                                       200 * 2,
                                       "dense2",
                                       nonlinearity=nonlin)
                    # h_enc_2 = nn.batch_norm(h_enc_2, "bn2", 128, 2)
                    z_mean = nn.dense(h_enc_2,
                                      200 * 2,
                                      vae_z_dim,
                                      "dense3",
                                      nonlinearity=None)
                    z_logvar = nn.dense(h_enc_2,
                                        200 * 2,
                                        vae_z_dim,
                                        "dense4",
                                        nonlinearity=None)
                epsilon = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)
                z = z_mean + tf.exp(0.5 * z_logvar) * epsilon

                with tf.variable_scope("decoder", reuse=reuse):
                    h_dec_1 = nn.dense(z,
                                       vae_z_dim,
                                       200 * 2,
                                       "dense1",
                                       nonlinearity=nonlin)
                    # h_dec_1 = nn.batch_norm(h_dec_1, "bn1", 127, 2)
                    h_dec_2 = nn.dense(h_dec_1,
                                       200 * 2,
                                       500 * 2,
                                       "dense2",
                                       nonlinearity=nonlin)
                    # h_dec_2 = nn.batch_norm(h_dec_2, "bn2", 128, 2)
                    x_mean = nn.dense(h_dec_2,
                                      500 * 2,
                                      x_dim,
                                      "dense3",
                                      nonlinearity=None)
                    x_mean = tf.nn.tanh(x_mean)

        # elbo = tf.reduce_mean(
        #     tf.reduce_sum(
        #         -tf.nn.sigmoid_cross_entropy_with_logits(
        #             logits=x_mean, labels=x),
        #         axis=1) -
        #     tf.reduce_sum(
        #         -0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar)),
        #         axis=1))
        vae_x_var = tf.exp(logvae_x_var)
        elbo = tf.reduce_mean(
            tf.reduce_sum(-0.5 * np.log(2 * np.pi) - 0.5 * tf.log(vae_x_var) -
                          tf.layers.flatten(tf.square(x - x_mean)) /
                          (2 * vae_x_var),
                          axis=1) -
            tf.reduce_sum(-0.5 * (1 + z_logvar - tf.square(z_mean) -
                                  tf.exp(z_logvar)),
                          axis=1))
        return elbo, x_mean

    def compute_est_updated_with_SGD(x, lr=0.001, params=None):
        elbo, _ = compute_est_ll(x, params=params)
        grads = tf.gradients(elbo, params.values())
        new_params = params.copy()
        for key, g in zip(params, grads):
            new_params[key] += lr * g
        return elbo, new_params

    def compute_est_updated_with_Adam(x,
                                      lr=0.001,
                                      beta_1=0.9,
                                      beta_2=0.999,
                                      epsilon=1e-7,
                                      decay=0.,
                                      params=None,
                                      adam_params=None):
        elbo, _ = compute_est_ll(x, params=params)
        grads = tf.gradients(elbo, params.values())
        new_params = params.copy()
        new_adam_params = adam_params.copy()
        new_adam_params['iterations'] += 1
        lr = lr * \
            (1. / (1. + decay *
                   tf.cast(adam_params['iterations'], tf.float32)))
        t = tf.cast(new_adam_params['iterations'], tf.float32)
        lr_t = lr * (tf.sqrt(1. - tf.pow(beta_2, t)) /
                     (1. - tf.pow(beta_1, t)))
        for key, g in zip(params, grads):
            new_adam_params['m_' + key] = (
                beta_1 * adam_params['m_' + key]) + (1. - beta_1) * g
            new_adam_params['v_' + key] = tf.stop_gradient(
                (beta_2 * adam_params['v_' + key]) +
                (1. - beta_2) * tf.square(g))
            new_params[key] = params[key] + lr_t * new_adam_params[
                'm_' + key] / tf.sqrt(new_adam_params['v_' + key] + epsilon)
        return elbo, new_params, new_adam_params

    lr = tf.placeholder(tf.float32)
    data = tf.placeholder(tf.float32, shape=(batch_size, x_dim))

    # Construct generator and estimator nets
    est_params_dict = OrderedDict()
    _, _ = compute_est_ll(data, params=est_params_dict)
    gen_noise = tf.random_normal((batch_size_est, z_dim), dtype=tf.float32)
    samples_gen = generator(gen_noise)
    vae_noise = tf.random_normal((batch_size_est, vae_z_dim), dtype=tf.float32)
    samples_est = compute_est_samples(z=vae_noise, params=est_params_dict)
    # for key in est_params_dict:
    #    print(key, est_params_dict[key])

    adam_params_dict = OrderedDict()
    with tf.variable_scope("adam"):
        adam_params_dict['iterations'] = tf.Variable(0,
                                                     dtype=tf.int64,
                                                     name='iterations')
        for key in est_params_dict:
            adam_params_dict['m_' + key] = tf.Variable(tf.zeros_like(
                est_params_dict[key]),
                                                       name='m_' + key)
            adam_params_dict['v_' + key] = tf.Variable(tf.zeros_like(
                est_params_dict[key]),
                                                       name='v_' + key)

    gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generator")
    est_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "estimator")
    adam_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "adam")

    # unrolling estimator updates
    cur_params = est_params_dict
    cur_adam_params = adam_params_dict
    elbo_genx_at_steps = []
    for _ in range(unrolling_steps):
        samples_gen = generator(
            tf.random_normal((batch_size_est, z_dim), dtype=tf.float32))
        elbo_genx_step, cur_params, cur_adam_params = compute_est_updated_with_Adam(
            samples_gen,
            lr=lr,
            beta_1=beta1,
            epsilon=epsilon,
            params=cur_params,
            adam_params=cur_adam_params)
        elbo_genx_at_steps.append(elbo_genx_step)

    # estimator update
    updates = []
    for key in est_params_dict:
        updates.append(tf.assign(est_params_dict[key], cur_params[key]))
    for key in adam_params_dict:
        updates.append(tf.assign(adam_params_dict[key], cur_adam_params[key]))
    e_train_op = tf.group(*updates, name="e_train_op")

    # Optimize the generator on the unrolled ELBO loss
    unrolled_elbo_data, _ = compute_est_ll(data, params=cur_params)
    # unrolled_elbo_samp, _ = compute_est_ll(
    #     tf.stop_gradient(samples_gen), params=cur_params)

    # GAN-loss for discriminator and generator
    samples_gen_gan = generator(
        tf.random_normal((batch_size_est, z_dim), dtype=tf.float32))
    fake_D_output = discriminator(samples_gen_gan)
    real_D_output = discriminator(data)
    # print(fake_D_output, real_D_output)
    ganloss_g = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.ones_like(fake_D_output), logits=fake_D_output))
    ganloss_D_fake = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.zeros_like(fake_D_output), logits=fake_D_output))
    ganloss_D_real = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.ones_like(real_D_output), logits=real_D_output))

    use_e_sym = tf.placeholder(tf.float32, shape=(), name="use_E")
    if args.lbt:
        logger.info("Using lbt")
        object_g = lambda_gan * ganloss_g - use_e_sym * unrolled_elbo_data
    else:
        logger.info("Using GAN")
        object_g = lambda_gan * ganloss_g  # - use_e_sym * unrolled_elbo_data

    # object_g = -1 * unrolled_elbo_data
    object_d = ganloss_D_fake + ganloss_D_real
    dis_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 "discriminator")

    g_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, "generator")
    g_train_opt = tf.train.AdamOptimizer(learning_rate=gen_lr,
                                         beta1=beta1,
                                         epsilon=epsilon)
    # g_train_opt = tf.train.RMSPropOptimizer(learning_rate=gen_lr, epsilon=epsilon)
    g_grads = g_train_opt.compute_gradients(object_g, var_list=gen_vars)
    # g_grads_clipped = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in g_grads]
    g_grads_, g_vars_ = zip(*g_grads)
    g_grads_clipped_, g_grads_norm_ = tf.clip_by_global_norm(g_grads_, 5.)
    g_grads_clipped = zip(g_grads_clipped_, g_vars_)
    if args.clip_grad:
        logger.info("Clipping gradients of generator parameters.")
        with tf.control_dependencies(g_update_ops):
            g_train_op = g_train_opt.apply_gradients(g_grads_clipped)
    else:
        with tf.control_dependencies(g_update_ops):
            g_train_op = g_train_opt.apply_gradients(g_grads)
        # g_train_op = g_train_opt.apply_gradients(g_grads)

    d_train_opt = tf.train.AdamOptimizer(learning_rate=dis_lr,
                                         beta1=beta1,
                                         epsilon=epsilon)
    d_train_op = d_train_opt.minimize(object_d, var_list=dis_vars)

    # ----------------------------------------------------------------
    # Training
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(max_to_keep=None)
    if args.model_path:
        saver.restore(sess, args.model_path)

    # # print variables
    # logger.info("Generator parameters:")
    # for p in gen_vars:
    #     logger.debug("%s: %s" % (p.name, sess.run(tf.shape(p))))
    # logger.info("Estimator parameters:")
    # for p in est_vars:
    #     logger.debug("%s: %s" % (p.name, sess.run(tf.shape(p))))
    # logger.info("Adam parameters:")
    # for p in adam_vars:
    #     logger.debug("%s: %s" % (p.name, sess.run(tf.shape(p))))

    elbo_vals = []
    ganloss_vals = []
    tgan_g, tgan_d_fake, tgan_d_real = 0., 0., 0.
    elbo_genx_val, elbo_data_val, gradients_nrom = -np.inf, -np.inf, 0
    use_e_flag = 0.

    for i in range(max_iter + 1):

        x_mini_batch = dtrain.next_batch(batch_size)[0].reshape(
            [batch_size, x_dim])

        if i > 3000:
            use_e_flag = 1.
            for _ in range(args.n_est):
                elbo_genx_val, _ = sess.run(
                    [elbo_genx_at_steps[-1], e_train_op],
                    feed_dict={lr: 3. * est_lr})

        for _ in range(args.n_dis):
            _, tgan_g, tgan_d_real, tgan_d_fake = sess.run(
                [d_train_op, ganloss_g, ganloss_D_real, ganloss_D_fake],
                feed_dict={data: x_mini_batch})

        elbo_data_val, gradients_nrom, _ = sess.run(
            [unrolled_elbo_data, g_grads_norm_, g_train_op],
            feed_dict={
                data: x_mini_batch,
                lr: est_lr,
                use_e_sym: use_e_flag
            })
        elbo_vals.append([elbo_genx_val, elbo_data_val])
        ganloss_vals.append([tgan_g, tgan_d_real, tgan_d_fake])

        # visualization
        if i % viz_every == 0:
            np_samples_gen, np_samples_est, np_data = sess.run(
                [samples_gen, samples_est, data],
                feed_dict={data: x_mini_batch})
            np_samples_est = np_samples_est.reshape([-1, 32, 32, 3]).transpose(
                [0, 3, 1, 2]).reshape([-1, 32 * 32 * 3])
            np_samples_gen = np_samples_gen.reshape([-1, 32, 32, 3]).transpose(
                [0, 3, 1, 2]).reshape([-1, 32 * 32 * 3])
            np_data = np_data.reshape([-1, 32, 32, 3]).transpose(
                [0, 3, 1, 2]).reshape([-1, 32 * 32 * 3])

            np_samples_est = np_samples_est / 2. + 0.5
            np_samples_gen = np_samples_gen / 2. + 0.5
            np_data = np_data / 2. + 0.5

            paramgraphics.mat_to_img(np_samples_gen[:n_viz],
                                     dim_input,
                                     colorImg=True,
                                     save_path=os.path.join(
                                         dirname,
                                         'sample_' + str(i) + '_gen.png'))
            paramgraphics.mat_to_img(np_data[:n_viz],
                                     dim_input,
                                     colorImg=True,
                                     save_path=os.path.join(
                                         dirname,
                                         'sample_' + str(i) + '_dat.png'))
            paramgraphics.mat_to_img(np_samples_est[:n_viz],
                                     dim_input,
                                     colorImg=True,
                                     save_path=os.path.join(
                                         dirname,
                                         'sample_' + str(i) + '_est.png'))

            fig = plt.figure(figsize=(6, 4))
            plt.plot(elbo_vals,
                     '.',
                     markersize=2,
                     markeredgecolor='none',
                     linestyle='none',
                     alpha=min(1.0, 0.01 * max_iter / (i + 1)))
            plt.ylim((-200.0, 0.0))
            legend = plt.legend(('elbo_genx', 'elbo_data'), markerscale=6)
            for lh in legend.legendHandles:
                lh._legmarker.set_alpha(1.)
            plt.grid(True)
            plt.tight_layout()
            plt.savefig(os.path.join(dirname, 'curve.png'),
                        bbox_inches='tight')
            plt.close(fig)

        # training log
        if i % viz_every == 0:
            elbo_genx_ma_val, elbo_data_ma_val = np.mean(elbo_vals[-200:],
                                                         axis=0)
            logger.info(
                "Iter %d: gradients norm = %.4f. samples LL = %.4f, data LL = %.4f."
                % (i, gradients_nrom, elbo_genx_ma_val, elbo_data_ma_val))
            logger.info(
                "Iter %d: gan_g = %.4f. gan_d_real = %.4f, gan_d_fake = %.4f."
                % (i, tgan_g, tgan_d_real, tgan_d_fake))

        if i % args.model_every == 0:
            saver.save(sess, os.path.join(dirname, 'model_' + str(i)))
Exemple #12
0
    updates = []
    for (var, var_eval) in zip(train_var, eval_var):
        var_avg = ema.average(var)
        updates.append(var_eval.assign(var_avg))
    return tf.group(*updates)


if __name__ == "__main__":

    # fix random seed for reproducibility
    np.random.seed(flgs.seed)
    tf.set_random_seed(flgs.seed)


    data_path = os.path.join('./data/cifar10','cifar-10-python.tar.gz')
    x_train, y_train, x_test, y_test = dataset.load_cifar10(data_path, normalize=True, one_hot=False)
    num_classes = len(set(y_train))
    n_data, n_xl, _, n_channels = x_train.shape
    n_x = n_xl * n_xl * n_channels
    
    # prepare data
    x_train, y_train, mask_train, x_test, y_test = prepare_dataset(flgs.save_dir, x_train, y_train, x_test, y_test, num_classes)

    # Build the computation graph
    is_training = tf.placeholder(tf.bool, shape=[], name='is_training')
    learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr')
    adam_beta1_ph = tf.placeholder(tf.float32, shape=[], name='beta1')
    weight_ph = tf.placeholder(tf.float32, shape=[], name='wght')
    optimizer = tf.train.AdamOptimizer(learning_rate_ph, beta1=adam_beta1_ph)

    # data placeholders
Exemple #13
0
def train_model(max_epochs=300, start_lr=0.1,
                dense_layers=[20, 20, 20], growth_rate=60, compression=0.5,
                dropout=0.0, weight_decay=1e-4, batch_size=64, logdir='./logs',
                weightsdir='./weights', lr_decrease_factor=0.5, lr_patience=10,
                nbr_gpus=1, model_path=None, initial_epoch=0):
    # Create a dir in the logs catalog and dump info
    run_dir = datetime.today().strftime('%Y%m%d-%H%M%S-%f')

    # Load the dataset with augmentations
    start_time = time.time()
    ((generator_train, generator_test),
     (x_train, y_train), (x_test, y_test),
     (x_val, y_val)) = load_cifar10()

    # Create model using supplied params
    # Load model from file if the argument model_path is supplied.
    # Use mutli_gpu setup if enabled
    if nbr_gpus > 1:
        with tf.device('/cpu:0'):
            if model_path is not None:
                orig_model = load_model(model_path)
            else:
                orig_model = create_densenet(
                    input_shape=(32, 32, 3), dense_layers=dense_layers,
                    growth_rate=growth_rate, nbr_classes=10, weight_decay=weight_decay,
                    compression=compression, dropout=dropout
                )
        model = multi_gpu_model(orig_model, nbr_gpus)

    else:
        if model_path is not None:
            orig_model = load_model(model_path)
        else:
            orig_model = create_densenet(
                input_shape=(32, 32, 3), dense_layers=dense_layers,
                growth_rate=growth_rate, nbr_classes=10, weight_decay=weight_decay,
                compression=compression, dropout=dropout
            )
        model = orig_model

    # Write model info to file
    dump_infomation(os.path.join(logdir, run_dir), orig_model, dense_layers,
                    growth_rate, compression, dropout, weight_decay,
                    batch_size)

    # Setup optimizer
    optimizer = SGD(lr=start_lr, momentum=0.9, nesterov=True)

    cbs = create_callbacks(max_epochs, run_dir, start_lr, lr_decrease_factor,
                           lr_patience, orig_model)
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])

    history = model.fit_generator(
        generator_train.flow(x_train, y_train, batch_size=batch_size, seed=0),
        callbacks=cbs, epochs=max_epochs,
        validation_data=generator_test.flow(x_val, y_val, seed=0),
        use_multiprocessing=True, workers=2, max_queue_size=batch_size,
        verbose=1, initial_epoch=initial_epoch
    )

    best_val_acc = max(history.history['val_acc'])
    best_acc = max(history.history['acc'])
    return {
        'loss': -1 * best_acc,
        'true_loss': -1 * best_val_acc,
        'status': 'ok',
        'eval_time': time.time() - start_time,
    }
def train_model(max_epochs=600,
                start_lr=0.025,
                drop_path_keep=0.6,
                nbr_blocks=2,
                weight_decay=1e-4,
                nbr_filters=32,
                batch_size=32,
                logdir='./logs',
                weightsdir='./weights_nasnet',
                lr_decrease_factor=0.5,
                lr_patience=10,
                nbr_gpus=1,
                model_path=None,
                initial_epoch=0):

    # Create a dir in the logs catalog and dump info
    run_dir = 'nasnet_%s' % datetime.today().strftime('%Y%m%d-%H%M%S-%f')

    # Load the dataset with augmentations
    start_time = time.time()
    ((generator_train, generator_test), (x_train, y_train), (x_test, y_test),
     (x_val, y_val)) = load_cifar10()

    # Create current epoch holding tensor
    epoch_tensor = tf.Variable(initial_epoch, dtype=tf.int32, trainable=False)

    # Create model using supplied params
    # Load model from file if the argument model_path is supplied.
    # Use mutli_gpu setup if enabled
    if nbr_gpus > 1:
        with tf.device('/cpu:0'):
            if model_path is not None:
                orig_model = load_model(model_path)
            else:
                orig_model = create_nasnet(input_shape=(32, 32, 3),
                                           nbr_normal_cells=6,
                                           nbr_blocks=nbr_blocks,
                                           weight_decay=weight_decay,
                                           nbr_classes=10,
                                           nbr_filters=nbr_filters,
                                           stem_multiplier=3,
                                           filter_multiplier=2,
                                           dimension_reduction=2,
                                           final_filters=768,
                                           dropout_prob=0.0,
                                           drop_path_keep=drop_path_keep,
                                           max_epochs=max_epochs,
                                           epoch_tensor=epoch_tensor)
        model = multi_gpu_model(orig_model, nbr_gpus)

    else:
        if model_path is not None:
            orig_model = load_model(model_path)
        else:
            orig_model = create_nasnet(input_shape=(32, 32, 3),
                                       nbr_normal_cells=6,
                                       nbr_blocks=nbr_blocks,
                                       weight_decay=weight_decay,
                                       nbr_classes=10,
                                       nbr_filters=nbr_filters,
                                       stem_multiplier=3,
                                       filter_multiplier=2,
                                       dimension_reduction=2,
                                       final_filters=768,
                                       dropout_prob=0.0,
                                       drop_path_keep=drop_path_keep,
                                       max_epochs=max_epochs,
                                       epoch_tensor=epoch_tensor)
        model = orig_model

    # Setup optimizer
    optimizer = SGD(lr=start_lr, momentum=0.9, nesterov=True, clipnorm=5.0)

    cbs = create_callbacks(max_epochs, run_dir, start_lr, lr_decrease_factor,
                           lr_patience, orig_model, epoch_tensor)
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        loss_weights=[1, 0.4],  # Weight the auxiliary head by 0.4
        metrics=['accuracy'])

    # Write model info to file
    dump_infomation(os.path.join(logdir, run_dir), orig_model, start_lr,
                    drop_path_keep, nbr_blocks, nbr_filters, batch_size)

    # Setup the multi output generators
    train = generator_train.flow(x_train,
                                 y_train,
                                 batch_size=batch_size,
                                 seed=0)
    test = generator_test.flow(x_val, y_val, batch_size=batch_size, seed=0)
    mul_train = multi_generator(train)
    mul_test = multi_generator(test)
    steps_per_epoch = len(train)
    validation_steps = len(test)

    # Start training
    history = model.fit_generator(mul_train,
                                  callbacks=cbs,
                                  epochs=max_epochs,
                                  validation_data=mul_test,
                                  use_multiprocessing=False,
                                  max_queue_size=batch_size,
                                  verbose=1,
                                  initial_epoch=initial_epoch,
                                  steps_per_epoch=steps_per_epoch,
                                  validation_steps=validation_steps)

    best_val_acc = max(history.history['val_acc'])
    best_acc = max(history.history['acc'])
    return {
        'loss': -1 * best_acc,
        'true_loss': -1 * best_val_acc,
        'status': 'ok',
        'eval_time': time.time() - start_time,
    }
Exemple #15
0
def load_fname(version, suffix=None, with_ext=False):
    suffix = "." + suffix if suffix is not None else ""
    prefix = "./data/cifar_resnet%s%s" % (version, suffix)
    return utils.extend_fname(prefix, with_ext=with_ext)


batch_size = 16
input_size = 32
inputs_ext = {'data': {'shape': (batch_size, 3, input_size, input_size)}}
inputs = [mx.sym.var(n) for n in inputs_ext]
calib_ctx = mx.gpu(2)
ctx = [mx.gpu(int(i)) for i in "1,2,3,4,5".split(',') if i.strip()]

utils.log_init()

val_data = ds.load_cifar10(batch_size, input_size)
data_iter = iter(val_data)


def data_iter_func():
    data, label = next(data_iter)
    return data, label


data, _ = next(data_iter)

sym_file, param_file = load_fname(version)
net1 = utils.load_model(sym_file, param_file, inputs, ctx=ctx)
acc_top1 = mx.metric.Accuracy()
acc_top5 = mx.metric.TopKAccuracy(5)
acc_top1.reset()
    print("Loading all models in %s" % path)

    models = []
    for model_file in os.listdir(path):

        try:
            print('Loading %s' % model_file)
            model = init_model(os.path.join(path, model_file))
            models.append(model)

        except RuntimeError:
            print('Some error occured!')

    # Evaluate using ensemble:
    ((generator_train, generator_test), (x_train, y_train), (x_test, y_test),
     (x_val, y_val)) = load_cifar10()

    # Evaluate the models
    correct = 0
    total = 0
    for x_batch, y_batch in generator_test.flow(x_test, y_test, batch_size=32):
        print('%d/%d' % (total, len(y_test)))
        total += len(y_batch)

        y = predict_models([model], x_batch)
        correct += np.sum(y.flatten() == y_batch.flatten())

        if total >= len(y_test):
            break

    print('Correct: %d/%d (%f)' % (correct, total, correct / total))