def test_vs_A2(batch_normalize): np.random.seed(42) X, Y, y = dataset.load_cifar10(batch='data_batch_1', limit_N=None) X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch', limit_N=None) K, d = (Y.shape[0], X.shape[0]) net_sizes = [d, 50, K] gd_params = { 'eta': 0.024749, 'batch_size': 100, 'epochs': 10, 'gamma': 0.9, 'decay_rate': 0.80, 'lambda': 0.000242, 'batch_normalize': batch_normalize } net = Net(net_sizes, gd_params) r = net.train(X, Y, X_test, Y_test, silent=False) losses, test_losses, accuracies, test_accuracies = (r['losses'], r['test_losses'], r['accuracies'], r['test_accuracies']) print("Final accuracy: {}".format(accuracies[-1])) print("Final accuracy (test): {}".format(test_accuracies[-1])) return r
def params_search(min_eta, max_eta, min_lambda, max_lambda, silent=False, limit_N=None, combs=100): if not silent: print("Running parameters search...") np.random.seed(42) X, Y, y = dataset.load_cifar10(batch='data_batch_1', limit_N=limit_N) X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch', limit_N=limit_N) K, d = (Y.shape[0], X.shape[0]) net_sizes = [d, 50, 30, K] default_params = { 'eta': 0.020, 'batch_size': 100, 'epochs': 10, 'gamma': 0.9, 'decay_rate': 0.98, 'lambda': 0.000001, 'batch_normalize': True } configs = _get_configs(default_params, combs, min_eta, max_eta, min_lambda, max_lambda) net = Net(net_sizes, default_params) Ws, bs = net.Ws, net.bs parallel = Parallel(n_jobs=8, backend='multiprocessing', verbose=5) results = parallel(delayed(_search_worker)(net_sizes, c, Ws, bs, X, Y, X_test, Y_test) for c in configs) if not silent: print("Parameters search done.") return configs, results
def test_four_layers(batch_normalize): np.random.seed(42) X, Y, y = dataset.load_multibatch_cifar10() X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch') K, d = (Y.shape[0], X.shape[0]) net_sizes = [d, 50, 30, 10, K] gd_params = { 'eta': 0.03, 'batch_size': 100, 'epochs': 20, 'gamma': 0.9, 'decay_rate': 0.95, 'lambda': 0.0, 'batch_normalize': batch_normalize } net = Net(net_sizes, gd_params) r = net.train(X, Y, X_test, Y_test, silent=False) # costs, test_costs = (r['costs'], r['test_costs']) losses, test_losses, accuracies, test_accuracies = (r['losses'], r['test_losses'], r['accuracies'], r['test_accuracies']) print("Final accuracy: {}".format(accuracies[-1])) print("Final accuracy (test): {}".format(test_accuracies[-1])) return r
def test_final_model(): np.random.seed(42) X, Y, y = dataset.load_multibatch_cifar10() X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch') K, d = (Y.shape[0], X.shape[0]) net_sizes = [d, 50, 30, K] gd_params = { 'eta': 0.0169, 'batch_size': 100, 'epochs': 20, 'gamma': 0.6, 'decay_rate': 0.93, 'lambda': 5e-5, 'plateau_guard': 0.0002, 'batch_normalize': True } net = Net(net_sizes, gd_params) r = net.train(X, Y, X_test, Y_test, silent=False) losses, test_losses, accuracies, test_accuracies = (r['losses'], r['test_losses'], r['accuracies'], r['test_accuracies']) print("Final accuracy: {}".format(accuracies[-1])) print("Final accuracy (test): {}".format(test_accuracies[-1])) plot_results(r, '../Report/Figs/final_model.eps') return r
def get_cifar10_data(n_train=49000, n_val=1000, n_test=10000, subtract_mean=True): X_train, y_train, X_test, y_test = load_cifar10() # Subsample the data mask = list(range(n_train, n_train + n_val)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(n_train)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(n_test)) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image if subtract_mean: mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image return X_train, y_train, X_val, y_val, X_test, y_test
def test_import(): filepath = './model_epoch_20.pkl' net = Net.import_model(filepath) X_test, Y_test, y_test = dataset.load_cifar10(batch='test_batch') acc = net.compute_accuracy(X_test, y_test) print('test acc', acc)
def load_image_data(data, n_xl, n_channels, output_batch_size): if data == 'mnist': # Load MNIST data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'mnist.pkl.gz') x_train, t_train, x_valid, t_valid, _, _ = \ dataset.load_mnist_realval(data_path) x_train = np.vstack([x_train, x_valid]).astype('float32') x_train = np.reshape(x_train, [-1, n_xl, n_xl, n_channels]) x_train2 = x_train[:output_batch_size] t_train2 = t_train[:output_batch_size] t_train2 = np.nonzero(t_train2)[1] order = np.argsort(t_train2) sorted_x_train = x_train2[order] elif data == 'svhn': # Load SVHN data print('Reading svhn...') time_read = -time.time() print('Train') x_train = np.load('data/svhn_train1_x.npy') y_train = np.load('data/svhn_train1_y.npy') print('Test') x_test = np.load('data/svhn_test_x.npy') y_test = np.load('data/svhn_test_y.npy') time_read += time.time() print('Finished in {:.4f} seconds'.format(time_read)) x_train2 = x_train[:output_batch_size] y_train2 = y_train[:output_batch_size] order = np.argsort(y_train2) sorted_x_train = x_train2[order] elif data == 'lfw': # Load LFW data print('Reading lfw...') time_read = -time.time() x_train = np.load('data/lfw.npy').astype(np.float32) print(x_train.shape) x_train = np.reshape(x_train, [-1, n_xl, n_xl, n_channels]) time_read += time.time() print('Finished in {:.4f} seconds'.format(time_read)) sorted_x_train = x_train[:output_batch_size] else: x_train, t_train, x_test, t_test = \ dataset.load_cifar10('data/cifar10/cifar-10-python.tar.gz', normalize=True, one_hot=True) x = np.vstack((x_train, x_test)) t = np.vstack((t_train, t_test)) x2 = x[:output_batch_size] t2 = np.argmax(t[:output_batch_size], 1) order = np.argsort(t2) x_train = x sorted_x_train = x2[order] return x_train, sorted_x_train
def test_gradients(batch_normalize): np.random.seed(42) X, Y, y = dataset.load_cifar10(batch='data_batch_1', limit_N=100, limit_d=100) K, d = (Y.shape[0], X.shape[0]) net_sizes = [d, 50, 30, 20, K] gd_params = {'lambda': 0.0, 'batch_normalize': batch_normalize} net = Net(net_sizes, gd_params) print('\nComputing gradients (analytical methods)...') if batch_normalize: ss, s_means, s_vars, Hs, P = net._forward_bn(X) dtheta = net._backward_bn(X, Y, P, Hs, ss, s_means, s_vars) else: Hs, P = net._forward(X) dtheta = net._backward(X, Y, P, Hs) dummy_net = Net(net.network_sizes, gd_params, init_theta=False) def dummy_cost_fn(_X, _Y, _W, _b, _lamb): dummy_net.Ws = _W dummy_net.bs = _b return dummy_net.compute_cost(_X, _Y) print('Computing gradients (fast numerical method)...') dtheta_num = compute_gradients_num(X, Y, net.Ws, net.bs, net.lamb, 1e-5, dummy_cost_fn) print('Computing gradients (slow numerical method)...') dtheta_num_slow = compute_gradients_num_slow(X, Y, net.Ws, net.bs, net.lamb, 1e-5, dummy_cost_fn) print('\nDone\n') print('Mean relative errors between numerical methods:\n{}\n'.format( compare_dthetas(dtheta_num, dtheta_num_slow))) print('Mean relative errors between analytical and slow numerical:\n{}\n'. format(compare_dthetas(dtheta, dtheta_num_slow))) print('Mean relative errors between analytical and fast numerical:\n{}\n'. format(compare_dthetas(dtheta, dtheta_num)))
def test_model(model_path=None): # Load the dataset with augmentations start_time = time.time() ((generator_train, generator_test), (x_train, y_train), (x_test, y_test), (x_val, y_val)) = load_cifar10() model = load_model(model_path) optimizer = SGD(lr=0.1, momentum=0.9, nesterov=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['acc']) loss = model.evaluate_generator(generator_test.flow(x_test, y_test)) print('Loss was: %s' % loss) return loss
def main(): seed = 1234 np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) client.notify("==> Loading the dataset...") dataset = load_cifar10(batch=args.batch) train_dl = dataset['train'] test_dl = dataset['test'] client.notify("==> Loading the model...") net = Resnet50(output_dim=10).to(device) if args.weight_file is not None: weights = torch.load(weight_file) net.load_state_dict(weights, strict=False) if os.exists('./models') is False: os.makedirs('./models') optimizer = optimizers.Adam(net.parameters(), lr=1e-4) lr_scheduler = optimizers.lr_scheduler.StepLR(optimizer, 5, 0.1) history = { 'epochs': np.arange(1, args.epochs+1), 'train_loss': [], 'train_acc': [], 'test_loss': [], 'test_acc': [] } client.notify('==> Start training...') for epoch in range(args.epoch): train(net, optimizer, train_dl, epoch, history) lr_scheduler.step() test(net, test_dl, epoch, history)s client.notify("==> Training Done") plot_result(history) client.notify('==> Saved plot')
def run_experiment(args): import os # set environment variables for tensorflow os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import inspect import shutil import numpy as np import tensorflow as tf from collections import OrderedDict import matplotlib.pyplot as plt plt.switch_backend('Agg') import utils import paramgraphics import nn from tensorflow.contrib.framework.python.ops import arg_scope # import tensorflow.contrib.layers as layers # ---------------------------------------------------------------- # Arguments and Settings args.message = 'LBT-GAN-cifar10_' + args.message np.random.seed(args.seed) tf.set_random_seed(args.seed) # copy file for reproducibility logger, dirname = utils.setup_logging(args) script_fn = inspect.getfile(inspect.currentframe()) script_src = os.path.abspath(script_fn) script_dst = os.path.abspath(os.path.join(dirname, script_fn)) shutil.copyfile(script_src, script_dst) logger.info("script copied from %s to %s" % (script_src, script_dst)) # print arguments for k, v in sorted(vars(args).items()): logger.info(" %20s: %s" % (k, v)) # get arguments batch_size = args.batch_size batch_size_est = args.batch_size_est gen_lr = args.gen_lr dis_lr = args.dis_lr est_lr = args.est_lr lambda_gan = args.lambda_gan beta1 = 0.5 epsilon = 1e-8 max_iter = args.max_iter viz_every = args.viz_every z_dim, vae_z_dim = utils.get_ints(args.z_dims) unrolling_steps = args.unrolling_steps assert unrolling_steps > 0 n_viz = args.n_viz # ---------------------------------------------------------------- # Dataset from dataset import load_cifar10, DataSet train_x, train_y, test_x, test_y = load_cifar10() train_x = train_x * 2. - 1. test_x = test_x * 2. - 1. dtrain = DataSet(train_x, train_y) dtest = DataSet(test_x, test_y) # data_channel = 3 x_dim = 32 * 32 * 3 dim_input = (32, 32) # ---------------------------------------------------------------- # Model setup logger.info("Setting up model ...") def discriminator(x, Reuse=tf.AUTO_REUSE, is_training=True): def leaky_relu(x, alpha=0.2): return tf.maximum(alpha * x, x) with tf.variable_scope("discriminator", reuse=Reuse): x = tf.reshape(x, [batch_size, 32, 32, 3]) lx = tf.layers.dropout(x, 0.2, training=is_training) conv1 = tf.layers.conv2d(lx, 64, 5, 2, use_bias=True, padding='same') conv1 = leaky_relu(conv1) conv2 = tf.layers.conv2d(conv1, 128, 5, 2, use_bias=False, padding='same') conv2 = tf.layers.batch_normalization(conv2, training=is_training) conv2 = leaky_relu(conv2) conv3 = tf.layers.conv2d(conv2, 256, 5, 2, use_bias=False, padding='same') conv3 = tf.layers.batch_normalization(conv3, training=is_training) conv3 = leaky_relu(conv3) conv3 = tf.layers.flatten(conv3) fc2 = tf.layers.dense(conv3, 1) return fc2 def generator(z, Reuse=tf.AUTO_REUSE, flatten=True, is_training=True): if args.g_nonlin == 'relu': # print("Use Relu in G") nonlin = tf.nn.relu else: # print("Use tanh in G") nonlin = tf.nn.tanh # nonlin = tf.nn.relu if args.g_nonlin == 'relu' else tf.nn.tanh # norm_prms = {'is_training': is_training, 'decay': 0.9, 'scale': False} with tf.variable_scope("generator", reuse=Reuse): # x = layers.fully_connected(x, 4 * 4 * 512) lx = tf.layers.dense(z, 4 * 4 * 512) lx = tf.reshape(lx, [-1, 4, 4, 512]) lx = tf.layers.batch_normalization(lx, training=is_training) lx = nonlin(lx) # x = tf.reshape(x, (-1, 4, 4, 512)) # x = conv_concate_onehot(x, y) # x = layers.conv2d_transpose(x, 256, 5, 2) lx = tf.layers.conv2d_transpose(lx, 256, 5, 2, use_bias=False, padding='same') lx = tf.layers.batch_normalization(lx, training=is_training) lx = nonlin(lx) # x = conv_concate_onehot(x, y) # x = layers.conv2d_transpose(x, 128, 5, 2) lx = tf.layers.conv2d_transpose(lx, 128, 5, 2, use_bias=False, padding='same') lx = tf.layers.batch_normalization(lx, training=is_training) lx = nonlin(lx) # x = conv_concate_onehot(x, y) # x = layers.conv2d_transpose( # x, 3, 5, 2, normalizer_fn=None, activation_fn=nn.tanh) lx = tf.layers.conv2d_transpose(lx, 3, 5, 2, padding='same') lx = tf.nn.tanh(lx) if flatten is True: lx = tf.layers.flatten(lx) return lx nonlin = tf.nn.relu def compute_est_samples(z, params=None, reuse=tf.AUTO_REUSE): with tf.variable_scope("estimator"): with arg_scope([nn.dense], params=params): with tf.variable_scope("decoder", reuse=reuse): h_dec_1 = nn.dense(z, vae_z_dim, 200 * 2, "dense1", nonlinearity=nonlin) h_dec_2 = nn.dense(h_dec_1, 200 * 2, 500 * 2, "dense2", nonlinearity=nonlin) x_mean = nn.dense(h_dec_2, 500 * 2, x_dim, "dense3", nonlinearity=None) x_mean = tf.nn.tanh(x_mean) return x_mean def compute_est_ll(x, params=None, reuse=tf.AUTO_REUSE): with tf.variable_scope("estimator", reuse=reuse): logvae_x_var = tf.get_variable( "logvae_x_var", (), tf.float32, trainable=True, initializer=tf.constant_initializer(-1)) with arg_scope([nn.dense], params=params): with tf.variable_scope("encoder", reuse=reuse): h_enc_1 = nn.dense(x, x_dim, 500 * 2, "dense1", nonlinearity=nonlin) # h_enc_1 = nn.batch_norm(h_enc_1, "bn1", 129, 2) h_enc_2 = nn.dense(h_enc_1, 500 * 2, 200 * 2, "dense2", nonlinearity=nonlin) # h_enc_2 = nn.batch_norm(h_enc_2, "bn2", 128, 2) z_mean = nn.dense(h_enc_2, 200 * 2, vae_z_dim, "dense3", nonlinearity=None) z_logvar = nn.dense(h_enc_2, 200 * 2, vae_z_dim, "dense4", nonlinearity=None) epsilon = tf.random_normal(tf.shape(z_mean), dtype=tf.float32) z = z_mean + tf.exp(0.5 * z_logvar) * epsilon with tf.variable_scope("decoder", reuse=reuse): h_dec_1 = nn.dense(z, vae_z_dim, 200 * 2, "dense1", nonlinearity=nonlin) # h_dec_1 = nn.batch_norm(h_dec_1, "bn1", 127, 2) h_dec_2 = nn.dense(h_dec_1, 200 * 2, 500 * 2, "dense2", nonlinearity=nonlin) # h_dec_2 = nn.batch_norm(h_dec_2, "bn2", 128, 2) x_mean = nn.dense(h_dec_2, 500 * 2, x_dim, "dense3", nonlinearity=None) x_mean = tf.nn.tanh(x_mean) # elbo = tf.reduce_mean( # tf.reduce_sum( # -tf.nn.sigmoid_cross_entropy_with_logits( # logits=x_mean, labels=x), # axis=1) - # tf.reduce_sum( # -0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar)), # axis=1)) vae_x_var = tf.exp(logvae_x_var) elbo = tf.reduce_mean( tf.reduce_sum(-0.5 * np.log(2 * np.pi) - 0.5 * tf.log(vae_x_var) - tf.layers.flatten(tf.square(x - x_mean)) / (2 * vae_x_var), axis=1) - tf.reduce_sum(-0.5 * (1 + z_logvar - tf.square(z_mean) - tf.exp(z_logvar)), axis=1)) return elbo, x_mean def compute_est_updated_with_SGD(x, lr=0.001, params=None): elbo, _ = compute_est_ll(x, params=params) grads = tf.gradients(elbo, params.values()) new_params = params.copy() for key, g in zip(params, grads): new_params[key] += lr * g return elbo, new_params def compute_est_updated_with_Adam(x, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, decay=0., params=None, adam_params=None): elbo, _ = compute_est_ll(x, params=params) grads = tf.gradients(elbo, params.values()) new_params = params.copy() new_adam_params = adam_params.copy() new_adam_params['iterations'] += 1 lr = lr * \ (1. / (1. + decay * tf.cast(adam_params['iterations'], tf.float32))) t = tf.cast(new_adam_params['iterations'], tf.float32) lr_t = lr * (tf.sqrt(1. - tf.pow(beta_2, t)) / (1. - tf.pow(beta_1, t))) for key, g in zip(params, grads): new_adam_params['m_' + key] = ( beta_1 * adam_params['m_' + key]) + (1. - beta_1) * g new_adam_params['v_' + key] = tf.stop_gradient( (beta_2 * adam_params['v_' + key]) + (1. - beta_2) * tf.square(g)) new_params[key] = params[key] + lr_t * new_adam_params[ 'm_' + key] / tf.sqrt(new_adam_params['v_' + key] + epsilon) return elbo, new_params, new_adam_params lr = tf.placeholder(tf.float32) data = tf.placeholder(tf.float32, shape=(batch_size, x_dim)) # Construct generator and estimator nets est_params_dict = OrderedDict() _, _ = compute_est_ll(data, params=est_params_dict) gen_noise = tf.random_normal((batch_size_est, z_dim), dtype=tf.float32) samples_gen = generator(gen_noise) vae_noise = tf.random_normal((batch_size_est, vae_z_dim), dtype=tf.float32) samples_est = compute_est_samples(z=vae_noise, params=est_params_dict) # for key in est_params_dict: # print(key, est_params_dict[key]) adam_params_dict = OrderedDict() with tf.variable_scope("adam"): adam_params_dict['iterations'] = tf.Variable(0, dtype=tf.int64, name='iterations') for key in est_params_dict: adam_params_dict['m_' + key] = tf.Variable(tf.zeros_like( est_params_dict[key]), name='m_' + key) adam_params_dict['v_' + key] = tf.Variable(tf.zeros_like( est_params_dict[key]), name='v_' + key) gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generator") est_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "estimator") adam_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "adam") # unrolling estimator updates cur_params = est_params_dict cur_adam_params = adam_params_dict elbo_genx_at_steps = [] for _ in range(unrolling_steps): samples_gen = generator( tf.random_normal((batch_size_est, z_dim), dtype=tf.float32)) elbo_genx_step, cur_params, cur_adam_params = compute_est_updated_with_Adam( samples_gen, lr=lr, beta_1=beta1, epsilon=epsilon, params=cur_params, adam_params=cur_adam_params) elbo_genx_at_steps.append(elbo_genx_step) # estimator update updates = [] for key in est_params_dict: updates.append(tf.assign(est_params_dict[key], cur_params[key])) for key in adam_params_dict: updates.append(tf.assign(adam_params_dict[key], cur_adam_params[key])) e_train_op = tf.group(*updates, name="e_train_op") # Optimize the generator on the unrolled ELBO loss unrolled_elbo_data, _ = compute_est_ll(data, params=cur_params) # unrolled_elbo_samp, _ = compute_est_ll( # tf.stop_gradient(samples_gen), params=cur_params) # GAN-loss for discriminator and generator samples_gen_gan = generator( tf.random_normal((batch_size_est, z_dim), dtype=tf.float32)) fake_D_output = discriminator(samples_gen_gan) real_D_output = discriminator(data) # print(fake_D_output, real_D_output) ganloss_g = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(fake_D_output), logits=fake_D_output)) ganloss_D_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(fake_D_output), logits=fake_D_output)) ganloss_D_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(real_D_output), logits=real_D_output)) use_e_sym = tf.placeholder(tf.float32, shape=(), name="use_E") if args.lbt: logger.info("Using lbt") object_g = lambda_gan * ganloss_g - use_e_sym * unrolled_elbo_data else: logger.info("Using GAN") object_g = lambda_gan * ganloss_g # - use_e_sym * unrolled_elbo_data # object_g = -1 * unrolled_elbo_data object_d = ganloss_D_fake + ganloss_D_real dis_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator") g_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, "generator") g_train_opt = tf.train.AdamOptimizer(learning_rate=gen_lr, beta1=beta1, epsilon=epsilon) # g_train_opt = tf.train.RMSPropOptimizer(learning_rate=gen_lr, epsilon=epsilon) g_grads = g_train_opt.compute_gradients(object_g, var_list=gen_vars) # g_grads_clipped = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in g_grads] g_grads_, g_vars_ = zip(*g_grads) g_grads_clipped_, g_grads_norm_ = tf.clip_by_global_norm(g_grads_, 5.) g_grads_clipped = zip(g_grads_clipped_, g_vars_) if args.clip_grad: logger.info("Clipping gradients of generator parameters.") with tf.control_dependencies(g_update_ops): g_train_op = g_train_opt.apply_gradients(g_grads_clipped) else: with tf.control_dependencies(g_update_ops): g_train_op = g_train_opt.apply_gradients(g_grads) # g_train_op = g_train_opt.apply_gradients(g_grads) d_train_opt = tf.train.AdamOptimizer(learning_rate=dis_lr, beta1=beta1, epsilon=epsilon) d_train_op = d_train_opt.minimize(object_d, var_list=dis_vars) # ---------------------------------------------------------------- # Training sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) if args.model_path: saver.restore(sess, args.model_path) # # print variables # logger.info("Generator parameters:") # for p in gen_vars: # logger.debug("%s: %s" % (p.name, sess.run(tf.shape(p)))) # logger.info("Estimator parameters:") # for p in est_vars: # logger.debug("%s: %s" % (p.name, sess.run(tf.shape(p)))) # logger.info("Adam parameters:") # for p in adam_vars: # logger.debug("%s: %s" % (p.name, sess.run(tf.shape(p)))) elbo_vals = [] ganloss_vals = [] tgan_g, tgan_d_fake, tgan_d_real = 0., 0., 0. elbo_genx_val, elbo_data_val, gradients_nrom = -np.inf, -np.inf, 0 use_e_flag = 0. for i in range(max_iter + 1): x_mini_batch = dtrain.next_batch(batch_size)[0].reshape( [batch_size, x_dim]) if i > 3000: use_e_flag = 1. for _ in range(args.n_est): elbo_genx_val, _ = sess.run( [elbo_genx_at_steps[-1], e_train_op], feed_dict={lr: 3. * est_lr}) for _ in range(args.n_dis): _, tgan_g, tgan_d_real, tgan_d_fake = sess.run( [d_train_op, ganloss_g, ganloss_D_real, ganloss_D_fake], feed_dict={data: x_mini_batch}) elbo_data_val, gradients_nrom, _ = sess.run( [unrolled_elbo_data, g_grads_norm_, g_train_op], feed_dict={ data: x_mini_batch, lr: est_lr, use_e_sym: use_e_flag }) elbo_vals.append([elbo_genx_val, elbo_data_val]) ganloss_vals.append([tgan_g, tgan_d_real, tgan_d_fake]) # visualization if i % viz_every == 0: np_samples_gen, np_samples_est, np_data = sess.run( [samples_gen, samples_est, data], feed_dict={data: x_mini_batch}) np_samples_est = np_samples_est.reshape([-1, 32, 32, 3]).transpose( [0, 3, 1, 2]).reshape([-1, 32 * 32 * 3]) np_samples_gen = np_samples_gen.reshape([-1, 32, 32, 3]).transpose( [0, 3, 1, 2]).reshape([-1, 32 * 32 * 3]) np_data = np_data.reshape([-1, 32, 32, 3]).transpose( [0, 3, 1, 2]).reshape([-1, 32 * 32 * 3]) np_samples_est = np_samples_est / 2. + 0.5 np_samples_gen = np_samples_gen / 2. + 0.5 np_data = np_data / 2. + 0.5 paramgraphics.mat_to_img(np_samples_gen[:n_viz], dim_input, colorImg=True, save_path=os.path.join( dirname, 'sample_' + str(i) + '_gen.png')) paramgraphics.mat_to_img(np_data[:n_viz], dim_input, colorImg=True, save_path=os.path.join( dirname, 'sample_' + str(i) + '_dat.png')) paramgraphics.mat_to_img(np_samples_est[:n_viz], dim_input, colorImg=True, save_path=os.path.join( dirname, 'sample_' + str(i) + '_est.png')) fig = plt.figure(figsize=(6, 4)) plt.plot(elbo_vals, '.', markersize=2, markeredgecolor='none', linestyle='none', alpha=min(1.0, 0.01 * max_iter / (i + 1))) plt.ylim((-200.0, 0.0)) legend = plt.legend(('elbo_genx', 'elbo_data'), markerscale=6) for lh in legend.legendHandles: lh._legmarker.set_alpha(1.) plt.grid(True) plt.tight_layout() plt.savefig(os.path.join(dirname, 'curve.png'), bbox_inches='tight') plt.close(fig) # training log if i % viz_every == 0: elbo_genx_ma_val, elbo_data_ma_val = np.mean(elbo_vals[-200:], axis=0) logger.info( "Iter %d: gradients norm = %.4f. samples LL = %.4f, data LL = %.4f." % (i, gradients_nrom, elbo_genx_ma_val, elbo_data_ma_val)) logger.info( "Iter %d: gan_g = %.4f. gan_d_real = %.4f, gan_d_fake = %.4f." % (i, tgan_g, tgan_d_real, tgan_d_fake)) if i % args.model_every == 0: saver.save(sess, os.path.join(dirname, 'model_' + str(i)))
updates = [] for (var, var_eval) in zip(train_var, eval_var): var_avg = ema.average(var) updates.append(var_eval.assign(var_avg)) return tf.group(*updates) if __name__ == "__main__": # fix random seed for reproducibility np.random.seed(flgs.seed) tf.set_random_seed(flgs.seed) data_path = os.path.join('./data/cifar10','cifar-10-python.tar.gz') x_train, y_train, x_test, y_test = dataset.load_cifar10(data_path, normalize=True, one_hot=False) num_classes = len(set(y_train)) n_data, n_xl, _, n_channels = x_train.shape n_x = n_xl * n_xl * n_channels # prepare data x_train, y_train, mask_train, x_test, y_test = prepare_dataset(flgs.save_dir, x_train, y_train, x_test, y_test, num_classes) # Build the computation graph is_training = tf.placeholder(tf.bool, shape=[], name='is_training') learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr') adam_beta1_ph = tf.placeholder(tf.float32, shape=[], name='beta1') weight_ph = tf.placeholder(tf.float32, shape=[], name='wght') optimizer = tf.train.AdamOptimizer(learning_rate_ph, beta1=adam_beta1_ph) # data placeholders
def train_model(max_epochs=300, start_lr=0.1, dense_layers=[20, 20, 20], growth_rate=60, compression=0.5, dropout=0.0, weight_decay=1e-4, batch_size=64, logdir='./logs', weightsdir='./weights', lr_decrease_factor=0.5, lr_patience=10, nbr_gpus=1, model_path=None, initial_epoch=0): # Create a dir in the logs catalog and dump info run_dir = datetime.today().strftime('%Y%m%d-%H%M%S-%f') # Load the dataset with augmentations start_time = time.time() ((generator_train, generator_test), (x_train, y_train), (x_test, y_test), (x_val, y_val)) = load_cifar10() # Create model using supplied params # Load model from file if the argument model_path is supplied. # Use mutli_gpu setup if enabled if nbr_gpus > 1: with tf.device('/cpu:0'): if model_path is not None: orig_model = load_model(model_path) else: orig_model = create_densenet( input_shape=(32, 32, 3), dense_layers=dense_layers, growth_rate=growth_rate, nbr_classes=10, weight_decay=weight_decay, compression=compression, dropout=dropout ) model = multi_gpu_model(orig_model, nbr_gpus) else: if model_path is not None: orig_model = load_model(model_path) else: orig_model = create_densenet( input_shape=(32, 32, 3), dense_layers=dense_layers, growth_rate=growth_rate, nbr_classes=10, weight_decay=weight_decay, compression=compression, dropout=dropout ) model = orig_model # Write model info to file dump_infomation(os.path.join(logdir, run_dir), orig_model, dense_layers, growth_rate, compression, dropout, weight_decay, batch_size) # Setup optimizer optimizer = SGD(lr=start_lr, momentum=0.9, nesterov=True) cbs = create_callbacks(max_epochs, run_dir, start_lr, lr_decrease_factor, lr_patience, orig_model) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['acc']) history = model.fit_generator( generator_train.flow(x_train, y_train, batch_size=batch_size, seed=0), callbacks=cbs, epochs=max_epochs, validation_data=generator_test.flow(x_val, y_val, seed=0), use_multiprocessing=True, workers=2, max_queue_size=batch_size, verbose=1, initial_epoch=initial_epoch ) best_val_acc = max(history.history['val_acc']) best_acc = max(history.history['acc']) return { 'loss': -1 * best_acc, 'true_loss': -1 * best_val_acc, 'status': 'ok', 'eval_time': time.time() - start_time, }
def train_model(max_epochs=600, start_lr=0.025, drop_path_keep=0.6, nbr_blocks=2, weight_decay=1e-4, nbr_filters=32, batch_size=32, logdir='./logs', weightsdir='./weights_nasnet', lr_decrease_factor=0.5, lr_patience=10, nbr_gpus=1, model_path=None, initial_epoch=0): # Create a dir in the logs catalog and dump info run_dir = 'nasnet_%s' % datetime.today().strftime('%Y%m%d-%H%M%S-%f') # Load the dataset with augmentations start_time = time.time() ((generator_train, generator_test), (x_train, y_train), (x_test, y_test), (x_val, y_val)) = load_cifar10() # Create current epoch holding tensor epoch_tensor = tf.Variable(initial_epoch, dtype=tf.int32, trainable=False) # Create model using supplied params # Load model from file if the argument model_path is supplied. # Use mutli_gpu setup if enabled if nbr_gpus > 1: with tf.device('/cpu:0'): if model_path is not None: orig_model = load_model(model_path) else: orig_model = create_nasnet(input_shape=(32, 32, 3), nbr_normal_cells=6, nbr_blocks=nbr_blocks, weight_decay=weight_decay, nbr_classes=10, nbr_filters=nbr_filters, stem_multiplier=3, filter_multiplier=2, dimension_reduction=2, final_filters=768, dropout_prob=0.0, drop_path_keep=drop_path_keep, max_epochs=max_epochs, epoch_tensor=epoch_tensor) model = multi_gpu_model(orig_model, nbr_gpus) else: if model_path is not None: orig_model = load_model(model_path) else: orig_model = create_nasnet(input_shape=(32, 32, 3), nbr_normal_cells=6, nbr_blocks=nbr_blocks, weight_decay=weight_decay, nbr_classes=10, nbr_filters=nbr_filters, stem_multiplier=3, filter_multiplier=2, dimension_reduction=2, final_filters=768, dropout_prob=0.0, drop_path_keep=drop_path_keep, max_epochs=max_epochs, epoch_tensor=epoch_tensor) model = orig_model # Setup optimizer optimizer = SGD(lr=start_lr, momentum=0.9, nesterov=True, clipnorm=5.0) cbs = create_callbacks(max_epochs, run_dir, start_lr, lr_decrease_factor, lr_patience, orig_model, epoch_tensor) model.compile( optimizer=optimizer, loss='sparse_categorical_crossentropy', loss_weights=[1, 0.4], # Weight the auxiliary head by 0.4 metrics=['accuracy']) # Write model info to file dump_infomation(os.path.join(logdir, run_dir), orig_model, start_lr, drop_path_keep, nbr_blocks, nbr_filters, batch_size) # Setup the multi output generators train = generator_train.flow(x_train, y_train, batch_size=batch_size, seed=0) test = generator_test.flow(x_val, y_val, batch_size=batch_size, seed=0) mul_train = multi_generator(train) mul_test = multi_generator(test) steps_per_epoch = len(train) validation_steps = len(test) # Start training history = model.fit_generator(mul_train, callbacks=cbs, epochs=max_epochs, validation_data=mul_test, use_multiprocessing=False, max_queue_size=batch_size, verbose=1, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) best_val_acc = max(history.history['val_acc']) best_acc = max(history.history['acc']) return { 'loss': -1 * best_acc, 'true_loss': -1 * best_val_acc, 'status': 'ok', 'eval_time': time.time() - start_time, }
def load_fname(version, suffix=None, with_ext=False): suffix = "." + suffix if suffix is not None else "" prefix = "./data/cifar_resnet%s%s" % (version, suffix) return utils.extend_fname(prefix, with_ext=with_ext) batch_size = 16 input_size = 32 inputs_ext = {'data': {'shape': (batch_size, 3, input_size, input_size)}} inputs = [mx.sym.var(n) for n in inputs_ext] calib_ctx = mx.gpu(2) ctx = [mx.gpu(int(i)) for i in "1,2,3,4,5".split(',') if i.strip()] utils.log_init() val_data = ds.load_cifar10(batch_size, input_size) data_iter = iter(val_data) def data_iter_func(): data, label = next(data_iter) return data, label data, _ = next(data_iter) sym_file, param_file = load_fname(version) net1 = utils.load_model(sym_file, param_file, inputs, ctx=ctx) acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) acc_top1.reset()
print("Loading all models in %s" % path) models = [] for model_file in os.listdir(path): try: print('Loading %s' % model_file) model = init_model(os.path.join(path, model_file)) models.append(model) except RuntimeError: print('Some error occured!') # Evaluate using ensemble: ((generator_train, generator_test), (x_train, y_train), (x_test, y_test), (x_val, y_val)) = load_cifar10() # Evaluate the models correct = 0 total = 0 for x_batch, y_batch in generator_test.flow(x_test, y_test, batch_size=32): print('%d/%d' % (total, len(y_test))) total += len(y_batch) y = predict_models([model], x_batch) correct += np.sum(y.flatten() == y_batch.flatten()) if total >= len(y_test): break print('Correct: %d/%d (%f)' % (correct, total, correct / total))