AlexNet.load_model(x_net, layer=args.layer) x_f = lasagne.layers.get_output(x_net[args.layer], deterministic=True) gx_t = AlexNet.transform_im(gx, npx=npx, nc=nc) gx_net = AlexNet.build_model(gx_t, layer=args.layer, shape=(None, 3, npx, npx)) AlexNet.load_model(gx_net, layer=args.layer) gx_f = lasagne.layers.get_output(gx_net[args.layer], deterministic=True) ftr_loss = costs.L2Loss(gx_f, x_f) # add two losses together cost = pixel_loss + ftr_loss * sharedX(args.alpha) output = [cost, z] lrt = sharedX(args.lr) b1t = sharedX(args.b1) p_updater = updates.Adam(lr=lrt, b1=b1t, regularizer=updates.Regularizer(l2=args.weight_decay)) p_updates = p_updater(predict_params, cost) print('COMPILING') t = time() _train_p = theano.function([x], cost, updates=p_updates) _train_p_cost = theano.function([x], [cost, gx]) _predict_z = theano.function([x], z) _gen = theano.function([z], gx) print('%.2f seconds to compile theano functions' % (time() - t)) def rec_test(test_data, n_epochs=0, batch_size=128, output_dir=None): print('computing reconstruction loss on test images') rec_imgs = []
d_cost_real = sum([bce(p, T.ones(p.shape)).mean() for p in p_real]) d_cost_gen = sum([bce(p, T.zeros(p.shape)).mean() for p in p_gen]) g_cost_d = sum([bce(p, T.ones(p.shape)).mean() for p in p_gen]) #d_cost_real = bce(p_real[-1], T.ones(p_real[-1].shape)).mean() #d_cost_gen = bce(p_gen[-1], T.zeros(p_gen[-1].shape)).mean() #g_cost_d = bce(p_gen[-1], T.ones(p_gen[-1].shape)).mean() d_cost = d_cost_real + d_cost_gen + ( 1e-5 * sum([T.sum(p**2.0) for p in discrim_params])) g_cost = g_cost_d + (1e-5 * sum([T.sum(p**2.0) for p in gen_params])) cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) updates = d_updates + g_updates print 'COMPILING' t = time() _train_g = theano.function([X, Z0], cost, updates=g_updates) _train_d = theano.function([X, Z0], cost, updates=d_updates) _gen = theano.function([Z0], gX) print '%.2f seconds to compile theano functions' % (time() - t) f_log = open("{}/{}.ndjson".format(log_dir, desc), 'wb') log_fields = [ 'n_epochs',
X = T.fmatrix() y = T.fvector() theta = T.fmatrix() deltaX = T.fmatrix() # svgd gradient data_N = T.scalar('data_N') block = T.fmatrix() gX_1 = langevin_sampler(X, y, theta, data_N, *net_params) cost_1 = -1 * T.mean(T.sum(gX_1 * deltaX, axis=1)) lrt = sharedX(lr) g_updater_1 = updates.Adagrad(lr=lr, regularizer=updates.Regularizer(l2=l2)) g_updates_1 = g_updater_1(net_params, cost_1) print 'COMPILING' t = time() _gen_1 = theano.function([X, y, theta, data_N], gX_1) _train_g_1 = theano.function([X, y, theta, deltaX, data_N], cost_1, updates=g_updates_1) _svgd_gradient = theano.function([X, y, theta, data_N], svgd_gradient(X, y, theta, data_N)) _score_bayes_lr = theano.function([X, y, theta, data_N], score_bayes_lr(X, y, theta, data_N)) _evaluate = theano.function([X, y, theta], evaluate(X, y, theta)) print '%.2f seconds to compile theano functions'%(time()-t) n_iter = 10000 n_particle = 100
# def gen_Z(dist): # mu = dist[:Nz] # sigma = dist[Nz:] X = T.tensor5() encodeZ = encoder(X, *encode_params) decodeX = decoder(encodeZ, *decode_params) cost = bce(T.flatten(decodeX, 2), T.flatten(X, 2)).mean() lrt = sharedX(lrate) AutoEnc_parameter = encode_params + decode_params updater = updates.Adam(lr=lrt, b1=0.8, regularizer=updates.Regularizer(l2=l2)) updates = updater(AutoEnc_parameter, cost) print 'COMPILING' t = time() _train_ = theano.function([X], cost, updates=updates) print '%.2f seconds to compile theano functions' % (time() - t) mat = scipy.io.loadmat('models_stats.mat') mat = mat['models'] num = np.array(mat[0][0][1]) names = mat[0][0][0][0] objname = [] for j in range(len(objectNumber)): objname.append(names[objectNumber[j]][0])
return vgd_grad gX = gen(Z, *gen_params) g_cost = -1 * T.sum(T.sum(T.mul( gX, deltaX), axis=1)) #update generate models by minimize reconstruct mse d_lr = 1e-4 g_lr = 1e-3 d_lrt = sharedX(d_lr) g_lrt = sharedX(g_lr) d_updater = updates.Adam(lr=d_lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=g_lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) print 'COMPILING' t = time() _gen = theano.function([Z], gX) _train_d = theano.function([X, X0], d_cost, updates=d_updates) _train_g = theano.function([Z, deltaX], g_cost, updates=g_updates) _vgd_gradient = theano.function([X0, X1], vgd_gradient(X0, X1)) _reconstruction_cost = theano.function([X], T.mean(mse_data)) print '%.2f seconds to compile theano functions' % (time() - t)
gX = gen(Z, *gen_params) p_real = discrim(X, *discrim_params) p_gen = discrim(gX, *discrim_params) d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=weightdecay)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=weightdecay)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) updates = d_updates + g_updates print 'COMPILING' t = time.time() _train_g = theano.function([X, Z], cost, updates=g_updates) _train_d = theano.function([X, Z], cost, updates=d_updates) _gen = theano.function([Z], gX) print '%.2f seconds to compile theano functions'%(time.time()-t) ########
def get_updater(optimizer, **kwargs): opt_map = dict(adam='Adam', sgd='Momentum', rms='RMSprop') if optimizer not in opt_map: raise ValueError('Unknown optimizer: %s' % (optimizer, )) if optimizer == 'adam': kwargs.update(b1=b1) elif optimizer == 'sgd': kwargs.update(momentum=args.sgd_momentum) opt_func = getattr(updates, opt_map[optimizer]) return opt_func(**kwargs) ignored_prefixes = [] if args.no_decay_bias: ignored_prefixes.append('Bias') if args.no_decay_gain: ignored_prefixes.append('Gain') reg = updates.Regularizer(l2=args.decay, ignored_prefixes=ignored_prefixes) updater = get_updater(args.optimizer, lr=lrt, regularizer=reg) if args.discrim_decay is None: args.discrim_decay = args.decay discrim_reg = updates.Regularizer(l2=args.discrim_decay, ignored_prefixes=ignored_prefixes) if args.discrim_optimizer is None: args.discrim_optimizer = args.optimizer discrim_updater = get_updater(args.discrim_optimizer, lr=lrt, regularizer=discrim_reg) def featurizer(x=None, gx=None, args=args, **kwargs):
def load_model(): [e_params, g_params, d_params] = pickle.load(open("faces_dcgan.pkl", "rb")) gwx = g_params[-1] dwy = d_params[-1] # inputs X = T.tensor4() ## encode layer e_layer_sizes = [128, 64, 32, 16, 8] e_filter_sizes = [3, 256, 256, 512, 1024] eX, e_params, e_layers = make_conv_set(X, e_layer_sizes, e_filter_sizes, "e", weights=e_params) ## generative layer g_layer_sizes = [8, 16, 32, 64, 128] g_num_filters = [1024, 512, 256, 256, 128] g_out, g_params, g_layers = make_conv_set(eX, g_layer_sizes, g_num_filters, "g", weights=g_params) g_params += [gwx] gX = tanh(deconv(g_out, gwx, subsample=(1, 1), border_mode=(2, 2))) ## discrim layer(s) df1 = 128 d_layer_sizes = [128, 64, 32, 16, 8] d_filter_sizes = [3, df1, 2 * df1, 4 * df1, 8 * df1] def discrim(input, name, weights=None): d_out, disc_params, d_layers = make_conv_set(input, d_layer_sizes, d_filter_sizes, name, weights=weights) d_flat = T.flatten(d_out, 2) disc_params += [dwy] y = sigmoid(T.dot(d_flat, dwy)) return y, disc_params, d_layers # target outputs target = T.tensor4() p_real, d_params, d_layers = discrim(target, "d", weights=d_params) # we need to make sure the p_gen params are the same as the p_real params p_gen, d_params2, d_layers = discrim(gX, "d", weights=d_params) ## GAN costs d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() ## MSE encoding cost is done on an (averaged) downscaling of the image target_pool = max_pool_2d(target, (4, 4), mode="average_exc_pad", ignore_border=True) target_flat = T.flatten(target_pool, 2) gX_pool = max_pool_2d(gX, (4, 4), mode="average_exc_pad", ignore_border=True) gX_flat = T.flatten(gX_pool, 2) enc_cost = mse(gX_flat, target_flat).mean() ## generator cost is a linear combination of the discrim cost plus the MSE enocding cost d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d + enc_cost / 10 ## if the enc_cost is weighted too highly it will take a long time to train ## N.B. e_cost and e_updates will only try and minimise MSE loss on the autoencoder (for debugging) e_cost = enc_cost cost = [g_cost_d, d_cost_real, enc_cost] elrt = sharedX(0.002) lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) e_updater = updates.Adam(lr=elrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(d_params, d_cost) g_updates = g_updater(e_params + g_params, g_cost) e_updates = e_updater(e_params, e_cost) print 'COMPILING' t = time() _train_g = theano.function([X, target], cost, updates=g_updates) _train_d = theano.function([X, target], cost, updates=d_updates) _train_e = theano.function([X, target], cost, updates=e_updates) _get_cost = theano.function([X, target], cost) print('%.2f seconds to compile theano functions' % (time() - t)) img_dir = "gen_images/" if not os.path.exists(img_dir): os.makedirs(img_dir) ae_encode = theano.function([X, target], [gX, target]) return ae_encode
x_dropout = dropout(x_repeated, p=drop_p) x_corrupt = T.clip(x_dropout, 1e-6, 1- 1e-6) z = conv_encoder(x_corrupt, *enc_params) reconstructed_x, logpxz = conv_decoder(x_repeated, z, *dec_params) z_vgd_grad = 0. - _vgd_gradient(z, num_z, logpxz) # L operator dHdPhi = T.Lop( f=z.flatten() / T.cast(num_z * nbatch, 'float32'), wrt=enc_params, eval_points=z_vgd_grad.flatten()) en_updater = updates.GAdam(lr=sharedX(en_lrt), regularizer=updates.Regularizer(l2=l2)) en_updates = en_updater(enc_params, dHdPhi) decost = 0 - logpxz.sum() / T.cast(num_z * nbatch, 'float32') de_updater = updates.Adam(lr=sharedX(de_lrt), regularizer=updates.Regularizer(l2=l2)) de_updates = de_updater(dec_params, decost) gupdates = en_updates + de_updates X_train, X_valid, X_test = mnist() ntrain, nvalid, ntest = len(X_train), len(X_valid), len(X_test) print X_train.shape, X_valid.shape, X_test.shape