def features(X, nbatch=128): Xfs = [] for xmb in iter_data(X, size=nbatch): fmbs = _features(floatX(xmb)) for i, fmb in enumerate(fmbs): Xfs.append(fmb) return np.concatenate(Xfs, axis=0)
def iter_data_discriminator(self, xreals, instances): #prepare xfake_list = instances[0].img[0:self.miniBatchForD, :, :, :] for i in range(1, len(instances)): xfake = instances[i].img[0:self.miniBatchForD, :, :, :] xfake_list = np.append(xfake_list, xfake, axis=0) #iteration for xreal, xfake in iter_data(xreals, shuffle(xfake_list), size=self.batchSize): yield xreal, xfake
def infer_bnorm_stats(X, nbatch=128): U = [np.zeros(128, dtype=theano.config.floatX), np.zeros(256, dtype=theano.config.floatX)] S = [np.zeros(128, dtype=theano.config.floatX), np.zeros(256, dtype=theano.config.floatX)] n = 0 for xmb in iter_data(X, size=nbatch): stats = _bnorm_stats(floatX(xmb)) umb = stats[:2] smb = stats[2:] for i, u in enumerate(umb): U[i] += u for i, s in enumerate(smb): S[i] += s n += 1 U = [u/n for u in U] S = [s/n for s in S] return U, S
'g_cost', 'd_cost', ] print desc.upper() n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() sample_z0mb = rand_gen(size=(200, nz0)) # noise samples for top generator module for epoch in range(1, niter + niter_decay + 1): trX = shuffle(trX) for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain / nbatch): imb = transform(imb) z0mb = rand_gen(size=(len(imb), nz0)) if n_updates % (k + 1) == 0: cost = _train_g(imb, z0mb) else: cost = _train_d(imb, z0mb) n_updates += 1 n_examples += len(imb) samples = np.asarray(_gen(sample_z0mb)) grayscale_grid_vis(inverse_transform(samples), (10, 20), "{}/{}.png".format(sample_dir, n_epochs)) n_epochs += 1 if n_epochs > niter: lrt.set_value(floatX(lrt.get_value() - lr / niter_decay)) if n_epochs in [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200]:
def main(): # Parameters train_data = './datasets/facades/train/' display_data = './datasets/facades/val/' start = 0 stop = 400 save_samples = False shuffle_ = True use_h5py = 0 batchSize = 4 loadSize = 286 fineSize = 256 flip = True ngf = 64 ndf = 64 input_nc = 3 output_nc = 3 num_epoch = 1001 training_method = 'adam' lr_G = 0.0002 lr_D = 0.0002 beta1 = 0.5 task = 'facades' name = 'pan' which_direction = 'BtoA' preprocess = 'regular' begin_save = 700 save_freq = 100 show_freq = 20 continue_train = 0 use_PercepGAN = 1 use_Pix = 'No' which_netG = 'unet_nodrop' which_netD = 'basic' lam_pix = 25. lam_p1 = 5. lam_p2 = 1.5 lam_p3 = 1.5 lam_p4 = 1. lam_gan_d = 1. lam_gan_g = 1. m = 3.0 test_deterministic = True kD = 1 kG = 1 save_model_D = False # Load the dataset print("Loading data...") if which_direction == 'AtoB': tra_input, tra_output, _ = pix2pix( data_path=train_data, img_shape=[input_nc, loadSize, loadSize], save=save_samples, start=start, stop=stop) dis_input, dis_output, _ = pix2pix( data_path=display_data, img_shape=[input_nc, fineSize, fineSize], save=False, start=0, stop=4) dis_input = processing_img(dis_input, center=True, scale=True, convert=False) elif which_direction == 'BtoA': tra_output, tra_input, _ = pix2pix( data_path=train_data, img_shape=[input_nc, loadSize, loadSize], save=save_samples, start=start, stop=stop) dis_output, dis_input, _ = pix2pix( data_path=display_data, img_shape=[input_nc, fineSize, fineSize], save=False, start=0, stop=4) dis_input = processing_img(dis_input, center=True, scale=True, convert=False) ids = range(0, stop - start) ntrain = len(ids) # Prepare Theano variables for inputs and targets input_x = T.tensor4('input_x') input_y = T.tensor4('input_y') # Create neural network model print("Building model and compiling functions...") if which_netG == 'unet': generator = models.build_generator_unet(input_x, ngf=ngf) elif which_netG == 'unet_nodrop': generator = models.build_generator_unet_nodrop(input_x, ngf=ngf) elif which_netG == 'unet_1.0': generator = models.build_generator_unet_1(input_x, ngf=ngf) elif which_netG == 'unet_facades': generator = models.build_generator_facades(input_x, ngf=ngf) else: print('waiting to fill') if use_PercepGAN == 1: if which_netD == 'basic': discriminator = models.build_discriminator(ndf=ndf) else: print('waiting to fill') # Create expression for passing generator gen_imgs = lasagne.layers.get_output(generator) if use_PercepGAN == 1: # Create expression for passing real data through the discriminator dis1_f, dis2_f, dis3_f, dis4_f, disout_f = lasagne.layers.get_output( discriminator, input_y) # Create expression for passing fake data through the discriminator dis1_ff, dis2_ff, dis3_ff, dis4_ff, disout_ff = lasagne.layers.get_output( discriminator, gen_imgs) p1 = lam_p1 * T.mean(T.abs_(dis1_f - dis1_ff)) p2 = lam_p2 * T.mean(T.abs_(dis2_f - dis2_ff)) p3 = lam_p3 * T.mean(T.abs_(dis3_f - dis3_ff)) p4 = lam_p4 * T.mean(T.abs_(dis4_f - dis4_ff)) l2_norm = p1 + p2 + p3 + p4 percepgan_dis_loss = lam_gan_d * ( lasagne.objectives.binary_crossentropy(disout_f, 0.9) + lasagne. objectives.binary_crossentropy(disout_ff, 0)).mean() + T.maximum( (T.constant(m) - l2_norm), T.constant(0.)) percepgan_gen_loss = -lam_gan_g * ( lasagne.objectives.binary_crossentropy(disout_ff, 0)).mean() + l2_norm else: l2_norm = T.constant(0) percepgan_dis_loss = T.constant(0) percepgan_gen_loss = T.constant(0) if use_Pix == 'L1': pixel_loss = lam_pix * T.mean(abs(gen_imgs - input_y)) elif use_Pix == 'L2': pixel_loss = lam_pix * T.mean(T.sqr(gen_imgs - input_y)) else: pixel_loss = T.constant(0) # Create loss expressions generator_loss = percepgan_gen_loss + pixel_loss discriminator_loss = percepgan_dis_loss # Create update expressions for training generator_params = lasagne.layers.get_all_params(generator, trainable=True) if training_method == 'adam': g_updates = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr_G, beta1=beta1) elif training_method == 'nm': g_updates = lasagne.updates.nesterov_momentum(generator_loss, generator_params, learning_rate=lr_G, momentum=beta1) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_g = theano.function( [input_x, input_y], [p1, p2, p3, p4, l2_norm, generator_loss, pixel_loss], updates=g_updates) if use_PercepGAN == 1: discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) if training_method == 'adam': d_updates = lasagne.updates.adam(discriminator_loss, discriminator_params, learning_rate=lr_D, beta1=beta1) elif training_method == 'nm': d_updates = lasagne.updates.nesterov_momentum(discriminator_loss, discriminator_params, learning_rate=lr_D, momentum=beta1) train_d = theano.function([input_x, input_y], [l2_norm, discriminator_loss], updates=d_updates) dis_fn = theano.function([input_x, input_y], [(disout_f > .5).mean(), (disout_ff < .5).mean()]) # Compile another function generating some data gen_fn = theano.function([input_x], lasagne.layers.get_output( generator, deterministic=test_deterministic)) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print desc f_log = open('logs/%s.ndjson' % desc, 'wb') log_fields = [ 'NE', 'sec', 'px', '1', '2', '3', '4', 'pd', 'cd', 'pg', 'cg', 'fr', 'tr', ] if not os.path.isdir('generated_imgs/' + desc): os.mkdir(os.path.join('generated_imgs/', desc)) if not os.path.isdir('models/' + desc): os.mkdir(os.path.join('models/', desc)) t = time() # We iterate over epochs: for epoch in range(num_epoch): if shuffle_ is True: ids = shuffle_data(ids) n_updates_g = 0 n_updates_d = 0 percep_d = 0 percep_g = 0 cost_g = 0 cost_d = 0 pixel = 0 train_batches = 0 k = 0 p1 = 0 p2 = 0 p3 = 0 p4 = 0 for index_ in iter_data(ids, size=batchSize): index = sorted(index_) xmb = tra_input[index, :, :, :] ymb = tra_output[index, :, :, :] if preprocess == 'regular': xmb, ymb = pix2pixBatch(xmb, ymb, fineSize, input_nc, flip=flip) elif task == 'inpainting': print('waiting to fill') elif task == 'cartoon': print('waiting to fill') if n_updates_g == 0: imsave('other/%s_input' % desc, convert_img_back(xmb[0, :, :, :]), format='png') imsave('other/%s_GT' % desc, convert_img_back(ymb[0, :, :, :]), format='png') xmb = processing_img(xmb, center=True, scale=True, convert=False) ymb = processing_img(ymb, center=True, scale=True, convert=False) if use_PercepGAN == 1: if k < kD: percep, cost = train_d(xmb, ymb) percep_d += percep cost_d += cost n_updates_d += 1 k += 1 elif k < kD + kG: pp1, pp2, pp3, pp4, percep, cost, pix = train_g(xmb, ymb) p1 += pp1 p2 += pp2 p3 += pp3 p4 += pp4 percep_g += percep cost_g += cost pixel += pix n_updates_g += 1 k += 1 elif k == kD + kG: percep, cost = train_d(xmb, ymb) percep_d += percep cost_d += cost n_updates_d += 1 pp1, pp2, pp3, pp4, percep, cost, pix = train_g(xmb, ymb) p1 += pp1 p2 += pp2 p3 += pp3 p4 += pp4 percep_g += percep cost_g += cost pixel += pix n_updates_g += 1 if k == kD + kG: k = 0 else: pp1, pp2, pp3, pp4, percep, cost, pix = train_g(xmb, ymb) p1 += pp1 p2 += pp2 p3 += pp3 p4 += pp4 percep_g += percep cost_g += cost pixel += pix n_updates_g += 1 if epoch % show_freq == 0: p1 = p1 / n_updates_g p2 = p2 / n_updates_g p3 = p3 / n_updates_g p4 = p4 / n_updates_g percep_g = percep_g / n_updates_g percep_d = percep_d / (n_updates_d + 0.0001) cost_g = cost_g / n_updates_g cost_d = cost_d / (n_updates_d + 0.0001) pixel = pixel / n_updates_g true_rate = -1 fake_rate = -1 if use_PercepGAN == 1: true_rate, fake_rate = dis_fn(xmb, ymb) log = [ epoch, round(time() - t, 2), round(pixel, 2), round(p1, 2), round(p2, 2), round(p3, 2), round(p4, 2), round(percep_d, 2), round(cost_d, 2), round(percep_g, 2), round(cost_g, 2), round(float(fake_rate), 2), round(float(true_rate), 2) ] print '%.0f %.2f %.2f %.2f %.2f %.2f% .2f %.2f %.2f %.2f% .2f %.2f' % ( epoch, p1, p2, p3, p4, percep_d, cost_d, pixel, percep_g, cost_g, fake_rate, true_rate) t = time() f_log.write(json.dumps(dict(zip(log_fields, log))) + '\n') f_log.flush() gen_imgs = gen_fn(dis_input) blank_image = Image.new("RGB", (fineSize * 4 + 5, fineSize * 2 + 3)) pc = 0 for i in range(2): for ii in range(4): if i == 0: img = dis_input[ii, :, :, :] img = ImgRescale(img, center=True, scale=True, convert_back=True) blank_image.paste(Image.fromarray(img), (ii * fineSize + ii + 1, 1)) elif i == 1: img = gen_imgs[ii, :, :, :] img = ImgRescale(img, center=True, scale=True, convert_back=True) blank_image.paste( Image.fromarray(img), (ii * fineSize + ii + 1, 2 + fineSize)) blank_image.save('generated_imgs/%s/%s_%d.png' % (desc, desc, epoch)) #pv = PatchViewer(grid_shape=(2, 4), # patch_shape=(256,256), is_color=True) #for i in range(2): # for ii in range(4): # if i == 0: # img = dis_input[ii,:,:,:] # elif i == 1: # img = gen_imgs[ii,:,:,:] # img = convert_img_back(img) # pv.add_patch(img, rescale=False, activation=0) #pv.save('generated_imgs/%s/%s_%d.png'%(desc,desc,epoch)) if (epoch) % save_freq == 0 and epoch > begin_save - 1: # Optionally, you could now dump the network weights to a file like this: np.savez('models/%s/gen_%d.npz' % (desc, epoch), *lasagne.layers.get_all_param_values(generator)) if use_PercepGAN == 1 and save_model_D is True: np.savez('models/%s/dis_%d.npz' % (desc, epoch), *lasagne.layers.get_all_param_values(discriminator))
def main(): # Parameters task = 'toy' name = '25G' DIM=512 begin_save = 0 loss_type = ['trickLogD','minimax','ls'] nloss = 3 DATASET = '25gaussians' batchSize = 64 ncandi = 1 kD = 1 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = 10000 show_freq = 10000 test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize,1), low=0.,high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha*differences) gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes-1.)**2) D_loss = discriminator_loss + LAMBDA*gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam( D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs,fake_imgs],[(fake_out).mean(),Fd_score]) disft_fn = theano.function([real_imgs,fake_imgs], [real_out.mean(), fake_out.mean(), (real_out>0.5).mean(), (fake_out>0.5).mean(), Fd_score]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print desc if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson'%desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/'+desc): os.mkdir(os.path.join('models/',desc)) gen_new_params = [] # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize*kD) xmb = xmb[0:batchSize*kD] # initial G cluster if n_updates == 0: for can_i in range(0,ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i%nloss], discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append(lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old=gen_imgs fmb = gen_imgs[0:batchSize/ncandi*kD,:] else: g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0) fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:],axis=0) #print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise,nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise],g_loss_logD,updates=up_g_logD) train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax) train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator,deterministic=True)) else: gen_old_params = gen_new_params for can_i in range(0,ncandi): for type_i in range(0,nloss): lasagne.layers.set_all_param_values(generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf],gen_imgs) #frr = frr[0] frr = frr_score - fd_score if can_i*nloss + type_i < ncandi: idx = can_i*nloss + type_i gen_new_params[idx]=lasagne.layers.get_all_param_values(generator) fake_rate[idx]=frr g_imgs_old[idx*ntf:(idx+1)*ntf,:]=gen_imgs fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:] = \ gen_imgs[0:batchSize/ncandi*kD,:] else: fr_com = fake_rate - frr if min(fr_com) < 0: ids_replace = np.where(fr_com==min(fr_com)) idr = ids_replace[0][0] fake_rate[idr]=frr gen_new_params[idr] = lasagne.layers.get_all_param_values(generator) g_imgs_old[idr*ntf:(idr+1)*ntf,:]=gen_imgs fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:] = \ gen_imgs[0:batchSize/ncandi*kD,:] sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi*ntf) sample_xmb = sample_xmb[0:ncandi*ntf] for i in range(0, ncandi): xfake = g_imgs_old[i*ntf:(i+1)*ntf,:] xreal = sample_xmb[i*ntf:(i+1)*ntf,:] tr, fr, trp, frp, fdscore = disft_fn(xreal,xfake) if i == 0: fake_rate = np.array([fr]) real_rate = np.array([tr]) fake_rate_p = np.array([frp]) real_rate_p = np.array([trp]) FDL = np.array([fdscore]) else: fake_rate = np.append(fake_rate,fr) real_rate = np.append(real_rate,tr) fake_rate_p = np.append(fake_rate_p,frp) real_rate_p = np.append(real_rate_p,trp) FDL = np.append(FDL,fdscore) print fake_rate, fake_rate_p, FDL print (n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write(str(fake_rate)+' '+str(fake_rate_p)+'\n'+ str(n_updates) + ' ' + str(real_rate.mean())+ ' ' +str(real_rate_p.mean())+'\n') f_log.flush() # train D for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) if n_updates%show_freq == 0: s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) g_imgs = gen_fn(s_zmb) xmb = toy_dataset(DATASET=DATASET, size=512) generate_image(xmb,g_imgs,n_updates/save_freq,desc)
def main( problem, popsize, moegan, freq, loss_type=['trickLogD', 'minimax', 'ls'], postfix=None, nPassD=1, #backpropagation pass for discriminator inBatchSize=64): # Parameters task = 'toy' name = '{}_{}_{}MMDu2'.format( problem, "MOEGAN" if moegan else "EGAN", postfix + "_" if postfix is not None else "") #'8G_MOEGAN_PFq_NFd_t2' DIM = 512 begin_save = 0 nloss = len(loss_type) batchSize = inBatchSize if problem == "8G": DATASET = '8gaussians' elif problem == "25G": DATASET = '25gaussians' else: exit(-1) ncandi = popsize kD = nPassD # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = freq show_freq = freq test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter sudof GP NSGA2 = moegan # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [(fake_out).mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) #main MODEL G noise = T.matrix('noise') generator_trainer = create_G(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print(desc) if not os.path.isdir('front'): os.mkdir(os.path.join('front')) if not os.path.isdir('front/' + desc): os.mkdir(os.path.join('front/', desc)) if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson' % desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/' + desc): os.mkdir(os.path.join('models/', desc)) instances = [] class Instance: def __init__(self, fq, fd, params, img_values): self.fq = fq self.fd = fd self.params = params self.img = img_values def f(self): return self.fq - self.fd # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD) xmb = xmb[0:batchSize * kD] # initial G cluster if n_updates == 0: for can_i in range(0, ncandi): init_generator_trainer = create_G(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = init_generator_trainer.train(loss_type[can_i % nloss], zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = init_generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance( frr_score, fd_score, lasagne.layers.get_all_param_values( init_generator_trainer.generator), gen_imgs)) else: instances_old = instances instances = [] for can_i in range(0, ncandi): for type_i in range(0, nloss): generator_trainer.set(instances_old[can_i].params) #train zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) generator_trainer.train(loss_type[type_i], zmb) #score sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) #save instances.append( Instance(frr_score, fd_score, generator_trainer.get(), gen_imgs)) if ncandi <= (len(instances) + len(instances_old)): if NSGA2 == True: #add parents in the pool for inst in instances_old: generator_trainer.set(inst.params) sample_zmb = floatX( np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance(frr_score, fd_score, generator_trainer.get(), gen_imgs)) #cromos = { idx:[float(inst.fq),-0.5*float(inst.fd)] for idx,inst in enumerate(instances) } # S1 cromos = { idx: [-float(inst.fq), 0.5 * float(inst.fd)] for idx, inst in enumerate(instances) } # S2 cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs) instances = [instances[p] for p in finalpop] with open('front/%s.tsv' % desc, 'wb') as ffront: for inst in instances: ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) elif nloss > 1: #sort new instances.sort( key=lambda inst: -inst.f()) #wrong def in the paper #print([inst.f() for inst in instances]) #cut best ones instances = instances[len(instances) - ncandi:] #print([inst.f() for inst in instances]) sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf) sample_xmb = sample_xmb[0:ncandi * ntf] for i in range(0, ncandi): xfake = instances[i].img[0:ntf, :] xreal = sample_xmb[i * ntf:(i + 1) * ntf, :] tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) fake_rate = np.array([fr]) if i == 0 else np.append(fake_rate, fr) real_rate = np.array([tr]) if i == 0 else np.append(real_rate, tr) fake_rate_p = np.array([frp]) if i == 0 else np.append( fake_rate_p, frp) real_rate_p = np.array([trp]) if i == 0 else np.append( real_rate_p, trp) FDL = np.array([fdscore]) if i == 0 else np.append(FDL, fdscore) print(fake_rate, fake_rate_p, FDL) print(n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' + str(n_updates) + ' ' + str(real_rate.mean()) + ' ' + str(real_rate_p.mean()) + '\n').encode()) f_log.flush() # train D #for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize): # cost = train_d(xreal, xfake) imgs_fakes = instances[0].img[0:int(batchSize / ncandi * kD), :] for i in range(1, len(instances)): img = instances[i].img[0:int(batchSize / ncandi * kD), :] imgs_fakes = np.append(imgs_fakes, img, axis=0) for xreal, xfake in iter_data(xmb, shuffle(imgs_fakes), size=batchSize): cost = train_d(xreal, xfake) if (n_updates % show_freq == 0 and n_updates != 0) or n_updates == 1: id_update = int(n_updates / save_freq) #metric s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) xmb = toy_dataset(DATASET=DATASET, size=512) #compue mmd for all points mmd2_all = [] for i in range(0, ncandi): generator_trainer.set(instances[i].params) g_imgs = generator_trainer.gen(s_zmb) mmd2_all.append(abs(compute_metric_mmd2(g_imgs, xmb))) mmd2_all = np.array(mmd2_all) #print pareto front if NSGA2 == True: front_path = os.path.join('front/', desc) with open('%s/%d_%s_mmd2u.tsv' % (front_path, id_update, desc), 'wb') as ffront: for idx in range(0, ncandi): ffront.write((str(instances[idx].fq) + "\t" + str(instances[idx].fd) + "\t" + str(mmd2_all[idx])).encode()) ffront.write("\n".encode()) #mmd2 output print(n_updates, "mmd2u:", np.min(mmd2_all), "id:", np.argmin(mmd2_all)) #save best params = instances[np.argmin(mmd2_all)].params generator_trainer.set(params) g_imgs_min = generator_trainer.gen(s_zmb) generate_image(xmb, g_imgs_min, id_update, desc, postfix="_mmu2d_best") np.savez('models/%s/gen_%d.npz' % (desc, id_update), *lasagne.layers.get_all_param_values(discriminator)) np.savez('models/%s/dis_%d.npz' % (desc, id_update), *generator_trainer.get()) #worst_debug params = instances[np.argmax(mmd2_all)].params generator_trainer.set(params) g_imgs_max = generator_trainer.gen(s_zmb) generate_image(xmb, g_imgs_max, id_update, desc, postfix="_mmu2d_worst")
for epoch in range(1, (niter + niter_decay + 1)): # load a file containing a subset of the large full training set Xtr = shuffle(Xtr) Xva = shuffle(Xva) Xtr_epoch = Xtr[:(nbatch * batches_per_epoch), :] # mess with the KLd cost lam_kld.set_value(floatX([kld_weight])) # initialize cost arrays g_epoch_costs = [0. for i in range(5)] v_epoch_costs = [0. for i in range(5)] epoch_layer_klds = [0. for i in range(len(vae_layer_names))] vae_nlls = [] vae_klds = [] g_batch_count = 0 v_batch_count = 0 for imb in tqdm(iter_data(Xtr_epoch, size=nbatch), total=batches_per_epoch): # set adversary to be slow relative to generator... adv_lr = 0.05 * lrt.get_value(borrow=False) adv_lrt.set_value(floatX(adv_lr)) # transform training batch to model input format imb_input = make_model_input(imb) # compute loss and apply updates for this batch g_result = g_train_func(*imb_input) g_epoch_costs = [(v1 + v2) for v1, v2 in zip(g_result[:5], g_epoch_costs)] vae_nlls.append(1. * g_result[3]) vae_klds.append(1. * g_result[4]) batch_obs_costs = g_result[5] batch_layer_klds = g_result[6] epoch_layer_klds = [(v1 + v2) for v1, v2 in zip(batch_layer_klds, epoch_layer_klds)] g_batch_count += 1 # run a smallish number of validation batches per epoch
# Training loop (main loop) # at_first = True for epoch in range(1, niter + niter_decay + 1): # shuffle trX0, trX, trY = shuffle(trX0, trX, trY) # Generate images at this time. genout, out_lYS, out_G3_1, out_G3_2, out_G10, out_G11, out_G12 = _gen( sample_zmb, sample_ymb) samples = genout grayscale_grid_vis(inverse_transform(samples), (10, 10), 'samples/%s/%d.png' % (desc, n_epochs)) # for imb0, imb, ymb in tqdm(iter_data(trX0, trX, trY, size=MINIBATCH_SIZE), total=ntrain / MINIBATCH_SIZE): # X:real data if not IS_BINARY: # transform imb to (?, 1, 28, 28) imb = transform(imb) ymb = floatX(np.uint8(OneHot(ymb, NUM_Y))) #imb:[0.0, 255] imb = expandRows(imb, MINIBATCH_SIZE) if at_first is True: print 'imb:', imb.shape, np.min(imb), np.max(imb) # Y: label ymb = expandRows(ymb, MINIBATCH_SIZE) if at_first is True:
out_file = open(log_name, 'wb') print("EXPERIMENT: {}".format(desc.upper())) n_check = 0 n_updates = 0 t = time() for epoch in range(1, 200): Xva = shuffle(Xva) # initialize cost arrays g_epoch_costs = [0. for gco in g_cost_outputs] g_batch_count = 0. if (epoch < 25): lrt.set_value(floatX(0.00001)) elif (epoch < 50): lrt.set_value(floatX(0.00003)) for imb in tqdm(iter_data(Xva, size=100), total=(ntrain / 100)): # transform training batch to "image format" imb_img = train_transform(imb) # train vae on training batch g_result = i_train_func(floatX(imb_img)) g_epoch_costs = [(v1 + v2) for v1, v2 in zip(g_result, g_epoch_costs)] g_batch_count += 1 if (epoch == 75) or (epoch == 150): lr = lrt.get_value(borrow=False) lr = lr / 2.0 lrt.set_value(floatX(lr)) # report quantitative diagnostics g_epoch_costs = [(c / g_batch_count) for c in g_epoch_costs] str1 = "Epoch {}: ({})".format(epoch, desc.upper()) g_bc_strs = ["{0:s}: {1:.2f},".format(c_name, g_epoch_costs[c_idx]) for (c_idx, c_name) in zip(g_bc_idx, g_bc_names)]
def main(): # Parameters task = 'toy' name = '8G_MOEGAN_MMDu2' #'8G_MOEGAN_PFq_NFd_t2' DIM = 512 begin_save = 0 loss_type = ['trickLogD', 'minimax', 'ls'] #['trickLogD', 'minimax', 'ls'] nloss = 3 #2 DATASET = '8gaussians' batchSize = 64 ncandi = 8 kD = 1 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = 10000 / 10 show_freq = 10000 / 10 test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP NSGA2 = True # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [(fake_out).mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print(desc) if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson' % desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/' + desc): os.mkdir(os.path.join('models/', desc)) gen_new_params = [] # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD) xmb = xmb[0:batchSize * kD] # initial G cluster if n_updates == 0: for can_i in range(0, ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i % nloss], discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) for _ in range(0, kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append( lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old = gen_imgs fmb = gen_imgs[0:int(batchSize / ncandi * kD), :] else: g_imgs_old = np.append(g_imgs_old, gen_imgs, axis=0) newfmb = gen_imgs[0:int(batchSize / ncandi * kD), :] fmb = np.append(fmb, newfmb, axis=0) # print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise, nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = - \ lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise], g_loss_logD, updates=up_g_logD) train_g_minimax = theano.function([noise], g_loss_minimax, updates=up_g_minimax) train_g_ls = theano.function([noise], g_loss_ls, updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator, deterministic=True)) else: class Instance: def __init__(self, fq, fd, params, img_values, image_copy): self.fq = fq self.fd = fd self.params = params self.vimg = img_values self.cimg = image_copy def f(self): return self.fq - self.fd instances = [] fq_list = np.zeros(ncandi) fd_list = np.zeros(ncandi) gen_old_params = gen_new_params for can_i in range(0, ncandi): for type_i in range(0, nloss): lasagne.layers.set_all_param_values( generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance( frr_score, fd_score, lasagne.layers.get_all_param_values(generator), gen_imgs, gen_imgs[0:int(batchSize / ncandi * kD), :])) if ncandi < len(instances): if NSGA2 == True: cromos = { idx: [float(inst.fq), -float(inst.fd)] for idx, inst in enumerate(instances) } cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs) for idx, p in enumerate(finalpop): inst = instances[p] gen_new_params[idx] = inst.params fq_list[idx] = inst.fq fd_list[idx] = inst.fd fake_rate[idx] = inst.f() g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg fmb[int(idx * batchSize / ncandi * kD):math.ceil((idx + 1) * batchSize / ncandi * kD), :] = inst.cimg with open('front/%s.tsv' % desc, 'wb') as ffront: for idx, p in enumerate(finalpop): inst = instances[p] ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) else: for idx, inst in enumerate(instances): if idx < ncandi: gen_new_params[idx] = inst.params fake_rate[idx] = inst.f() fq_list[idx] = inst.fq fd_list[idx] = inst.fd g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg fmb[int(idx * batchSize / ncandi * kD):math.ceil((idx + 1) * batchSize / ncandi * kD), :] = inst.cimg else: fr_com = fake_rate - inst.f() if min(fr_com) < 0: idr = np.where(fr_com == min(fr_com))[0][0] gen_new_params[idr] = inst.params fake_rate[idr] = inst.f() g_imgs_old[idr * ntf:(idr + 1) * ntf, :] = inst.vimg fmb[int(idr * batchSize / ncandi * kD):math.ceil((idr + 1) * batchSize / ncandi * kD), :] = inst.cimg sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf) sample_xmb = sample_xmb[0:ncandi * ntf] for i in range(0, ncandi): xfake = g_imgs_old[i * ntf:(i + 1) * ntf, :] xreal = sample_xmb[i * ntf:(i + 1) * ntf, :] tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) if i == 0: fake_rate = np.array([fr]) real_rate = np.array([tr]) fake_rate_p = np.array([frp]) real_rate_p = np.array([trp]) FDL = np.array([fdscore]) else: fake_rate = np.append(fake_rate, fr) real_rate = np.append(real_rate, tr) fake_rate_p = np.append(fake_rate_p, frp) real_rate_p = np.append(real_rate_p, trp) FDL = np.append(FDL, fdscore) print(fake_rate, fake_rate_p, FDL) print(n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' + str(n_updates) + ' ' + str(real_rate.mean()) + ' ' + str(real_rate_p.mean()) + '\n').encode()) f_log.flush() # train D for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) if n_updates % show_freq == 0: s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) params_max = gen_new_params[np.argmax(fake_rate)] lasagne.layers.set_all_param_values(generator, params_max) g_imgs_max = gen_fn(s_zmb) if n_updates % show_freq == 0 and n_updates != 0: #metric s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) xmb = toy_dataset(DATASET=DATASET, size=512) mmd2_all = [] for i in range(0, ncandi): lasagne.layers.set_all_param_values(generator, gen_new_params[i]) g_imgs_min = gen_fn(s_zmb) mmd2_all.append(compute_metric_mmd2(g_imgs_min, xmb)) mmd2_all = np.array(mmd2_all) if NSGA2: with open('front/%s_mmd2u.tsv' % desc, 'wb') as ffront: for idx in range(0, ncandi): ffront.write( (str(fq_list[idx]) + "\t" + str(fd_list[idx]) + "\t" + str(mmd2_all[idx])).encode()) ffront.write("\n".encode()) #save best params = gen_new_params[np.argmin(mmd2_all)] lasagne.layers.set_all_param_values(generator, params) g_imgs_min = gen_fn(s_zmb) generate_image(xmb, g_imgs_min, n_updates / save_freq, desc, postfix="_mmu2d") np.savez('models/%s/gen_%d.npz' % (desc, n_updates / save_freq), *lasagne.layers.get_all_param_values(discriminator)) np.savez('models/%s/dis_%d.npz' % (desc, n_updates / save_freq), *lasagne.layers.get_all_param_values(generator))
if phase == 'TEST': #generate_rotated_multipie_setting1() test(10000) if phase == 'TRAIN': log = open('logs/log.txt', 'w') log.close() for epoch in range(1, niter + niter_decay + 1): print 'epoch', epoch trY_A, trX_B, trY_B, trX_A = one_epoch_traning_data() trY_A, trX_B, trY_B, trX_A = shuffle(trY_A, trX_B, trY_B, trX_A) mean_vars_array = [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []] for ymb_A, xmb_B, ymb_B, xmb_A in tqdm(iter_data(trY_A, trX_B, trY_B, trX_A, size=nbatch), total=len(trY_A) / nbatch): ymb_A = transform(ymb_A) xmb_B = transform(xmb_B) ymb_B = transform(ymb_B) xmb_A = transform(xmb_A) if n_updates % (k + 1) == 0: output_g = _train_g(ymb_A, xmb_B, ymb_B, xmb_A) else: output_d = _train_d(ymb_A, xmb_B, ymb_B, xmb_A) n_updates += 1 n_examples += len(xmb_A) for i in range(len(mean_vars_array)): mean_vars_array[i].append(output_g[i])
def main(): # Parameters data_path = '../datasets/' task = 'face' name = '128' start = 0 stop = 202560 input_nc = 3 loss_type = ['trickLogD','minimax','ls'] nloss = 3 shuffle_ = True batchSize = 32 fineSize = 128 flip = True ncandi = 1 # # of survived childern kD = 3 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = batchSize*kD b1 = 0.5 # momentum term of adam nz = 100 # # of dim for Z ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer niter = 25 # # of iter at starting learning rate lr = 0.0002 # initial learning rate for adam G lrd = 0.0002 # initial learning rate for adam D beta = 0.001 # the hyperparameter that balance fitness score GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP save_freq = 5000 show_freq = 500 begin_save = 0 test_deterministic = True # Load the dataset print("Loading data...") f = h5py.File(data_path+'img_align_celeba_128.hdf5','r') trX = f['data'] ids = range(start, stop) ################## MODEL D ####################### print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.tensor4('real_imgs') fake_imgs = T.tensor4('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_128(ndf=ndf) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize,1,1,1), low=0.,high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha*differences) gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1,2,3))) gradient_penalty = T.mean((slopes-1.)**2) D_loss = discriminator_loss + LAMBDA*gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = b1 # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam( D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Diversity fitnees Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data disft_fn = theano.function([real_imgs,fake_imgs], [(real_out).mean(), (fake_out).mean(), Fd_score]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print desc if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson'%desc, 'wb') if not os.path.isdir('samples'): os.mkdir(os.path.join('samples/')) if not os.path.isdir('samples/'+desc): os.mkdir(os.path.join('samples/',desc)) if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/'+desc): os.mkdir(os.path.join('models/',desc)) gen_new_params = [] n_updates = 0 # We iterate over epochs: for epoch in range(niter): t = time() if shuffle_ is True: ids = shuffle(ids) for index_ in iter_data(ids, size=batchSize*kD): index = sorted(index_) xmb = trX[index,:,:,:] xmb = Batch(xmb,fineSize,input_nc,flip=flip) xmb = processing_img(xmb, center=True, scale=True, convert=False) rand_idx = random.randint(start,stop-ntf-1) rand_ids = ids[rand_idx:rand_idx+ntf] rand_ids = sorted(rand_ids) sample_xmb = trX[rand_ids,:,:,:] sample_xmb = Batch(sample_xmb,fineSize,input_nc,flip=flip) sample_xmb = processing_img(sample_xmb, center=True, scale=True, convert=False) # initial G cluster if epoch + n_updates == 0: for can_i in range(0,ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i%nloss], discriminator=discriminator, lr=lr, b1=b1, ngf=ngf) for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append(lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old=gen_imgs fmb = gen_imgs[0:batchSize/ncandi*kD,:,:,:] else: g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0) fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:,:,:],axis=0) #print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_128(noise,ngf=ngf) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise],g_loss_logD,updates=up_g_logD) train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax) train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator,deterministic=True)) else: gen_old_params = gen_new_params for can_i in range(0,ncandi): for type_i in range(0,nloss): lasagne.layers.set_all_param_values(generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) _, fr_score, fd_score = disft_fn(sample_xmb,gen_imgs) fit = fr_score - fd_score if can_i*nloss + type_i < ncandi: idx = can_i*nloss + type_i gen_new_params[idx]=lasagne.layers.get_all_param_values(generator) fitness[idx]=fit fake_rate[idx]=fr_score g_imgs_old[idx*ntf:(idx+1)*ntf,:,:,:]=gen_imgs fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:,:,:] = \ gen_imgs[0:batchSize/ncandi*kD,:,:,:] else: fit_com = fitness - fit if min(fit_com) < 0: ids_replace = np.where(fit_com==min(fit_com)) idr = ids_replace[0][0] fitness[idr]=fit fake_rate[idr]=fr_score gen_new_params[idr] = lasagne.layers.get_all_param_values(generator) g_imgs_old[idr*ntf:(idr+1)*ntf,:,:,:]=gen_imgs fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:,:,:] = \ gen_imgs[0:batchSize/ncandi*kD,:,:,:] print fake_rate, fitness f_log.write(str(fake_rate) + ' '+str(fd_score) +' ' + str(fitness)+ '\n') # train D for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) for i in range(0, ncandi): xfake = g_imgs_old[i*ntf:(i+1)*ntf,:,:,:] xreal = sample_xmb[0:ntf,:,:,:] tr, fr, fd = disft_fn(xreal,xfake) if i == 0: fake_rate = np.array([fr]) fitness = np.array([0.]) real_rate = np.array([tr]) FDL = np.array([fd]) else: fake_rate = np.append(fake_rate,fr) fitness = np.append(fitness,[0.]) real_rate = np.append(real_rate,tr) FDL = np.append(FDL,fd) print fake_rate, FDL print (n_updates, epoch,real_rate.mean()) n_updates += 1 f_log.write(str(fake_rate)+' '+str(FDL)+ '\n'+ str(epoch)+' '+str(n_updates)+' '+str(real_rate.mean())+'\n') f_log.flush() if n_updates%show_freq == 0: blank_image = Image.new("RGB",(fineSize*8+9,fineSize*8+9)) for i in range(8): for ii in range(8): img = g_imgs_old[i*8+ii,:,:,:] img = ImgRescale(img, center=True, scale=True, convert_back=True) blank_image.paste(Image.fromarray(img),(ii*fineSize+ii+1,i*fineSize+i+1)) blank_image.save('samples/%s/%s_%d.png'%(desc,desc,n_updates/save_freq)) if n_updates%save_freq == 0 and epoch > begin_save - 1: # Optionally, you could now dump the network weights to a file like this: np.savez('models/%s/gen_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(generator)) np.savez('models/%s/dis_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(discriminator))
def test(nep): print 'Testing...' if phase == 'TEST': load_weights( 'models/multipie_gan/setting2-cross-94.5/60_gen_params.jl', 'models/multipie_gan/setting2-cross-94.5/60_discrim_params.jl', 'models/multipie_gan/setting2-cross-94.5/60_test_params.jl') test_nbatch = 2000 batch_feature = [] for tmb in tqdm(iter_data(teY, size=test_nbatch), total=len(teY) / test_nbatch): batch_feature.append(_eigen_encoder(transform(tmb))) probe_feature = np.concatenate(batch_feature, axis=0) probe_feature_stat = probe_feature.reshape(len(probe_feature), -1) probe_feature_var = np.var(probe_feature_stat, axis=1) #print probe_feature_var #print probe_feature_var.shape gallery_feature = probe_feature[range(7 * n_pos + n_pos / 2, len(teY), n_lum * n_pos)] rates = np.full(n_pos, 0).astype(np.float32) for probe_idx in tqdm(range(len(teY))): max_distance = -100000.0 max_idx = 0 for gallery_idx, feature in enumerate(gallery_feature): cos_up = np.inner(probe_feature[probe_idx].reshape(-1, ), feature.reshape(-1, )) cos_down = np.sqrt((probe_feature[probe_idx]**2).sum()) * np.sqrt( (feature**2).sum()) distance = cos_up / cos_down if distance > max_distance: max_distance = distance max_idx = gallery_idx if probe_idx in range(max_idx * n_lum * n_pos, (max_idx + 1) * n_lum * n_pos): rates[probe_idx % n_pos] += 1 rates /= (len(teY) / n_pos) print 'rate:', rates, rates.mean() print 'Visualisation' sample_visual = sample_for_visual() sample_poses = sample_visual[1:] sample_to_rotate = sample_visual[0] pos_codes = [(_pose_lum_encoder(transform(sample_pos))).mean(0) for sample_pos in sample_poses] print len(pos_codes) eigen_codes = _eigen_encoder(transform(sample_to_rotate)) print len(eigen_codes) rotated_faces = [[ _face_rotator(eigen_code.reshape(1, -1, 1, 1), pos_code.reshape(1, -1, 1, 1)) for pos_code in pos_codes ] for eigen_code in eigen_codes] rotated_faces = np.concatenate([ transform(sample_to_rotate).reshape(5, 1, 1, 1, 64, 64), rotated_faces ], axis=1) rotated_faces = np.array(rotated_faces).reshape(5 * (1 + n_pos), -1) #rotated_faces = np.vstack([rotated_faces, transform(sample_to_rotate).reshape(5,-1)]) grayscale_grid_vis(inverse_transform(rotated_faces), (5, (1 + n_pos)), 'samples/test_%d.png' % (nep)) print rotated_faces.shape return rates.mean()
def main(): # Parameters train_data = './datasets/facades/test/' JPEG_img = False start = 0 stop = 106 save_samples = True h5py = 0 batchSize = 4 loadSize = 256 fineSize = 256 flip = False ngf = 64 ndf = 64 input_nc = 3 output_nc = 3 epoch = 1000 task = 'facades' name = 'nogan' which_direction = 'BtoA' preprocess = 'regular' which_netG = 'unet_nodrop' test_deterministic = True patchSize = 64 overlap = 4 desc = task + '_' + name print desc # Load the dataset print("Loading data...") if h5py == 0 and preprocess == 'regular': if which_direction == 'AtoB': test_input, test_output, file_name = pix2pix(data_path=test_data, img_shape=[input_nc,loadSize,loadSize], save = save_samples, start=start, stop=stop) elif which_direction == 'BtoA': test_output, test_input, file_name = pix2pix(data_path=test_data, img_shape=[input_nc,loadSize,loadSize], save = save_samples, start=start, stop=stop) ids = range(0,stop-start) elif h5py == 1 and preprocess == 'regular': print('waiting to fill') ids = range(start,stop) elif h5py == 0 and task == 'inpainting': test_input, file_name = Inpainting(data_path=test_data, img_shape=[input_nc,loadSize,loadSize], save = save_samples, start=start, stop=stop) test_output = test_input ids = range(0,stop-start) elif h5py == 1 and task == 'inpainting': print('waiting to fill') ids = range(start,stop) elif h5py == 0 and task == 'cartoon': print('waiting to fill') ids = range(0,stop-start) elif h5py == 1 and task == 'cartoon': print('waiting to fill') ids = range(start,stop) ntrain = len(ids) # Prepare Theano variables for inputs and targets input_x = T.tensor4('input_x') input_y = T.tensor4('input_y') # Create neural network model print("Building model and compiling functions...") if which_netG == 'unet': generator = models.build_generator_unet(input_x,ngf=ngf) elif which_netG == 'unet_nodrop': generator = models.build_generator_unet_nodrop(input_x,ngf=ngf) elif which_netG == 'unet_1.0': generator = models.build_generator_unet_1(input_x,ngf=ngf) elif which_netG == 'unet_deraining': generator = models.build_generator_deraining(input_x,ngf=ngf) elif which_netG == 'Ginpainting': generator = models.build_generator_inpainting(input_x,ngf=ngf) else: print('waiting to fill') with np.load('models/%s/gen_%d.npz'%(desc,epoch)) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(generator, param_values) gen_fn = theano.function([input_x], lasagne.layers.get_output(generator, deterministic=test_deterministic)) test_folder = desc +'_'+ str(epoch) real = True if not os.path.isdir('test_imgs/'+test_folder): os.mkdir(os.path.join('test_imgs/',test_folder)) test_folder_path = str('test_imgs/' + test_folder + '/') test_folder_path_patch = str(test_folder_path+'output/') test_folder_path_image = str(test_folder_path+'input/') test_folder_path_real = str(test_folder_path+'GroundTruth/') if not os.path.isdir(test_folder_path_patch): os.mkdir(os.path.join(test_folder_path_patch)) if not os.path.isdir(test_folder_path_image): os.mkdir(os.path.join(test_folder_path_image)) if not os.path.isdir(test_folder_path_real): os.mkdir(os.path.join(test_folder_path_real)) i = 1 for index in iter_data(ids, size=batchSize): xmb = test_input[index,:,:,:] ymb = test_output[index,:,:,:] if preprocess == 'regular': xmb, ymb = pix2pixBatch(xmb,ymb,fineSize,input_nc,flip=flip) elif task == 'inpainting': dmb,_ = pix2pixBatch(xmb,ymb,fineSize,input_nc,flip=flip) xmb, ymb = inpainting_data(dmb, image_shape=[input_nc,fineSize,fineSize], patch_shape=[input_nc,patchSize,patchSize],overlap=overlap) elif task == 'cartoon': print('waiting to fill') xmb = processing_img(xmb,convert=False) ymb = processing_img(ymb,convert=False) images = gen_fn(xmb) for ii in xrange(images.shape[0]): idd = index[ii] ff = file_name[idd] if JPEG_img is True: fff = ff[:-5] + '.png' elif JPEG_img is False: fff = ff[:-4] + '.png' img = images[ii] img_real = ymb[ii,:,:,:] img_whole = xmb[ii,:,:,:] if preprocess == 'regular': img_whole = convert_img_back(img_whole) img = convert_img_back(img) img_real = convert_img_back(img_real) elif preprocess == 'inpainting': img0 = np.zeros([input_nc,fineSize,fineSize],dtype='float32') img_real0 = np.zeros([input_nc,fineSize,fineSize],dtype='float32') img_whole0 = np.zeros([input_nc,fineSize,fineSize],dtype='float32') img0[:,:,:] = img_whole[:,:,:] img0[:,(fineSize-patchSize)/2+overlap:(fineSize+patchSize)/2-overlap,(fineSize-patchSize)/2+overlap:(fineSize+patchSize)/2-overlap] = img[:,overlap:patchSize-overlap,overlap:patchSize-overlap:] img_real0[:,:,:] = img_whole[:,:,:] img_real0[:,(fineSize-patchSize)/2+overlap:(fineSize+patchSize)/2-overlap,(fineSize-patchSize)/2+overlap:(fineSize+patchSize)/2-overlap] = img_real[:,overlap:patchSize-overlap,overlap:patchSize-overlap:] img_whole0[:,:,:] = img_whole[:,:,:] img_whole0[:,(fineSize-patchSize)/2+overlap:(fineSize+patchSize)/2-overlap,(fineSize-patchSize)/2+overlap:(fineSize+patchSize)/2-overlap] = 1.0 img_whole = convert_img_back(img_whole0) img = convert_img_back(img0) img_real = convert_img_back(img_real0) result_img = Image.fromarray(((img+1) / 2 * 255).astype(np.uint8)) result_img.save(test_folder_path_patch + fff) result = Image.fromarray(((img_whole+1) / 2 * 255).astype(np.uint8)) result.save(test_folder_path_image + fff) result_real = Image.fromarray(((img_real+1) / 2 * 255).astype(np.uint8)) result_real.save(test_folder_path_real + fff) i += 1
_train_d = theano.function([X, X0], d_cost, updates=d_updates) _train_g = theano.function([Z, deltaX], g_cost, updates=g_updates) _gen = theano.function([Z], gen(Z, *gen_params)) _logp_rbm = theano.function([X], logp_rbm(X)) _svgd_gradient = theano.function([X], svgd_gradient(X)) print '%.2f seconds to compile theano functions' % (time() - t) nbatch = 100 n_iter = 20 n_updates = 0 sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) for iter in tqdm(range(1, n_iter + 1)): trX = shuffle(trX) for imb in iter_data(trX, size=nbatch): imb = floatX(imb) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) # generate samples samples = floatX(_gen(zmb).reshape(-1, nx)) grad, svgd_grad = _svgd_gradient(samples) _train_g(zmb, floatX(svgd_grad.reshape(-1, nc, npx, npx))) # generator _train_d(imb, floatX(samples)) # discriminator n_updates += 1 if iter % 50 == 0: joblib.dump([p.get_value() for p in gen_params],
"10k_va_nnd", "100k_va_nnd", "g_cost", "d_cost", ] print desc.upper() n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() for epoch in range(1, niter + niter_decay + 1): trX, trY = shuffle(trX, trY) for imb, ymb in tqdm(iter_data(trX, trY, size=nbatch), total=ntrain / nbatch): imb = transform(imb) ymb = floatX(OneHot(ymb, ny)) zmb = floatX(np_rng.uniform(-1.0, 1.0, size=(len(imb), nz))) if n_updates % (k + 1) == 0: cost = _train_g(imb, zmb, ymb) else: cost = _train_d(imb, zmb, ymb) n_updates += 1 n_examples += len(imb) if (epoch - 1) % 5 == 0: g_cost = float(cost[0]) d_cost = float(cost[1]) gX, gY = gen_samples(100000) gX = gX.reshape(len(gX), -1) va_nnc_acc_1k = nnc_score(gX[:1000], gY[:1000], vaX, vaY, metric="euclidean")
# make file for recording test progress log_name = "{}/EVAL.txt".format(result_dir) out_file = open(log_name, 'wb') print("EXPERIMENT: {}".format(desc.upper())) Xva_blocks = [Xva] #np.split(Xva, 2, axis=0) for epoch in range(5): epoch_vae_cost = 0.0 epoch_iwae_cost = 0.0 for block_num, Xva_block in enumerate(Xva_blocks): Xva_block = shuffle(Xva_block) obs_count = Xva_block.shape[0] g_epoch_costs = [0. for c in g_basic_costs] g_batch_count = 0. for imb in tqdm(iter_data(Xva_block, size=nbatch), total=obs_count/nbatch): # transform validation batch to "image format" imb_img = floatX( train_transform(imb) ) # evaluate costs g_result = g_eval_func(imb_img) # evaluate costs more thoroughly iwae_bounds = iwae_multi_eval(imb_img, 25*25, cost_func=iwae_cost_func, iwae_num=iwae_samples) g_result[4] = np.mean(iwae_bounds) # swap in tighter bound # accumulate costs g_epoch_costs = [(v1 + v2) for v1, v2 in zip(g_result, g_epoch_costs)] g_batch_count += 1 ################################## # QUANTITATIVE DIAGNOSTICS STUFF # ##################################
_vgd_gradient = theano.function([X0, X1, Y], vgd_gradient(X0, X1, Y)) _reconstruction_cost = theano.function([X], T.mean(mse_data)) print '%.2f seconds to compile theano functions' % (time() - t) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) sample_ymb = floatX( OneHot( np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(), ny)) n_updates = 0 t = time() for epoch in range(niter): print 'cifar 10, vgd, %s, iter %d' % (desc, epoch) trX, trY = shuffle(trX, trY) for imb, ymb in tqdm(iter_data(trX, trY, size=nbatch), total=ntrain / nbatch): imb = transform(imb.reshape(imb.shape[0], nc, npx, npx)) ymb = floatX(OneHot(ymb, ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(imb.shape[0], nz))) # generate samples samples = _gen(zmb, ymb) vgd_grad = _vgd_gradient(samples, samples, ymb) if n_updates % (k + 1) == 0: _train_g(zmb, ymb, floatX(vgd_grad)) else: _train_d(imb, samples, ymb) n_updates += 1
n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() # niter = 1 # niter_decay = 1 for epoch in range(1, niter_decay + 1): # trX, trY = shuffle(trX, trY) sIndex = np.arange(ntrain) np.random.shuffle(sIndex) for x_batch in iter_data(trX, shuffle_index=sIndex, size=batchsize, ndata=ntrain): # print x_batch.shape,x_batch.shape x_batch = floatX(np.reshape(x_batch, (x_batch.shape[0], 1, 64, 64, 64))) cost = _train_(x_batch) n_updates += 1 n_examples += x_batch.shape[0] if n_updates % 50 == 0: print 'epoch' + str(epoch), 'time', str(time() - t) print 'cost %.4f' % (float(cost)) n_epochs += 1 lrt.set_value(floatX(lrt.get_value() - lrate / niter_decay))
def run(hp, folder): trX, trY, nb_classes = load_data() k = 1 # # of discrim updates for each gen update l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam nc = 1 # # of channels in image ny = nb_classes # # of classes nbatch = 128 # # of examples in batch npx = 28 # # of pixels width/height of images nz = 100 # # of dim for Z ngfc = 512 # # of gen units for fully connected layers ndfc = 512 # # of discrim units for fully connected layers ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer nx = npx*npx*nc # # of dimensions in X niter = 200 # # of iter at starting learning rate niter_decay = 100 # # of iter to linearly decay learning rate to zero lr = 0.0002 # initial learning rate for adam scale = 0.02 k = hp['k'] l2 = hp['l2'] #b1 = hp['b1'] nc = 1 ny = nb_classes nbatch = hp['nbatch'] npx = 28 nz = hp['nz'] ngfc = hp['ngfc'] # # of gen units for fully connected layers ndfc = hp['ndfc'] # # of discrim units for fully connected layers ngf = hp['ngf'] # # of gen filters in first conv layer ndf = hp['ndf'] # # of discrim filters in first conv layer nx = npx*npx*nc # # of dimensions in X niter = hp['niter'] # # of iter at starting learning rate niter_decay = hp['niter_decay'] # # of iter to linearly decay learning rate to zero lr = hp['lr'] # initial learning rate for adam scale = hp['scale'] #k = 1 # # of discrim updates for each gen update #l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam #nc = 1 # # of channels in image #ny = nb_classes # # of classes budget_hours = hp.get('budget_hours', 2) budget_secs = budget_hours * 3600 ntrain = len(trX) def transform(X): return (floatX(X)).reshape(-1, nc, npx, npx) def inverse_transform(X): X = X.reshape(-1, npx, npx) return X model_dir = folder samples_dir = os.path.join(model_dir, 'samples') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(samples_dir): os.makedirs(samples_dir) relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() bce = T.nnet.binary_crossentropy gifn = inits.Normal(scale=scale) difn = inits.Normal(scale=scale) gw = gifn((nz, ngfc), 'gw') gw2 = gifn((ngfc, ngf*2*7*7), 'gw2') gw3 = gifn((ngf*2, ngf, 5, 5), 'gw3') gwx = gifn((ngf, nc, 5, 5), 'gwx') dw = difn((ndf, nc, 5, 5), 'dw') dw2 = difn((ndf*2, ndf, 5, 5), 'dw2') dw3 = difn((ndf*2*7*7, ndfc), 'dw3') dwy = difn((ndfc, 1), 'dwy') gen_params = [gw, gw2, gw3, gwx] discrim_params = [dw, dw2, dw3, dwy] def gen(Z, w, w2, w3, wx, use_batchnorm=True): if use_batchnorm: batchnorm_ = batchnorm else: batchnorm_ = lambda x:x h = relu(batchnorm_(T.dot(Z, w))) h2 = relu(batchnorm_(T.dot(h, w2))) h2 = h2.reshape((h2.shape[0], ngf*2, 7, 7)) h3 = relu(batchnorm_(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2)))) x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2))) return x def discrim(X, w, w2, w3, wy): h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2))) h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2)))) h2 = T.flatten(h2, 2) h3 = lrelu(batchnorm(T.dot(h2, w3))) y = sigmoid(T.dot(h3, wy)) return y X = T.tensor4() Z = T.matrix() gX = gen(Z, *gen_params) p_real = discrim(X, *discrim_params) p_gen = discrim(gX, *discrim_params) d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) #updates = d_updates + g_updates print 'COMPILING' t = time() _train_g = theano.function([X, Z], cost, updates=g_updates) _train_d = theano.function([X, Z], cost, updates=d_updates) _gen = theano.function([Z], gX) print '%.2f seconds to compile theano functions'%(time()-t) tr_idxs = np.arange(len(trX)) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n/nbatch): zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb) samples.append(xmb) n_gen += len(xmb) n_left = n-n_gen zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb) samples.append(xmb) return np.concatenate(samples, axis=0) s = floatX(np_rng.uniform(-1., 1., size=(10000, nz))) n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() begin = datetime.now() for epoch in range(1, niter+niter_decay+1): t = time() print("Epoch {}".format(epoch)) trX = shuffle(trX) for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain/nbatch): imb = transform(imb) zmb = floatX(np_rng.uniform(-1., 1., size=(len(imb), nz))) if n_updates % (k+1) == 0: cost = _train_g(imb, zmb) else: cost = _train_d(imb, zmb) n_updates += 1 n_examples += len(imb) samples = np.asarray(_gen(sample_zmb)) grayscale_grid_vis(inverse_transform(samples), (10, 20), '{}/{:05d}.png'.format(samples_dir, n_epochs)) n_epochs += 1 if n_epochs > niter: lrt.set_value(floatX(lrt.get_value() - lr/niter_decay)) if n_epochs % 50 == 0 or epoch == niter + niter_decay or epoch == 1: imgs = [] for i in range(0, s.shape[0], nbatch): imgs.append(_gen(s[i:i+nbatch])) img = np.concatenate(imgs, axis=0) samples_filename = '{}/{:05d}_gen.npz'.format(model_dir, n_epochs) joblib.dump(img, samples_filename, compress=9) shutil.copy(samples_filename, '{}/gen.npz'.format(model_dir)) joblib.dump([p.get_value() for p in gen_params], '{}/d_gen_params.jl'.format(model_dir, n_epochs), compress=9) joblib.dump([p.get_value() for p in discrim_params], '{}/discrim_params.jl'.format(model_dir, n_epochs), compress=9) print('Elapsed : {}sec'.format(time() - t)) if (datetime.now() - begin).total_seconds() >= budget_secs: print("Budget finished.quit.") break
print desc.upper() n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() sample_z0mb = rand_gen(size=(200, nz0)) # noise samples for top generator module for epoch in range(1, niter+niter_decay+1): Xtr = shuffle(Xtr) g_cost = 0 d_cost = 0 gc_iter = 0 dc_iter = 0 for imb in tqdm(iter_data(Xtr, size=nbatch), total=ntrain/nbatch): imb = train_transform(imb) z0mb = rand_gen(size=(len(imb), nz0)) if n_updates % (k+1) == 0: g_cost += _train_g(imb, z0mb)[0] gc_iter += 1 else: d_cost += _train_d(imb, z0mb)[1] dc_iter += 1 n_updates += 1 n_examples += len(imb) print("g_cost: {0:.4f}, d_cost: {1:.4f}".format((g_cost/gc_iter),(d_cost/dc_iter))) samples = np.asarray(_gen(sample_z0mb)) color_grid_vis(draw_transform(samples), (10, 20), "{}/{}.png".format(sample_dir, n_epochs)) n_epochs += 1 if n_epochs > niter:
# load a file containing a subset of the large full training set df_num = (epoch - 1) % len(data_files) Xtr, Xva, Xmu = load_data_file(data_files[df_num]) epoch_batch_count = Xtr.shape[0] // nbatch # mess with the pixel cost weight if epoch <= pix_weights.shape[0]: lam_pix.set_value(floatX([pix_weights[epoch-1]])) # initialize cost arrays g_epoch_costs = [0. for i in range(7)] v_epoch_costs = [0. for i in range(7)] epoch_layer_klds = [0. for i in range(len(vae_layer_names))] vae_nlls = [] vae_klds = [] g_batch_count = 0 v_batch_count = 0 for imb in tqdm(iter_data(Xtr, size=nbatch), total=epoch_batch_count): # set adversary to be slow relative to generator... adv_lr = 2.0 * lrt.get_value(borrow=False) adv_lrt.set_value(floatX(adv_lr)) # transform training batch to model input format imb_input = make_model_input(imb, Xmu) # compute loss and apply updates for this batch g_result = g_train_func(*imb_input) g_epoch_costs = [(v1 + v2) for v1, v2 in zip(g_result[:7], g_epoch_costs)] vae_nlls.append(1. * g_result[1]) vae_klds.append(1. * g_result[2]) batch_obs_costs = g_result[7] batch_layer_klds = g_result[8] epoch_layer_klds = [(v1 + v2) for v1, v2 in zip(batch_layer_klds, epoch_layer_klds)] g_batch_count += 1 # run a smallish number of validation batches per epoch