def __init__(self, num, definition, limits=default_limits, internal_rng=False, name=None): assert len(limits) == 2 assert limits[1] > limits[0] self.limits = tuple(float(l) for l in limits) self.span = limits[1] - limits[0] if len(definition) != 1: raise ValueError( 'definition should have 1 parameter (dim), not %d' % len(definition)) try: dim = int(definition[0]) except ValueError: raise ValueError('non-integer dim: %s' % dim) self.recon_dim = self.sample_dim = dim self.num = num self.rangekw = dict(low=self.limits[0], high=self.limits[1]) if internal_rng: self.placeholders = [ t_rng.uniform(size=(num, dim), **self.rangekw) ] else: self.placeholders = [T.matrix()] self.flat_data = [Output(self.placeholders[0], shape=(self.num, dim))]
def main(): # Parameters task = 'toy' name = '25G' DIM=512 begin_save = 0 loss_type = ['trickLogD','minimax','ls'] nloss = 3 DATASET = '25gaussians' batchSize = 64 ncandi = 1 kD = 1 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = 10000 show_freq = 10000 test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize,1), low=0.,high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha*differences) gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes-1.)**2) D_loss = discriminator_loss + LAMBDA*gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam( D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs,fake_imgs],[(fake_out).mean(),Fd_score]) disft_fn = theano.function([real_imgs,fake_imgs], [real_out.mean(), fake_out.mean(), (real_out>0.5).mean(), (fake_out>0.5).mean(), Fd_score]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print desc if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson'%desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/'+desc): os.mkdir(os.path.join('models/',desc)) gen_new_params = [] # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize*kD) xmb = xmb[0:batchSize*kD] # initial G cluster if n_updates == 0: for can_i in range(0,ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i%nloss], discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append(lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old=gen_imgs fmb = gen_imgs[0:batchSize/ncandi*kD,:] else: g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0) fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:],axis=0) #print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise,nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise],g_loss_logD,updates=up_g_logD) train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax) train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator,deterministic=True)) else: gen_old_params = gen_new_params for can_i in range(0,ncandi): for type_i in range(0,nloss): lasagne.layers.set_all_param_values(generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf],gen_imgs) #frr = frr[0] frr = frr_score - fd_score if can_i*nloss + type_i < ncandi: idx = can_i*nloss + type_i gen_new_params[idx]=lasagne.layers.get_all_param_values(generator) fake_rate[idx]=frr g_imgs_old[idx*ntf:(idx+1)*ntf,:]=gen_imgs fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:] = \ gen_imgs[0:batchSize/ncandi*kD,:] else: fr_com = fake_rate - frr if min(fr_com) < 0: ids_replace = np.where(fr_com==min(fr_com)) idr = ids_replace[0][0] fake_rate[idr]=frr gen_new_params[idr] = lasagne.layers.get_all_param_values(generator) g_imgs_old[idr*ntf:(idr+1)*ntf,:]=gen_imgs fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:] = \ gen_imgs[0:batchSize/ncandi*kD,:] sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi*ntf) sample_xmb = sample_xmb[0:ncandi*ntf] for i in range(0, ncandi): xfake = g_imgs_old[i*ntf:(i+1)*ntf,:] xreal = sample_xmb[i*ntf:(i+1)*ntf,:] tr, fr, trp, frp, fdscore = disft_fn(xreal,xfake) if i == 0: fake_rate = np.array([fr]) real_rate = np.array([tr]) fake_rate_p = np.array([frp]) real_rate_p = np.array([trp]) FDL = np.array([fdscore]) else: fake_rate = np.append(fake_rate,fr) real_rate = np.append(real_rate,tr) fake_rate_p = np.append(fake_rate_p,frp) real_rate_p = np.append(real_rate_p,trp) FDL = np.append(FDL,fdscore) print fake_rate, fake_rate_p, FDL print (n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write(str(fake_rate)+' '+str(fake_rate_p)+'\n'+ str(n_updates) + ' ' + str(real_rate.mean())+ ' ' +str(real_rate_p.mean())+'\n') f_log.flush() # train D for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) if n_updates%show_freq == 0: s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) g_imgs = gen_fn(s_zmb) xmb = toy_dataset(DATASET=DATASET, size=512) generate_image(xmb,g_imgs,n_updates/save_freq,desc)
def main( problem, popsize, moegan, freq, loss_type=['trickLogD', 'minimax', 'ls'], postfix=None, nPassD=1, #backpropagation pass for discriminator inBatchSize=64): # Parameters task = 'toy' name = '{}_{}_{}MMDu2'.format( problem, "MOEGAN" if moegan else "EGAN", postfix + "_" if postfix is not None else "") #'8G_MOEGAN_PFq_NFd_t2' DIM = 512 begin_save = 0 nloss = len(loss_type) batchSize = inBatchSize if problem == "8G": DATASET = '8gaussians' elif problem == "25G": DATASET = '25gaussians' else: exit(-1) ncandi = popsize kD = nPassD # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = freq show_freq = freq test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter sudof GP NSGA2 = moegan # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [(fake_out).mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) #main MODEL G noise = T.matrix('noise') generator_trainer = create_G(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print(desc) if not os.path.isdir('front'): os.mkdir(os.path.join('front')) if not os.path.isdir('front/' + desc): os.mkdir(os.path.join('front/', desc)) if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson' % desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/' + desc): os.mkdir(os.path.join('models/', desc)) instances = [] class Instance: def __init__(self, fq, fd, params, img_values): self.fq = fq self.fd = fd self.params = params self.img = img_values def f(self): return self.fq - self.fd # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD) xmb = xmb[0:batchSize * kD] # initial G cluster if n_updates == 0: for can_i in range(0, ncandi): init_generator_trainer = create_G(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = init_generator_trainer.train(loss_type[can_i % nloss], zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = init_generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance( frr_score, fd_score, lasagne.layers.get_all_param_values( init_generator_trainer.generator), gen_imgs)) else: instances_old = instances instances = [] for can_i in range(0, ncandi): for type_i in range(0, nloss): generator_trainer.set(instances_old[can_i].params) #train zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) generator_trainer.train(loss_type[type_i], zmb) #score sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) #save instances.append( Instance(frr_score, fd_score, generator_trainer.get(), gen_imgs)) if ncandi <= (len(instances) + len(instances_old)): if NSGA2 == True: #add parents in the pool for inst in instances_old: generator_trainer.set(inst.params) sample_zmb = floatX( np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance(frr_score, fd_score, generator_trainer.get(), gen_imgs)) #cromos = { idx:[float(inst.fq),-0.5*float(inst.fd)] for idx,inst in enumerate(instances) } # S1 cromos = { idx: [-float(inst.fq), 0.5 * float(inst.fd)] for idx, inst in enumerate(instances) } # S2 cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs) instances = [instances[p] for p in finalpop] with open('front/%s.tsv' % desc, 'wb') as ffront: for inst in instances: ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) elif nloss > 1: #sort new instances.sort( key=lambda inst: -inst.f()) #wrong def in the paper #print([inst.f() for inst in instances]) #cut best ones instances = instances[len(instances) - ncandi:] #print([inst.f() for inst in instances]) sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf) sample_xmb = sample_xmb[0:ncandi * ntf] for i in range(0, ncandi): xfake = instances[i].img[0:ntf, :] xreal = sample_xmb[i * ntf:(i + 1) * ntf, :] tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) fake_rate = np.array([fr]) if i == 0 else np.append(fake_rate, fr) real_rate = np.array([tr]) if i == 0 else np.append(real_rate, tr) fake_rate_p = np.array([frp]) if i == 0 else np.append( fake_rate_p, frp) real_rate_p = np.array([trp]) if i == 0 else np.append( real_rate_p, trp) FDL = np.array([fdscore]) if i == 0 else np.append(FDL, fdscore) print(fake_rate, fake_rate_p, FDL) print(n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' + str(n_updates) + ' ' + str(real_rate.mean()) + ' ' + str(real_rate_p.mean()) + '\n').encode()) f_log.flush() # train D #for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize): # cost = train_d(xreal, xfake) imgs_fakes = instances[0].img[0:int(batchSize / ncandi * kD), :] for i in range(1, len(instances)): img = instances[i].img[0:int(batchSize / ncandi * kD), :] imgs_fakes = np.append(imgs_fakes, img, axis=0) for xreal, xfake in iter_data(xmb, shuffle(imgs_fakes), size=batchSize): cost = train_d(xreal, xfake) if (n_updates % show_freq == 0 and n_updates != 0) or n_updates == 1: id_update = int(n_updates / save_freq) #metric s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) xmb = toy_dataset(DATASET=DATASET, size=512) #compue mmd for all points mmd2_all = [] for i in range(0, ncandi): generator_trainer.set(instances[i].params) g_imgs = generator_trainer.gen(s_zmb) mmd2_all.append(abs(compute_metric_mmd2(g_imgs, xmb))) mmd2_all = np.array(mmd2_all) #print pareto front if NSGA2 == True: front_path = os.path.join('front/', desc) with open('%s/%d_%s_mmd2u.tsv' % (front_path, id_update, desc), 'wb') as ffront: for idx in range(0, ncandi): ffront.write((str(instances[idx].fq) + "\t" + str(instances[idx].fd) + "\t" + str(mmd2_all[idx])).encode()) ffront.write("\n".encode()) #mmd2 output print(n_updates, "mmd2u:", np.min(mmd2_all), "id:", np.argmin(mmd2_all)) #save best params = instances[np.argmin(mmd2_all)].params generator_trainer.set(params) g_imgs_min = generator_trainer.gen(s_zmb) generate_image(xmb, g_imgs_min, id_update, desc, postfix="_mmu2d_best") np.savez('models/%s/gen_%d.npz' % (desc, id_update), *lasagne.layers.get_all_param_values(discriminator)) np.savez('models/%s/dis_%d.npz' % (desc, id_update), *generator_trainer.get()) #worst_debug params = instances[np.argmax(mmd2_all)].params generator_trainer.set(params) g_imgs_max = generator_trainer.gen(s_zmb) generate_image(xmb, g_imgs_max, id_update, desc, postfix="_mmu2d_worst")
def main(): # Parameters data_path = '../datasets/' task = 'face' name = '128' start = 0 stop = 202560 input_nc = 3 loss_type = ['trickLogD','minimax','ls'] nloss = 3 shuffle_ = True batchSize = 32 fineSize = 128 flip = True ncandi = 1 # # of survived childern kD = 3 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = batchSize*kD b1 = 0.5 # momentum term of adam nz = 100 # # of dim for Z ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer niter = 25 # # of iter at starting learning rate lr = 0.0002 # initial learning rate for adam G lrd = 0.0002 # initial learning rate for adam D beta = 0.001 # the hyperparameter that balance fitness score GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP save_freq = 5000 show_freq = 500 begin_save = 0 test_deterministic = True # Load the dataset print("Loading data...") f = h5py.File(data_path+'img_align_celeba_128.hdf5','r') trX = f['data'] ids = range(start, stop) ################## MODEL D ####################### print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.tensor4('real_imgs') fake_imgs = T.tensor4('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_128(ndf=ndf) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize,1,1,1), low=0.,high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha*differences) gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1,2,3))) gradient_penalty = T.mean((slopes-1.)**2) D_loss = discriminator_loss + LAMBDA*gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = b1 # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam( D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Diversity fitnees Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data disft_fn = theano.function([real_imgs,fake_imgs], [(real_out).mean(), (fake_out).mean(), Fd_score]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print desc if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson'%desc, 'wb') if not os.path.isdir('samples'): os.mkdir(os.path.join('samples/')) if not os.path.isdir('samples/'+desc): os.mkdir(os.path.join('samples/',desc)) if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/'+desc): os.mkdir(os.path.join('models/',desc)) gen_new_params = [] n_updates = 0 # We iterate over epochs: for epoch in range(niter): t = time() if shuffle_ is True: ids = shuffle(ids) for index_ in iter_data(ids, size=batchSize*kD): index = sorted(index_) xmb = trX[index,:,:,:] xmb = Batch(xmb,fineSize,input_nc,flip=flip) xmb = processing_img(xmb, center=True, scale=True, convert=False) rand_idx = random.randint(start,stop-ntf-1) rand_ids = ids[rand_idx:rand_idx+ntf] rand_ids = sorted(rand_ids) sample_xmb = trX[rand_ids,:,:,:] sample_xmb = Batch(sample_xmb,fineSize,input_nc,flip=flip) sample_xmb = processing_img(sample_xmb, center=True, scale=True, convert=False) # initial G cluster if epoch + n_updates == 0: for can_i in range(0,ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i%nloss], discriminator=discriminator, lr=lr, b1=b1, ngf=ngf) for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append(lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old=gen_imgs fmb = gen_imgs[0:batchSize/ncandi*kD,:,:,:] else: g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0) fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:,:,:],axis=0) #print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_128(noise,ngf=ngf) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise],g_loss_logD,updates=up_g_logD) train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax) train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator,deterministic=True)) else: gen_old_params = gen_new_params for can_i in range(0,ncandi): for type_i in range(0,nloss): lasagne.layers.set_all_param_values(generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) _, fr_score, fd_score = disft_fn(sample_xmb,gen_imgs) fit = fr_score - fd_score if can_i*nloss + type_i < ncandi: idx = can_i*nloss + type_i gen_new_params[idx]=lasagne.layers.get_all_param_values(generator) fitness[idx]=fit fake_rate[idx]=fr_score g_imgs_old[idx*ntf:(idx+1)*ntf,:,:,:]=gen_imgs fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:,:,:] = \ gen_imgs[0:batchSize/ncandi*kD,:,:,:] else: fit_com = fitness - fit if min(fit_com) < 0: ids_replace = np.where(fit_com==min(fit_com)) idr = ids_replace[0][0] fitness[idr]=fit fake_rate[idr]=fr_score gen_new_params[idr] = lasagne.layers.get_all_param_values(generator) g_imgs_old[idr*ntf:(idr+1)*ntf,:,:,:]=gen_imgs fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:,:,:] = \ gen_imgs[0:batchSize/ncandi*kD,:,:,:] print fake_rate, fitness f_log.write(str(fake_rate) + ' '+str(fd_score) +' ' + str(fitness)+ '\n') # train D for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) for i in range(0, ncandi): xfake = g_imgs_old[i*ntf:(i+1)*ntf,:,:,:] xreal = sample_xmb[0:ntf,:,:,:] tr, fr, fd = disft_fn(xreal,xfake) if i == 0: fake_rate = np.array([fr]) fitness = np.array([0.]) real_rate = np.array([tr]) FDL = np.array([fd]) else: fake_rate = np.append(fake_rate,fr) fitness = np.append(fitness,[0.]) real_rate = np.append(real_rate,tr) FDL = np.append(FDL,fd) print fake_rate, FDL print (n_updates, epoch,real_rate.mean()) n_updates += 1 f_log.write(str(fake_rate)+' '+str(FDL)+ '\n'+ str(epoch)+' '+str(n_updates)+' '+str(real_rate.mean())+'\n') f_log.flush() if n_updates%show_freq == 0: blank_image = Image.new("RGB",(fineSize*8+9,fineSize*8+9)) for i in range(8): for ii in range(8): img = g_imgs_old[i*8+ii,:,:,:] img = ImgRescale(img, center=True, scale=True, convert_back=True) blank_image.paste(Image.fromarray(img),(ii*fineSize+ii+1,i*fineSize+i+1)) blank_image.save('samples/%s/%s_%d.png'%(desc,desc,n_updates/save_freq)) if n_updates%save_freq == 0 and epoch > begin_save - 1: # Optionally, you could now dump the network weights to a file like this: np.savez('models/%s/gen_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(generator)) np.savez('models/%s/dis_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(discriminator))
def main( problem, popsize, algorithm, save_freq, loss_type=['trickLogD', 'minimax', 'ls'], postfix=None, nPassD=1, #backpropagation pass for discriminator batchSize=64, metric="default", output_dir="runs", gradients_penalty=False): if not (problem in problem_table.keys()): exit(-1) #task task_args = problem_table[problem][1] task = problem_table[problem][0](nPassD, popsize, batchSize, metric) net_otype = task.net_output_type() # description description_name = '{}_{}_{}_{}'.format( str(task), algorithm, popsize, postfix if postfix is not None else "", ) # share params nloss = len(loss_type) lr = task_args['lr'] # initial learning rate for adam G lrd = task_args['lrd'] # initial learning rate for adam D b1 = task_args['b1'] # momentum term of adam beta = task_args['beta'] # momentum term of adam samples = task_args['metric_samples'] # metric samples DIM = task_args['dim'] # momentum term of adam GP_norm = gradients_penalty # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter sudof GP # algorithm params if algorithm == "egan": VARIATION = "all" MULTI_OBJECTIVE_SELECTION = False elif algorithm == "moegan": VARIATION = "all" MULTI_OBJECTIVE_SELECTION = True elif algorithm == "smoegan": VARIATION = "deepqlearning" MULTI_OBJECTIVE_SELECTION = True else: exit(-2) # Load the dataset def create_generator_trainer(noise=None, discriminator=None, lr=0.0002, b1=0.5, DIM=64): return GeneratorTrainer(noise, task.create_geneator(noise, DIM), discriminator, lr, b1) # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = net_otype('real_imgs') fake_imgs = net_otype('fake_imgs') # Create neural network model discriminator = task.create_discriminator(DIM, GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) #lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # max is ~7.5 for toy dataset and ~0.025 for real ones (it will be updated after 1 iteration, which is likely the worst one) Fd_auto_normalization = AutoNormalization(float(0.1)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [fake_out.mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) #main MODEL G noise = T.matrix('noise') generator_trainer = create_generator_trainer(noise, discriminator, lr, b1, DIM) # Finally, launch the training loop. print("Starting training...") print(description_name) #build dirs path_front, path_logs, path_models, path_models_last, path_images = build_output_dirs( output_dir, description_name) #define a problem instance instances = [] instances_old = [] #generator of a offspring def generate_offsptring(xreal, loss_id, pop_id, inst=None): if inst == None: newparams = create_generator_trainer(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM).get() inst = Instance(-float("inf"), float("inf"), newparams, -1, pop_id, None) #init gen generator_trainer.set(inst.params) #train generator_trainer.train(loss_type[loss_id], task.noise_batch()) #score xfake = generator_trainer.gen(task.noise_batch()) frr_score, fd_score = dis_fn(xreal, xfake) #new instance new_instance = Instance(frr_score, fd_score, generator_trainer.get(), loss_id, pop_id, xfake) #save instances.append(new_instance) #info stuff return new_instance #init varation variation = get_varation(VARIATION)(popsize, nloss, generate_offsptring) #reval pop with new D def reval_pupulation(in_instances): #ret out_instances = [] #generates new batches of images for each generator, and then eval these sets by means (new) D for inst in in_instances: generator_trainer.set(inst.params) xfake = generator_trainer.gen(task.noise_batch()) frr_score, fd_score = dis_fn(xreal_eval, xfake) out_instances.append( Instance(frr_score, fd_score, generator_trainer.get(), inst.loss_id, inst.pop_id, xfake, im_parent=True)) return out_instances #log stuff LOG_HEADER, LOG_TEMPLATE = build_log_template(popsize, nloss) log = Logger(os.path.join(path_logs, 'logs.tsv'), header=LOG_HEADER.encode()) timer = Timer() losses_counter = [0] * nloss # We iterate over epochs: for n_updates in task.get_range(): #get batch xmb = task.batch() #get eval batch if xmb.shape[0] == batchSize: xreal_eval = xmb else: xreal_eval = shuffle(xmb)[:batchSize] # initial G cluster if MULTI_OBJECTIVE_SELECTION: instances_old = reval_pupulation(instances) else: instances_old = instances #reset instances = [] variation.update(instances_old, task.is_last()) for pop_id in range(0, popsize): variation.gen(xreal_eval, instances_old[pop_id] if n_updates else None, pop_id) if popsize <= (len(instances) + len(instances_old)): if MULTI_OBJECTIVE_SELECTION == True: #add parents in the pool instances = [*instances_old, *instances] #from the orginal code, we have to maximize D(G(X)), #Since in NSGA2 performences a minimization, #We are going to minimize -D(G(X)), #also we want maximize the diversity score, #So, we are going to minimize -diversity score (also we wanna normalize that value) cromos = { idx: [-float(inst.fq), -float(Fd_auto_normalization(inst.fd))] for idx, inst in enumerate(instances) } # S2 cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(popsize, cromos, cromos_idxs) instances = [instances[p] for p in finalpop] with open(os.path.join(path_front, 'last.tsv'), 'wb') as ffront: for inst in instances: ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) elif nloss > 1: #sort new instances.sort(key=lambda inst: inst.f() ) #(from the orginal code in github) maximize #cut best ones instances = instances[len(instances) - popsize:] for i in range(0, popsize): xreal, xfake = task.statistic_datas(instances[i].img) tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) fake_rate = np.array([fr]) if i == 0 else np.append(fake_rate, fr) real_rate = np.array([tr]) if i == 0 else np.append(real_rate, tr) fake_rate_p = np.array([frp]) if i == 0 else np.append( fake_rate_p, frp) real_rate_p = np.array([trp]) if i == 0 else np.append( real_rate_p, trp) FDL = np.array([fdscore]) if i == 0 else np.append(FDL, fdscore) losses_counter[instances[i].loss_id] += 1 # train D for xreal, xfake in task.iter_data_discriminator(xmb, instances): train_d(xreal, xfake) #show it info print(n_updates, real_rate.mean(), real_rate_p.mean()) #write logs log.writeln( LOG_TEMPLATE.format(n_updates, str(timer), fake_rate.mean(), real_rate.mean(), *fake_rate, *real_rate, *FDL, *losses_counter).encode()) #varation logs variation.logs(path_logs, n_updates, last_iteration=task.is_last()) if (n_updates % save_freq == 0 and n_updates != 0) or n_updates == 1 or task.is_last(): #it same if task.is_last(): id_name_update = math.ceil(float(n_updates) / save_freq) else: id_name_update = math.floor(float(n_updates) / save_freq) #if is egan, eval only the best one. if MULTI_OBJECTIVE_SELECTION == True: instances_to_eval = instances else: instances_to_eval = [instances[-1]] #metric metric_results = task.compute_metrics( instances_to_eval, lambda inst, nz: generator_trainer.set(inst.params).gen(nz), samples) #mmd2 output print(n_updates, "metric:", np.min(metric_results), "id:", np.argmin(metric_results)) #best best = np.argmin(metric_results) worst = np.argmax(metric_results) np.savez( os.path.join(path_models, 'dis_%s.npz') % (id_name_update), *lasagne.layers.get_all_param_values(discriminator)) np.savez( os.path.join(path_models, 'gen_%s.npz') % (id_name_update), *instances_to_eval[best].params) #save best generator_trainer.set(instances_to_eval[best].params) xfake_best = generator_trainer.gen(task.noise_batch(samples)) #worst_debug generator_trainer.set(instances_to_eval[worst].params) xfake_worst = generator_trainer.gen(task.noise_batch(samples)) #save images task.save_image(xmb, xfake_best, path_images, "best_%s" % (id_name_update)) task.save_image(xmb, xfake_worst, path_images, "worst_%s" % (id_name_update)) #print pareto front with open( os.path.join(path_front, '%s.tsv') % (id_name_update), 'wb') as ffront: for idx in range(len(instances_to_eval)): ffront.write((str(instances_to_eval[idx].fq) + "\t" + str(instances_to_eval[idx].fd) + "\t" + str(metric_results[idx])).encode()) ffront.write("\n".encode()) #save all last models: if task.is_last(): for key, inst in enumerate(instances_to_eval): np.savez( os.path.join(path_models_last, 'gen_%s.npz') % (key), *inst.params)
def main(): # Parameters task = 'toy' name = '8G_MOEGAN_MMDu2' #'8G_MOEGAN_PFq_NFd_t2' DIM = 512 begin_save = 0 loss_type = ['trickLogD', 'minimax', 'ls'] #['trickLogD', 'minimax', 'ls'] nloss = 3 #2 DATASET = '8gaussians' batchSize = 64 ncandi = 8 kD = 1 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = 10000 / 10 show_freq = 10000 / 10 test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP NSGA2 = True # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [(fake_out).mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print(desc) if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson' % desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/' + desc): os.mkdir(os.path.join('models/', desc)) gen_new_params = [] # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD) xmb = xmb[0:batchSize * kD] # initial G cluster if n_updates == 0: for can_i in range(0, ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i % nloss], discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) for _ in range(0, kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append( lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old = gen_imgs fmb = gen_imgs[0:int(batchSize / ncandi * kD), :] else: g_imgs_old = np.append(g_imgs_old, gen_imgs, axis=0) newfmb = gen_imgs[0:int(batchSize / ncandi * kD), :] fmb = np.append(fmb, newfmb, axis=0) # print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise, nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = - \ lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise], g_loss_logD, updates=up_g_logD) train_g_minimax = theano.function([noise], g_loss_minimax, updates=up_g_minimax) train_g_ls = theano.function([noise], g_loss_ls, updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator, deterministic=True)) else: class Instance: def __init__(self, fq, fd, params, img_values, image_copy): self.fq = fq self.fd = fd self.params = params self.vimg = img_values self.cimg = image_copy def f(self): return self.fq - self.fd instances = [] fq_list = np.zeros(ncandi) fd_list = np.zeros(ncandi) gen_old_params = gen_new_params for can_i in range(0, ncandi): for type_i in range(0, nloss): lasagne.layers.set_all_param_values( generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance( frr_score, fd_score, lasagne.layers.get_all_param_values(generator), gen_imgs, gen_imgs[0:int(batchSize / ncandi * kD), :])) if ncandi < len(instances): if NSGA2 == True: cromos = { idx: [float(inst.fq), -float(inst.fd)] for idx, inst in enumerate(instances) } cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs) for idx, p in enumerate(finalpop): inst = instances[p] gen_new_params[idx] = inst.params fq_list[idx] = inst.fq fd_list[idx] = inst.fd fake_rate[idx] = inst.f() g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg fmb[int(idx * batchSize / ncandi * kD):math.ceil((idx + 1) * batchSize / ncandi * kD), :] = inst.cimg with open('front/%s.tsv' % desc, 'wb') as ffront: for idx, p in enumerate(finalpop): inst = instances[p] ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) else: for idx, inst in enumerate(instances): if idx < ncandi: gen_new_params[idx] = inst.params fake_rate[idx] = inst.f() fq_list[idx] = inst.fq fd_list[idx] = inst.fd g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg fmb[int(idx * batchSize / ncandi * kD):math.ceil((idx + 1) * batchSize / ncandi * kD), :] = inst.cimg else: fr_com = fake_rate - inst.f() if min(fr_com) < 0: idr = np.where(fr_com == min(fr_com))[0][0] gen_new_params[idr] = inst.params fake_rate[idr] = inst.f() g_imgs_old[idr * ntf:(idr + 1) * ntf, :] = inst.vimg fmb[int(idr * batchSize / ncandi * kD):math.ceil((idr + 1) * batchSize / ncandi * kD), :] = inst.cimg sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf) sample_xmb = sample_xmb[0:ncandi * ntf] for i in range(0, ncandi): xfake = g_imgs_old[i * ntf:(i + 1) * ntf, :] xreal = sample_xmb[i * ntf:(i + 1) * ntf, :] tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) if i == 0: fake_rate = np.array([fr]) real_rate = np.array([tr]) fake_rate_p = np.array([frp]) real_rate_p = np.array([trp]) FDL = np.array([fdscore]) else: fake_rate = np.append(fake_rate, fr) real_rate = np.append(real_rate, tr) fake_rate_p = np.append(fake_rate_p, frp) real_rate_p = np.append(real_rate_p, trp) FDL = np.append(FDL, fdscore) print(fake_rate, fake_rate_p, FDL) print(n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' + str(n_updates) + ' ' + str(real_rate.mean()) + ' ' + str(real_rate_p.mean()) + '\n').encode()) f_log.flush() # train D for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) if n_updates % show_freq == 0: s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) params_max = gen_new_params[np.argmax(fake_rate)] lasagne.layers.set_all_param_values(generator, params_max) g_imgs_max = gen_fn(s_zmb) if n_updates % show_freq == 0 and n_updates != 0: #metric s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) xmb = toy_dataset(DATASET=DATASET, size=512) mmd2_all = [] for i in range(0, ncandi): lasagne.layers.set_all_param_values(generator, gen_new_params[i]) g_imgs_min = gen_fn(s_zmb) mmd2_all.append(compute_metric_mmd2(g_imgs_min, xmb)) mmd2_all = np.array(mmd2_all) if NSGA2: with open('front/%s_mmd2u.tsv' % desc, 'wb') as ffront: for idx in range(0, ncandi): ffront.write( (str(fq_list[idx]) + "\t" + str(fd_list[idx]) + "\t" + str(mmd2_all[idx])).encode()) ffront.write("\n".encode()) #save best params = gen_new_params[np.argmin(mmd2_all)] lasagne.layers.set_all_param_values(generator, params) g_imgs_min = gen_fn(s_zmb) generate_image(xmb, g_imgs_min, n_updates / save_freq, desc, postfix="_mmu2d") np.savez('models/%s/gen_%d.npz' % (desc, n_updates / save_freq), *lasagne.layers.get_all_param_values(discriminator)) np.savez('models/%s/dis_%d.npz' % (desc, n_updates / save_freq), *lasagne.layers.get_all_param_values(generator))