def init_z(self, frame_id=-1, image_id=-1): nz = self.nz n_sigma = 0.5 self.iter_total = 0 # set prev_z if self.z_seq is not None and image_id >= 0: image_id = image_id % self.z_seq.shape[0] frame_id = frame_id % self.z_seq.shape[1] print('set z as image %d, frame %d' % (image_id, frame_id)) self.prev_z = self.z_seq[image_id, frame_id] if self.prev_z is None: #random initialization self.z_init = np_rng.uniform(-1.0, 1.0, size=(self.batch_size, nz)) self.opt_solver.set_smoothness(0.0) self.z_const = self.z_init self.prev_zs = self.z_init else: # add small noise to initial latent vector, so that we can get different results z0_r = np.tile(self.prev_z, [self.batch_size, 1]) z0_n = np_rng.uniform(-1.0, 1.0, size=(self.batch_size, nz)) * n_sigma self.z_init = np.clip(z0_r + z0_n, -0.99, 0.99) self.opt_solver.set_smoothness(5.0) self.z_const = np.tile(self.prev_z, [self.batch_size, 1]) self.prev_zs = z0_r self.opt_solver.initialize(self.z_init) self.just_fixed = True
def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n / nbatch): print 'i:', i # ymb.shape = (nbatch, ny) ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), NUM_LABEL)) print 'gen_samples: ymb:', ymb.shape print ymb # zmb.shape = (nbatch, DIM_Z) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, DIM_Z))) print 'gen_samples: zmb:', zmb.shape print zmb # xmd xmb = _gen(zmb, ymb) print 'gen_samples: xmb:', xmb.shape print rH2 samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) n_gen += len(xmb) n_left = n - n_gen ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), NUM_LABEL)) zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, DIM_Z))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
def init_z(self, frame_id=0, image_id=0): # print('init z!!!!!') nz = 100 n_sigma = 0.5 self.iter_total = 0 # set prev_z if self.z_seq is not None: image_id = image_id % self.z_seq.shape[0] frame_id = frame_id % self.z_seq.shape[1] print('set z as image %d, frame %d' % (image_id, frame_id)) self.prev_z = self.z_seq[image_id, frame_id] if self.prev_z is None: # print('random initialization') self.z0_f = floatX( np_rng.uniform(-1.0, 1.0, size=(self.batch_size, nz))) self.zero_z_const() self.z_i = self.z0_f.copy( ) # floatX(np_rng.uniform(-1.0, 1.0, size=(batch_size, nz))) self.z1 = self.z0_f.copy() else: z0_r = np.tile(self.prev_z, [self.batch_size, 1]) z0_n = floatX( np_rng.uniform(-1.0, 1.0, size=(self.batch_size, nz)) * n_sigma) self.z0_f = floatX(np.clip(z0_r + z0_n, -0.99, 0.99)) self.z_i = np.tile(self.prev_z, [self.batch_size, 1]) self.z1 = z0_r.copy() z = self.invert_model[2] z.set_value(floatX(np.arctanh(self.z0_f))) self.just_fixed = True
def gen_samples(n, nbatch=128): samples = [] n_gen = 0 for i in range(n/nbatch): zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb) samples.append(xmb) n_gen += len(xmb) n_left = n-n_gen zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb) samples.append(xmb) return np.concatenate(samples, axis=0)
def gen_samples(n, nbatch=128): samples = [] n_gen = 0 for i in range(n / nbatch): zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb) samples.append(xmb) n_gen += len(xmb) n_left = n - n_gen zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb) samples.append(xmb) return np.concatenate(samples, axis=0)
def gen_samples(self, z0=None, n=32, batch_size=32, nz=100, use_transform=True): assert n % batch_size == 0 samples = [] if z0 is None: z0 = np_rng.uniform(-1., 1., size=(n, nz)) else: n = len(z0) batch_size = max(n, 64) n_batches = int(np.ceil(n / float(batch_size))) for i in range(n_batches): zmb = floatX(z0[batch_size * i:min(len(z0), batch_size * (i + 1)), :]) xmb = self._gen(zmb) samples.append(xmb) samples = np.concatenate(samples, axis=0) if use_transform: samples = self.inverse_transform(samples, npx=self.npx) samples = (samples * 255).astype(np.uint8) return samples
def invert_bfgs(gen_model, invert_model, ftr_model, im, z_predict=None, npx=64): _f, z = invert_model nz = gen_model.nz if z_predict is None: z_predict = np_rng.uniform(-1., 1., size=(1, nz)) else: z_predict = floatX(z_predict) z_predict = np.arctanh(z_predict) im_t = gen_model.transform(im) ftr = ftr_model(im_t) prob = optimize.minimize(f_bfgs, z_predict, args=(_f, im_t, ftr), tol=1e-6, jac=True, method='L-BFGS-B', options={'maxiter': 200}) print('n_iters = %3d, f = %.3f' % (prob.nit, prob.fun)) z_opt = prob.x z_opt_n = floatX(z_opt[np.newaxis, :]) [f_opt, g, gx] = _f(z_opt_n, im_t, ftr) gx = gen_model.inverse_transform(gx, npx=npx) z_opt = np.tanh(z_opt) return gx, z_opt, f_opt
def test_model(model_config_dict, model_test_name): import glob model_list = glob.glob(samples_dir +'/*.pkl') # load parameters model_param_dicts = unpickle(model_list[0]) # load generator generator_models = load_generator_model(min_num_gen_filters=model_config_dict['min_num_gen_filters'], model_params_dict=model_param_dicts) generator_function = generator_models[0] print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) print 'START SAMPLING' for s in xrange(model_config_dict['num_sampling']): print '{} sampling'.format(s) hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) sample_data = sampling_function(hidden_data)[0] sample_data = inverse_transform(np.asarray(sample_data)).transpose([0,2,3,1]) save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TRAIN){}.png'.format(s+1) color_grid_vis(sample_data, (16, 16), save_as)
def gen_classes(name, steps, classes, interpolate=False, start=None): bymb = get_buffer_y(steps, num_buffer_classes, 3) # dont know why but samples better when bzmb = get_buffer_z(steps, num_buffer_classes, 3) # included is a buffer of common classes offset = bymb.shape[0] numtargets = len(classes) targets = np.asarray([[classes[i] for _ in range(steps)] for i in range(numtargets)]) ymb = floatX(OneHot(targets.flatten(), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(numtargets * steps, nz))) ymb = np.vstack((bymb, ymb)) zmb = np.vstack((bzmb, zmb)) if interpolate: if numtargets > 1: for i in range(numtargets): y1 = classes[i] y2 = classes[(i+1) % numtargets] for j in range(steps): y = offset + steps * i + j ymb[y] = np.zeros(ny) if y1 == y2: ymb[y][y1] = 1.0 else: ymb[y][y1] = 1.0 - j / (steps-1.0) ymb[y][y2] = j / (steps-1.0) zmb = setup_z(zmb, offset, classes, numtargets, steps, start) indexes = range(offset, ymb.shape[0]) samples = gen_image(name, ymb, zmb, steps, indexes) gen_image_set(name, ymb, zmb, indexes) return ymb[offset:], zmb[offset:], samples
def def_invert(self, model, batch_size=1, beta=0.5, lr=0.1, b1=0.9, nz=100, use_bin=True): beta_r = sharedX(beta) x_c = T.tensor4() m_c = T.tensor4() x_e = T.tensor4() m_e = T.tensor4() z0 = T.matrix() z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz)))) gx = model.model_G(z) mm_c = T.tile(m_c, (1, gx.shape[1], 1, 1)) color_all = T.mean(T.sqr(gx - x_c) * mm_c, axis=(1, 2, 3)) / ( T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5)) gx_edge = HOGNet.get_hog(gx, use_bin) x_edge = HOGNet.get_hog(x_e, use_bin) mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1)) sum_e = T.sum(T.abs_(mm_e)) sum_x_edge = T.sum(T.abs_(x_edge)) edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / ( T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5)) rec_all = color_all + edge_all * sharedX(0.2) z_const = sharedX(10.0) init_all = T.mean(T.sqr(z0 - z)) * z_const if beta > 0: print('using D') p_gen = model.model_D(gx) real_all = T.nnet.binary_crossentropy(p_gen, T.ones( p_gen.shape)).T # costs.bce(p_gen, T.ones(p_gen.shape)) cost_all = rec_all + beta_r * real_all[0] + init_all else: print('without D') cost_all = rec_all + init_all real_all = T.zeros(cost_all.shape) cost = T.sum(cost_all) d_updater = updates.Adam( lr=sharedX(lr), b1=sharedX(b1)) # ,regularizer=updates.Regularizer(l2=l2)) output = [ gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge ] print 'COMPILING...' t = time() z_updates = d_updater([z], cost) _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates) print '%.2f seconds to compile _invert function' % (time() - t) return [_invert, z_updates, z, beta_r, z_const]
def rand_gen(size, noise_type='normal'): if noise_type == 'normal': r_vals = floatX(np_rng.normal(size=size)) elif noise_type == 'uniform': r_vals = floatX(np_rng.uniform(size=size, low=-1.0, high=1.0)) else: assert False, "unrecognized noise type!" return r_vals
def rand_fill(x, m, scale=1.): ''' Fill masked parts of x, indicated by m, using uniform noise. -- assume data is in [-1, 1] (i.e. comes from train_transform()) ''' m = 1. * (m > 1e-3) nz = (scale * (np_rng.uniform(size=x.shape) - 0.5)) x_nz = (m * nz) + ((1. - m) * x) return x_nz
def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n/nbatch): ymb = floatX(OneHot(np_rng.randint(0, ny, nbatch), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) n_gen += len(xmb) n_left = n-n_gen ymb = floatX(OneHot(np_rng.randint(0, ny, n_left), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n / nbatch): ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) n_gen += len(xmb) n_left = n - n_gen ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n / nbatch): ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb, tmp_yb, yb2, d, h3, h5 = _gen(zmb, ymb) print 'tmp_yb:', tmp_yb.shape print 'yb2:', yb2.shape print 'd:', d.shape print 'h3:', h3.shape print 'h5:', h5.shape sys.exit() samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) n_gen += len(xmb) n_left = n - n_gen ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
def def_invert(self, model, batch_size=1, d_weight=0.5, nc=1, lr=0.1, b1=0.9, nz=100, use_bin=True): d_weight_r = sharedX(d_weight) x_c = T.tensor4() m_c = T.tensor4() x_e = T.tensor4() m_e = T.tensor4() z0 = T.matrix() z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz)))) gx = model.model_G(z) # input: im_c: 255: no edge; 0: edge; transform=> 1: no edge, 0: edge if nc == 1: # gx, range [0, 1] => edge, 1 gx3 = 1.0 - gx # T.tile(gx, (1, 3, 1, 1)) else: gx3 = gx mm_c = T.tile(m_c, (1, gx3.shape[1], 1, 1)) color_all = T.mean(T.sqr(gx3 - x_c) * mm_c, axis=(1, 2, 3)) / (T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5)) gx_edge = self.hog.get_hog(gx3) x_edge = self.hog.get_hog(x_e) mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1)) sum_e = T.sum(T.abs_(mm_e)) sum_x_edge = T.sum(T.abs_(x_edge)) edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5)) rec_all = color_all + edge_all * sharedX(0.2) z_const = sharedX(5.0) init_all = T.mean(T.sqr(z0 - z)) * z_const if d_weight > 0: print('using D') p_gen = model.model_D(gx) real_all = T.nnet.binary_crossentropy(p_gen, T.ones(p_gen.shape)).T cost_all = rec_all + d_weight_r * real_all[0] + init_all else: print('without D') cost_all = rec_all + init_all real_all = T.zeros(cost_all.shape) cost = T.sum(cost_all) d_updater = updates.Adam(lr=sharedX(lr), b1=sharedX(b1)) output = [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge] print('COMPILING...') t = time() z_updates = d_updater([z], cost) _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates) print('%.2f seconds to compile _invert function' % (time() - t)) return [_invert, z_updates, z, d_weight_r, z_const]
def def_invert(self, model, batch_size=1, d_weight=0.5, nc=1, lr=0.1, b1=0.9, nz=100, use_bin=True): d_weight_r = sharedX(d_weight) x_c = T.tensor4() m_c = T.tensor4() x_e = T.tensor4() m_e = T.tensor4() z0 = T.matrix() z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz)))) gx = model.model_G(z) # input: im_c: 255: no edge; 0: edge; transform=> 1: no edge, 0: edge if nc == 1: # gx, range [0, 1] => edge, 1 gx3 = 1.0-gx #T.tile(gx, (1, 3, 1, 1)) else: gx3 = gx mm_c = T.tile(m_c, (1, gx3.shape[1], 1, 1)) color_all = T.mean(T.sqr(gx3 - x_c) * mm_c, axis=(1, 2, 3)) / (T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5)) gx_edge = self.hog.get_hog(gx3) x_edge = self.hog.get_hog(x_e) mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1)) sum_e = T.sum(T.abs_(mm_e)) sum_x_edge = T.sum(T.abs_(x_edge)) edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5)) rec_all = color_all + edge_all * sharedX(0.2) z_const = sharedX(5.0) init_all = T.mean(T.sqr(z0 - z)) * z_const if d_weight > 0: print('using D') p_gen = model.model_D(gx) real_all = T.nnet.binary_crossentropy(p_gen, T.ones(p_gen.shape)).T cost_all = rec_all + d_weight_r * real_all[0] + init_all else: print('without D') cost_all = rec_all + init_all real_all = T.zeros(cost_all.shape) cost = T.sum(cost_all) d_updater = updates.Adam(lr=sharedX(lr), b1=sharedX(b1)) output = [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge] print('COMPILING...') t = time() z_updates = d_updater([z], cost) _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates) print('%.2f seconds to compile _invert function' % (time() - t)) return [_invert, z_updates, z, d_weight_r, z_const]
def gen_samples(self, z0=None, n=32, batch_size=32, use_transform=True): assert n % batch_size == 0 samples = [] if z0 is None: z0 = np_rng.uniform(-1., 1., size=(n, self.nz)) else: n = len(z0) batch_size = max(n, 64) n_batches = int(np.ceil(n/float(batch_size))) for i in range(n_batches): zmb = floatX(z0[batch_size * i:min(n, batch_size * (i + 1)), :]) xmb = self._gen(zmb) samples.append(xmb) samples = np.concatenate(samples, axis=0) if use_transform: samples = self.inverse_transform(samples, npx=self.npx, nc=self.nc) samples = (samples * 255).astype(np.uint8) return samples
def gen_classes_arithmetic(name, steps, classes, weights): bymb = get_buffer_y(steps, num_buffer_classes, 3) # dont know why but samples better when bzmb = get_buffer_z(steps, num_buffer_classes, 3) # included is a buffer of common classes offset = bymb.shape[0] numtargets = len(classes)+1 targets = np.asarray([[classes[i % (numtargets-1)] for _ in range(steps)] for i in range(numtargets)]) ymb = floatX(OneHot(targets.flatten(), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(numtargets * steps, nz))) ymb = np.vstack((bymb, ymb)) zmb = np.vstack((bzmb, zmb)) for i in range(numtargets): for j in range(steps): y_idx = offset + steps * i + j ymb[y_idx] = np.zeros(ny) if i == numtargets-1: for k, c in enumerate(classes): ymb[y_idx][c] = weights[k] else: ymb[y_idx][classes[i]] = 1.0 frac = j / (steps-1.0) if frac > 0.5: frac = 2.0 * (1.0 - frac) else: frac = 2.0 * frac if (i == numtargets-1): z1 = zf[classes[0]][0] z2 = zf[classes[0]][1] else: z1 = zf[classes[i]][0] z2 = zf[classes[i]][1] for k in range(nz): z = (1.0 - frac) * z1[k] + frac * z2[k] #z = min(z, z2 - z) zmb[y_idx][k] = z indexes = range(offset, ymb.shape[0]) samples = gen_image(name, ymb, zmb, steps, indexes) gen_image_set(name, ymb, zmb, indexes) return ymb[offset:], zmb[offset:], samples
def main(genpath,datasetname,outpath,target=False): #params DIM = 512 SAMPLES = 3000 #3000 nz = 2 if target: #load samples from db xmb = toy_dataset(DATASET=datasetname, size=SAMPLES) generate_image(xmb, path=outpath) else: #load gen_fn, generator = create_G(DIM = DIM) #for all in the path: params_map = dict(np.load(genpath)) params=list() for key,vals in sorted(params_map.items(),key=lambda x: int(x[0].split("_")[1])): params.append(np.float32(vals)) #set params lasagne.layers.set_all_param_values(generator, params) # generate sample s_zmb = floatX(np_rng.uniform(-1., 1., size=(SAMPLES, nz))) g_imgs = gen_fn(s_zmb) generate_image(g_imgs, path=outpath)
def main(path, datasetname): #params DIM = 512 SAMPLES = 25000 nz = 2 #load gen_fn, generator = create_G(DIM=DIM) #load samples from db xmb = toy_dataset(DATASET=datasetname, size=SAMPLES) #for all in the path: for root, dirs, files in os.walk(path): mmd_list = [] files.sort(key=lambda x: int(x.split("_")[1].split(".")[0])) for filename in files: try: genpath = os.path.join(root, filename) params_map = dict(np.load(genpath)) params = list() for key, vals in sorted(params_map.items(), key=lambda x: int(x[0].split("_")[1])): params.append(np.float32(vals)) #set params lasagne.layers.set_all_param_values(generator, params) # generate sample s_zmb = floatX(np_rng.uniform(-1., 1., size=(SAMPLES, nz))) g_imgs = gen_fn(s_zmb) mmd = abs(compute_metric_mmd2(g_imgs, xmb)) print("MMD: ", mmd, genpath) mmd_list.append((mmd, genpath)) except: pass mmd_list.sort(key=lambda v: v[0]) i = 0 for val, name in mmd_list[:10]: i += 1 print("Best MMD[" + str(i) + "]", val, math.sqrt(val), name)
def train_model(data_stream, energy_optimizer, generator_optimizer, generator_bn_optimizer, model_config_dict, model_test_name): [generator_function, generator_params, generator_bn_params] = set_generator_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['expert_size'], model_config_dict['min_num_eng_filters']) # compile functions print 'COMPILING ENERGY UPDATER' t=time() energy_updater = set_energy_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING GENERATOR UPDATER' t=time() generator_updater = set_generator_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, generator_params=generator_params, generator_bn_params=generator_bn_params, generator_optimizer=generator_optimizer, generator_bn_optimizer=generator_bn_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = floatX(np_rng.normal(scale=0.01*(0.99**int(batch_count/100)), size=(num_data, num_channels, input_shape, input_shape))) updater_inputs = [input_data, hidden_data, noise_data, batch_count] updater_outputs = generator_updater(*updater_inputs) noise_data = floatX(np_rng.normal(scale=0.01*(0.99**int(batch_count/100)), size=(num_data, num_channels, input_shape, input_shape))) updater_inputs = [input_data, hidden_data, noise_data, batch_count] updater_outputs = energy_updater(*updater_inputs) # get output values input_energy = updater_outputs[0].mean() sample_energy = updater_outputs[1].mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) # batch count up batch_count += 1 if batch_count%1==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', input_energy_list[-1] print '----------------------------------------------------------------' print ' sample energy : ', sample_energy_list[-1] print '================================================================' if batch_count%1000==0: # sample data [sample_data_t, sample_data_f] = sampling_function(fixed_hidden_data) sample_data_t = np.asarray(sample_data_t) save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TRAIN){}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data_t).transpose([0,2,3,1]), (16, 16), save_as) sample_data_f = np.asarray(sample_data_f) save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TEST){}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data_f).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_test_name +'_input_energy', arr=np.asarray(input_energy_list)) np.save(file=samples_dir + '/' + model_test_name +'_sample_energy', arr=np.asarray(sample_energy_list)) save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl' save_model(tensor_params_list=generator_params + generator_bn_params + energy_params, save_to=save_as)
def main( problem, popsize, moegan, freq, loss_type=['trickLogD', 'minimax', 'ls'], postfix=None, nPassD=1, #backpropagation pass for discriminator inBatchSize=64): # Parameters task = 'toy' name = '{}_{}_{}MMDu2'.format( problem, "MOEGAN" if moegan else "EGAN", postfix + "_" if postfix is not None else "") #'8G_MOEGAN_PFq_NFd_t2' DIM = 512 begin_save = 0 nloss = len(loss_type) batchSize = inBatchSize if problem == "8G": DATASET = '8gaussians' elif problem == "25G": DATASET = '25gaussians' else: exit(-1) ncandi = popsize kD = nPassD # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = freq show_freq = freq test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter sudof GP NSGA2 = moegan # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [(fake_out).mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) #main MODEL G noise = T.matrix('noise') generator_trainer = create_G(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print(desc) if not os.path.isdir('front'): os.mkdir(os.path.join('front')) if not os.path.isdir('front/' + desc): os.mkdir(os.path.join('front/', desc)) if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson' % desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/' + desc): os.mkdir(os.path.join('models/', desc)) instances = [] class Instance: def __init__(self, fq, fd, params, img_values): self.fq = fq self.fd = fd self.params = params self.img = img_values def f(self): return self.fq - self.fd # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD) xmb = xmb[0:batchSize * kD] # initial G cluster if n_updates == 0: for can_i in range(0, ncandi): init_generator_trainer = create_G(noise=noise, discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = init_generator_trainer.train(loss_type[can_i % nloss], zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = init_generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance( frr_score, fd_score, lasagne.layers.get_all_param_values( init_generator_trainer.generator), gen_imgs)) else: instances_old = instances instances = [] for can_i in range(0, ncandi): for type_i in range(0, nloss): generator_trainer.set(instances_old[can_i].params) #train zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) generator_trainer.train(loss_type[type_i], zmb) #score sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) #save instances.append( Instance(frr_score, fd_score, generator_trainer.get(), gen_imgs)) if ncandi <= (len(instances) + len(instances_old)): if NSGA2 == True: #add parents in the pool for inst in instances_old: generator_trainer.set(inst.params) sample_zmb = floatX( np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = generator_trainer.gen(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance(frr_score, fd_score, generator_trainer.get(), gen_imgs)) #cromos = { idx:[float(inst.fq),-0.5*float(inst.fd)] for idx,inst in enumerate(instances) } # S1 cromos = { idx: [-float(inst.fq), 0.5 * float(inst.fd)] for idx, inst in enumerate(instances) } # S2 cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs) instances = [instances[p] for p in finalpop] with open('front/%s.tsv' % desc, 'wb') as ffront: for inst in instances: ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) elif nloss > 1: #sort new instances.sort( key=lambda inst: -inst.f()) #wrong def in the paper #print([inst.f() for inst in instances]) #cut best ones instances = instances[len(instances) - ncandi:] #print([inst.f() for inst in instances]) sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf) sample_xmb = sample_xmb[0:ncandi * ntf] for i in range(0, ncandi): xfake = instances[i].img[0:ntf, :] xreal = sample_xmb[i * ntf:(i + 1) * ntf, :] tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) fake_rate = np.array([fr]) if i == 0 else np.append(fake_rate, fr) real_rate = np.array([tr]) if i == 0 else np.append(real_rate, tr) fake_rate_p = np.array([frp]) if i == 0 else np.append( fake_rate_p, frp) real_rate_p = np.array([trp]) if i == 0 else np.append( real_rate_p, trp) FDL = np.array([fdscore]) if i == 0 else np.append(FDL, fdscore) print(fake_rate, fake_rate_p, FDL) print(n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' + str(n_updates) + ' ' + str(real_rate.mean()) + ' ' + str(real_rate_p.mean()) + '\n').encode()) f_log.flush() # train D #for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize): # cost = train_d(xreal, xfake) imgs_fakes = instances[0].img[0:int(batchSize / ncandi * kD), :] for i in range(1, len(instances)): img = instances[i].img[0:int(batchSize / ncandi * kD), :] imgs_fakes = np.append(imgs_fakes, img, axis=0) for xreal, xfake in iter_data(xmb, shuffle(imgs_fakes), size=batchSize): cost = train_d(xreal, xfake) if (n_updates % show_freq == 0 and n_updates != 0) or n_updates == 1: id_update = int(n_updates / save_freq) #metric s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) xmb = toy_dataset(DATASET=DATASET, size=512) #compue mmd for all points mmd2_all = [] for i in range(0, ncandi): generator_trainer.set(instances[i].params) g_imgs = generator_trainer.gen(s_zmb) mmd2_all.append(abs(compute_metric_mmd2(g_imgs, xmb))) mmd2_all = np.array(mmd2_all) #print pareto front if NSGA2 == True: front_path = os.path.join('front/', desc) with open('%s/%d_%s_mmd2u.tsv' % (front_path, id_update, desc), 'wb') as ffront: for idx in range(0, ncandi): ffront.write((str(instances[idx].fq) + "\t" + str(instances[idx].fd) + "\t" + str(mmd2_all[idx])).encode()) ffront.write("\n".encode()) #mmd2 output print(n_updates, "mmd2u:", np.min(mmd2_all), "id:", np.argmin(mmd2_all)) #save best params = instances[np.argmin(mmd2_all)].params generator_trainer.set(params) g_imgs_min = generator_trainer.gen(s_zmb) generate_image(xmb, g_imgs_min, id_update, desc, postfix="_mmu2d_best") np.savez('models/%s/gen_%d.npz' % (desc, id_update), *lasagne.layers.get_all_param_values(discriminator)) np.savez('models/%s/dis_%d.npz' % (desc, id_update), *generator_trainer.get()) #worst_debug params = instances[np.argmax(mmd2_all)].params generator_trainer.set(params) g_imgs_max = generator_trainer.gen(s_zmb) generate_image(xmb, g_imgs_max, id_update, desc, postfix="_mmu2d_worst")
def noise_batch(self, samples=None): if samples == None: return floatX( np_rng.uniform(-1., 1., size=(self.batchSize, self.noiseSize))) return floatX(np_rng.uniform(-1., 1., size=(samples, self.noiseSize)))
def main(): # Parameters task = 'toy' name = '25G' DIM=512 begin_save = 0 loss_type = ['trickLogD','minimax','ls'] nloss = 3 DATASET = '25gaussians' batchSize = 64 ncandi = 1 kD = 1 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = 10000 show_freq = 10000 test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize,1), low=0.,high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha*differences) gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes-1.)**2) D_loss = discriminator_loss + LAMBDA*gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam( D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs,fake_imgs],[(fake_out).mean(),Fd_score]) disft_fn = theano.function([real_imgs,fake_imgs], [real_out.mean(), fake_out.mean(), (real_out>0.5).mean(), (fake_out>0.5).mean(), Fd_score]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print desc if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson'%desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/'+desc): os.mkdir(os.path.join('models/',desc)) gen_new_params = [] # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize*kD) xmb = xmb[0:batchSize*kD] # initial G cluster if n_updates == 0: for can_i in range(0,ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i%nloss], discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append(lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old=gen_imgs fmb = gen_imgs[0:batchSize/ncandi*kD,:] else: g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0) fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:],axis=0) #print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise,nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise],g_loss_logD,updates=up_g_logD) train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax) train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator,deterministic=True)) else: gen_old_params = gen_new_params for can_i in range(0,ncandi): for type_i in range(0,nloss): lasagne.layers.set_all_param_values(generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf],gen_imgs) #frr = frr[0] frr = frr_score - fd_score if can_i*nloss + type_i < ncandi: idx = can_i*nloss + type_i gen_new_params[idx]=lasagne.layers.get_all_param_values(generator) fake_rate[idx]=frr g_imgs_old[idx*ntf:(idx+1)*ntf,:]=gen_imgs fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:] = \ gen_imgs[0:batchSize/ncandi*kD,:] else: fr_com = fake_rate - frr if min(fr_com) < 0: ids_replace = np.where(fr_com==min(fr_com)) idr = ids_replace[0][0] fake_rate[idr]=frr gen_new_params[idr] = lasagne.layers.get_all_param_values(generator) g_imgs_old[idr*ntf:(idr+1)*ntf,:]=gen_imgs fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:] = \ gen_imgs[0:batchSize/ncandi*kD,:] sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi*ntf) sample_xmb = sample_xmb[0:ncandi*ntf] for i in range(0, ncandi): xfake = g_imgs_old[i*ntf:(i+1)*ntf,:] xreal = sample_xmb[i*ntf:(i+1)*ntf,:] tr, fr, trp, frp, fdscore = disft_fn(xreal,xfake) if i == 0: fake_rate = np.array([fr]) real_rate = np.array([tr]) fake_rate_p = np.array([frp]) real_rate_p = np.array([trp]) FDL = np.array([fdscore]) else: fake_rate = np.append(fake_rate,fr) real_rate = np.append(real_rate,tr) fake_rate_p = np.append(fake_rate_p,frp) real_rate_p = np.append(real_rate_p,trp) FDL = np.append(FDL,fdscore) print fake_rate, fake_rate_p, FDL print (n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write(str(fake_rate)+' '+str(fake_rate_p)+'\n'+ str(n_updates) + ' ' + str(real_rate.mean())+ ' ' +str(real_rate_p.mean())+'\n') f_log.flush() # train D for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) if n_updates%show_freq == 0: s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) g_imgs = gen_fn(s_zmb) xmb = toy_dataset(DATASET=DATASET, size=512) generate_image(xmb,g_imgs,n_updates/save_freq,desc)
g_updates = g_updater(gen_params, g_cost) updates = d_updates + g_updates print "COMPILING" t = time() _train_g = theano.function([X, Z, Y], cost, updates=g_updates) _train_d = theano.function([X, Z, Y], cost, updates=d_updates) _gen = theano.function([Z, Y], gX) print "%.2f seconds to compile theano functions" % (time() - t) tr_idxs = np.arange(len(trX)) trX_vis = np.asarray([[trX[i] for i in py_rng.sample(tr_idxs[trY == y], 20)] for y in range(10)]).reshape(200, -1) trX_vis = inverse_transform(transform(trX_vis)) grayscale_grid_vis(trX_vis, (10, 20), "samples/%s_etl_test.png" % desc) sample_zmb = floatX(np_rng.uniform(-1.0, 1.0, size=(200, nz))) sample_ymb = floatX(OneHot(np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(), ny)) def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n / nbatch): ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny)) zmb = floatX(np_rng.uniform(-1.0, 1.0, size=(nbatch, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) n_gen += len(xmb) n_left = n - n_gen
def gen_z(n): if args.znorm: return floatX(normalize(np_rng.uniform(-1., 1., size=(n, nz)))) else: return floatX(np_rng.uniform(-1., 1., size=(n, nz)))
g_updates = g_updater(gen_params, g_cost) print 'COMPILING' t = time() _train_d = theano.function([X, X0], d_cost, updates=d_updates) _train_g = theano.function([Z, deltaX], g_cost, updates=g_updates) _gen = theano.function([Z], gen(Z, *gen_params)) _logp_rbm = theano.function([X], logp_rbm(X)) _svgd_gradient = theano.function([X], svgd_gradient(X)) print '%.2f seconds to compile theano functions' % (time() - t) nbatch = 100 n_iter = 20 n_updates = 0 sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) for iter in tqdm(range(1, n_iter + 1)): trX = shuffle(trX) for imb in iter_data(trX, size=nbatch): imb = floatX(imb) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) # generate samples samples = floatX(_gen(zmb).reshape(-1, nx)) grad, svgd_grad = _svgd_gradient(samples) _train_g(zmb, floatX(svgd_grad.reshape(-1, nc, npx, npx))) # generator _train_d(imb, floatX(samples)) # discriminator
def discrim(X, w, w2, g2, b2, w3, g3, b3, w4, g4, b4, w5, g5, b5, w6, g6, b6, wy): h = lrelu(dnn_conv(X, w, subsample=(1, 1), border_mode=(1, 1))) h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(1, 1)), g=g2, b=b2)) h3 = lrelu(batchnorm(dnn_conv(h2, w3, subsample=(1, 1), border_mode=(1, 1)), g=g3, b=b3)) h4 = lrelu(batchnorm(dnn_conv(h3, w4, subsample=(2, 2), border_mode=(1, 1)), g=g4, b=b4)) h5 = lrelu(batchnorm(dnn_conv(h4, w5, subsample=(1, 1), border_mode=(1, 1)), g=g5, b=b5)) h6 = lrelu(batchnorm(dnn_conv(h5, w6, subsample=(2, 2), border_mode=(1, 1)), g=g6, b=b6)) h6 = T.flatten(h6, 2) y = sigmoid(T.dot(h6, wy)) return y def inverse_transform(X): X = (X.reshape(-1, nc, npx, npx).transpose(0, 2, 3, 1)+1.)/2. return X Z = T.matrix() X = T.tensor4() gX = gen(Z, *gen_params) dX = discrim(X, *discrim_params) _gen = theano.function([Z], gX) _discrim = theano.function([X], dX) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(400, 256))) samples = _gen(sample_zmb) scores = _discrim(samples) sort = np.argsort(scores.flatten())[::-1] samples = samples[sort] color_grid_vis(inverse_transform(samples), (20, 20), 'samples.png')
'n_examples', 'n_seconds', '1k_va_nnd', '10k_va_nnd', '100k_va_nnd', 'g_cost', 'd_cost', ] tr_data, te_data, tr_stream, val_stream, te_stream = faces(ntrain=ntrain) # Only tr_data/tr_stream are used. tr_handle = tr_data.open() vaX, = tr_data.get_data(tr_handle, slice(0, 10000)) vaX = transform(vaX) vis_idxs = py_rng.sample(np.arange(len(vaX)), nvis) vaX_vis = inverse_transform(vaX[vis_idxs]) color_grid_vis(vaX_vis, (14, 14), 'samples/%s_etl_test.png'%desc) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz))) vaX = vaX.reshape(len(vaX), -1) # DEFINE NETWORKS. relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() tanh = activations.Tanh() bce = T.nnet.binary_crossentropy gifn = inits.Normal(scale=0.02) difn = inits.Normal(scale=0.02) gain_ifn = inits.Normal(loc=1., scale=0.02) bias_ifn = inits.Constant(c=0.) gw = gifn((nz, ngf*8*4*4), 'gw') gg = gain_ifn((ngf*8*4*4), 'gg') gb = bias_ifn((ngf*8*4*4), 'gb')
print('COMPILING...') t = time() _estimate_bn = theano.function([Z], bn_data) print('%.2f seconds to compile theano functions' % (time() - t)) # batchnorm statistics nb_sum = [] nb_mean = [] nb_mean_ext = [] # first pass print('first pass: computing mean') for n in tqdm(range(num_batches)): zmb = floatX(np_rng.uniform(-1., 1., size=(batch_size, nz))) bn_data = _estimate_bn(zmb) if n == 0: for d in bn_data: nb_sum.append(d) else: for id, d in enumerate(bn_data): nb_sum[id] = nb_sum[id] + d # compute empirical mean for id, d_sum in enumerate(nb_sum): if d_sum.ndim == 4: m = np.mean(d_sum, axis=(0, 2, 3)) / num_batches nb_mean.append(m) nb_mean_ext.append(np.reshape(m, [1, len(m), 1, 1]))
def continue_train_model(last_batch_idx, data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): model_list = glob.glob(samples_dir +'/*.pkl') # load parameters model_param_dicts = unpickle(model_list[0]) generator_models = load_generator_model(min_num_gen_filters=model_config_dict['min_num_gen_filters'], model_params_dict=model_param_dicts) generator_function = generator_models[0] generator_params = generator_models[1] energy_models = load_energy_model(num_experts=model_config_dict['expert_size'], model_params_dict=model_param_dicts) feature_function = energy_models[0] # norm_function = energy_models[1] expert_function = energy_models[1] # prior_function = energy_models[3] energy_params = energy_models[2] # compile functions print 'COMPILING MODEL UPDATER' t=time() generator_updater, generator_optimizer_params = set_generator_update_function(energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, generator_params=generator_params, generator_optimizer=generator_optimizer, init_param_dict=model_param_dicts) energy_updater, energy_optimizer_params = set_energy_update_function(energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer, init_param_dict=model_param_dicts) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # batch count up batch_count += 1 if batch_count<last_batch_idx: continue # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [hidden_data, noise_data] update_output = generator_updater(*update_input) entropy_weights = update_output[1].mean() entropy_cost = update_output[2].mean() noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [input_data, hidden_data, noise_data] update_output = energy_updater(*update_input) input_energy = update_output[0].mean() sample_energy = update_output[1].mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) if batch_count%10==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', input_energy_list[-1] print '----------------------------------------------------------------' print ' sample energy : ', sample_energy_list[-1] print '----------------------------------------------------------------' print ' entropy weight : ', entropy_weights print '----------------------------------------------------------------' print ' entropy cost : ', entropy_cost print '================================================================' if batch_count%100==0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_test_name +'_input_energy', arr=np.asarray(input_energy_list)) np.save(file=samples_dir + '/' + model_test_name +'_sample_energy', arr=np.asarray(sample_energy_list)) save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl' save_model(tensor_params_list=generator_params[0] + generator_params[1] + energy_params + generator_optimizer_params + energy_optimizer_params, save_to=save_as)
def main(): # Parameters data_path = '../datasets/' task = 'face' name = '128' start = 0 stop = 202560 input_nc = 3 loss_type = ['trickLogD','minimax','ls'] nloss = 3 shuffle_ = True batchSize = 32 fineSize = 128 flip = True ncandi = 1 # # of survived childern kD = 3 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = batchSize*kD b1 = 0.5 # momentum term of adam nz = 100 # # of dim for Z ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer niter = 25 # # of iter at starting learning rate lr = 0.0002 # initial learning rate for adam G lrd = 0.0002 # initial learning rate for adam D beta = 0.001 # the hyperparameter that balance fitness score GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP save_freq = 5000 show_freq = 500 begin_save = 0 test_deterministic = True # Load the dataset print("Loading data...") f = h5py.File(data_path+'img_align_celeba_128.hdf5','r') trX = f['data'] ids = range(start, stop) ################## MODEL D ####################### print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.tensor4('real_imgs') fake_imgs = T.tensor4('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_128(ndf=ndf) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize,1,1,1), low=0.,high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha*differences) gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1,2,3))) gradient_penalty = T.mean((slopes-1.)**2) D_loss = discriminator_loss + LAMBDA*gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = b1 # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam( D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Diversity fitnees Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data disft_fn = theano.function([real_imgs,fake_imgs], [(real_out).mean(), (fake_out).mean(), Fd_score]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print desc if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson'%desc, 'wb') if not os.path.isdir('samples'): os.mkdir(os.path.join('samples/')) if not os.path.isdir('samples/'+desc): os.mkdir(os.path.join('samples/',desc)) if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/'+desc): os.mkdir(os.path.join('models/',desc)) gen_new_params = [] n_updates = 0 # We iterate over epochs: for epoch in range(niter): t = time() if shuffle_ is True: ids = shuffle(ids) for index_ in iter_data(ids, size=batchSize*kD): index = sorted(index_) xmb = trX[index,:,:,:] xmb = Batch(xmb,fineSize,input_nc,flip=flip) xmb = processing_img(xmb, center=True, scale=True, convert=False) rand_idx = random.randint(start,stop-ntf-1) rand_ids = ids[rand_idx:rand_idx+ntf] rand_ids = sorted(rand_ids) sample_xmb = trX[rand_ids,:,:,:] sample_xmb = Batch(sample_xmb,fineSize,input_nc,flip=flip) sample_xmb = processing_img(sample_xmb, center=True, scale=True, convert=False) # initial G cluster if epoch + n_updates == 0: for can_i in range(0,ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i%nloss], discriminator=discriminator, lr=lr, b1=b1, ngf=ngf) for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append(lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old=gen_imgs fmb = gen_imgs[0:batchSize/ncandi*kD,:,:,:] else: g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0) fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:,:,:],axis=0) #print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_128(noise,ngf=ngf) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise],g_loss_logD,updates=up_g_logD) train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax) train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator,deterministic=True)) else: gen_old_params = gen_new_params for can_i in range(0,ncandi): for type_i in range(0,nloss): lasagne.layers.set_all_param_values(generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0,kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) _, fr_score, fd_score = disft_fn(sample_xmb,gen_imgs) fit = fr_score - fd_score if can_i*nloss + type_i < ncandi: idx = can_i*nloss + type_i gen_new_params[idx]=lasagne.layers.get_all_param_values(generator) fitness[idx]=fit fake_rate[idx]=fr_score g_imgs_old[idx*ntf:(idx+1)*ntf,:,:,:]=gen_imgs fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:,:,:] = \ gen_imgs[0:batchSize/ncandi*kD,:,:,:] else: fit_com = fitness - fit if min(fit_com) < 0: ids_replace = np.where(fit_com==min(fit_com)) idr = ids_replace[0][0] fitness[idr]=fit fake_rate[idr]=fr_score gen_new_params[idr] = lasagne.layers.get_all_param_values(generator) g_imgs_old[idr*ntf:(idr+1)*ntf,:,:,:]=gen_imgs fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:,:,:] = \ gen_imgs[0:batchSize/ncandi*kD,:,:,:] print fake_rate, fitness f_log.write(str(fake_rate) + ' '+str(fd_score) +' ' + str(fitness)+ '\n') # train D for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) for i in range(0, ncandi): xfake = g_imgs_old[i*ntf:(i+1)*ntf,:,:,:] xreal = sample_xmb[0:ntf,:,:,:] tr, fr, fd = disft_fn(xreal,xfake) if i == 0: fake_rate = np.array([fr]) fitness = np.array([0.]) real_rate = np.array([tr]) FDL = np.array([fd]) else: fake_rate = np.append(fake_rate,fr) fitness = np.append(fitness,[0.]) real_rate = np.append(real_rate,tr) FDL = np.append(FDL,fd) print fake_rate, FDL print (n_updates, epoch,real_rate.mean()) n_updates += 1 f_log.write(str(fake_rate)+' '+str(FDL)+ '\n'+ str(epoch)+' '+str(n_updates)+' '+str(real_rate.mean())+'\n') f_log.flush() if n_updates%show_freq == 0: blank_image = Image.new("RGB",(fineSize*8+9,fineSize*8+9)) for i in range(8): for ii in range(8): img = g_imgs_old[i*8+ii,:,:,:] img = ImgRescale(img, center=True, scale=True, convert_back=True) blank_image.paste(Image.fromarray(img),(ii*fineSize+ii+1,i*fineSize+i+1)) blank_image.save('samples/%s/%s_%d.png'%(desc,desc,n_updates/save_freq)) if n_updates%save_freq == 0 and epoch > begin_save - 1: # Optionally, you could now dump the network weights to a file like this: np.savez('models/%s/gen_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(generator)) np.savez('models/%s/dis_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(discriminator))
def train_model(data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): [generator_function, generator_params, generator_entropy_params] = set_generator_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['hidden_size'], model_config_dict['min_num_eng_filters']) # compile functions print 'COMPILING ENERGY UPDATER' t=time() energy_updater = set_energy_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING GENERATOR UPDATER' t=time() generator_updater = set_generator_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, generator_params=generator_params, generator_entropy_params=generator_entropy_params, generator_optimizer=generator_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING EVALUATION FUNCTION' t=time() evaluation_function = set_evaluation_and_sampling_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = np_rng.normal(size=input_data.shape) noise_data = floatX(noise_data*model_config_dict['init_noise']*(model_config_dict['noise_decay']**e)) # update generator generator_update_inputs = [input_data, hidden_data, noise_data, e] [input_energy_val, sample_energy_val, entropy_cost] = generator_updater(*generator_update_inputs) # update energy function energy_update_inputs = [input_data, hidden_data, e] [input_energy_val, sample_energy_val, ] = energy_updater(*energy_update_inputs) # get output values input_energy = input_energy_val.mean() sample_energy = sample_energy_val.mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) # batch count up batch_count += 1 if batch_count%100==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', input_energy print '----------------------------------------------------------------' print ' sample energy : ', sample_energy print '----------------------------------------------------------------' print ' entropy cost : ', entropy_cost print '================================================================' if batch_count%1000==0: # sample data save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(batch_count) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_test_name +'_input_energy', arr=np.asarray(input_energy_list)) np.save(file=samples_dir + '/' + model_test_name +'_sample_energy', arr=np.asarray(sample_energy_list))
def train_model(data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): generator_models = set_generator_model( num_hiddens=model_config_dict["hidden_size"], min_num_gen_filters=model_config_dict["min_num_gen_filters"] ) generator_function = generator_models[0] generator_params = generator_models[1] energy_models = set_energy_model( num_experts=model_config_dict["expert_size"], min_num_eng_filters=model_config_dict["min_num_eng_filters"] ) feature_function = energy_models[0] # norm_function = energy_models[1] expert_function = energy_models[1] # prior_function = energy_models[3] energy_params = energy_models[2] # compile functions print "COMPILING MODEL UPDATER" t = time() generator_updater = set_generator_update_function( energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, generator_params=generator_params, generator_optimizer=generator_optimizer, ) energy_updater = set_energy_update_function( energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer, ) print "%.2f SEC " % (time() - t) print "COMPILING SAMPLING FUNCTION" t = time() sampling_function = set_sampling_function(generator_function=generator_function) print "%.2f SEC " % (time() - t) # set fixed hidden data for sampling fixed_hidden_data = floatX( np_rng.uniform( low=-model_config_dict["hidden_distribution"], high=model_config_dict["hidden_distribution"], size=(model_config_dict["num_display"], model_config_dict["hidden_size"]), ) ) print "START TRAINING" # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict["epochs"]): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX( np_rng.uniform( low=-model_config_dict["hidden_distribution"], high=model_config_dict["hidden_distribution"], size=(num_data, model_config_dict["hidden_size"]), ) ) noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [hidden_data, noise_data] update_output = generator_updater(*update_input) entropy_weights = update_output[1].mean() entropy_cost = update_output[2].mean() noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [input_data, hidden_data, noise_data] update_output = energy_updater(*update_input) input_energy = update_output[0].mean() sample_energy = update_output[1].mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) # batch count up batch_count += 1 if batch_count % 10 == 0: print "================================================================" print "BATCH ITER #{}".format(batch_count), model_test_name print "================================================================" print " TRAIN RESULTS" print "================================================================" print " input energy : ", input_energy_list[-1] print "----------------------------------------------------------------" print " sample energy : ", sample_energy_list[-1] print "----------------------------------------------------------------" print " entropy weight : ", entropy_weights print "----------------------------------------------------------------" print " entropy cost : ", entropy_cost print "================================================================" if batch_count % 100 == 0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) save_as = samples_dir + "/" + model_test_name + "_SAMPLES(TRAIN){}.png".format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0, 2, 3, 1]), (16, 16), save_as) np.save(file=samples_dir + "/" + model_test_name + "_input_energy", arr=np.asarray(input_energy_list)) np.save(file=samples_dir + "/" + model_test_name + "_sample_energy", arr=np.asarray(sample_energy_list)) save_as = samples_dir + "/" + model_test_name + "_MODEL.pkl" save_model( tensor_params_list=generator_params[0] + generator_params[1] + energy_params, save_to=save_as )
def __call__(self, shape, name=None): return sharedX(np_rng.uniform(low=-self.scale, high=self.scale, size=shape), name=name)
def mnistGANcond(): """ This example loads the 32x32 imagenet model used in the paper, generates 400 random samples, and sorts them according to the discriminator's probability of being real and renders them to the file samples.png """ nc = 1 npx = 28 ngf = 64 # # of gen filters in first conv layer ndf = 128 ny = 10 # # of classes nz = 100 # # of dim for Z k = 1 # # of discrim updates for each gen update l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam nc = 1 # # of channels in image ny = 10 # # of classes nbatch = 128 # # of examples in batch npx = 28 # # of pixels width/height of images nz = 100 # # of dim for Z ngfc = 1024 # # of gen units for fully connected layers ndfc = 1024 # # of discrim units for fully connected layers ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer nx = npx * npx * nc # # of dimensions in X niter = 100 # # of iter at starting learning rate niter_decay = 100 # # of iter to linearly decay learning rate to zero lr = 0.0002 relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() tanh = activations.Tanh() model_path = 'dcgan_code-master/mnist/models/cond_dcgan/' gen_params = [ sharedX(p) for p in joblib.load(model_path + '200_gen_params.jl') ] discrim_params = [ sharedX(p) for p in joblib.load(model_path + '200_discrim_params.jl') ] def gen(Z, Y, w, w2, w3, wx): yb = Y.dimshuffle(0, 1, 'x', 'x') Z = T.concatenate([Z, Y], axis=1) h = relu(batchnorm(T.dot(Z, w))) h = T.concatenate([h, Y], axis=1) h2 = relu(batchnorm(T.dot(h, w2))) h2 = h2.reshape((h2.shape[0], ngf * 2, 7, 7)) h2 = conv_cond_concat(h2, yb) h3 = relu( batchnorm(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2)))) h3 = conv_cond_concat(h3, yb) x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2))) return x def discrim(X, Y, w, w2, w3, wy): yb = Y.dimshuffle(0, 1, 'x', 'x') X = conv_cond_concat(X, yb) h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2))) h = conv_cond_concat(h, yb) h2 = lrelu( batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2)))) h2 = T.flatten(h2, 2) h2 = T.concatenate([h2, Y], axis=1) h3 = lrelu(batchnorm(T.dot(h2, w3))) h3 = T.concatenate([h3, Y], axis=1) y = sigmoid(T.dot(h3, wy)) return y def inverse_transform(X): X = (X.reshape(-1, nc, npx, npx).transpose(0, 2, 3, 1) + 1.) / 2. return X Z = T.matrix() X = T.tensor4() Y = T.matrix() gX = gen(Z, Y, *gen_params) dX = discrim(X, Y, *discrim_params) _gen = theano.function([Z, Y], gX) _discrim = theano.function([X, Y], dX) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) sample_ymb = floatX( OneHot( np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(), ny)) samples = _gen(sample_zmb, sample_ymb) scores = _discrim(samples, sample_ymb) print(scores[1:10]) sort = np.argsort(scores.flatten())[::-1] samples = samples[sort] print(np.shape(inverse_transform(samples))) print(min(scores)) print(max(scores)) color_grid_vis(inverse_transform(samples), (20, 20), 'samples.png') return inverse_transform(samples), sample_ymb
def get_buffer_z(steps,num_buffer_samples=480,num_buffer_steps=3): num_buffer_rows = int(math.ceil(float(num_buffer_samples) / steps)) zmb = floatX(np_rng.uniform(-1., 1., size=(num_buffer_rows * steps, nz))) return zmb
bn_data = gbn + dbn print('COMPILING...') t = time() _estimate_bn = theano.function([Z], bn_data) print('%.2f seconds to compile theano functions' % (time() - t)) # batchnorm statistics nb_sum = [] nb_mean = [] nb_mean_ext = [] # first pass print('first pass: computing mean') for n in tqdm(range(num_batches)): zmb = floatX(np_rng.uniform(-1., 1., size=(batch_size, nz))) bn_data = _estimate_bn(zmb) if n == 0: for d in bn_data: nb_sum.append(d) else: for id, d in enumerate(bn_data): nb_sum[id] = nb_sum[id] + d # compute empirical mean for id, d_sum in enumerate(nb_sum): if d_sum.ndim == 4: m = np.mean(d_sum, axis=(0, 2, 3)) / num_batches nb_mean.append(m) nb_mean_ext.append(np.reshape(m, [1, len(m), 1, 1]))
def train_model(model_name, data_stream, num_hiddens, num_epochs, generator_optimizer): # set models print 'LOADING VGG' t=time() feature_extractor = load_vgg_feature_extractor() print '%.2f SEC '%(time()-t) sample_generator , generator_parameters = set_generator_model(num_hiddens) print 'COMPILING UPDATER AND SAMPLER' t=time() updater_function = set_updater_function(feature_extractor, sample_generator, generator_parameters, generator_optimizer) sampling_function = set_sampling_function(sample_generator) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-1.0, high=1.0, size=(16*16, num_hiddens))) print 'START TRAINING' # for each epoch moment_cost_list = [] batch_count = 0 for e in xrange(num_epochs): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) hidden_data = floatX(np_rng.uniform(low=-1.0, high=1.0, size=(input_data.shape[0], num_hiddens))) updater_inputs = [input_data, hidden_data] updater_outputs = updater_function(*updater_inputs) moment_cost_list.append(updater_outputs[0]) # batch count up batch_count += 1 if batch_count%10==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' moment matching cost : ', moment_cost_list[-1] print '================================================================' if batch_count%100==0: # sample data save_as = samples_dir + '/' + model_name + '_SAMPLES{}.png'.format(batch_count) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_name +'_MOMENT_COST', arr=np.asarray(moment_cost_list))
b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) print 'COMPILING' t = time() _gen = theano.function([Z], gX) _train_d = theano.function([X, X0], d_cost, updates=d_updates) _train_g = theano.function([Z, deltaX], g_cost, updates=g_updates) _vgd_gradient = theano.function([X0, X1], vgd_gradient(X0, X1)) _reconstruction_cost = theano.function([X], T.mean(mse_data)) print '%.2f seconds to compile theano functions' % (time() - t) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz))) n_updates = 0 t = time() for epoch in range(1, niter + 1): for filename in npzfiles: batch_data = shuffle( np.load(filename)['images'].astype(theano.config.floatX)) for idx in tqdm(xrange(0, batch_data.shape[0] // nbatch)): imb = transform(batch_data[idx * nbatch:(idx + 1) * nbatch]) zmb = floatX(np_rng.uniform(-1., 1., size=(imb.shape[0], nz))) # generate samples samples = _gen(zmb)
# # # main # # if __name__ == '__main__': NUM_CLASSES = 10 # # of classes nz = 100 # # of dim for Z Z = T.matrix('random') Y = T.matrix('label') ##### mnist = BinaryMnist() generator = mnist.makeGeneratorLayers(NUM_MINIBATCH, Z, nz, Y, NUM_CLASSES) out = ll.get_output(generator) print 'compiling...' out_func = theano.function([Z, Y], out, mode='DebugMode') print 'compiling...DONE' #test Zval = floatX(np_rng.uniform(-1.0, 1.0, size=(NUM_MINIBATCH, nz))) Yval = floatX(OneHot(np_rng.randint(0, 10, NUM_MINIBATCH), NUM_CLASSES)) ret = out_func(Zval, Yval) print 'ret', ret.shape
def sample_z(a_batch_size, a_z_size): return floatX(np_rng.uniform(Z_MIN, Z_MAX, size=(a_batch_size, a_z_size)))
data = tr_data.get_data(tr_handle, slice(0, tr_data.num_examples)) labels = data[labels_idx] vc_idx = np.where(labels == vc_num)[0] vc_idx = vc_idx[:196] if 'orig' in desc: zmb_idx = tr_stream.dataset.provides_sources.index('feat_orig') else: zmb_idx = tr_stream.dataset.provides_sources.index('feat_l2') sample_zmb = data[zmb_idx][vc_idx,:] patches = data[patches_idx][vc_idx,:] patches = transform(patches, 64) color_grid_vis(inverse_transform(patches, nc=3, npx=64), (14, 14), './patches.png') else: sample_zmb = floatX(np_rng.uniform(-1., 1., size=(196, 100))) print 'COMPILING...' _gen = theano.function([Z], gX) recon = theano.function([gX,X], cost) print 'Done!' samples = np.asarray(_gen(sample_zmb)) if 'patches' in locals(): recon_cost = recon(samples, patches) costs[ii,0] = recon_cost print "Reconstruction Error: %3f" % (float(recon_cost)) save_file = dcgan_root + 'samples/%s/vc_%s.png'%(desc, str(vc_num)) color_grid_vis(inverse_transform(samples, nc=3, npx=64), (14, 14), save_file)
f_log = open( os.path.join( log_dir, '%s.ndjson' % desc ), 'wb' ) log_fields = [ 'num_epoch', 'num_update', 'num_example', 't_spent', 'c_cost', 'd_cost',] # DO THE JOB. print desc.upper( ) num_update = 0 num_epoch = 0 num_update = 0 num_example = 0 Zb_vis = floatX( np_rng.uniform( -1., 1., size = ( nvis ** 2, nz, 1, 1 ) ) ) t = time( ) for epoch in range( niter ): # Load pre-trained param if exists. num_epoch += 1 mpath_c = os.path.join( model_dir, 'C%03d.npy' % num_epoch ) mpath_d = os.path.join( model_dir, 'D%03d.npy' % num_epoch ) if os.path.exists( mpath_c ) and os.path.exists( mpath_d ): print( 'Epoch %02d: Load.' % num_epoch ) data_c = np.load( mpath_c ) for pi in range( len( converter_params ) ): converter_params[ pi ].set_value( data_c[ pi ] ) data_d = np.load( mpath_d ) for pi in range( len( discrim_params ) ): discrim_params[ pi ].set_value( data_d[ pi ] ) continue
def main(): # Parameters task = 'toy' name = '8G_MOEGAN_MMDu2' #'8G_MOEGAN_PFq_NFd_t2' DIM = 512 begin_save = 0 loss_type = ['trickLogD', 'minimax', 'ls'] #['trickLogD', 'minimax', 'ls'] nloss = 3 #2 DATASET = '8gaussians' batchSize = 64 ncandi = 8 kD = 1 # # of discrim updates for each gen update kG = 1 # # of discrim updates for each gen update ntf = 256 b1 = 0.5 # momentum term of adam nz = 2 # # of dim for Z niter = 4 # # of iter at starting learning rate lr = 0.0001 # initial learning rate for adam G lrd = 0.0001 # initial learning rate for adam D N_up = 100000 save_freq = 10000 / 10 show_freq = 10000 / 10 test_deterministic = True beta = 1. GP_norm = False # if use gradients penalty on discriminator LAMBDA = 2. # hyperparameter of GP NSGA2 = True # Load the dataset # MODEL D print("Building model and compiling functions...") # Prepare Theano variables for inputs and targets real_imgs = T.matrix('real_imgs') fake_imgs = T.matrix('fake_imgs') # Create neural network model discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm) # Create expression for passing real data through the discriminator real_out = lasagne.layers.get_output(discriminator, real_imgs) # Create expression for passing fake data through the discriminator fake_out = lasagne.layers.get_output(discriminator, fake_imgs) # Create loss expressions discriminator_loss = ( lasagne.objectives.binary_crossentropy(real_out, 1) + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean() # Gradients penalty norm if GP_norm is True: alpha = t_rng.uniform((batchSize, 1), low=0., high=1.) differences = fake_imgs - real_imgs interpolates = real_imgs + (alpha * differences) gradients = theano.grad(lasagne.layers.get_output( discriminator, interpolates).sum(), wrt=interpolates) slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1))) gradient_penalty = T.mean((slopes - 1.)**2) D_loss = discriminator_loss + LAMBDA * gradient_penalty b1_d = 0. else: D_loss = discriminator_loss b1_d = 0. # Create update expressions for training discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True) lrtd = theano.shared(lasagne.utils.floatX(lrd)) updates_d = lasagne.updates.adam(D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d) lrt = theano.shared(lasagne.utils.floatX(lr)) # Fd Socre Fd = theano.gradient.grad(discriminator_loss, discriminator_params) Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd)) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_d = theano.function([real_imgs, fake_imgs], discriminator_loss, updates=updates_d) # Compile another function generating some data dis_fn = theano.function([real_imgs, fake_imgs], [(fake_out).mean(), Fd_score]) disft_fn = theano.function([real_imgs, fake_imgs], [ real_out.mean(), fake_out.mean(), (real_out > 0.5).mean(), (fake_out > 0.5).mean(), Fd_score ]) # Finally, launch the training loop. print("Starting training...") desc = task + '_' + name print(desc) if not os.path.isdir('logs'): os.mkdir(os.path.join('logs')) f_log = open('logs/%s.ndjson' % desc, 'wb') if not os.path.isdir('models'): os.mkdir(os.path.join('models/')) if not os.path.isdir('models/' + desc): os.mkdir(os.path.join('models/', desc)) gen_new_params = [] # We iterate over epochs: for n_updates in range(N_up): xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD) xmb = xmb[0:batchSize * kD] # initial G cluster if n_updates == 0: for can_i in range(0, ncandi): train_g, gen_fn, generator = create_G( loss_type=loss_type[can_i % nloss], discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) for _ in range(0, kG): zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) gen_new_params.append( lasagne.layers.get_all_param_values(generator)) if can_i == 0: g_imgs_old = gen_imgs fmb = gen_imgs[0:int(batchSize / ncandi * kD), :] else: g_imgs_old = np.append(g_imgs_old, gen_imgs, axis=0) newfmb = gen_imgs[0:int(batchSize / ncandi * kD), :] fmb = np.append(fmb, newfmb, axis=0) # print gen_new_params # MODEL G noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise, nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() g_loss_minimax = - \ lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() g_loss_ls = T.mean(T.sqr((Tfake_out - 1))) g_params = lasagne.layers.get_all_params(generator, trainable=True) up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1) up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1) up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1) train_g = theano.function([noise], g_loss_logD, updates=up_g_logD) train_g_minimax = theano.function([noise], g_loss_minimax, updates=up_g_minimax) train_g_ls = theano.function([noise], g_loss_ls, updates=up_g_ls) gen_fn = theano.function([noise], lasagne.layers.get_output( generator, deterministic=True)) else: class Instance: def __init__(self, fq, fd, params, img_values, image_copy): self.fq = fq self.fd = fd self.params = params self.vimg = img_values self.cimg = image_copy def f(self): return self.fq - self.fd instances = [] fq_list = np.zeros(ncandi) fd_list = np.zeros(ncandi) gen_old_params = gen_new_params for can_i in range(0, ncandi): for type_i in range(0, nloss): lasagne.layers.set_all_param_values( generator, gen_old_params[can_i]) if loss_type[type_i] == 'trickLogD': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g(zmb) elif loss_type[type_i] == 'minimax': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_minimax(zmb) elif loss_type[type_i] == 'ls': for _ in range(0, kG): zmb = floatX( np_rng.uniform(-1., 1., size=(batchSize, nz))) cost = train_g_ls(zmb) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz))) gen_imgs = gen_fn(sample_zmb) frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs) instances.append( Instance( frr_score, fd_score, lasagne.layers.get_all_param_values(generator), gen_imgs, gen_imgs[0:int(batchSize / ncandi * kD), :])) if ncandi < len(instances): if NSGA2 == True: cromos = { idx: [float(inst.fq), -float(inst.fd)] for idx, inst in enumerate(instances) } cromos_idxs = [idx for idx, _ in enumerate(instances)] finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs) for idx, p in enumerate(finalpop): inst = instances[p] gen_new_params[idx] = inst.params fq_list[idx] = inst.fq fd_list[idx] = inst.fd fake_rate[idx] = inst.f() g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg fmb[int(idx * batchSize / ncandi * kD):math.ceil((idx + 1) * batchSize / ncandi * kD), :] = inst.cimg with open('front/%s.tsv' % desc, 'wb') as ffront: for idx, p in enumerate(finalpop): inst = instances[p] ffront.write( (str(inst.fq) + "\t" + str(inst.fd)).encode()) ffront.write("\n".encode()) else: for idx, inst in enumerate(instances): if idx < ncandi: gen_new_params[idx] = inst.params fake_rate[idx] = inst.f() fq_list[idx] = inst.fq fd_list[idx] = inst.fd g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg fmb[int(idx * batchSize / ncandi * kD):math.ceil((idx + 1) * batchSize / ncandi * kD), :] = inst.cimg else: fr_com = fake_rate - inst.f() if min(fr_com) < 0: idr = np.where(fr_com == min(fr_com))[0][0] gen_new_params[idr] = inst.params fake_rate[idr] = inst.f() g_imgs_old[idr * ntf:(idr + 1) * ntf, :] = inst.vimg fmb[int(idr * batchSize / ncandi * kD):math.ceil((idr + 1) * batchSize / ncandi * kD), :] = inst.cimg sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf) sample_xmb = sample_xmb[0:ncandi * ntf] for i in range(0, ncandi): xfake = g_imgs_old[i * ntf:(i + 1) * ntf, :] xreal = sample_xmb[i * ntf:(i + 1) * ntf, :] tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake) if i == 0: fake_rate = np.array([fr]) real_rate = np.array([tr]) fake_rate_p = np.array([frp]) real_rate_p = np.array([trp]) FDL = np.array([fdscore]) else: fake_rate = np.append(fake_rate, fr) real_rate = np.append(real_rate, tr) fake_rate_p = np.append(fake_rate_p, frp) real_rate_p = np.append(real_rate_p, trp) FDL = np.append(FDL, fdscore) print(fake_rate, fake_rate_p, FDL) print(n_updates, real_rate.mean(), real_rate_p.mean()) f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' + str(n_updates) + ' ' + str(real_rate.mean()) + ' ' + str(real_rate_p.mean()) + '\n').encode()) f_log.flush() # train D for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize): cost = train_d(xreal, xfake) if n_updates % show_freq == 0: s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) params_max = gen_new_params[np.argmax(fake_rate)] lasagne.layers.set_all_param_values(generator, params_max) g_imgs_max = gen_fn(s_zmb) if n_updates % show_freq == 0 and n_updates != 0: #metric s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz))) xmb = toy_dataset(DATASET=DATASET, size=512) mmd2_all = [] for i in range(0, ncandi): lasagne.layers.set_all_param_values(generator, gen_new_params[i]) g_imgs_min = gen_fn(s_zmb) mmd2_all.append(compute_metric_mmd2(g_imgs_min, xmb)) mmd2_all = np.array(mmd2_all) if NSGA2: with open('front/%s_mmd2u.tsv' % desc, 'wb') as ffront: for idx in range(0, ncandi): ffront.write( (str(fq_list[idx]) + "\t" + str(fd_list[idx]) + "\t" + str(mmd2_all[idx])).encode()) ffront.write("\n".encode()) #save best params = gen_new_params[np.argmin(mmd2_all)] lasagne.layers.set_all_param_values(generator, params) g_imgs_min = gen_fn(s_zmb) generate_image(xmb, g_imgs_min, n_updates / save_freq, desc, postfix="_mmu2d") np.savez('models/%s/gen_%d.npz' % (desc, n_updates / save_freq), *lasagne.layers.get_all_param_values(discriminator)) np.savez('models/%s/dis_%d.npz' % (desc, n_updates / save_freq), *lasagne.layers.get_all_param_values(generator))
desc = 'dcgan' model_dir = 'models/%s'%desc samples_dir = 'samples/%s'%desc if not os.path.exists('logs/'): os.makedirs('logs/') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(samples_dir): os.makedirs(samples_dir) X_sample = data.get_unlab_batch(0,monitor_size) X_sample = data.center_crop(X_sample,img_size) color_grid_vis(X_sample.transpose(0, 2, 3, 1), (14, 14), 'samples/%s_etl_test.png'%desc) Z_sample = floatX(np_rng.uniform(-1., 1., size=(monitor_size, model.gen_dim))) print desc.upper() print "starting training" with open('errors.log', 'w') as f: f.write('# iter data_seen epoch dis_loss g_loss') f.write(' c_loss c_val_err c_test_err\n') with open('best.log', 'w') as f: f.write('# iter data_seen epoch c_val_err c_test_err\n') n_iter = n_epochs*(data.unlab_size/batch_size+1) best_err = 1e6
def run(self): parser = argparse.ArgumentParser() parser.add_argument("--gendim", type = int, default = 100) #parser.add_argument("--dataset", type = str, default = 'stl10') parser.add_argument("--batch_size", type = int, default = 128) parser.add_argument("--n_epochs", type = int, default = 100) parser.add_argument("--k_iter", type = int, default = 1) parser.add_argument("--monitor_size", type = int, default = 196) parser.add_argument("--init_scale", type = float, default = 0.02) parser.add_argument("--folds", type = int, default = 5) parser.add_argument("--valid_fold", type = int, default = 0) parser.add_argument("--iter_save", type = int, default = 100) parser.add_argument('--classify', action='store_true') parser.add_argument("--img_size", type = int, default = 64) args = parser.parse_args() print args gen_dim = args.gendim n_epochs = args.n_epochs batch_size = args.batch_size #dataset = args.dataset k_iter = args.k_iter monitor_size = args.monitor_size init_scale = args.init_scale folds = args.folds valid_fold = args.valid_fold iter_save = args.iter_save classify = args.classify img_size = args.img_size if classify: from src.gan_class import GAN_trainer else: from src.gan import GAN_trainer model = self.model_module.GAN_model(img_shape=(img_size,img_size),gen_dim=gen_dim,init_scale=init_scale) trainer = GAN_trainer(model) data = dataset.stl10() desc = 'dcgan' model_dir = 'models/%s'%desc samples_dir = 'samples/%s'%desc if not os.path.exists('logs/'): os.makedirs('logs/') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(samples_dir): os.makedirs(samples_dir) X_sample = data.get_unlab_batch(0,monitor_size) X_sample = data.center_crop(X_sample,img_size) color_grid_vis(X_sample.transpose(0, 2, 3, 1), (14, 14), 'samples/%s_etl_test.png'%desc) Z_sample = floatX(np_rng.uniform(-1., 1., size=(monitor_size, model.gen_dim))) print desc.upper() print "starting training" with open('errors.log', 'w') as f: f.write('# iter data_seen epoch dis_loss g_loss') if classify: f.write(' c_loss c_val_err c_test_err\n') else: f.write('\n') if classify: with open('best.log', 'w') as f: f.write('# iter data_seen epoch c_val_err c_test_err\n') n_iter = n_epochs*(data.unlab_size/batch_size+1) best_err = 1e6 last_it = 0 t = time() for it in xrange(n_iter): epoch = it*batch_size/data.unlab_size X_batch = data.get_unlab_batch(it,batch_size) X_batch = data.scale_data(data.center_crop(X_batch,img_size)) Z_batch = floatX(np_rng.uniform(-1., 1., size=(len(X_batch), model.gen_dim))) gen_loss = trainer.train_generator_on_batch(Z_batch) dis_loss = trainer.train_discriminator_on_batch(X_batch, Z_batch) if classify: X_batch, y_batch = data.get_train_batch(it,batch_size) X_batch = data.scale_data(data.center_crop(X_batch,img_size)) cls_loss = trainer.train_classifier_on_batch(X_batch, y_batch) if (it % iter_save == 0) or (it % 10 == 0 and it < iter_save): if classify: cls_test_err = 0.0 for it2 in xrange(data.test_size/batch_size): X_batch, y_batch = data.get_test_batch(it2,batch_size) X_batch = data.scale_data(data.center_crop(X_batch,img_size)) cls_test_err += trainer._cls_error(X_batch, y_batch) cls_test_err /= data.test_size/batch_size cls_valid_err = 0.0 for it2 in xrange(data.valid_size/batch_size): X_batch, y_batch = data.get_valid_batch(it2,batch_size) X_batch = data.scale_data(data.center_crop(X_batch,img_size)) cls_valid_err += trainer._cls_error(X_batch, y_batch) cls_valid_err /= data.valid_size/batch_size samples = np.asarray(trainer._gen(Z_sample)) color_grid_vis(data.inv_scale_data(samples).transpose(0, 2, 3, 1), (14, 14), 'samples/%s/%d.png'%(desc, it)) with open('errors.log', 'a') as f: f.write( " ".join(map(str, (it,it*batch_size,epoch) ))+" ") f.write( " ".join(map(str, (dis_loss,gen_loss) ))+" ") if classify: f.write( " ".join(map(str, (cls_loss,cls_valid_err,cls_test_err) ))+"\n") else: f.write("\n") if classify and cls_valid_err<best_err: best_err = cls_valid_err with open('best.log', 'a') as f: f.write( " ".join(map(str, (it,it*batch_size,epoch) ))+" ") f.write( " ".join(map(str, (cls_valid_err,cls_test_err) ))+"\n") model.dump('models/%s/best_gen_params.jl'%(desc)) t2 = time()-t t += t2 print "iter:%d/%d; epoch:%d; %f sec. per iteration"%(it,n_iter,epoch,t2/(1+it-last_it)) last_it = it+1 if epoch in [1, 2, 3, 4, 5, 10, 15, 20, 25, 50, 75, 100, 200, n_epochs]: if (it*batch_size)%data.unlab_size<batch_size: model_dir = 'models/%s/%d'%(desc, it) if not os.path.exists(model_dir): os.makedirs(model_dir) model.dump('%s/params.jl'%(model_dir)) model_dir = 'models/%s/last'%(desc) if not os.path.exists(model_dir): os.makedirs(model_dir) model.dump('%s/params.jl' % (model_dir))
#################### # COMPILE FUNCTION # #################### print 'COMPILING' t = time() _train_g = theano.function([X, N, Z, Temp], cost, updates=g_updates) _train_d = theano.function([X, N, Z, Temp], cost, updates=d_updates) _gen = theano.function([Z], gX) print '%.2f seconds to compile theano functions'%(time()-t) ##################################### # SAMPLE RANDOM DATA FOR GENERATION # ##################################### sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz))) ################### # GENERATE SAMPLE # ################### def gen_samples(n, nbatch=128): samples = [] n_gen = 0 for i in range(n/nbatch): zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb) samples.append(xmb) n_gen += len(xmb) n_left = n-n_gen zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb)
tr_idxs = np.arange(len(trX)) trX_vis = np.asarray([[trX[i] for i in py_rng.sample(tr_idxs[trY==y], cols)] for y in range(ny)]).reshape(ny * cols, -1) trX_vis = inverse_transform(transform(trX_vis)) grayscale_grid_vis(trX_vis, (ny, cols), 'samples/test.png') ############ # set up targets normally steps = 6 numtargets = 9 #This is how many letter you will count start = 1 targets = np.asarray([[i+start for _ in range(steps)] for i in range(numtargets)]) sample_ymb = floatX(OneHot(targets.flatten(), ny)) # set up random z sample_zmb = floatX(np_rng.uniform(-1., 1., size=(numtargets * steps, nz))) def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n/nbatch): ymb = floatX(OneHot(np_rng.randint(0, ny, nbatch), ny)) zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb, ymb) samples.append(xmb) labels.append(np.argmax(ymb, axis=1)) n_gen += len(xmb) n_left = n-n_gen
def transform(X): return (floatX(X) / 255.).reshape(-1, nc, npx, npx) Z = T.matrix() X = T.tensor4() Y = T.matrix() gX = gen(Z, Y, *gen_params) dX = discrim(X, Y, *discrim_params) _gen = theano.function([Z, Y], gX) _discrim = theano.function([X, Y], dX) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(10 * ny, nz))) sample_ymb = floatX( OneHot( np.asarray([[i for _ in range(10)] for i in range(ny)]).flatten(), ny)) samples = _gen(sample_zmb, sample_ymb) scores = _discrim(samples, sample_ymb) color_grid_vis(inverse_transform(samples), (ny, 10), 'samples.png') for i in range(ny): Z = T.matrix() X = T.tensor4() Y = T.matrix() gX = gen(Z, Y, *gen_params) dX = discrim(X, Y, *discrim_params)
gen_params32 = [param.get_value().astype('float32') for param in gen_params] #%% #%% Z = T.matrix() Zz = Z.astype('float32') X = T.tensor4() Xx = X.astype('float32') gX = gen(Zz, *gen_params32) #%% dX = discrim(Xx, *discrim_params) _gen = theano.function([Zz], gX) _discrim = theano.function([Xx], dX) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(400, 256))) samples = _gen(sample_zmb) scores = _discrim(samples) sort = np.argsort(scores.flatten())[::-1] samples = samples[sort] color_grid_vis(inverse_transform(samples), (20, 20), 'samples.png') #%% #%% def calculate_b_u_b_s(X, g=None, b=None, u=None, s=None, a=1., e=1e-8): if X.ndim == 4: if u is not None and s is not None: b_u = u.dimshuffle('x', 0, 'x', 'x') b_s = s.dimshuffle('x', 0, 'x', 'x') else:
g_updater = updates.Adam(lr=lrt, b1=args.b1, regularizer=updates.Regularizer(l2=args.weight_decay)) d_updates = d_updater(disc_params, d_cost) g_updates = g_updater(gen_params, g_cost) updates = d_updates + g_updates print('COMPILING') t = time() _train_g = theano.function([x, z], cost, updates=g_updates) _train_d = theano.function([x, z], cost, updates=d_updates) _gen = theano.function([z], gx) print('%.2f seconds to compile theano functions' % (time() - t)) # test z samples sample_zmb = floatX(np_rng.uniform(-1., 1., size=(n_vis, nz))) f_log = open('%s/training_log.ndjson' % log_dir, 'wb') log_fields = [ 'n_epochs', 'n_updates', 'n_examples', 'n_seconds', 'g_cost', 'd_cost', ] # initialization n_updates = 0 n_epochs = 0 n_examples = 0
if not args.output_image: args.output_image = '%s_%s_samples.png' % (args.model_name, args.model_type) for arg in vars(args): print('[%s] =' % arg, getattr(args, arg)) # initialize model and constrained optimization problem model_class = locate('model_def.%s' % args.model_type) model = model_class.Model(model_name=args.model_name, model_file=args.model_file) # generate samples #def gen_samples(self, z0=None, n=32, batch_size=32, use_transform=True): samples = [] n = 32 batch_size = 32 z0 = np_rng.uniform(-1., 1., size=(n, model.nz)) n_batches = int(np.ceil(n/float(batch_size))) for i in range(n_batches): zmb = floatX(z0[batch_size * i:min(n, batch_size * (i + 1)), :]) xmb = model._gen(zmb) samples.append(xmb) samples = np.concatenate(samples, axis=0) samples = model.inverse_transform(samples, npx=model.npx, nc=model.nc) samples = (samples * 255).astype(np.uint8) #samples = model.gen_samples(z0=None, n=196, batch_size=49, use_transform=True) # generate grid visualization im_vis = utils.grid_vis(samples, 14, 14) # write to the disk im_vis = cv2.cvtColor(im_vis, cv2.COLOR_BGR2RGB) cv2.imwrite(args.output_image, im_vis) print('samples_shape', samples.shape)
def run(hp, folder): trX, trY, nb_classes = load_data() k = 1 # # of discrim updates for each gen update l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam nc = 1 # # of channels in image ny = nb_classes # # of classes nbatch = 128 # # of examples in batch npx = 28 # # of pixels width/height of images nz = 100 # # of dim for Z ngfc = 512 # # of gen units for fully connected layers ndfc = 512 # # of discrim units for fully connected layers ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer nx = npx*npx*nc # # of dimensions in X niter = 200 # # of iter at starting learning rate niter_decay = 100 # # of iter to linearly decay learning rate to zero lr = 0.0002 # initial learning rate for adam scale = 0.02 k = hp['k'] l2 = hp['l2'] #b1 = hp['b1'] nc = 1 ny = nb_classes nbatch = hp['nbatch'] npx = 28 nz = hp['nz'] ngfc = hp['ngfc'] # # of gen units for fully connected layers ndfc = hp['ndfc'] # # of discrim units for fully connected layers ngf = hp['ngf'] # # of gen filters in first conv layer ndf = hp['ndf'] # # of discrim filters in first conv layer nx = npx*npx*nc # # of dimensions in X niter = hp['niter'] # # of iter at starting learning rate niter_decay = hp['niter_decay'] # # of iter to linearly decay learning rate to zero lr = hp['lr'] # initial learning rate for adam scale = hp['scale'] #k = 1 # # of discrim updates for each gen update #l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam #nc = 1 # # of channels in image #ny = nb_classes # # of classes budget_hours = hp.get('budget_hours', 2) budget_secs = budget_hours * 3600 ntrain = len(trX) def transform(X): return (floatX(X)).reshape(-1, nc, npx, npx) def inverse_transform(X): X = X.reshape(-1, npx, npx) return X model_dir = folder samples_dir = os.path.join(model_dir, 'samples') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(samples_dir): os.makedirs(samples_dir) relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() bce = T.nnet.binary_crossentropy gifn = inits.Normal(scale=scale) difn = inits.Normal(scale=scale) gw = gifn((nz, ngfc), 'gw') gw2 = gifn((ngfc, ngf*2*7*7), 'gw2') gw3 = gifn((ngf*2, ngf, 5, 5), 'gw3') gwx = gifn((ngf, nc, 5, 5), 'gwx') dw = difn((ndf, nc, 5, 5), 'dw') dw2 = difn((ndf*2, ndf, 5, 5), 'dw2') dw3 = difn((ndf*2*7*7, ndfc), 'dw3') dwy = difn((ndfc, 1), 'dwy') gen_params = [gw, gw2, gw3, gwx] discrim_params = [dw, dw2, dw3, dwy] def gen(Z, w, w2, w3, wx, use_batchnorm=True): if use_batchnorm: batchnorm_ = batchnorm else: batchnorm_ = lambda x:x h = relu(batchnorm_(T.dot(Z, w))) h2 = relu(batchnorm_(T.dot(h, w2))) h2 = h2.reshape((h2.shape[0], ngf*2, 7, 7)) h3 = relu(batchnorm_(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2)))) x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2))) return x def discrim(X, w, w2, w3, wy): h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2))) h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2)))) h2 = T.flatten(h2, 2) h3 = lrelu(batchnorm(T.dot(h2, w3))) y = sigmoid(T.dot(h3, wy)) return y X = T.tensor4() Z = T.matrix() gX = gen(Z, *gen_params) p_real = discrim(X, *discrim_params) p_gen = discrim(gX, *discrim_params) d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) #updates = d_updates + g_updates print 'COMPILING' t = time() _train_g = theano.function([X, Z], cost, updates=g_updates) _train_d = theano.function([X, Z], cost, updates=d_updates) _gen = theano.function([Z], gX) print '%.2f seconds to compile theano functions'%(time()-t) tr_idxs = np.arange(len(trX)) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n/nbatch): zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb) samples.append(xmb) n_gen += len(xmb) n_left = n-n_gen zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb) samples.append(xmb) return np.concatenate(samples, axis=0) s = floatX(np_rng.uniform(-1., 1., size=(10000, nz))) n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() begin = datetime.now() for epoch in range(1, niter+niter_decay+1): t = time() print("Epoch {}".format(epoch)) trX = shuffle(trX) for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain/nbatch): imb = transform(imb) zmb = floatX(np_rng.uniform(-1., 1., size=(len(imb), nz))) if n_updates % (k+1) == 0: cost = _train_g(imb, zmb) else: cost = _train_d(imb, zmb) n_updates += 1 n_examples += len(imb) samples = np.asarray(_gen(sample_zmb)) grayscale_grid_vis(inverse_transform(samples), (10, 20), '{}/{:05d}.png'.format(samples_dir, n_epochs)) n_epochs += 1 if n_epochs > niter: lrt.set_value(floatX(lrt.get_value() - lr/niter_decay)) if n_epochs % 50 == 0 or epoch == niter + niter_decay or epoch == 1: imgs = [] for i in range(0, s.shape[0], nbatch): imgs.append(_gen(s[i:i+nbatch])) img = np.concatenate(imgs, axis=0) samples_filename = '{}/{:05d}_gen.npz'.format(model_dir, n_epochs) joblib.dump(img, samples_filename, compress=9) shutil.copy(samples_filename, '{}/gen.npz'.format(model_dir)) joblib.dump([p.get_value() for p in gen_params], '{}/d_gen_params.jl'.format(model_dir, n_epochs), compress=9) joblib.dump([p.get_value() for p in discrim_params], '{}/discrim_params.jl'.format(model_dir, n_epochs), compress=9) print('Elapsed : {}sec'.format(time() - t)) if (datetime.now() - begin).total_seconds() >= budget_secs: print("Budget finished.quit.") break
def train_model(train_stream, valid_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): [generator_function, generator_params] = set_generator_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['hidden_size'], model_config_dict['min_num_eng_filters']) # compile functions print 'COMPILING ENERGY UPDATER' t=time() energy_updater = set_energy_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING GENERATOR UPDATER' t=time() generator_updater = set_generator_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, generator_params=generator_params, generator_optimizer=generator_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING EVALUATION FUNCTION' t=time() evaluation_function = set_evaluation_and_sampling_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch for e in xrange(model_config_dict['epochs']): # train phase epoch_train_input_energy = 0. epoch_train_sample_energy = 0. epoch_train_count = 0. train_batch_iters = train_stream.get_epoch_iterator() # for each batch for b, train_batch_data in enumerate(train_batch_iters): # set update function inputs input_data = transform(train_batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = np_rng.normal(size=input_data.shape) noise_data = floatX(noise_data*model_config_dict['init_noise']*(model_config_dict['noise_decay']**e)) # update generator generator_update_inputs = [input_data, hidden_data, noise_data, e] [input_energy_val, sample_energy_val, ] = generator_updater(*generator_update_inputs) # update energy function energy_update_inputs = [input_data, hidden_data, e] [input_energy_val, sample_energy_val, ] = energy_updater(*energy_update_inputs) # get output values epoch_train_input_energy += input_energy_val.mean() epoch_train_sample_energy += sample_energy_val.mean() epoch_train_count += 1. epoch_train_input_energy /= epoch_train_count epoch_train_sample_energy /= epoch_train_count # validation phase epoch_valid_input_energy = 0. epoch_valid_sample_energy = 0. epoch_valid_count = 0. valid_batch_iters = valid_stream.get_epoch_iterator() for b, valid_batch_data in enumerate(valid_batch_iters): # set function inputs input_data = transform(valid_batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) # evaluate model evaluation_input = [input_data, hidden_data] outputs = evaluation_function(*evaluation_input) epoch_valid_input_energy += outputs[0].mean() epoch_valid_sample_energy += outputs[1].mean() epoch_valid_count += 1. epoch_valid_input_energy /= epoch_valid_count epoch_valid_sample_energy /= epoch_valid_count print '================================================================' print 'EPOCH #{}'.format(e), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', epoch_train_input_energy print '----------------------------------------------------------------' print ' sample energy : ', epoch_train_sample_energy print '================================================================' print ' VALID RESULTS' print '================================================================' print ' input energy : ', epoch_valid_input_energy print '----------------------------------------------------------------' print ' sample energy : ', epoch_valid_sample_energy print '================================================================' # # plot curve data # save_as = model_test_name + '_ENERGY_CURVE.png' # plot_learning_curve(cost_values=[train_input_energy, # train_sample_energy, # valid_input_energy, # valid_sample_energy], # cost_names=['Input Energy (train)', # 'Sample Energy (train)', # 'Input Energy (valid)', # 'Sample Energy (valid)'], # save_as=save_as) # sample data save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(e+1) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as)