def __init__(self, BATCH_SIZE, layer_num): gen_dict = {3: self.Generator1, 4: self.Generator2, 5: self.Generator3} dis_dict = { 3: self.Discriminator1, 4: self.Discriminator2, 5: self.Discriminator3 } self.batch = BATCH_SIZE self.real_data = tf.placeholder(tf.float32, shape=[None, input_dim]) self.fake_data = tf.placeholder(tf.float32, shape=[None, input_dim]) # condition size = 1, input 1,2,3 self.ori_label = tf.placeholder(tf.int32, shape=[ None, ]) # change label into ont hot mode self.label = tf.one_hot(self.ori_label, 3, on_value=1, off_value=None) # original: bool, change into float: can be represented as a 1/0 sequence self.label = tf.cast(self.label, tf.float32) self.gen_data = gen_dict.get(layer_num)(self.fake_data, self.label) self.disc_real = dis_dict.get(layer_num)(self.real_data, self.label) self.disc_fake = dis_dict.get(layer_num)(self.gen_data, self.label) # WGAN loss self.disc_cost = tf.reduce_mean(self.disc_fake) - tf.reduce_mean( self.disc_real) self.gen_cost = -tf.reduce_mean(self.disc_fake) disc_params = lib.params_with_name('Discriminator') gen_params = lib.params_with_name('Generator') self.alpha = tf.random_uniform(shape=[self.batch, 1], minval=0., maxval=1., dtype=tf.float32) self.differences = self.gen_data - self.real_data self.interpolates = self.real_data + (self.alpha * self.differences) self.label_differences = self.label - self.label self.label_interpolates = self.label + (self.alpha * self.label_differences) self.gradients = tf.gradients( dis_dict.get(layer_num)(self.interpolates, self.label_interpolates), [self.interpolates, self.label_interpolates])[0] self.slopes = tf.sqrt( tf.reduce_sum(tf.square(self.gradients), reduction_indices=[1])) self.gradient_penalty = tf.reduce_mean((self.slopes - 1.)**2) self.disc_cost += LAMBDA * self.gradient_penalty self.gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize( self.gen_cost, var_list=gen_params) self.disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize( self.disc_cost, var_list=disc_params)
def discriminate(self, inputs): output = tf.reshape(inputs, [-1, 3, 32, 32]) output = lib.ops.conv2d.Conv2D('Discriminator.Input', 3, DIM, 5, output, stride=2) output = LeakyReLU(output) output = lib.ops.conv2d.Conv2D('Discriminator.2', DIM, 2 * DIM, 5, output, stride=2) # output = lib.ops.batchnorm.Batchnorm('Discriminator.BN2', [0,2,3], output) output = LeakyReLU(output) output = lib.ops.conv2d.Conv2D('Discriminator.3', 2 * DIM, 4 * DIM, 5, output, stride=2) # output = lib.ops.batchnorm.Batchnorm('Discriminator.BN3', [0,2,3], output) output = LeakyReLU(output) output = tf.reshape(output, [-1, 4 * 4 * 4 * DIM]) discriminator_output = lib.ops.linear.Linear('Discriminator.Output', 4 * 4 * 4 * DIM, 1, output) discriminator_output = tf.reshape(discriminator_output, [-1]) if self.dis_params is None: self.dis_params = lib.params_with_name('Discriminator') inverter_output = lib.ops.linear.Linear('Inverter.4', 4 * 4 * 4 * DIM, 4 * 4 * DIM, output) inverter_output = LeakyReLU(inverter_output) inverter_output = lib.ops.linear.Linear('Inverter.5', 4 * 4 * DIM, 4 * NOISE_DIM, inverter_output) inverter_output = LeakyReLU(inverter_output) inverter_output = lib.ops.linear.Linear('Inverter.Output', 4 * NOISE_DIM, NOISE_DIM, inverter_output) inverter_output = tf.reshape(inverter_output, [-1, NOISE_DIM]) if self.inv_params is None: self.inv_params = lib.params_with_name('Inverter') return discriminator_output, inverter_output
def train_network(): real_data_int = tf.placeholder(tf.int32, shape=[BATCH_SIZE, OUTPUT_DIM]) real_data = 2 * ((tf.cast(real_data_int, tf.float32) / 255.) - .5) fake_data = Generator(BATCH_SIZE) disc_real, h_real = Discriminator(real_data) disc_fake, h_fake = Discriminator(fake_data) diverstiy_cost = compute_diversity_loss(h_fake, h_real) # Standard GAN Loss gen_cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=disc_fake, labels=tf.ones_like(disc_fake) * 0.9)) disc_cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=disc_fake, labels=tf.ones_like(disc_fake) * 0.1)) disc_cost += tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=disc_real, labels=tf.ones_like(disc_real) * 0.9)) disc_cost /= 2. # GDPP Penalty gen_cost += diverstiy_cost gen_cost /= 2. gen_train_op = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize(gen_cost, var_list=lib.params_with_name('Generator')) disc_train_op = tf.train.AdamOptimizer( learning_rate=2e-4, beta1=0.5).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.')) # Train loop run_config = tf.ConfigProto() run_config.gpu_options.allow_growth = True with tf.Session(config=run_config) as session: session.run(tf.global_variables_initializer()) saver = tf.train.Saver() gen = inf_train_gen() for iteration in tqdm(xrange(ITERS)): _data = gen.next() if iteration > 0: session.run(gen_train_op, feed_dict={real_data_int: _data}) session.run(disc_train_op, feed_dict={real_data_int: _data}) if iteration > 0 and iteration % 1000 == 0: saver.save(session, MAIN_DIR + 'models/gdpp_gan.ckpt', global_step=iteration) saver.save(session, MAIN_DIR + 'models/gdpp_gan_final.ckpt')
def train(self, gen, n_iters=15000): batch_size = self.batch_size zin = tf.get_variable("g_z", [self.batch_size, self.dim_z], initializer=tf.random_uniform_initializer(-1, 1)) zsig = tf.get_variable("g_sig", [self.batch_size, self.dim_z], initializer=tf.constant_initializer(0.2)) inp = tf.add(zin, tf.multiply(self.Z, zsig)) G_sample = self.generator(inp) images = tf.reshape(self.X, [self.batch_size, 28, 28, 3]) D_fake, h_fake = self.discriminator(G_sample) D_real, h_real = self.discriminator(images, reuse=True) D_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_real, labels=tf.ones_like(D_real) * 0.9)) D_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.ones_like(D_real) * 0.1)) D_loss = D_loss_fake + D_loss_real G_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.ones_like(D_real) * 0.9)) diversity_loss = self.compute_diversity_loss(h_fake, h_real) G_loss = 0.5 * (G_loss + diversity_loss) disc_params = lib.params_with_name('Discriminator') gen_params = lib.params_with_name('Generator') D_solver = tf.train.AdamOptimizer( learning_rate=1e-4, beta1=0.5, beta2=0.9 ).minimize(D_loss, var_list=disc_params) G_solver = tf.train.AdamOptimizer( learning_rate=1e-4, beta1=0.5, beta2=0.9 ).minimize(G_loss, var_list=gen_params) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth = True with tf.Session(config=run_config) as sess: tf.set_random_seed(1) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() for it in tqdm(xrange(n_iters)): _data, _ = next(gen) _ = sess.run(D_solver, feed_dict={self.X: _data, self.Z: self.sample_z(batch_size, self.dim_z)}) _ = sess.run(G_solver, feed_dict={self.X: _data, self.Z: self.sample_z(batch_size, self.dim_z)}) if np.mod(it, 2000) == 2: saver.save(sess, self.model_dir + 'dppgan_mnist.ckpt', global_step=it) saver.save(sess, self.model_dir + 'dppgan_mnist_final.ckpt')
def define(self): self.real_data = tf.placeholder(tf.float32, shape=[None, 2]) #2-dimensional data self.fake_data = self.Generator(self.batch_size, self.real_data) self.disc_real = self.Discriminator(self.real_data) self.disc_fake = self.Discriminator(self.fake_data) # WGAN loss print(' ---> Defining Disc and Gen Loss') self.disc_cost = tf.reduce_mean(self.disc_fake) - tf.reduce_mean( self.disc_real) self.gen_cost = -tf.reduce_mean(self.disc_fake) # WGAN gradient penalty if self.mode == 'wgan-gp': alpha = tf.random_uniform(shape=[self.batch_size, 1], minval=0., maxval=1.) interpolates = alpha * self.real_data + ( (1 - alpha) * self.fake_data) #Eh?? disc_interpolates = self.Discriminator(interpolates) gradients = tf.gradients(disc_interpolates, [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1)**2) self.disc_cost += self.lambdaa * gradient_penalty print( ' ---> Aggregating all variables for Disc and Gen in two distinct variables' ) disc_params = lib.params_with_name('Discriminator') gen_params = lib.params_with_name('Generator') if self.mode == 'wgan-gp': print(' ---> Defining Optimizers for training') self.disc_train_op = tf.train.AdamOptimizer( learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(self.disc_cost, var_list=disc_params) if len(gen_params) > 0: self.gen_train_op = tf.train.AdamOptimizer( learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(self.gen_cost, var_list=gen_params) else: self.gen_train_op = tf.no_op() else: pass
def discriminate(self, x): output = tf.reshape(x, self.x_dim) # 28 x 28 output = tf.keras.layers.Conv3D(filters=self.latent_dim, kernel_size=[1, 4, 4], strides=[1, 2, 2], padding='SAME', name='Discriminator.Input')(output) output = tf.nn.leaky_relu(output) # 14 x 14 output = tf.keras.layers.Conv3D(filters=self.latent_dim * 2, kernel_size=[1, 4, 4], strides=[1, 2, 2], padding='SAME', name='Discriminator.1')(output) output = tf.nn.leaky_relu(output) # 7 x 7 output = tf.keras.layers.Conv3D(filters=self.latent_dim * 4, kernel_size=[1, 4, 4], strides=[1, 2, 2], padding='SAME', name='Discriminator.2')(output) output = tf.nn.leaky_relu(output) # 4 x 4 output = tf.reshape(output, [-1, self.latent_dim * 36]) output = tflib.ops.linear.Linear('Discriminator.Output', self.latent_dim * 36, 1, output) output = tf.reshape(output, [-1]) output = tf.nn.sigmoid(output) # 7 x 7 if self.dis_params is None: self.dis_params = tflib.params_with_name('Discriminator') return output
def generate(self, z): assert z.shape[1] == self.z_dim output = tflib.ops.linear.Linear('Generator.Input', self.z_dim, self.latent_dim * 64, z) output = tf.nn.relu(output) output = tf.reshape(output, [-1, self.latent_dim * 4, 4, 4]) # 4 x 4 output = tflib.ops.deconv2d.Deconv2D('Generator.2', self.latent_dim * 4, self.latent_dim * 2, 5, output) output = tf.nn.relu(output) # 8 x 8 output = output[:, :, :7, :7] # 7 x 7 output = tflib.ops.deconv2d.Deconv2D('Generator.3', self.latent_dim * 2, self.latent_dim, 5, output) output = tf.nn.relu(output) # 14 x 14 output = tflib.ops.deconv2d.Deconv2D('Generator.Output', self.latent_dim, 1, 5, output) output = tf.nn.sigmoid(output) # 28 x 28 if self.gen_params is None: self.gen_params = tflib.params_with_name('Generator') return tf.reshape(output, [-1, self.x_dim])
def discriminate(self, x): output = tf.reshape(x, [-1, 1, 28, 28]) # 28 x 28 output = tflib.ops.conv2d.Conv2D( 'Discriminator.Input', 1, self.latent_dim, 5, output, stride=2) output = tf.nn.leaky_relu(output) # 14 x 14 output = tflib.ops.conv2d.Conv2D( 'Discriminator.2', self.latent_dim, self.latent_dim * 2, 5, output, stride=2) output = tf.nn.leaky_relu(output) # 7 x 7 output = tflib.ops.conv2d.Conv2D( 'Discriminator.3', self.latent_dim * 2, self.latent_dim * 4, 5, output, stride=2) output = tf.nn.leaky_relu(output) # 4 x 4 output = tf.reshape(output, [-1, self.latent_dim * 64]) output = tflib.ops.linear.Linear( 'Discriminator.Output', self.latent_dim * 64, 1, output) output = tf.reshape(output, [-1]) if self.dis_params is None: self.dis_params = tflib.params_with_name('Discriminator') return output
def _getOptimizer(self, wgan_gp, gen_cost, disc_cost, G_var, D_var): clip_disc_weights = None if wgan_gp.MODE == 'wgan': gen_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(gen_cost, var_list=G_var, colocate_gradients_with_ops=True) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(disc_cost, var_list=D_var, colocate_gradients_with_ops=True) clip_ops = [] for var in lib.params_with_name('Discriminator'): clip_bounds = [-.01, .01] clip_ops.append(tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) elif wgan_gp.MODE == 'wgan-gp': gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(gen_cost, var_list=G_var, colocate_gradients_with_ops=True) disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(disc_cost, var_list=D_var, colocate_gradients_with_ops=True) elif wgan_gp.MODE == 'dcgan': gen_train_op = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5).minimize(gen_cost, var_list=G_var, colocate_gradients_with_ops=True) disc_train_op = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5).minimize(disc_cost, var_list=D_var, colocate_gradients_with_ops=True) elif wgan_gp.MODE == 'lsgan': gen_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(gen_cost, var_list=G_var, colocate_gradients_with_ops=True) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(disc_cost, var_list=D_var, colocate_gradients_with_ops=True) else: raise Exception() return gen_train_op, disc_train_op, clip_disc_weights
def test_function(params): DCG, gen = params Generator = DCG.DCGANG_1 BATCH_SIZE = FLAGS.batch_size # print("Hi there") with tf.Graph().as_default() as graph: noise_tf = tf.convert_to_tensor(noise, dtype=tf.float32) fake_data = Generator(noise.shape[0], noise=noise_tf) print("Fake_data shape: ", fake_data.shape) # print("disc_fake shape: ", disc_fake.shape) gen_vars = lib.params_with_name('Generator') gen_saver = tf.train.Saver(gen_vars) ckpt_gen = tf.train.get_checkpoint_state("./saved_models/" + gen + "/") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if ckpt_gen and ckpt_gen.model_checkpoint_path: print("Restoring generator...", gen) gen_saver.restore(sess, ckpt_gen.model_checkpoint_path) fake_images = sess.run([fake_data])[0] # return only 16 fake_images = fake_images.reshape([noise.shape[0], 64, 64, 3]) print("fake_images shape: ", np.shape(fake_images)) return fake_images else: print("Failed to load Generator")
def wali(disc_fake, disc_real, gen_params, disc_params, lr=5e-5, rec_loss=None, cls_real_loss=None, cls_fake_loss=None, corr_loss=None, adam=False): gen_loss = -tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) disc_loss = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) if (cls_real_loss is not None) and (cls_fake_loss is not None): gen_loss += cls_real_loss + cls_fake_loss disc_loss += cls_real_loss + cls_fake_loss if corr_loss is not None: gen_loss -= 0.5 * corr_loss disc_loss -= 0.5 * corr_loss gen_train_op = tf.train.RMSPropOptimizer(learning_rate=lr).minimize( gen_loss, var_list=gen_params) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=lr).minimize( disc_loss, var_list=disc_params) clip_ops = [] for var in lib.params_with_name('Discriminator'): clip_bounds = [-.01, .01] clip_ops.append( tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) return gen_loss, disc_loss, clip_disc_weights, gen_train_op, disc_train_op, clip_ops
def generate(self, n_samples, noise=None): if noise is None: noise = tf.random_normal([n_samples, NOISE_DIM]) output = lib.ops.linear.Linear('Generator.Input', NOISE_DIM, 4 * 4 * 4 * DIM, noise) output = lib.ops.batchnorm.Batchnorm('Generator.BN1', [0], output) output = tf.nn.relu(output) output = tf.reshape(output, [-1, 4 * DIM, 4, 4]) output = lib.ops.deconv2d.Deconv2D('Generator.2', 4 * DIM, 2 * DIM, 5, output) output = lib.ops.batchnorm.Batchnorm('Generator.BN2', [0, 2, 3], output) output = tf.nn.relu(output) output = lib.ops.deconv2d.Deconv2D('Generator.3', 2 * DIM, DIM, 5, output) output = lib.ops.batchnorm.Batchnorm('Generator.BN3', [0, 2, 3], output) output = tf.nn.relu(output) output = lib.ops.deconv2d.Deconv2D('Generator.Output', DIM, 3, 5, output) output = tf.tanh(output) if self.gen_params is None: self.gen_params = lib.params_with_name('Generator') return tf.reshape(output, [-1, OUTPUT_DIM])
def invert(self, x): output = tf.reshape(x, [-1, 1, 28, 28]) # 28 x 28 output = tflib.ops.conv2d.Conv2D( 'Inverter.Input', 1, self.latent_dim, 5, output, stride=2) output = tf.nn.leaky_relu(output) # 14 x 14 output = tflib.ops.conv2d.Conv2D( 'Inverter.2', self.latent_dim, self.latent_dim * 2, 5, output, stride=2) output = tf.nn.leaky_relu(output) # 7 x 7 output = tflib.ops.conv2d.Conv2D( 'Inverter.3', self.latent_dim * 2, self.latent_dim * 4, 5, output, stride=2) output = tf.nn.leaky_relu(output) # 4 x 4 output = tf.reshape(output, [-1, self.latent_dim * 64]) output = tflib.ops.linear.Linear( 'Inverter.4', self.latent_dim * 64, self.latent_dim * 8, output) output = tf.nn.leaky_relu(output) output = tflib.ops.linear.Linear( 'Inverter.Output', self.latent_dim * 8, self.z_dim, output) output = tf.reshape(output, [-1, self.z_dim]) if self.inv_params is None: self.inv_params = tflib.params_with_name('Inverter') return output
def _build_model(self, opt): # label data self.all_label_data_conv = tf.placeholder( tf.float32, shape=[self.batchSize, self.nc, self.imageSize, self.imageSize]) self.split_label_data_conv = tf.split(self.all_label_data_conv, len(self.devices)) # input data self.all_input_data_conv = tf.placeholder( tf.float32, shape=[ self.batchSize * self.no_random, self.nc, self.imageSize, self.imageSize ]) self.split_input_data_conv = tf.split(self.all_input_data_conv, len(self.devices)) # mask data self.all_mask_data_conv = tf.placeholder( tf.float32, shape=[self.batchSize, self.nc, self.imageSize, self.imageSize]) self.split_mask_data_conv = tf.split(self.all_mask_data_conv, len(self.devices)) # cost self.all_fake_data = [] self.all_fake_data_rep = [] for device_index, (device, label_data_conv, input_data_conv, mask_data_conv) in enumerate( zip(self.devices, self.split_label_data_conv, self.split_input_data_conv, self.split_mask_data_conv)): with tf.device(device): self.label_data = label_data_conv self.input_data = input_data_conv if self.residual: gen_name = 'Generator_residual' else: gen_name = 'Generator_img' self.all_fake_data = self.attention(self.batchSize * self.no_random, noise=self.input_data, nc=self.nc, isize=self.imageSize, name=gen_name, is_avg=self.avg) self.gen_cost = self.calculate_cost() self.g_vars = lib.params_with_name(gen_name) max_to_keep = int( math.ceil(float(self.epochs) / max(float(self.nsave), 1))) self.gen_saver = tf.train.Saver(var_list=self.g_vars, max_to_keep=max_to_keep)
def test_function(params): DCG, gen, disc = params Generator = DCG.DCGANG_1 Discriminator = DCG.DCGAND_1 BATCH_SIZE = FLAGS.batch_size with tf.Graph().as_default() as graph: fake_data = Generator(BATCH_SIZE) disc_fake, pre_fake = Discriminator(fake_data) gen_vars = lib.params_with_name('Generator') gen_saver = tf.train.Saver(gen_vars) disc_vars = lib.params_with_name("Discriminator") disc_saver = tf.train.Saver(disc_vars) ckpt_gen = tf.train.get_checkpoint_state( "./saved_models/" + gen + "/") ckpt_disc = tf.train.get_checkpoint_state( "./saved_models/" + disc + "/") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if ckpt_gen and ckpt_gen.model_checkpoint_path: print("Restoring generator...", gen) gen_saver.restore(sess, ckpt_gen.model_checkpoint_path) else: print("Failed to load Generator") if ckpt_disc and ckpt_disc.model_checkpoint_path: print("Restoring discriminator...", disc) disc_saver.restore( sess, ckpt_disc.model_checkpoint_path) pred_arr = np.empty([no_samples, BATCH_SIZE]) for i in tqdm(range(no_samples + 1)): predictions = sess.run([disc_fake]) pred_arr[i - 1, :] = predictions[0] overall_mean = np.mean(pred_arr) overall_std = np.std(pred_arr) batch_means = np.mean(np.mean(pred_arr, axis=1)) batch_stds = np.std(np.std(pred_arr, axis=1)) return pred_arr else: print("Failed to load Discriminator")
def build_v2(self): """ Version 2: - D(x) has 1 output - D(x) takes in the one_hot class vector as an input to compute that 1 output - G(z) takes in the one_hot class vector as before in Version 1 """ self.z = tf.placeholder(tf.float32, shape=[self.batch_size, self.data.latent_output_size]) #with tf.variable_scope("generator") as scope: self.gen = MNIST_Generator() self.disc = MNIST_Discriminator() self.generator_output = self.gen.generator(self.z, self.data.latent_output_size) #with tf.variable_scope("discriminator") as scope: disc_output_x = self.disc.discriminator(self.x, self.xlabels, self.batch_size, self.data.labels_size, 50) #scope.reuse_variables() disc_output_gz = self.disc.discriminator(self.generator_output, self.zlabels, self.batch_size, self.data.labels_size, 50) differences = self.generator_output - self.x interpolates = tf.multiply(self.epsilon, self.x) + \ tf.multiply(1-self.epsilon, self.generator_output) print "INTERPOLATES Shape: ", interpolates.shape disc_interpolates = self.disc.discriminator(interpolates, self.xlabels, self.batch_size, self.data.labels_size, 50) self.generator_loss = -tf.reduce_mean(disc_output_gz) self.disc_loss = tf.reduce_mean(disc_output_gz) - tf.reduce_mean(disc_output_x) gradients = tf.gradients(disc_interpolates, [interpolates])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes-1)**2) self.disc_loss += self.lambdah*gradient_penalty self.gen_params = lib.params_with_name('Generator') self.disc_params = lib.params_with_name('Discriminator')
def generate(self, z): assert z.shape[1] == self.z_dim output = tflib.ops.linear.Linear('Generator.Input', self.z_dim, self.latent_dim * 4 * 1 * 6 * 6, z) output = tf.nn.relu(output) print(output.shape) output = tf.reshape( output, [-1, 1, 6, 6, self.latent_dim * 4]) # [1, 3, 3, 4*64] print(output.shape) output = tf.keras.layers.Conv3DTranspose(filters=self.latent_dim * 2, kernel_size=8, strides=(4, 4, 4), padding='SAME', name='Generator.2')(output) output = tf.nn.relu(output) # 4, 24, 24, 2*64 print(output.shape) output = output[:, :3, :, :, :] print(output.shape) output = tf.keras.layers.Conv3DTranspose(filters=self.latent_dim, kernel_size=4, strides=(2, 2, 2), padding='SAME', name='Generator.3')(output) output = tf.nn.relu(output) # 6, 48, 48, 64 print(output.shape) output = output[:, :5, :, :, :] print(output.shape) output = tf.keras.layers.Conv3DTranspose( filters=4, kernel_size=8, strides=(4, 4, 4), padding='SAME', name='Generator.Output')(output) print(output.shape) output = output[:, :19, :144, :144, :] print(output.shape) output = tf.nn.sigmoid(output) # 28 x 28 if self.gen_params is None: self.gen_params = tflib.params_with_name('Generator') print(self.x_dim) return tf.reshape(output, self.x_dim)
def build_model(self): G1, DiffMap, self.G_var1, self.G_var2 = GeneratorCNN_Pose_UAEAfterResidual_UAEnoFCAfter2Noise( self.x, self.pose_target, self.channel, self.z_num, self.repeat_num, self.conv_hidden_num, self.data_format, activation_fn=tf.nn.relu, noise_dim=0, reuse=False) G2 = G1 + DiffMap self.G1 = denorm_img(G1, self.data_format) self.G2 = denorm_img(G2, self.data_format) self.G = self.G2 self.DiffMap = denorm_img(DiffMap, self.data_format) self.wgan_gp = WGAN_GP(DATA_DIR='', MODE='dcgan', DIM=64, BATCH_SIZE=self.batch_size, ITERS=200000, LAMBDA=10, G_OUTPUT_DIM=128*64*3) Dis = self._getDiscriminator(self.wgan_gp, arch=self.D_arch) triplet = tf.concat([self.x_target, self.x, G1, G2], 0) # # WGAN-GP code uses NCHW self.D_z = Dis(tf.transpose(triplet, [0, 3, 1, 2]), input_dim=3) self.D_var = lib.params_with_name('Discriminator.') D_z_pos_x_target, D_z_neg_x, D_z_neg_g1, D_z_neg_g2 = tf.split(self.D_z, 4) self.PoseMaskLoss1 = tf.reduce_mean(tf.abs(G1 - self.x_target) * self.mask_target) self.g_loss1 = tf.reduce_mean(tf.abs(G1-self.x_target)) + self.PoseMaskLoss1 self.g_loss2, self.d_loss, self.g2_g1_loss = self._gan_loss(self.wgan_gp, Dis, D_z_pos_x_target, D_z_neg_x, D_z_neg_g1, D_z_neg_g2, arch=self.D_arch) self.PoseMaskLoss2 = tf.reduce_mean(tf.abs(G2 - self.x_target) * (self.mask_target)) self.L1Loss2 = tf.reduce_mean(tf.abs(G2 - self.x_target)) + self.PoseMaskLoss2 self.g_loss2 += self.L1Loss2 * 10 self.g_optim1, self.g_optim2, self.d_optim, self.clip_disc_weights = self._getOptimizer(self.wgan_gp, self.g_loss1, self.g_loss2, self.d_loss, self.G_var1,self.G_var2, self.D_var) self.summary_op = tf.summary.merge([ tf.summary.image("G1", self.G1), tf.summary.image("G2", self.G2), tf.summary.image("DiffMap", self.DiffMap), tf.summary.scalar("loss/PoseMaskLoss1", self.PoseMaskLoss1), tf.summary.scalar("loss/PoseMaskLoss2", self.PoseMaskLoss2), tf.summary.scalar("loss/L1Loss2", self.L1Loss2), tf.summary.scalar("loss/g_loss1", self.g_loss1), tf.summary.scalar("loss/g_loss2", self.g_loss2), tf.summary.scalar("loss/d_loss", self.d_loss), tf.summary.scalar("loss/g2_g1_loss", self.g2_g1_loss), tf.summary.scalar("misc/d_lr", self.d_lr), tf.summary.scalar("misc/g_lr", self.g_lr), ])
def build_model(self): G1, DiffMap, self.G_var1, self.G_var2 = GeneratorCNN_Pose_UAEAfterResidual_UAEnoFCAfter2Noise( self.x, self.pose_target, self.channel, self.z_num, self.repeat_num, self.conv_hidden_num, self.data_format, activation_fn=tf.nn.relu, noise_dim=0, reuse=False) G2 = G1 + DiffMap self.G1 = denorm_img(G1, self.data_format) self.G2 = denorm_img(G2, self.data_format) self.G = self.G2 self.DiffMap = denorm_img(DiffMap, self.data_format) self.wgan_gp = WGAN_GP(DATA_DIR='', MODE='dcgan', DIM=64, BATCH_SIZE=self.batch_size, ITERS=200000, LAMBDA=10, G_OUTPUT_DIM=128*64*3) Dis = self._getDiscriminator(self.wgan_gp, arch=self.D_arch) triplet = tf.concat([self.x_target, self.x, G1, G2], 0) ## WGAN-GP code uses NCHW self.D_z = Dis(tf.transpose( triplet, [0,3,1,2] ), input_dim=3) self.D_var = lib.params_with_name('Discriminator.') D_z_pos_x_target, D_z_neg_x, D_z_neg_g1, D_z_neg_g2 = tf.split(self.D_z, 4) self.PoseMaskLoss1 = tf.reduce_mean(tf.abs(G1 - self.x_target) * (self.mask_target)) self.g_loss1 = tf.reduce_mean(tf.abs(G1-self.x_target)) + self.PoseMaskLoss1 self.g_loss2, self.d_loss, self.g2_g1_loss = self._gan_loss(self.wgan_gp, Dis, D_z_pos_x_target, D_z_neg_x, D_z_neg_g1, D_z_neg_g2, arch=self.D_arch) self.PoseMaskLoss2 = tf.reduce_mean(tf.abs(G2 - self.x_target) * (self.mask_target)) self.L1Loss2 = tf.reduce_mean(tf.abs(G2 - self.x_target)) + self.PoseMaskLoss2 self.g_loss2 += self.L1Loss2 * 10 self.g_optim1, self.g_optim2, self.d_optim, self.clip_disc_weights = self._getOptimizer(self.wgan_gp, self.g_loss1, self.g_loss2, self.d_loss, self.G_var1,self.G_var2, self.D_var) self.summary_op = tf.summary.merge([ tf.summary.image("G1", self.G1), tf.summary.image("G2", self.G2), tf.summary.image("DiffMap", self.DiffMap), tf.summary.scalar("loss/PoseMaskLoss1", self.PoseMaskLoss1), tf.summary.scalar("loss/PoseMaskLoss2", self.PoseMaskLoss2), tf.summary.scalar("loss/L1Loss2", self.L1Loss2), tf.summary.scalar("loss/g_loss1", self.g_loss1), tf.summary.scalar("loss/g_loss2", self.g_loss2), tf.summary.scalar("loss/d_loss", self.d_loss), tf.summary.scalar("loss/g2_g1_loss", self.g2_g1_loss), tf.summary.scalar("misc/d_lr", self.d_lr), tf.summary.scalar("misc/g_lr", self.g_lr), ])
def wali(disc_fake, disc_real, gen_params, disc_params, lr=5e-5): gen_cost = -tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) gen_train_op = tf.train.RMSPropOptimizer(learning_rate=lr).minimize( gen_cost, var_list=gen_params) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=lr).minimize( disc_cost, var_list=disc_params) clip_ops = [] for var in lib.params_with_name('Discriminator'): clip_bounds = [-.01, .01] clip_ops.append( tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) return gen_cost, disc_cost, clip_disc_weights, gen_train_op, disc_train_op, clip_ops
def _loss(self): """Builds the loss part of the graph..""" self.generator_cost = generator_loss(self.mode, self.disc_fake) self.discriminator_cost = discriminator_loss(self.mode, self.disc_real, self.disc_fake) self.clip_disc_weights = None if self.mode == 'wgan': clip_ops = [] for var in tflib.params_with_name('Discriminator'): clip_bounds = [-.01, .01] clip_ops.append( tf.assign( var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) self.clip_disc_weights = tf.group(*clip_ops) elif self.mode == 'wgan-gp': alpha = tf.random_uniform(shape=[self.batch_size, 1, 1, 1], minval=0., maxval=1) differences = self.fake_data - self.real_data interpolates = self.real_data + (alpha * differences) gradients = tf.gradients(self.discriminator_fn(interpolates), [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) self.discriminator_cost += self.gradient_penalty_lambda * gradient_penalty # define optimizer op self.gen_train_op = tf.train.AdamOptimizer( learning_rate=self.generator_lr, beta1=0.5).minimize(self.generator_cost, var_list=self.g_vars) self.disc_train_op = tf.train.AdamOptimizer( learning_rate=self.discriminator_lr, beta1=0.5).minimize(self.discriminator_cost, var_list=self.d_vars) # summary writer g_loss_summary_op = tf.summary.scalar('g_loss', self.generator_cost) d_loss_summary_op = tf.summary.scalar('d_loss', self.discriminator_cost) self.merged_summary_op = tf.summary.merge_all()
def _getOptimizer(self, wgan_gp, gen_cost1, gen_cost2, disc_cost, G_var1, G_var2, D_var): clip_disc_weights = None if wgan_gp.MODE == 'wgan': gen_train_op1 = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(gen_cost1, var_list=G_var1, colocate_gradients_with_ops=True) gen_train_op2 = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(gen_cost2, var_list=G_var2, colocate_gradients_with_ops=True) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(disc_cost, var_list=D_var, colocate_gradients_with_ops=True) clip_ops = [] for var in lib.params_with_name('Discriminator'): clip_bounds = [-.01, .01] clip_ops.append(tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) elif wgan_gp.MODE == 'wgan-gp': gen_train_op1 = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(gen_cost1, var_list=G_var1, colocate_gradients_with_ops=True) gen_train_op2 = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(gen_cost2, var_list=G_var2, colocate_gradients_with_ops=True) disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(disc_cost, var_list=D_var, colocate_gradients_with_ops=True) elif wgan_gp.MODE == 'dcgan': gen_train_op1 = tf.train.AdamOptimizer(learning_rate=2e-5, beta1=0.5).minimize(gen_cost1, var_list=G_var1, colocate_gradients_with_ops=True) gen_train_op2 = tf.train.AdamOptimizer(learning_rate=2e-5, beta1=0.5).minimize(gen_cost2, var_list=G_var2, colocate_gradients_with_ops=True) disc_train_op = tf.train.AdamOptimizer(learning_rate=2e-5, beta1=0.5).minimize(disc_cost, var_list=D_var, colocate_gradients_with_ops=True) elif wgan_gp.MODE == 'lsgan': gen_train_op1 = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(gen_cost1, var_list=G_var1, colocate_gradients_with_ops=True) gen_train_op2 = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(gen_cost2, var_list=G_var2, colocate_gradients_with_ops=True) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(disc_cost, var_list=D_var, colocate_gradients_with_ops=True) else: raise Exception() return gen_train_op1, gen_train_op2, disc_train_op, clip_disc_weights
def setup_train_ops(gen_cost, disc_cost, global_step): gen_learning_rate = 1e-4 if MODE == 'wgan': gen_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) clip_ops = [] for var in lib.params_with_name('Discriminator'): clip_bounds = [-.01, .01] clip_ops.append(tf.assign(var, tf.clip_by_value( var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) elif MODE == 'wgan-gp': if FLAGS.decay_gen_lrate: gen_learning_rate = tf.train.exponential_decay( learning_rate=1e-4, global_step=global_step, decay_steps=1000, decay_rate=0.95) else: gen_learning_rate = 1e-4 gen_train_op = tf.train.AdamOptimizer(gen_learning_rate, beta1=0.5, beta2=0.9).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True, global_step=global_step) disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) elif MODE == 'dcgan': gen_train_op = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True, global_step=global_step) disc_train_op = tf.train.AdamOptimizer(learning_rate=2e-4, beta1=0.5).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) elif MODE == 'lsgan': gen_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True, global_step=global_step) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) else: raise Exception("Choose correct GAN version") return gen_train_op, disc_train_op, gen_learning_rate
def test_function(params): DCG, disc = params Discriminator = DCG.DCGAND_1 BATCH_SIZE = FLAGS.batch_size with tf.Graph().as_default() as graph: train_data_list = helpers.get_dataset_files() real_data = input_pipeline(train_data_list, batch_size=BATCH_SIZE) # Normalize -1 to 1 real_data = 2 * ((tf.cast(real_data, tf.float32) / 255.) - .5) disc_real, _ = Discriminator(real_data) disc_vars = lib.params_with_name("Discriminator") disc_saver = tf.train.Saver(disc_vars) ckpt_disc = tf.train.get_checkpoint_state("./saved_models/" + disc + "/") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print('Queue runners started.') real_im = sess.run([real_data])[0][0][0][0:5] print("Real Image range sample: ", real_im) try: if ckpt_disc and ckpt_disc.model_checkpoint_path: print("Restoring discriminator...", disc) disc_saver.restore(sess, ckpt_disc.model_checkpoint_path) pred_arr = np.empty([no_samples, BATCH_SIZE]) for i in tqdm(range(no_samples + 1)): predictions = sess.run([disc_real]) pred_arr[i - 1, :] = predictions[0] return pred_arr else: print("Failed to load Discriminator") except KeyboardInterrupt as e: print("Manual interrupt occurred.") finally: coord.request_stop() coord.join(threads) print('Finished inference.')
def invert(self, x): output = tf.reshape(x, self.x_dim) # 28 x 28 output = tf.keras.layers.Conv3D(filters=self.latent_dim, kernel_size=8, strides=(4, 4, 4), padding='SAME', name='Inverter.Input')(output) output = tf.nn.leaky_relu(output) # 14 x 14 output = tf.keras.layers.Conv3D(filters=self.latent_dim * 2, kernel_size=8, strides=(4, 4, 4), padding='SAME', name='Inverter.1')(output) output = tf.nn.leaky_relu(output) # 7 x 7 output = tf.keras.layers.Conv3D(filters=self.latent_dim * 4, kernel_size=8, strides=(4, 4, 4), padding='SAME', name='Inverter.2')(output) output = tf.nn.leaky_relu(output) # 4 x 4 output = tf.reshape(output, [-1, self.latent_dim * 36]) output = tflib.ops.linear.Linear('Inverter.4', self.latent_dim * 36, self.latent_dim * 4, output) output = tf.nn.leaky_relu(output) output = tflib.ops.linear.Linear('Inverter.Output', self.latent_dim * 4, self.z_dim, output) output = tf.reshape(output, [-1, self.z_dim]) if self.inv_params is None: self.inv_params = tflib.params_with_name('Inverter') return output
DIM = 256 BOTTLENECK = 32 output = inputs # output = ReLULayer('Discriminator.1', OUTPUT_DIM, DIM, output) output = lib.ops.linear.Linear('Discriminator.2', OUTPUT_DIM, BOTTLENECK, output) # output = ReLULayer('Discriminator.3', BOTTLENECK, DIM, output) output = lib.ops.linear.Linear('Discriminator.Out', BOTTLENECK, OUTPUT_DIM, output) return output real_data = tf.placeholder(tf.float32, shape=[BATCH_SIZE, OUTPUT_DIM]) outs = Autoencoder(real_data) disc_cost = tf.nn.softmax_cross_entropy_with_logits(logits=Autoencoder(real_data), labels=real_data) disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(disc_cost, var_list=lib.params_with_name('Discriminator')) # Dataset iterator def inf_train_gen(): while True: mb = [] for i in xrange(BATCH_SIZE): example = np.zeros(OUTPUT_DIM, dtype='float32') example[np.random.randint(OUTPUT_DIM)] = 1. mb.append(example) yield np.array(mb, dtype='float32') # Train loop! with tf.Session() as session: session.run(tf.initialize_all_variables())
return tf.reshape(output, [-1]) Generator = DCGANGenerator Discriminator = DCGANDiscriminator real_data = tf.placeholder(tf.float32, shape=[BATCH_SIZE, OUTPUT_DIM]) fake_data = Generator(BATCH_SIZE) disc_real = Discriminator(real_data) disc_fake = Discriminator(fake_data) if MODE == 'wgan': gen_cost = -tf.reduce_mean(disc_fake) disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) gen_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(gen_cost, var_list=lib.params_with_name('Generator')) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.')) clip_ops = [] for var in lib.params_with_name('Discriminator'): print "Clipping {}".format(var.name) clip_bounds = [-.01, .01] clip_ops.append(tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) wasserstein = tf.constant(0.) lipschitz_penalty = tf.constant(0.) elif MODE == 'wgan-gp': gen_cost = -tf.reduce_mean(disc_fake) disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
elif MODE == 'lsgan': gen_cost = tf.reduce_mean((disc_fake - 1)**2) disc_cost = (tf.reduce_mean((disc_real - 1)**2) + tf.reduce_mean((disc_fake - 0)**2))/2. else: raise Exception() gen_costs.append(gen_cost) disc_costs.append(disc_cost) gen_cost = tf.add_n(gen_costs) / len(DEVICES) disc_cost = tf.add_n(disc_costs) / len(DEVICES) if MODE == 'wgan': gen_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) clip_ops = [] for var in lib.params_with_name('Discriminator'): clip_bounds = [-.01, .01] clip_ops.append(tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) elif MODE == 'wgan-gp': gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0., beta2=0.9).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0., beta2=0.9).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True)
tf.argmax(disc_fake_acgan, dimension=1)), fake_labels), tf.float32))) gen_costs.append(gen_cost) disc_costs.append(disc_cost) gen_cost = tf.add_n(gen_costs) / len(DEVICES) disc_cost = tf.add_n(disc_costs) / len(DEVICES) if ACGAN: disc_acgan_real_acc = tf.add_n(disc_acgan_real_accs) / len(DEVICES) disc_acgan_fake_acc = tf.add_n(disc_acgan_fake_accs) / len(DEVICES) gen_train_op = tf.train.AdamOptimizer( learning_rate=G_LR, beta1=BETA1_G, beta2=0.9).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.AdamOptimizer( learning_rate=D_LR, beta1=BETA1_D, beta2=0.9).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) # For generating samples fixed_noise = tf.constant( np.random.normal(size=(100, 128)).astype('float32')) fixed_labels = tf.constant( np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9] * 10, dtype='int32')) fixed_noise_samples = Generator(100, labels=fixed_labels, noise=fixed_noise)
disc_costs.append(CT_) disc_costs.append(gradient_penalty) disc_wgan = tf.add_n(disc_costs) / len(DEVICES_A) if CONDITIONAL and ACGAN: disc_acgan = tf.add_n(disc_acgan_costs) / len(DEVICES_A) disc_acgan_acc = tf.add_n(disc_acgan_accs) / len(DEVICES_A) disc_acgan_fake_acc = tf.add_n(disc_acgan_fake_accs) / len(DEVICES_A) disc_cost = disc_wgan + (ACGAN_SCALE*disc_acgan) else: disc_acgan = tf.constant(0.) disc_acgan_acc = tf.constant(0.) disc_acgan_fake_acc = tf.constant(0.) disc_cost = disc_wgan disc_params = lib.params_with_name('Discriminator.') if DECAY: decay = tf.maximum(0., 1.-(tf.cast(_iteration, tf.float32)/ITERS)) else: decay = 1. gen_costs = [] gen_acgan_costs = [] for device in DEVICES: with tf.device(device): n_samples = GEN_BS_MULTIPLE * BATCH_SIZE / len(DEVICES) fake_labels = tf.cast(tf.random_uniform([n_samples])*NUM_LABELS, tf.int32) if CONDITIONAL and ACGAN: disc_fake, disc_fake_2, disc_fake_acgan = Discriminator(Generator(n_samples,fake_labels), fake_labels,0.8,0.5,0.5) #same dropout gen_costs.append(-tf.reduce_mean(disc_fake))
if MODE in ['local_ep', 'local_epce-z']: disc_fake, disc_real = [], [] # for i in xrange(LEN-1): # disc_fake.append(DynamicDiscrminator(p_z_l[:,i,:], p_z_l[:,i+1,:])) # disc_real.append(DynamicDiscrminator(q_z_l[:,i,:], q_z_l[:,i+1,:])) disc_fake.append(ZGDiscrminator(p_z_g)) disc_real.append(ZGDiscrminator(q_z_g)) # disc_fake.append(Discriminator(fake_x, p_z_g, p_z_l, p_y)) # disc_real.append(Discriminator(real_x, q_z_g, q_z_l, real_y)) elif MODE in ['ali', 'alice-z']: disc_real = Discriminator(real_x, q_z_g, q_z_l, real_y) disc_fake = Discriminator(fake_x, p_z_g, p_z_l, p_y) gen_params = lib.params_with_name('Generator') ext_params = lib.params_with_name('Extractor') disc_params = lib.params_with_name('Discriminator') local_classifier_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=l_Classifier(q_z_l), logits=real_y, )) global_classifier_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=g_Classifier(q_z_g), logits=real_y, )) print l_Classifier(q_z_l), real_y
gen_cost = tf.reduce_mean((disc_fake - 1)**2) disc_cost = (tf.reduce_mean((disc_real - 1)**2) + tf.reduce_mean((disc_fake - 0)**2))/2. mean_grad_norm = tf.constant(0.) # to make the logging code work else: raise Exception() gen_costs.append(gen_cost) disc_costs.append(disc_cost) gen_cost = tf.add_n(gen_costs) / len(DEVICES) disc_cost = tf.add_n(disc_costs) / len(DEVICES) if MODE == 'wgan': gen_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) clip_ops = [] for var in lib.params_with_name('Discriminator'): print "Clipping {}".format(var.name) clip_bounds = [-.01, .01] clip_ops.append(tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) elif MODE == 'wgan-gp': gen_params = lib.params_with_name('Generator') disc_params = lib.params_with_name('Discriminator') all_params = gen_params + disc_params all_grads = tf.gradients(disc_cost, all_params, gate_gradients=True)
gradient_penalty = 10*tf.reduce_mean((slopes-1.)**2) disc_costs.append(gradient_penalty) disc_wgan = tf.add_n(disc_costs) / len(DEVICES_A) if CONDITIONAL and ACGAN: disc_acgan = tf.add_n(disc_acgan_costs) / len(DEVICES_A) disc_acgan_acc = tf.add_n(disc_acgan_accs) / len(DEVICES_A) disc_acgan_fake_acc = tf.add_n(disc_acgan_fake_accs) / len(DEVICES_A) disc_cost = disc_wgan + (ACGAN_SCALE*disc_acgan) else: disc_acgan = tf.constant(0.) disc_acgan_acc = tf.constant(0.) disc_acgan_fake_acc = tf.constant(0.) disc_cost = disc_wgan disc_params = lib.params_with_name('Discriminator.') if DECAY: decay = tf.maximum(0., 1.-(tf.cast(_iteration, tf.float32)/ITERS)) else: decay = 1. gen_costs = [] gen_acgan_costs = [] for device in DEVICES: with tf.device(device): n_samples = GEN_BS_MULTIPLE * BATCH_SIZE / len(DEVICES) fake_labels = tf.cast(tf.random_uniform([n_samples])*10, tf.int32) if CONDITIONAL and ACGAN: disc_fake, disc_fake_acgan = Discriminator(Generator(n_samples,fake_labels), fake_labels) gen_costs.append(-tf.reduce_mean(disc_fake))
if MODE == 'wgan': output = lib.ops.batchnorm.Batchnorm('Discriminator.BN3', [0,2,3], output) output = LeakyReLU(output) output = tf.reshape(output, [-1, 4*4*4*DIM]) output = lib.ops.linear.Linear('Discriminator.Output', 4*4*4*DIM, 1, output) return tf.reshape(output, [-1]) real_data = tf.placeholder(tf.float32, shape=[BATCH_SIZE, OUTPUT_DIM]) fake_data = Generator(BATCH_SIZE) disc_real = Discriminator(real_data) disc_fake = Discriminator(fake_data) gen_params = lib.params_with_name('Generator') disc_params = lib.params_with_name('Discriminator') if MODE == 'wgan': gen_cost = -tf.reduce_mean(disc_fake) disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) gen_train_op = tf.train.RMSPropOptimizer( learning_rate=5e-5 ).minimize(gen_cost, var_list=gen_params) disc_train_op = tf.train.RMSPropOptimizer( learning_rate=5e-5 ).minimize(disc_cost, var_list=disc_params) clip_ops = [] for var in lib.params_with_name('Discriminator'):
elif MODE == 'lsgan': gen_cost = tf.reduce_mean((disc_fake - 1)**2) disc_cost = (tf.reduce_mean((disc_real - 1)**2) + tf.reduce_mean((disc_fake - 0)**2))/2. else: raise Exception() gen_costs.append(gen_cost) disc_costs.append(disc_cost) gen_cost = tf.add_n(gen_costs) / len(DEVICES) disc_cost = tf.add_n(disc_costs) / len(DEVICES) if MODE == 'wgan': gen_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize( gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize( disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) clip_ops = [] for var in lib.params_with_name('Discriminator'): clip_bounds = [-.01, .01] clip_ops.append(tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) elif MODE == 'wgan-gp': gen_train_op = tf.train.AdamOptimizer( learning_rate=WGAN_GP_GLR, beta1=0.5, beta2=0.9).minimize( gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.AdamOptimizer(
) differences = fake_data__ - real_data interpolates = real_data + (alpha*differences) gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) # print "WARNING NO LIPSCHITZ PENALTY" gradient_penalty = 10.*tf.reduce_mean((slopes-1.)**2) disc_costs.append(gradient_penalty) disc_cost = tf.add_n(disc_costs) / len(DEVICES_A) if DECAY: decay = tf.maximum(0., 1.-(tf.cast(iteration, tf.float32)/ITERS)) else: decay = 1. disc_train_op = tf.train.AdamOptimizer(learning_rate=LR*decay, beta1=MOMENTUM_D, beta2=0.9).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) gen_costs = [] for device in DEVICES: with tf.device(device): gen_costs.append(-tf.reduce_mean(Discriminator(Generator(GEN_BS_MULTIPLE*BATCH_SIZE/len(DEVICES))))) gen_cost = tf.add_n(gen_costs) / len(DEVICES) gen_train_op = tf.train.AdamOptimizer(learning_rate=LR*decay, beta1=MOMENTUM_G, beta2=0.9).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True) else: raise Exception() # split_real_data_conv = lib.split(all_real_data_conv, len(DEVICES), axis=0) # gen_costs, disc_costs = [],[]
gen_cost += tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( tf.reshape(Generator(BATCH_SIZE, real_inputs[:, :-1])[1], [-1, len(charmap)]), tf.reshape(real_inputs_discrete, [-1]) ) ) if SETTINGS['wgan']: # gen_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-4).minimize(gen_cost, var_list=lib.params_with_name('Generator')) # disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-4).minimize(disc_cost, var_list=lib.params_with_name('Discriminator')) # disc_train_op_nopenalty = tf.train.RMSPropOptimizer(learning_rate=5e-4).minimize(wgan_disc_cost, var_list=lib.params_with_name('Discriminator')) if DECAY: decay = tf.maximum(0., 1.-(tf.cast(_iteration, tf.float32)/ITERS)) else: decay = 1. gen_train_op = tf.train.AdamOptimizer(learning_rate=LR*decay, beta1=MOMENTUM_G, beta2=0.9).minimize(gen_cost, var_list=lib.params_with_name('Generator')) disc_train_op = tf.train.AdamOptimizer(learning_rate=LR*decay, beta1=MOMENTUM_D, beta2=0.9).minimize(disc_cost, var_list=lib.params_with_name('Discriminator')) assigns = [] for var in lib.params_with_name('Discriminator'): if ('.b' not in var.name) and ('Bias' not in var.name) and ('.BN' not in var.name): print "Clipping {}".format(var.name) clip_bounds = [-.01, .01] assigns.append(tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) if '.BN.scale' in var.name: print "Clipping {}".format(var.name) clip_bounds = [0, 1] assigns.append(tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*assigns) else:
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) lipschitz_penalty = tf.reduce_mean((slopes-1.)**2) disc_cost += 10*lipschitz_penalty gradients_2 = tf.gradients(Discriminator2(interpolates), [interpolates])[0] slopes_2 = tf.sqrt(tf.reduce_sum(tf.square(gradients_2), reduction_indices=[1])) lipschitz_penalty_2 = tf.reduce_mean((slopes_2-1.)**2) disc_2_cost += 10*lipschitz_penalty_2 gradients_3 = tf.gradients(Discriminator3(interpolates), [interpolates])[0] slopes_3 = tf.sqrt(tf.reduce_sum(tf.square(gradients_3), reduction_indices=[1])) lipschitz_penalty_3 = tf.reduce_mean((slopes_3-1.)**2) disc_3_cost += 10*lipschitz_penalty_3 gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(gen_cost, var_list=lib.params_with_name('Generator.')) disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.')) disc_2_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(disc_2_cost, var_list=lib.params_with_name('Discriminator2.')) disc_3_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(disc_2_cost, var_list=lib.params_with_name('Discriminator3.')) clip_ops = [] # for var in lib.params_with_name('Discriminator'): # print "Clipping {}".format(var.name) # clip_bounds = [-.01, .01] # clip_ops.append(tf.assign(var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops) def generate_image(iteration): samples = session.run(fake_data) lib.save_images.save_images(samples[:100], 'samples_{}.jpg'.format(iteration))
(disc_real - 1)**2) + tf.reduce_mean( (disc_fake - 0)**2)) / 2. else: raise Exception() gen_costs.append(gen_cost) disc_costs.append(disc_cost) gen_cost = tf.add_n(gen_costs) / len(DEVICES) disc_cost = tf.add_n(disc_costs) / len(DEVICES) if MODE == 'wgan': gen_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize( gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize( disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) clip_ops = [] for var in lib.params_with_name('Discriminator'): clip_bounds = [-.01, .01] clip_ops.append( tf.assign( var, tf.clip_by_value(var, clip_bounds[0], clip_bounds[1]))) clip_disc_weights = tf.group(*clip_ops)
output = lib.ops.batchnorm.Batchnorm('Discriminator.BN3', [0,2,3], output) output = LeakyReLU(output) output = tf.reshape(output, [-1, 4*4*4*DIM]) output = lib.ops.linear.Linear('Discriminator.Output', 4*4*4*DIM, 1, output) return tf.reshape(output, [-1]) real_data_int = tf.placeholder(tf.int32, shape=[BATCH_SIZE, OUTPUT_DIM]) real_data = 2*((tf.cast(real_data_int, tf.float32)/255.)-.5) fake_data = Generator(BATCH_SIZE) disc_real = Discriminator(real_data) disc_fake = Discriminator(fake_data) gen_params = lib.params_with_name('Generator') disc_params = lib.params_with_name('Discriminator') if MODE == 'wgan': gen_cost = -tf.reduce_mean(disc_fake) disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) gen_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(gen_cost, var_list=gen_params) disc_train_op = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(disc_cost, var_list=disc_params) clip_ops = [] for var in disc_params: clip_bounds = [-.01, .01] clip_ops.append( tf.assign( var,
JTR_new_acc = tf.add_n(JTR_new_accs) / n_divide JTR_disc_cost = JTR_disc_cost if REDUCE_BATCH else JTR_disc_cost / n_divide JTR_gen_cost = JTR_gen_cost if REDUCE_BATCH else JTR_gen_cost / n_divide else: JTR_disc_cost = tf.constant(0.) JTR_gen_cost = tf.constant(0.) disc_cost_all = disc_cost + JTR_disc_cost gen_cost_all = gen_cost + RA_cost + JTR_gen_cost gen_train_op = tf.train.AdamOptimizer( learning_rate=G_LR, beta1=BETA1_G, beta2=0.9).minimize( gen_cost_all, var_list=lib.params_with_name('New.Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.AdamOptimizer( learning_rate=D_LR, beta1=BETA1_D, beta2=0.9).minimize( disc_cost_all, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) else: gen_train_op = tf.train.AdamOptimizer( learning_rate=G_LR, beta1=BETA1_G, beta2=0.9).minimize( gen_cost, var_list=lib.params_with_name('New.Generator'), colocate_gradients_with_ops=True) disc_train_op = tf.train.AdamOptimizer( learning_rate=D_LR, beta1=BETA1_D, beta2=0.9).minimize(
fake_data = lib.concat([fake_data_splits[i], fake_data_splits[len(DEVICES_A)+i]], axis=0) alpha = tf.random_uniform( shape=[BATCH_SIZE/len(DEVICES_A),1], minval=0., maxval=1. ) differences = fake_data - real_data interpolates = real_data + (alpha*differences) gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = 10*tf.reduce_mean((slopes-1.)**2) disc_costs.append(gradient_penalty) disc_cost = tf.add_n(disc_costs) / len(DEVICES_A) disc_params = lib.params_with_name('Discriminator.') disc_cost += RHO * tf.add_n([tf.nn.l2_loss(x) for x in disc_params if not x.name.endswith('.b')]) if DECAY: decay = tf.maximum(0., 1.-(tf.cast(_iteration, tf.float32)/ITERS)) else: decay = 1. # disc_train_op = tf.train.AdamOptimizer(learning_rate=LR*decay, beta1=MOMENTUM_D, beta2=0.9).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.'), colocate_gradients_with_ops=True) gen_costs = [] for device in DEVICES: with tf.device(device): gen_costs.append(-tf.reduce_mean(Discriminator(Generator(GEN_BS_MULTIPLE*BATCH_SIZE/len(DEVICES))))) gen_cost = tf.add_n(gen_costs) / len(DEVICES) # gen_train_op = tf.train.AdamOptimizer(learning_rate=LR*decay, beta1=MOMENTUM_G, beta2=0.9).minimize(gen_cost, var_list=lib.params_with_name('Generator'), colocate_gradients_with_ops=True)
def main(): random.seed(SEED) np.random.seed(SEED) if (pos == 1): stringGenerator = TextGenerator( '../corpus_uncond_pos/index2word.pickle', '../corpus_uncond_pos/word2index.pickle', '../corpus_uncond_pos/input_file.txt', '../corpus_uncond_pos/target_file.txt', '../corpus_uncond_pos/vocab_creation_file.txt') else: stringGenerator = TextGenerator( '../corpus_uncond_neg/index2word.pickle', '../corpus_uncond_neg/word2index.pickle', '../corpus_uncond_neg/input_file.txt', '../corpus_uncond_neg/target_file.txt', '../corpus_uncond_neg/vocab_creation_file.txt') generated_num_inp = stringGenerator.sentencesCount_inp generated_num_test = stringGenerator.sentencesCount_test with open(starting_word_file, "w+") as op: for i in range(len(good_ids)): tokensSequence = [good_ids[i]] tokensSequence += [0] * (SEQ_LENGTH - 1) strSentence = " ".join([str(index) for index in tokensSequence]) + "\n" op.write(strSentence) assert START_TOKEN == 0 gen_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) start_data_loader = Gen_Data_loader(BATCH_SIZE, SEQ_LENGTH) likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE, SEQ_LENGTH) vocab_size = len(stringGenerator.index2Word) dis_data_loader = Dis_dataloader(SEQ_LENGTH) #Embedding matrix from google vec: GLOVE_DIR = '../corpus_uncond_neg/glove.6B/' embeddings_index = {} f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) EmbeddingMatrix = np.zeros((vocab_size, EMB_DIM)) #embedding_matrix = np.zeros((vocab_size, EMBEDDING_DIM)) for i, word in index2word.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. EmbeddingMatrix[i] = embedding_vector else: EmbeddingMatrix[i] = np.random.uniform(-1, 1, EMB_DIM) if (pos == 1): np.savez('embedding_pos.npz', EmbeddingMatrix) else: np.savez('embedding_neg.npz', EmbeddingMatrix) ############################################################################### best_score = 1000 generator = get_trainable_model(vocab_size) real_inputs_discrete = tf.placeholder(tf.int32, shape=[BATCH_SIZE, SEQ_LEN]) real_inputs = tf.one_hot(real_inputs_discrete, vocab_size) print(real_inputs) disc_real = Discriminator(real_inputs) disc_fake = Discriminator(generator.g_predictions_wgan) disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) gen_cost = -tf.reduce_mean(disc_fake) # WGAN lipschitz-penalty alpha = tf.random_uniform(shape=[BATCH_SIZE, 1, 1], minval=0., maxval=1.) differences = generator.g_predictions_wgan - real_inputs interpolates = real_inputs + (alpha * differences) gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) disc_cost += LAMBDA * gradient_penalty gen_params = generator.g_params disc_params = lib.params_with_name('Discriminator') gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize( gen_cost, var_list=gen_params) disc_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize( disc_cost, var_list=disc_params) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() #generate_samples(sess, target_lstm, 64, 10000, positive_file) stringGenerator.saveSamplesToFile_inp(SEQ_LENGTH, generated_num_inp, positive_file) stringGenerator.saveSamplesToFile_inp_text(SEQ_LENGTH, generated_num_inp, inp_ref_file) stringGenerator.saveSamplesToFile_test_text(SEQ_LENGTH, generated_num_test, test_ref_file) stringGenerator.saveSamplesToFile_test(SEQ_LENGTH, generated_num_test, test_file) gen_data_loader.create_batches(positive_file) start_data_loader.create_batches(starting_word_file) if (pos == 1): log = open('log_pos_was/experiment-log.txt', 'w') else: log = open('log_neg_was/experiment-log.txt', 'w') # pre-train generator print 'Start pre-training...' log.write('pre-training...\n') EPOCHS = 0 load = 0 if (load == 1): epoch = 5 if (pos == 1): saver.restore( sess, "/target_generate_pos_was/pretrain" + str(epoch) + ".ckpt") else: saver.restore( sess, "/target_generate_pos_was/pretrain" + str(epoch) + ".ckpt") EPOCHS = EPOCHS + epoch for epoch in xrange(PRE_EPOCH_NUM): print 'pre-train epoch:', epoch if (pos == 1): eval_file2 = 'target_generate_pos_was/eval_file' + '_pretrain_gen_' + str( EPOCHS + epoch) + '.txt' else: eval_file2 = 'target_generate_neg_was/eval_file' + '_pretrain_gen_' + str( EPOCHS + epoch) + '.txt' loss = pre_train_epoch(sess, generator, gen_data_loader) if epoch % 5 == 0: generate_samples2(sess, generator, BATCH_SIZE, len(good_ids), eval_file2, start_data_loader) generate_samples(sess, generator, BATCH_SIZE, len(good_ids), eval_file, start_data_loader) likelihood_data_loader.create_batches(positive_file) train_loss = target_loss(sess, generator, likelihood_data_loader) likelihood_data_loader.create_batches(test_file) test_loss = target_loss(sess, generator, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss, 'train_loss', train_loss buffer = str(epoch) + ' test_loss : ' + str( test_loss) + ' train_loss : ' + str(train_loss) + '\n' log.write(buffer) if (pos == 1): saver.save(sess, 'target_generate_pos_was/pretrain', global_step=EPOCHS + epoch) else: saver.save(sess, 'target_generate_neg_was/pretrain', global_step=EPOCHS + epoch) if (pos == 1): eval_file2 = 'target_generate_pos_was/eval_file' + '_pretrain_gen_' + str( EPOCHS + epoch) + '.txt' else: eval_file2 = 'target_generate_neg_was/eval_file' + '_pretrain_gen_' + str( EPOCHS + epoch) + '.txt' generate_samples2(sess, generator, BATCH_SIZE, len(good_ids), eval_file2, start_data_loader) likelihood_data_loader.create_batches(positive_file) train_loss = target_loss(sess, generator, likelihood_data_loader) likelihood_data_loader.create_batches(test_file) test_loss = target_loss(sess, generator, likelihood_data_loader) print 'pre-train epoch ', epoch, 'test_loss ', test_loss, 'train_loss', train_loss buffer = str(epoch) + ' test_loss : ' + str( test_loss) + ' train_loss : ' + str(train_loss) + '\n' log.write(buffer) def batch_iter(data, batch_size, num_epochs): """ Generates a batch iterator for a dataset. """ data = np.array(data) data_size = len(data) num_batches_per_epoch = int(len(data) / batch_size) + 1 for epoch in range(num_epochs): # Shuffle the data at each epoch shuffle_indices = np.random.permutation(np.arange(data_size)) shuffled_data = data[shuffle_indices] for batch_num in range(num_batches_per_epoch): start_index = batch_num * batch_size end_index = min((batch_num + 1) * batch_size, data_size) yield np.array(shuffled_data[start_index:end_index], dtype='int32') def load_train_data(file): """ Returns input vectors, labels, vocabulary, and inverse vocabulary. """ examples = [] with open(file) as fin: for line in fin: line = line.strip() line = line.split() parse_line = [int(x) for x in line] examples.append(parse_line) return np.array(examples) EPOCHS = EPOCHS + PRE_EPOCH_NUM print 'Start training discriminator...' for epoch in range(dis_alter_epoch): print('disctrainingepoch: ' + str(epoch)) # train discriminator pos_data = load_train_data(positive_file) pos_batches = batch_iter(pos_data, BATCH_SIZE, 1) for i in range(int(len(pos_data) / BATCH_SIZE) + 1): A = pos_batches.next() if (np.shape(A)[0] == BATCH_SIZE): _disc_cost, _ = sess.run([disc_cost, disc_train_op], feed_dict={real_inputs_discrete: A}) else: break if (epoch % 30 == 0): if (pos == 1): saver.save(sess, 'target_generate_pos_was/disc', global_step=EPOCHS + epoch) else: saver.save(sess, 'target_generate_neg_was/disc', global_step=EPOCHS + epoch) EPOCHS = EPOCHS + dis_alter_epoch for iteration in xrange(ITERS): start_time = time.time() print 'training wgan...' # train discriminator pos_data = load_train_data(positive_file) pos_batches = batch_iter(pos_data, BATCH_SIZE, 1) # Train generator for ii in range(int(len(pos_data) / BATCH_SIZE) + 1): A = pos_batches.next() if (np.shape(A)[0] == BATCH_SIZE): if iteration > 0: _gen_cost, _ = sess.run( [gen_cost, gen_train_op], feed_dict={real_inputs_discrete: A}) # Train critic for pp in xrange(CRITIC_ITERS): _disc_cost, _ = sess.run( [disc_cost, disc_train_op], feed_dict={real_inputs_discrete: A}) else: break if ii % 10 == 0: if (pos == 1): eval_file2 = 'target_generate_pos_was/eval_file_reinforce_' + str( EPOCHS + iteration) + '_' + str(ii) + '.txt' else: eval_file2 = 'target_generate_neg_was/eval_file_reinforce_' + str( EPOCHS + iteration) + '_' + str(ii) + '.txt' generate_samples2(sess, generator, BATCH_SIZE, len(good_ids), eval_file2, start_data_loader) generate_samples(sess, generator, BATCH_SIZE, len(good_ids), eval_file, start_data_loader) hyp = [] likelihood_data_loader.create_batches(positive_file) train_loss = target_loss(sess, generator, likelihood_data_loader) likelihood_data_loader.create_batches(test_file) test_loss = target_loss(sess, generator, likelihood_data_loader) print 'reinf-train epoch ', iteration, 'test_loss ', test_loss, 'train_loss', train_loss, 'disc_cost', _disc_cost buffer = str(iteration) + ' test_loss : ' + str( test_loss) + ' train_loss : ' + str( train_loss) + ' _disc_cost ' + str(_disc_cost) + '\n' log.write(buffer) log.close()
disc_real_ones, disc_real_zeros, disc_reals = Discriminator(real_images) disc_fake_ones, disc_fake_zeros, disc_fakes = Discriminator(fake_images) # Gen objective: push D(fake) to one # gen_cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(disc_fake, tf.ones_like(disc_fake))) gcs = disc_fake_ones # ggs = [tf.global_norm(tf.gradients(gc, lib.params_with_name('Generator'))) for gc in gcs] gen_cost = tf.reduce_mean(tf.concat(0, disc_fake_ones)) # Discrim objective: push D(fake) to zero, and push D(real) to one # disc_cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(disc_fake, tf.zeros_like(disc_fake))) # disc_cost += tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(disc_real, tf.ones_like(disc_real))) dcs = [(x+y)/2. for x,y in zip(disc_fake_zeros,disc_real_ones)] disc_cost = tf.reduce_mean(tf.concat(0, disc_fake_zeros + disc_real_ones)) gen_train_op = tf.train.AdamOptimizer(learning_rate=5e-4, beta1=0.5).minimize(gen_cost, var_list=lib.params_with_name('Generator')) disc_train_op = tf.train.AdamOptimizer(learning_rate=5e-4, beta1=0.5).minimize(disc_cost, var_list=lib.params_with_name('Discriminator')) train_data, dev_data, test_data = lib.mnist.load(BATCH_SIZE, BATCH_SIZE) def inf_train_gen(): while True: for data in train_data(): yield data with tf.Session() as session: session.run(tf.initialize_all_variables()) gen = inf_train_gen() gen = lib.audio_dataset.feed_epoch(DATA_PATH, N_FILES, BATCH_SIZE, SEQ_LEN, 0, 0, 0)
return tf.reshape(output, [-1]) # config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 1/10 # config.gpu_options.allow_growth = True real_x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, OUTPUT_DIM]) fake_x = Generator(BATCH_SIZE) disc_real = Discriminator(real_x) disc_fake = Discriminator(fake_x) # y_ = tf.placeholder(tf.float32, shape=[None, 10]) gen_params = lib.params_with_name('Generator') disc_params = lib.params_with_name('Discriminator') gen_cost = -tf.reduce_mean(disc_fake) disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.) differences = fake_x - real_x interpolates = real_x + (alpha * differences) gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) disc_cost += LAMBDA * gradient_penalty
gradients_2 = tf.gradients(Discriminator2(interpolates), [interpolates])[0] slopes_2 = tf.sqrt(tf.reduce_sum(tf.square(gradients_2), reduction_indices=[1])) lipschitz_penalty_2 = tf.reduce_mean((slopes_2-1.)**2) wgan_disc_2_cost = disc_2_cost disc_2_cost += 10*lipschitz_penalty_2 lipschitz_penalty_2 = tf.reduce_mean(slopes_2) gradients_3 = tf.gradients(Discriminator3(interpolates), [interpolates])[0] slopes_3 = tf.sqrt(tf.reduce_sum(tf.square(gradients_3), reduction_indices=[1])) lipschitz_penalty_3 = tf.reduce_mean((slopes_3-1.)**2) wgan_disc_3_cost = disc_3_cost disc_3_cost += 10*lipschitz_penalty_3 lipschitz_penalty_3 = tf.reduce_mean(slopes_3) if len(lib.params_with_name('Generator')): gen_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(gen_cost, var_list=lib.params_with_name('Generator.')) # gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-3, beta1=0.5).minimize(gen_cost, var_list=lib.params_with_name('Generator')) else: gen_train_op = None disc_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(disc_cost, var_list=lib.params_with_name('Discriminator.')) disc_2_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(disc_2_cost, var_list=lib.params_with_name('Discriminator2.')) disc_3_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(disc_3_cost, var_list=lib.params_with_name('Discriminator3.')) # disc_train_op = tf.train.AdamOptimizer(learning_rate=5e-4, beta1=0.5).minimize(disc_cost, var_list=lib.params_with_name('Discriminator')) frame_i = [0] def generate_image(frame, true_dist): samples = session.run(fake_data) lib.save_images.save_images(samples[:100], 'samples_{}.jpg'.format(frame))