def generator(self, t_z, t_text_embedding): s = self.options['image_size'] s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding') ) tf.summary.tensor_summary("Reduced voice embedding", reduced_text_embedding) #z_concat = tf.concat(1, [t_z, reduced_text_embedding]) z_concat = t_text_embedding z_ = ops.linear(z_concat, self.options['gf_dim']*8*s16*s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) h1 = ops.deconv2d(h0, [self.options['batch_size'], s8, s8, self.options['gf_dim']*4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d(h1, [self.options['batch_size'], s4, s4, self.options['gf_dim']*2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d(h2, [self.options['batch_size'], s2, s2, self.options['gf_dim']*1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) # Classify class h3_new = ops.lrelu(self.g_bn4(ops.conv2d(h3, self.options['df_dim'], 1, 1, 1, 1, name="g_conv"))) h3_new = tf.reshape(h3_new, [self.options['batch_size'], -1]) class_logit = ops.linear(h3_new, self.options['num_class'], 'g_h3_embedding') h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4)/2. + 0.5), class_logit
def discriminator(self, image, t_text_embedding): update_collection = tf.GraphKeys.UPDATE_OPS with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE): h0 = ops.lrelu( ops.conv2d_sn(image, self.options['df_dim'], spectral_normed=True, update_collection=update_collection, name='d_h0_conv')) #32 h1 = ops.lrelu( self.d_bn1( ops.conv2d_sn(h0, self.options['df_dim'] * 2, spectral_normed=True, update_collection=update_collection, name='d_h1_conv'))) #16 h2 = ops.lrelu( self.d_bn2( ops.conv2d_sn(h1, self.options['df_dim'] * 4, spectral_normed=True, update_collection=update_collection, name='d_h2_conv'))) #8 h3 = ops.lrelu( self.d_bn3( ops.conv2d_sn(h2, self.options['df_dim'] * 8, spectral_normed=True, update_collection=update_collection, name='d_h3_conv'))) #4 h3_new = ops.lrelu( self.d_bn4( ops.conv2d_sn(h3, self.options['df_dim'] * 8, 1, 1, 1, 1, spectral_normed=True, update_collection=update_collection, name='d_h3_conv_new'))) #4 h3_new = tf.reshape(h3_new, [self.options['batch_size'], -1]) image_embedding = ops.linear(h3_new, self.options['t_dim'], 'd_h3_embedding') # Embedding matrix of condition reduced_text_embeddings = ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding') # Scalar output function h4 = ops.linear(image_embedding, 1, 'd_scalar_output') discriminator_output_logit = tf.reduce_sum(tf.multiply( reduced_text_embeddings, image_embedding), 1, keepdims=True) + h4 return discriminator_output_logit, discriminator_output_logit
def downsampling(self, image, reuse = False): if reuse: tf.get_variable_scope().reuse_variables() # Downsample to 32*32*256 h0 = ops.lrelu( ops.conv2d(image, 256, name = 'down_h0_conv')) #32 # Downsample to 16*16*512 h1 = ops.lrelu( self.down_bn1(ops.conv2d(h0, 512, name = 'down_h1_conv'))) #32 return h1
def discriminator(self, image, t_text_embedding, reuse=False): with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE): h0 = ops.lrelu( ops.conv2d(image, self.options['df_dim'], name='d_h0_conv')) #32 h1 = ops.lrelu( self.d_bn1( ops.conv2d(h0, self.options['df_dim'] * 2, name='d_h1_conv'))) #16 h2 = ops.lrelu( self.d_bn2( ops.conv2d(h1, self.options['df_dim'] * 4, name='d_h2_conv'))) #8 h3 = ops.lrelu( self.d_bn3( ops.conv2d(h2, self.options['df_dim'] * 8, name='d_h3_conv'))) #4 h3_new = ops.lrelu( self.d_bn4( ops.conv2d(h3, self.options['df_dim'] * 8, 1, 1, 1, 1, name='d_h3_conv_new'))) #4 h3_new = tf.reshape(h3_new, [self.options['batch_size'], -1]) image_embedding = ops.linear(h3_new, self.options['t_dim'], 'd_h3_embedding') # Embedding matrix of condition reduced_text_embeddings = ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding') # Scalar output function h4 = ops.linear(image_embedding, 1, 'd_scalar_output') discriminator_output_logit = tf.reduce_sum(tf.multiply( reduced_text_embeddings, image_embedding), 1, keepdims=True) + h4 return tf.nn.sigmoid( discriminator_output_logit), discriminator_output_logit
def discriminator(self, image, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() h0 = ops.lrelu( ops.conv2d(image, 64, name = 'd_h0_conv')) #32 h1 = ops.lrelu( self.d_bn1(ops.conv2d(h0, 128, name = 'd_h1_conv'))) #32 h2 = ops.lrelu( self.d_bn2(ops.conv2d(h1, 256, name = 'd_h2_conv'))) #32 h3 = ops.lrelu( self.d_bn3(ops.conv2d(h2, 512, name = 'd_h3_conv'))) #32 h4 = ops.linear(tf.reshape(h3, [self.options['batch_size'], -1]), 1, 'd_h3_lin') return tf.nn.sigmoid(h4), h4
def sampler(self, t_z, t_text_embedding): tf.get_variable_scope().reuse_variables() s = self.options['image_size'] s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16) reduced_text_embedding =\ ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding') ) z_concat = tf.concat([t_z, reduced_text_embedding], 1) z_ = ops.linear(z_concat, self.options['gf_dim']*8*s16*s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0, train = False)) h1 = ops.deconv2d(h0, [self.options['batch_size'], s8, s8, self.options['gf_dim']*4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1, train = False)) h2 = ops.deconv2d(h1, [self.options['batch_size'], s4, s4, self.options['gf_dim']*2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2, train = False)) h3 = ops.deconv2d(h2, [self.options['batch_size'], s2, s2, self.options['gf_dim']*1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3, train = False)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4)/2. + 0.5)
def generator(self, t_z, t_text_embedding): s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding')) z_concat = tf.concat(1, [t_z, reduced_text_embedding]) z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) h1 = ops.deconv2d( h0, [self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d( h1, [self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d( h2, [self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4) / 2. + 0.5)
def classifier(self, image, n_classes, t_training, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() h0 = ops.lrelu(ops.conv2d(image, self.options['ef_dim'] * 8, name='e_h0_conv')) # 64 h1 = ops.lrelu(slim.batch_norm(ops.conv2d(h0, self.options['ef_dim'] * 8, name='e_h1_conv'), reuse=reuse, is_training=t_training, scope='e_bn1')) # 32 h2 = ops.lrelu(slim.batch_norm(ops.conv2d(h1, self.options['ef_dim'] * 6, name='e_h2_conv'), reuse=reuse, is_training=t_training, scope='e_bn2')) # 16 h3 = ops.lrelu(slim.batch_norm(ops.conv2d(h2, self.options['ef_dim'] * 4, name='e_h3_conv'), reuse=reuse, is_training=t_training, scope='e_bn3')) # 8 h4 = ops.lrelu(slim.batch_norm(ops.conv2d(h3, self.options['ef_dim'] * 2, name='e_h4_conv'), reuse=reuse, is_training=t_training, scope='e_bn4')) # 8 h4_shape = h4.get_shape().as_list() h4_flat = tf.contrib.layers.flatten(h4) ''' h4_flat = tf.squeeze(tf.nn.avg_pool(h4, ksize=[1, h4_shape[1], h4_shape[2], 1], strides=[1, h4_shape[1], h4_shape[2], 1], padding='SAME', name='global_avg_pool')) ''' fc1 = tf.nn.relu(ops.linear(h4_flat, 1024, 'fl_01')) fc2 = ops.linear(fc1, n_classes, 'fl_02') return fc2
def discriminator(self, image, t_text_embedding, reuse=False): with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): # 64 print ('check') print (image.shape) h0 = ops.lrelu(ops.conv2d(image, self.options['df_dim'] * 4, d_w = 16, name='d_h0_conv')) # 32 print (h0.shape) # h1 = ops.lrelu( self.d_bn1(ops.conv2d(h0, self.options['df_dim']*2, name = 'd_h1_conv'))) #16 # print (h1.shape) # h2 = ops.lrelu( self.d_bn2(ops.conv2d(h1, self.options['df_dim']*4, name = 'd_h2_conv'))) #8 # print (h2.shape) # h3 = ops.lrelu( self.d_bn3(ops.conv2d(h2, self.options['df_dim']*8, name = 'd_h3_conv'))) #4 # print (h3.shape) h3 = h0 # ADD TEXT EMBEDDING TO THE NETWORK # 256 print ('check') print (t_text_embedding.shape) reduced_text_embeddings = ops.lrelu(ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) print (reduced_text_embeddings.shape) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 1) print (reduced_text_embeddings.shape) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 2) print (reduced_text_embeddings.shape) tiled_embeddings = tf.tile(reduced_text_embeddings, [1, 4, 4, 1], name='tiled_embeddings') print (tiled_embeddings.shape) print (h3) print (h3.shape) print (tiled_embeddings) print (tiled_embeddings.shape) h3_concat = tf.concat([h3, tiled_embeddings], 3, name='h3_concat') #error: # ValueError: Dimensions must be equal, but are 176 and 1 for 'd_h3_conv_new/Conv2D' (op: 'Conv2D') with input shapes: [64,4,4,176], [1,1,1,88]. tmp = ops.conv2d(h3_concat, self.options['df_dim'] * 4, 1, 1, 1, 1, name='d_h3_conv_new') h3_new = ops.lrelu(self.d_bn4(tmp)) # 4 h4 = ops.linear(tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') return tf.nn.sigmoid(h4), h4
def generator(t_z, t_text_embedding): with tf.variable_scope('generator', reuse=False): reduce_embedding_size = 256 reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, reduce_embedding_size, 'g_embedding')) z_concat = tf.concat([t_z, reduced_text_embedding], axis=1) z_concat = ops.linear(z_concat, 64 * 8 * 3 * 13, 'g_h0_lin') w_init = tf.truncated_normal_initializer(mean=0.0, stddev=0.02) b_init = tf.constant_initializer(0.0) x = tf.reshape(z_concat, [-1, 3, 13, 64 * 8]) x = tf.contrib.layers.batch_norm(x) x = lrelu(x) x = tf.layers.conv2d_transpose(x, filters=256, kernel_size=[3, 6], strides=[1, 2], padding='valid', kernel_initializer=w_init, bias_initializer=b_init) x = tf.contrib.layers.batch_norm(x) x = lrelu(x) x = tf.layers.conv2d_transpose(x, filters=128, kernel_size=[4, 6], strides=[2, 2], padding='same', kernel_initializer=w_init, bias_initializer=b_init) x = tf.contrib.layers.batch_norm(x) x = lrelu(x) x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=[4, 6], strides=[2, 2], padding='same', kernel_initializer=w_init, bias_initializer=b_init) x = tf.contrib.layers.batch_norm(x) x = lrelu(x) x = tf.layers.conv2d_transpose(x, filters=3, kernel_size=[4, 6], strides=[2, 2], padding='same', kernel_initializer=w_init, bias_initializer=b_init) x = tf.nn.tanh(x) return x
def encoder(self, image, t_training, reuse = False) : if reuse : tf.get_variable_scope().reuse_variables() h0 = ops.lrelu(ops.conv2d(image, self.options['ef_dim'] * 8, name = 'e_h0_conv')) # 64 h1 = ops.lrelu(slim.batch_norm(ops.conv2d(h0, self.options['ef_dim'] * 8, name = 'e_h1_conv'), reuse=reuse, is_training = t_training, scope = 'e_bn1')) # 32 h2 = ops.lrelu(slim.batch_norm(ops.conv2d(h1, self.options['ef_dim'] * 6, name = 'e_h2_conv'), reuse=reuse, is_training = t_training, scope = 'e_bn2')) # 16 h3 = ops.lrelu(slim.batch_norm(ops.conv2d(h2, self.options['ef_dim'] * 4, name = 'e_h3_conv'), reuse=reuse, is_training = t_training, scope = 'e_bn3')) # 8 h4 = ops.lrelu(slim.batch_norm(ops.conv2d(h3, self.options['ef_dim'] * 2, name = 'e_h4_conv'), reuse=reuse, is_training = t_training, scope = 'e_bn4')) # 8 h4_shape = h4.get_shape().as_list() #h4_flat = tf.contrib.layers.flatten(h4) #h5 = ops.linear(h4_flat, 1024, 'fl_e_01') #h6 = ops.linear(h5, n_classes, 'fl_e_02') return h4, h4_shape
def discriminator(self, image, t_text_embedding): with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE): h0 = ops.lrelu(ops.conv2d(image, self.options['df_dim'], name = 'd_h0_conv')) #32 h1 = ops.lrelu( self.d_bn1(ops.conv2d(h0, self.options['df_dim']*2, name = 'd_h1_conv'))) #16 h2 = ops.lrelu( self.d_bn2(ops.conv2d(h1, self.options['df_dim']*4, name = 'd_h2_conv'))) #8 h3 = ops.lrelu( self.d_bn3(ops.conv2d(h2, self.options['df_dim']*8, name = 'd_h3_conv'))) #4 h4 = ops.linear(tf.reshape(h3, [self.options['batch_size'], -1]), 1, 'd_h3_lin') # ADD TEXT EMBEDDING TO THE NETWORK #reduced_text_embeddings = ops.lrelu(ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) #reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,1) #reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,2) #tiled_embeddings = tf.tile(reduced_text_embeddings, [1,4,4,1], name='tiled_embeddings') #h3_concat = tf.concat( 3, [h3, tiled_embeddings], name='h3_concat') #h3_new = ops.lrelu( self.d_bn4(ops.conv2d(h3_concat, self.options['df_dim']*8, 1,1,1,1, name = 'd_h3_conv_new'))) #4 #h4 = ops.linear(tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') return h4, h4
def discriminator(self, image, t_text_embedding): update_collection = tf.GraphKeys.UPDATE_OPS with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE): h0 = ops.lrelu( ops.conv2d_sn(image, self.options['df_dim'], spectral_normed=True, update_collection=update_collection, name='d_h0_conv')) #32 h1 = ops.lrelu( self.d_bn1( ops.conv2d_sn(h0, self.options['df_dim'] * 2, spectral_normed=True, update_collection=update_collection, name='d_h1_conv'))) #16 h2 = ops.lrelu( self.d_bn2( ops.conv2d_sn(h1, self.options['df_dim'] * 4, spectral_normed=True, update_collection=update_collection, name='d_h2_conv'))) #8 h3 = ops.lrelu( self.d_bn3( ops.conv2d_sn(h2, self.options['df_dim'] * 8, spectral_normed=True, update_collection=update_collection, name='d_h3_conv'))) #4 # ADD TEXT EMBEDDING TO THE NETWORK reduced_text_embeddings = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 1) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 2) tiled_embeddings = tf.tile(reduced_text_embeddings, [1, 4, 4, 1], name='tiled_embeddings') h3_concat = tf.concat([h3, tiled_embeddings], 3, name='h3_concat') h3_new = ops.lrelu( self.d_bn4( ops.conv2d_sn(h3_concat, self.options['df_dim'] * 8, 1, 1, 1, 1, spectral_normed=True, update_collection=update_collection, name='d_h3_conv_new'))) #4 h4 = ops.linear( tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') return h4, h4
def discriminator(self, image, t_text_embedding, reuse=False): with tf.variable_scope('discriminator'): if reuse: tf.get_variable_scope().reuse_variables() h0 = ops.lrelu(ops.conv2d(image, self.options['df_dim'], name = 'd_h0_conv')) #32 h1 = ops.lrelu(self.d_bn1(ops.conv2d(h0, self.options['df_dim']*2, name='d_h1_conv'))) #16 h2 = ops.lrelu(self.d_bn2(ops.conv2d(h1, self.options['df_dim']*4, name='d_h2_conv'))) #8 h3 = ops.lrelu(self.d_bn3(ops.conv2d(h2, self.options['df_dim']*8, name='d_h3_conv'))) #4 # ADD TEXT EMBEDDING TO THE NETWORK reduced_text_embeddings =\ ops.lrelu(ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,1) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,2) tiled_embeddings =\ tf.tile(reduced_text_embeddings, [1,4,4,1], name='tiled_embeddings') h3_concat = tf.concat([h3, tiled_embeddings], 3, name='h3_concat') h3_new =\ ops.lrelu( self.d_bn4(ops.conv2d(h3_concat, self.options['df_dim']*8, 1, 1, 1, 1, name='d_h3_conv_new'))) #4 h4 = ops.linear(tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') return tf.nn.sigmoid(h4), h4
def discriminator(self, image, t_text_embedding, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() self.w_d = [w for w in tf.global_variables()] h0 = ops.lrelu(ops.conv2d(image, self.options['df_dim'], name = 'd_h0_conv')) #32 h1 = ops.lrelu( self.d_bn1(ops.conv2d(h0, self.options['df_dim']*2, name = 'd_h1_conv'))) #16 h2 = ops.lrelu( self.d_bn2(ops.conv2d(h1, self.options['df_dim']*4, name = 'd_h2_conv'))) #8 h3 = ops.lrelu( self.d_bn3(ops.conv2d(h2, self.options['df_dim']*8, name = 'd_h3_conv'))) #4 # ADD TEXT EMBEDDING TO THE NETWORK reduced_text_embeddings = ops.lrelu(ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,1) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,2) tiled_embeddings = tf.tile(reduced_text_embeddings, [1,4,4,1], name='tiled_embeddings') h3_concat = tf.concat([h3, tiled_embeddings], 3, name='h3_concat') h3_new = ops.lrelu( self.d_bn4(ops.conv2d(h3_concat, self.options['df_dim']*8, 1,1,1,1, name = 'd_h3_conv_new'))) #4 h4 = ops.linear(tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') #return tf.nn.sigmoid(h4), h4 clip_updates = [w.assign(tf.clip_by_value(w, -1e-2, 1e-2)) for w in self.w_d] if self.option['gan_type'] == 1: return h4, h4, clip_updates else return tf.nn.sigmoid(h4), h4, clip_updates
def image_encoder(self, image): with tf.variable_scope("image_vae", reuse=tf.AUTO_REUSE): h0 = ops.lrelu( ops.conv2d(image, self.options['df_dim'], name='v_h0_conv')) #32 h1 = ops.lrelu( self.d_bn1( ops.conv2d(h0, self.options['df_dim'] * 2, name='v_h1_conv'))) #16 h2 = ops.lrelu( self.d_bn2( ops.conv2d(h1, self.options['df_dim'] * 4, name='v_h2_conv'))) #8 h3 = ops.lrelu( self.d_bn3( ops.conv2d(h2, self.options['df_dim'] * 8, name='v_h3_conv'))) #4 h3_new = ops.lrelu( self.d_bn4( ops.conv2d(h3, self.options['df_dim'] * 8, 1, 1, 1, 1, name='v_h3_conv_new'))) hidden = tf.reshape(h3_new, [self.options['batch_size'], -1]) mean = ops.linear(hidden, self.options['t_dim'], 'v_enc_mean') std = 1e-6 + tf.nn.softplus( ops.linear(hidden, self.options['t_dim'], 'v_enc_std')) code = mean + std * tf.random_normal( mean.get_shape().as_list(), 0, 1, dtype=tf.float32) return code, mean, std
def generator(self, t_z, t_text_embedding): # image size by default is 64 x 64 s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) # ops.linear() takes in the text_embedding and text dimension # Leaky relu takes in x and return max of (x, leak*x) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding')) # Concatenates tensors along one dimension. z_concat = tf.concat([t_z, reduced_text_embedding], axis=1) z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') # First layer, activation relu h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) # Second layer, activation relu h1 = ops.deconv2d( h0, [self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) # Third layer, activation relu h2 = ops.deconv2d( h1, [self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) # Four layer, activation relu h3 = ops.deconv2d( h2, [self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') # Output layer activation tanh return (tf.tanh(h4) / 2. + 0.5)
def generator(self, t_z, t_text_embedding, training=True, name='gan-generator'): s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) with tf.variable_scope(name, reuse=tf.AUTO_REUSE): reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding')) z_concat = tf.concat([t_z, reduced_text_embedding], 1) z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0, train=training)) h1 = ops.deconv2d(h0, [ self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4 ], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1, train=training)) h2 = ops.deconv2d(h1, [ self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2 ], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2, train=training)) h3 = ops.deconv2d(h2, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3, train=training)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4) / 2. + 0.5)
def sampler(self, t_z, image_code, sound_code, pretrain_image_vae): s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) input_code = tf.multiply(pretrain_image_vae, image_code) + tf.multiply( 1 - pretrain_image_vae, sound_code) reduced_text_embedding = ops.lrelu( ops.linear(input_code, self.options['t_dim'], 'v_g_embedding')) #z_concat = tf.concat(1, [t_z, reduced_text_embedding]) z_concat = reduced_text_embedding z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0, train=False)) h1 = ops.deconv2d( h0, [self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4], name='v_g_h1') h1 = tf.nn.relu(self.g_bn1(h1, train=False)) h2 = ops.deconv2d( h1, [self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2], name='v_g_h2') h2 = tf.nn.relu(self.g_bn2(h2, train=False)) h3 = ops.deconv2d( h2, [self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1], name='v_g_h3') h3 = tf.nn.relu(self.g_bn3(h3, train=False)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='v_g_h4') return (tf.tanh(h4) / 2. + 0.5)
def generator(self, t_z, t_text_embedding): s = self.options['image_size'] s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding') ) z_concat = tf.concat(1, [t_z, reduced_text_embedding]) z_ = ops.linear(z_concat, self.options['gf_dim']*8*s16*s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) h1 = ops.deconv2d(h0, [self.options['batch_size'], s8, s8, self.options['gf_dim']*4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d(h1, [self.options['batch_size'], s4, s4, self.options['gf_dim']*2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d(h2, [self.options['batch_size'], s2, s2, self.options['gf_dim']*1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4)/2. + 0.5)
def discriminator(self, image, t_text_embedding, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() h0 = ops.lrelu(ops.conv2d(image, self.options['df_dim'], name = 'd_h0_conv')) #32 h1 = ops.lrelu( self.d_bn1(ops.conv2d(h0, self.options['df_dim']*2, name = 'd_h1_conv'))) #16 h2 = ops.lrelu( self.d_bn2(ops.conv2d(h1, self.options['df_dim']*4, name = 'd_h2_conv'))) #8 h3 = ops.lrelu( self.d_bn3(ops.conv2d(h2, self.options['df_dim']*8, name = 'd_h3_conv'))) #4 # ADD TEXT EMBEDDING TO THE NETWORK reduced_text_embeddings = ops.lrelu(ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,1) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,2) tiled_embeddings = tf.tile(reduced_text_embeddings, [1,4,4,1], name='tiled_embeddings') h3_concat = tf.concat( 3, [h3, tiled_embeddings], name='h3_concat') h3_new = ops.lrelu( self.d_bn4(ops.conv2d(h3_concat, self.options['df_dim']*8, 1,1,1,1, name = 'd_h3_conv_new'))) #4 h4 = ops.linear(tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') return tf.nn.sigmoid(h4), h4
def generator(self, word2vec, reuse=False): with tf.variable_scope("generator") as scope: if reuse: scope.reuse_variables() filter_sizes = [3, 4, 5] embedding_size = 400 num_filters = 800 sequence_length = 15 embedded_chars_expanded = tf.expand_dims(word2vec, -1) # ?xlengthxfeaturex1 # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): #with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] # W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") # b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") if i == 0: h = tf.nn.relu(self.gbn0( ops.conv2dv1(embedded_chars_expanded, num_filters, filter_shape, name='g_h%d_conv' % i))) # 16 elif i == 1: h = tf.nn.relu(self.gbn1( ops.conv2dv1(embedded_chars_expanded, num_filters, filter_shape, name='g_h%d_conv' % i))) # 16 else: h = tf.nn.relu(self.gbn2( ops.conv2dv1(embedded_chars_expanded, num_filters, filter_shape, name='g_h%d_conv' % i))) # 16 # conv = tf.nn.conv2d( # self.embedded_chars_expanded, # W, # strides=[1, 1, 1, 1], # padding="VALID", # name="conv") # Apply nonlinearity #h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) reduced_text_embedding = ops.lrelu(ops.linear(h_pool_flat, self.options['t_dim'], 'g_embedding')) z_ = ops.linear(reduced_text_embedding, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) h1 = ops.deconv2d(h0, [self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d(h1, [self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d(h2, [self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4) / 2. + 0.5), h_pool_flat
def discriminator(self, image, t_text_embedding, n_classes, t_training, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() h0 = ops.lrelu( ops.conv2d(image, self.options['df_dim'], name='d_h0_conv')) # 64 h1 = ops.lrelu( slim.batch_norm(ops.conv2d(h0, self.options['df_dim'] * 2, name='d_h1_conv'), reuse=reuse, is_training=t_training, scope='d_bn1')) # 32 h2 = ops.lrelu( slim.batch_norm(ops.conv2d(h1, self.options['df_dim'] * 4, name='d_h2_conv'), reuse=reuse, is_training=t_training, scope='d_bn2')) # 16 h3 = ops.lrelu( slim.batch_norm(ops.conv2d(h2, self.options['df_dim'] * 8, name='d_h3_conv'), reuse=reuse, is_training=t_training, scope='d_bn3')) # 8 h3_shape = h3.get_shape().as_list() # ADD TEXT EMBEDDING TO THE NETWORK reduced_text_embeddings = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 1) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 2) tiled_embeddings = tf.tile(reduced_text_embeddings, [1, h3_shape[1], h3_shape[1], 1], name='tiled_embeddings') h3_concat = tf.concat([h3, tiled_embeddings], 3, name='h3_concat') h3_new = ops.lrelu( slim.batch_norm(ops.conv2d(h3_concat, self.options['df_dim'] * 8, 1, 1, 1, 1, name='d_h3_conv_new'), reuse=reuse, is_training=t_training, scope='d_bn4')) # 4 h3_flat = tf.reshape(h3_new, [self.options['batch_size'], -1]) h4 = ops.linear(h3_flat, 1, 'd_h4_lin_rw') h4_aux = ops.linear(h3_flat, n_classes, 'd_h4_lin_ac') return tf.nn.sigmoid(h4), h4, tf.nn.sigmoid(h4_aux), h4_aux
def discriminator(self, image, t_text_embedding, reuse=False): if reuse: with tf.variable_scope(tf.get_variable_scope(), reuse=True): h0 = ops.lrelu( ops.conv2d(image, self.options['df_dim'], name='d_h0_conv')) #32,48 h1 = ops.lrelu( self.d_bn1( ops.conv2d(h0, self.options['df_dim'] * 2, name='d_h1_conv'))) #16,24 h2 = ops.lrelu( self.d_bn2( ops.conv2d(h1, self.options['df_dim'] * 4, name='d_h2_conv'))) #8,12 h3 = ops.lrelu( self.d_bn3( ops.conv2d(h2, self.options['df_dim'] * 8, name='d_h3_conv'))) #4,6 # ADD TEXT EMBEDDING TO THE NETWORK reduced_text_embeddings = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) reduced_text_embeddings = tf.expand_dims( reduced_text_embeddings, 1) reduced_text_embeddings = tf.expand_dims( reduced_text_embeddings, 2) tiled_embeddings = tf.tile(reduced_text_embeddings, [1, 4, 4, 1], name='tiled_embeddings') # h3_concat = tf.concat([h3, tiled_embeddings], 3, name='h3_concat') h3_concat = tf.concat(values=[h3, tiled_embeddings], axis=3, name='h3_concat') h3_new = ops.lrelu( self.d_bn4( ops.conv2d(h3_concat, self.options['df_dim'] * 8, 1, 1, 1, 1, name='d_h3_conv_new'))) #4 h4 = ops.linear( tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') # return tf.nn.sigmoid(h4), h4 else: h0 = ops.lrelu( ops.conv2d(image, self.options['df_dim'], name='d_h0_conv')) #32,48 h1 = ops.lrelu( self.d_bn1( ops.conv2d(h0, self.options['df_dim'] * 2, name='d_h1_conv'))) #16,24 h2 = ops.lrelu( self.d_bn2( ops.conv2d(h1, self.options['df_dim'] * 4, name='d_h2_conv'))) #8,12 h3 = ops.lrelu( self.d_bn3( ops.conv2d(h2, self.options['df_dim'] * 8, name='d_h3_conv'))) #4,6 # ADD TEXT EMBEDDING TO THE NETWORK reduced_text_embeddings = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 1) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 2) tiled_embeddings = tf.tile(reduced_text_embeddings, [1, 4, 4, 1], name='tiled_embeddings') # h3_concat = tf.concat([h3, tiled_embeddings], 3, name='h3_concat') h3_concat = tf.concat(values=[h3, tiled_embeddings], axis=3, name='h3_concat') h3_new = ops.lrelu( self.d_bn4( ops.conv2d(h3_concat, self.options['df_dim'] * 8, 1, 1, 1, 1, name='d_h3_conv_new'))) #4 h4 = ops.linear( tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') # return tf.nn.sigmoid(h4), h4 return tf.nn.sigmoid(h4), h4 # if reuse: # tf.get_variable_scope().reuse_variables() # h0 = ops.lrelu(ops.conv2d(image, self.options['df_dim'], name = 'd_h0_conv')) #32,48 # h1 = ops.lrelu( self.d_bn1(ops.conv2d(h0, self.options['df_dim']*2, name = 'd_h1_conv'))) #16,24 # h2 = ops.lrelu( self.d_bn2(ops.conv2d(h1, self.options['df_dim']*4, name = 'd_h2_conv'))) #8,12 # h3 = ops.lrelu( self.d_bn3(ops.conv2d(h2, self.options['df_dim']*8, name = 'd_h3_conv'))) #4,6 # # ADD TEXT EMBEDDING TO THE NETWORK # reduced_text_embeddings = ops.lrelu(ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) # reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,1) # reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings,2) # tiled_embeddings = tf.tile(reduced_text_embeddings, [1,6,6,1], name='tiled_embeddings') # # h3_concat = tf.concat([h3, tiled_embeddings], 3, name='h3_concat') # h3_concat = tf.concat(values=[h3, tiled_embeddings], axis=3, name='h3_concat') # h3_new = ops.lrelu( self.d_bn4(ops.conv2d(h3_concat, self.options['df_dim']*8, 1,1,1,1, name = 'd_h3_conv_new'))) #4 # h4 = ops.linear(tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') # return tf.nn.sigmoid(h4), h4
def discriminator(self, image, t_text_embedding): with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE): if self.options['vgg']: h0 = ops.lrelu( ops.conv2d(image, 3, stride=1, name='d_h0a_conv')) h0 = ops.lrelu(ops.conv2d(h0, 3, stride=2, name='d_h0_conv')) h1 = ops.lrelu( self.d_bn1a( ops.conv2d(h0, self.options['df_dim'] * 2, stride=1, name='d_h1a_conv'))) h1 = ops.lrelu( self.d_bn1( ops.conv2d(h0, self.options['df_dim'] * 2, stride=2, name='d_h1_conv'))) h2 = ops.lrelu( self.d_bn2a( ops.conv2d(h1, self.options['df_dim'] * 4, stride=1, name='d_h2a_conv'))) h2 = ops.lrelu( self.d_bn2( ops.conv2d(h2, self.options['df_dim'] * 4, stride=2, name='d_h2_conv'))) h3 = ops.lrelu( self.d_bn3a( ops.conv2d(h2, self.options['df_dim'] * 8, stride=1, name='d_h3a_conv'))) h3 = ops.lrelu( self.d_bn3( ops.conv2d(h3, self.options['df_dim'] * 8, stride=2, name='d_h3_conv'))) else: if self.options['extra_64']: image = ops.lrelu( ops.conv2d(image, 3, stride=1, name='d_h0a_conv')) h0 = ops.lrelu( ops.conv2d(image, self.options['df_dim'], name='d_h0_conv')) # 32 if self.options['extra_32']: h0 = ops.lrelu( self.d_bn1a( ops.conv2d(h0, self.options['df_dim'], stride=1, name='d_h1a_conv'))) h1 = ops.lrelu( self.d_bn1( ops.conv2d(h0, self.options['df_dim'] * 2, name='d_h1_conv'))) # 16 h2 = ops.lrelu( self.d_bn2( ops.conv2d(h1, self.options['df_dim'] * 4, name='d_h2_conv'))) # 8 h3 = ops.lrelu( self.d_bn3( ops.conv2d(h2, self.options['df_dim'] * 8, name='d_h3_conv'))) # 4 # ADD TEXT EMBEDDING TO THE NETWORK reduced_text_embeddings = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'd_embedding')) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 1) reduced_text_embeddings = tf.expand_dims(reduced_text_embeddings, 2) tiled_embeddings = tf.tile(reduced_text_embeddings, [1, 4, 4, 1], name='tiled_embeddings') h3_concat = tf.concat([h3, tiled_embeddings], 3, name='h3_concat') h3_new = ops.lrelu( self.d_bn4( ops.conv2d(h3_concat, self.options['df_dim'] * 8, 1, 1, 1, 1, name='d_h3_conv_new'))) # 4 h4 = ops.linear( tf.reshape(h3_new, [self.options['batch_size'], -1]), 1, 'd_h3_lin') return tf.nn.sigmoid(h4), h4
def generator(self, t_z, t_text_embedding): s = self.options['image_size'] #64 x 64 s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding')) #self.options['t_dim', 256] z_concat = tf.concat([t_z, reduced_text_embedding], 1) #t_z is batch_size, z_dim, which is 100 z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [ -1, s16, s16, self.options['gf_dim'] * 8 ]) #[-1, 4, 4, 64 * 8] gf_dim is number of filters in the first layer h0 = tf.nn.relu(self.g_bn0(h0)) if self.options['vgg']: h1 = ops.deconv2d(h0, [ self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4 ], stride=2, name='g_h1') #8 h1 = tf.nn.relu(self.g_bn1(h1)) h1 = ops.deconv2d(h1, [ self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4 ], stride=1, name='g_h1b') h1 = tf.nn.relu(self.g_bn1b(h1)) h2 = ops.deconv2d(h1, [ self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2 ], stride=2, name='g_h2') #16 h2 = tf.nn.relu(self.g_bn2(h2)) h2 = ops.deconv2d(h2, [ self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2 ], stride=1, name='g_h2b') h2 = tf.nn.relu(self.g_bn2b(h2)) h3 = ops.deconv2d(h2, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], stride=2, name='g_h3') #32 h3 = tf.nn.relu(self.g_bn3(h3)) h3 = ops.deconv2d(h3, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], stride=1, name='g_h3b') h3 = tf.nn.relu(self.g_bn3b(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], stride=2, name='g_h4') #64 h4 = ops.deconv2d(h4, [self.options['batch_size'], s, s, 3], stride=1, name='g_h4b') return (tf.tanh(h4) / 2. + 0.5) else: h1 = ops.deconv2d(h0, [ self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4 ], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d(h1, [ self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2 ], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d(h2, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) if self.options['extra_32']: h3 = ops.deconv2d(h3, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], stride=1, name='g_h3b') h3 = tf.nn.relu(self.g_bn3b(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') if self.options['extra_64']: h4 = ops.deconv2d(h4, [self.options['batch_size'], s, s, 3], stride=1, name='g_h4b') return (tf.tanh(h4) / 2. + 0.5)