def generator(self, t_z, t_text_embedding): s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding')) z_concat = tf.concat(1, [t_z, reduced_text_embedding]) z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) h1 = ops.deconv2d( h0, [self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d( h1, [self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d( h2, [self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4) / 2. + 0.5)
def sampler(self, t_z, t_text_embedding): tf.get_variable_scope().reuse_variables() s = self.options['image_size'] s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16) reduced_text_embedding =\ ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding') ) z_concat = tf.concat([t_z, reduced_text_embedding], 1) z_ = ops.linear(z_concat, self.options['gf_dim']*8*s16*s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0, train = False)) h1 = ops.deconv2d(h0, [self.options['batch_size'], s8, s8, self.options['gf_dim']*4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1, train = False)) h2 = ops.deconv2d(h1, [self.options['batch_size'], s4, s4, self.options['gf_dim']*2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2, train = False)) h3 = ops.deconv2d(h2, [self.options['batch_size'], s2, s2, self.options['gf_dim']*1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3, train = False)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4)/2. + 0.5)
def generator(self, t_z): #, t_sound_embedding): s = self.options['image_size'] # Fully connected layer with feature map 4*4*512 z_ = ops.linear(t_z, 64*8*4*4, 'g_h0_lin') h0 = tf.reshape(z_, [-1, 4, 4, 64*8]) h0 = tf.nn.relu(self.g_bn0(h0)) # deconvolution with feature map 8*8*256 h1 = ops.deconv2d(h0, [self.options['batch_size'], 8, 8, 64*4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) # deconvolution with feature map 16*16*128 h2 = ops.deconv2d(h1, [self.options['batch_size'], 16, 16, 64*2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) # deconvolution with feature map 32*32*64 h3 = ops.deconv2d(h2, [self.options['batch_size'], 32, 32, 64*1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) # deconvolution with feature map 64*64*3 h4 = ops.deconv2d(h3, [self.options['batch_size'], 64, 64, self.options['image_c_dim']], name='g_h4') # tanh is the same setting from DCGAN return (tf.tanh(h4))
def decoder(self, en_image, t_training): s = self.options['image_size'] s2, s4, s8, s16, s32 = int(s / 2), int(s / 4), int(s / 8),\ int(s / 16), int(s / 32) h1 = ops.deconv2d(en_image, [self.options['batch_size'], s16, s16, self.options['df_dim'] * 4], name = 'd_h1') h1 = tf.nn.relu(slim.batch_norm(h1, is_training = t_training, scope="d_bn1")) h2 = ops.deconv2d(h1, [self.options['batch_size'], s8, s8, self.options['df_dim'] * 6], name = 'd_h2') h2 = tf.nn.relu(slim.batch_norm(h2, is_training = t_training, scope="d_bn2")) h3 = ops.deconv2d(h2, [self.options['batch_size'], s4, s4, self.options['df_dim'] * 6], name = 'd_h3') h3 = tf.nn.relu(slim.batch_norm(h3, is_training = t_training, scope="d_bn3")) h4 = ops.deconv2d(h3, [self.options['batch_size'], s2, s2, self.options['df_dim'] * 8], name = 'd_h4') h4 = tf.nn.relu(slim.batch_norm(h4, is_training = t_training, scope="d_bn4")) h5 = ops.deconv2d(h4, [self.options['batch_size'], s, s, 3], name = 'd_h5') return (tf.tanh(h5) / 2. + 0.5)
def generator(self, t_z, t_text_embedding): s = self.options['image_size'] s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding') ) tf.summary.tensor_summary("Reduced voice embedding", reduced_text_embedding) #z_concat = tf.concat(1, [t_z, reduced_text_embedding]) z_concat = t_text_embedding z_ = ops.linear(z_concat, self.options['gf_dim']*8*s16*s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) h1 = ops.deconv2d(h0, [self.options['batch_size'], s8, s8, self.options['gf_dim']*4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d(h1, [self.options['batch_size'], s4, s4, self.options['gf_dim']*2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d(h2, [self.options['batch_size'], s2, s2, self.options['gf_dim']*1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) # Classify class h3_new = ops.lrelu(self.g_bn4(ops.conv2d(h3, self.options['df_dim'], 1, 1, 1, 1, name="g_conv"))) h3_new = tf.reshape(h3_new, [self.options['batch_size'], -1]) class_logit = ops.linear(h3_new, self.options['num_class'], 'g_h3_embedding') h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4)/2. + 0.5), class_logit
def generator(self, t_z, t_text_embedding): # image size by default is 64 x 64 s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) # ops.linear() takes in the text_embedding and text dimension # Leaky relu takes in x and return max of (x, leak*x) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding')) # Concatenates tensors along one dimension. z_concat = tf.concat([t_z, reduced_text_embedding], axis=1) z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') # First layer, activation relu h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) # Second layer, activation relu h1 = ops.deconv2d( h0, [self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) # Third layer, activation relu h2 = ops.deconv2d( h1, [self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) # Four layer, activation relu h3 = ops.deconv2d( h2, [self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') # Output layer activation tanh return (tf.tanh(h4) / 2. + 0.5)
def generator_2(self, input_tensor, reuse = False): #, t_sound_embedding): if reuse: tf.get_variable_scope().reuse_variables() h0 = ops.deconv2d(input_tensor, [self.options['batch_size'], 32, 32, 256], name='g2_h0') h0 = tf.nn.relu(self.g2_bn0(h0)) h1 = ops.deconv2d(h0, [self.options['batch_size'], 64, 64, 128], name='g2_h1') h1 = tf.nn.relu(self.g2_bn1(h1)) h2 = ops.deconv2d(h1, [self.options['batch_size'], 128, 128, 64], name='g2_h2') h2 = tf.nn.relu(self.g2_bn2(h2)) h3 = ops.deconv2d(h2, [self.options['batch_size'], 256, 256, 3], name='g2_h3') return (tf.tanh(h3))
def generator(self, t_z, t_text_embedding, training=True, name='gan-generator'): s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) with tf.variable_scope(name, reuse=tf.AUTO_REUSE): reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding')) z_concat = tf.concat([t_z, reduced_text_embedding], 1) z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0, train=training)) h1 = ops.deconv2d(h0, [ self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4 ], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1, train=training)) h2 = ops.deconv2d(h1, [ self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2 ], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2, train=training)) h3 = ops.deconv2d(h2, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3, train=training)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4) / 2. + 0.5)
def sampler(self, t_z, image_code, sound_code, pretrain_image_vae): s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) input_code = tf.multiply(pretrain_image_vae, image_code) + tf.multiply( 1 - pretrain_image_vae, sound_code) reduced_text_embedding = ops.lrelu( ops.linear(input_code, self.options['t_dim'], 'v_g_embedding')) #z_concat = tf.concat(1, [t_z, reduced_text_embedding]) z_concat = reduced_text_embedding z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0, train=False)) h1 = ops.deconv2d( h0, [self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4], name='v_g_h1') h1 = tf.nn.relu(self.g_bn1(h1, train=False)) h2 = ops.deconv2d( h1, [self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2], name='v_g_h2') h2 = tf.nn.relu(self.g_bn2(h2, train=False)) h3 = ops.deconv2d( h2, [self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1], name='v_g_h3') h3 = tf.nn.relu(self.g_bn3(h3, train=False)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='v_g_h4') return (tf.tanh(h4) / 2. + 0.5)
def generator(self, t_z, t_text_embedding): s = self.options['image_size'] s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding') ) z_concat = tf.concat(1, [t_z, reduced_text_embedding]) z_ = ops.linear(z_concat, self.options['gf_dim']*8*s16*s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) h1 = ops.deconv2d(h0, [self.options['batch_size'], s8, s8, self.options['gf_dim']*4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d(h1, [self.options['batch_size'], s4, s4, self.options['gf_dim']*2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d(h2, [self.options['batch_size'], s2, s2, self.options['gf_dim']*1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4)/2. + 0.5)
def generator(self, t_z, t_text_embedding): s = self.options['image_size'] #64 x 64 s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) reduced_text_embedding = ops.lrelu( ops.linear(t_text_embedding, self.options['t_dim'], 'g_embedding')) #self.options['t_dim', 256] z_concat = tf.concat([t_z, reduced_text_embedding], 1) #t_z is batch_size, z_dim, which is 100 z_ = ops.linear(z_concat, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [ -1, s16, s16, self.options['gf_dim'] * 8 ]) #[-1, 4, 4, 64 * 8] gf_dim is number of filters in the first layer h0 = tf.nn.relu(self.g_bn0(h0)) if self.options['vgg']: h1 = ops.deconv2d(h0, [ self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4 ], stride=2, name='g_h1') #8 h1 = tf.nn.relu(self.g_bn1(h1)) h1 = ops.deconv2d(h1, [ self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4 ], stride=1, name='g_h1b') h1 = tf.nn.relu(self.g_bn1b(h1)) h2 = ops.deconv2d(h1, [ self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2 ], stride=2, name='g_h2') #16 h2 = tf.nn.relu(self.g_bn2(h2)) h2 = ops.deconv2d(h2, [ self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2 ], stride=1, name='g_h2b') h2 = tf.nn.relu(self.g_bn2b(h2)) h3 = ops.deconv2d(h2, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], stride=2, name='g_h3') #32 h3 = tf.nn.relu(self.g_bn3(h3)) h3 = ops.deconv2d(h3, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], stride=1, name='g_h3b') h3 = tf.nn.relu(self.g_bn3b(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], stride=2, name='g_h4') #64 h4 = ops.deconv2d(h4, [self.options['batch_size'], s, s, 3], stride=1, name='g_h4b') return (tf.tanh(h4) / 2. + 0.5) else: h1 = ops.deconv2d(h0, [ self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4 ], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d(h1, [ self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2 ], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d(h2, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) if self.options['extra_32']: h3 = ops.deconv2d(h3, [ self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1 ], stride=1, name='g_h3b') h3 = tf.nn.relu(self.g_bn3b(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') if self.options['extra_64']: h4 = ops.deconv2d(h4, [self.options['batch_size'], s, s, 3], stride=1, name='g_h4b') return (tf.tanh(h4) / 2. + 0.5)
def generator(self, word2vec, reuse=False): with tf.variable_scope("generator") as scope: if reuse: scope.reuse_variables() filter_sizes = [3, 4, 5] embedding_size = 400 num_filters = 800 sequence_length = 15 embedded_chars_expanded = tf.expand_dims(word2vec, -1) # ?xlengthxfeaturex1 # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): #with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] # W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") # b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") if i == 0: h = tf.nn.relu(self.gbn0( ops.conv2dv1(embedded_chars_expanded, num_filters, filter_shape, name='g_h%d_conv' % i))) # 16 elif i == 1: h = tf.nn.relu(self.gbn1( ops.conv2dv1(embedded_chars_expanded, num_filters, filter_shape, name='g_h%d_conv' % i))) # 16 else: h = tf.nn.relu(self.gbn2( ops.conv2dv1(embedded_chars_expanded, num_filters, filter_shape, name='g_h%d_conv' % i))) # 16 # conv = tf.nn.conv2d( # self.embedded_chars_expanded, # W, # strides=[1, 1, 1, 1], # padding="VALID", # name="conv") # Apply nonlinearity #h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) s = self.options['image_size'] s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) reduced_text_embedding = ops.lrelu(ops.linear(h_pool_flat, self.options['t_dim'], 'g_embedding')) z_ = ops.linear(reduced_text_embedding, self.options['gf_dim'] * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(z_, [-1, s16, s16, self.options['gf_dim'] * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) h1 = ops.deconv2d(h0, [self.options['batch_size'], s8, s8, self.options['gf_dim'] * 4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = ops.deconv2d(h1, [self.options['batch_size'], s4, s4, self.options['gf_dim'] * 2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = ops.deconv2d(h2, [self.options['batch_size'], s2, s2, self.options['gf_dim'] * 1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) h4 = ops.deconv2d(h3, [self.options['batch_size'], s, s, 3], name='g_h4') return (tf.tanh(h4) / 2. + 0.5), h_pool_flat