def build_models(self, image): with tf.variable_scope('img_discriminator'): nf = 64 self.img = image # size 64, 64, 3 # layer 1 self.l1 = op.conv2d(self.img, nf, name='l1') self.l1 = op.lrelu(self.l1) # self.l1_bn = op.batch_norm(name='l1_bn0') # self.l1 = op.lrelu(self.l1_bn(self.l1, train=self.train)) # layer 2 self.l2 = op.conv2d(self.l1, nf * 2, name='l2') self.l2_bn = op.batch_norm(name='l2_bn0') self.l2 = op.lrelu(self.l2_bn(self.l2, train=self.train)) # layer 3 self.l3 = op.conv2d(self.l2, nf * 4, name='l3') self.l3_bn = op.batch_norm(name='l3_bn0') self.l3 = op.lrelu(self.l3_bn(self.l3, train=self.train)) # layer 4 self.l4 = op.conv2d(self.l3, nf * 8, name='l4') self.l4_bn = op.batch_norm(name='l4_bn0') self.l4 = op.lrelu(self.l4_bn(self.l4, train=self.train)) # layer 6, actually it is different from the original paper.. self.score = op.linear(tf.reshape(self.l4, [self.batch_size, -1]), 1, 'final') return
def discriminator(self, image, is_training, reuse=False): with tf.variable_scope("discriminator"): if reuse: tf.get_variable_scope().reuse_variables() # [batch,256,256,1] -> [batch,128,128,64] h0 = lrelu(conv2d(image, self.discriminator_dim, scope="d_h0_conv")) # [batch,128,128,64] -> [batch,64,64,64*2] h1 = lrelu( batch_norm(conv2d(h0, self.discriminator_dim * 2, scope="d_h1_conv"), is_training, scope="d_bn_1")) # [batch,64,64,64*2] -> [batch,32,32,64*4] h2 = lrelu( batch_norm(conv2d(h1, self.discriminator_dim * 4, scope="d_h2_conv"), is_training, scope="d_bn_2")) # [batch,32,32,64*4] -> [batch,31,31,64*8] h3 = lrelu( batch_norm(conv2d(h2, self.discriminator_dim * 8, sh=1, sw=1, scope="d_h3_conv"), is_training, scope="d_bn_3")) # real or fake binary loss fc1 = fc(tf.reshape(h3, [self.batch_size, -1]), 1, scope="d_fc1") return tf.sigmoid(fc1), fc1
def discriminator(self, image, is_training, reuse=False): with tf.variable_scope("discriminator"): if reuse: tf.get_variable_scope().reuse_variables() h0 = lrelu(conv2d(image, self.discriminator_dim, scope="d_h0_conv")) h1 = lrelu( batch_norm(conv2d(h0, self.discriminator_dim * 2, scope="d_h1_conv"), is_training, scope="d_bn_1")) h2 = lrelu( batch_norm(conv2d(h1, self.discriminator_dim * 4, scope="d_h2_conv"), is_training, scope="d_bn_2")) h3 = lrelu( batch_norm(conv2d(h2, self.discriminator_dim * 8, scope="d_h3_conv"), is_training, scope="d_bn_3")) # h4 = lrelu(batch_norm(conv2d(h3, self.discriminator_dim * 8, scope="d_h4_conv"), # is_training, scope="d_bn_4")) # h5 = lrelu(batch_norm(conv2d(h4, self.discriminator_dim * 8, sh=1, sw=1, scope="d_h5_conv"), # is_training, scope="d_bn_5")) # real or fake binary loss fc1 = fc(tf.reshape(h3, [self.batch_size, -1]), 8, scope="d_fc1") fc2 = fc(fc1, 1, scope="d_fc2") return tf.nn.sigmoid(fc2), fc2
def build_models(self, image, sentence_vec): with tf.variable_scope('img_discriminator'): self.img = image # size 64, 64, 3 self.sentence_vec = op.lrelu( op.linear(sentence_vec, 128, 'conditional_vec')) # size [batch, 128] # set the size of each layer first, we have four conv layer l1_h, l1_w, l1_c = 32, 32, 64 l2_h, l2_w, l2_c = 16, 16, 128 l3_h, l3_w, l3_c = 8, 8, 256 l4_h, l4_w, l4_c = 4, 4, 512 # layer 1 self.l1 = op.conv2d(self.img, l1_c, name='l1') self.l1 = op.lrelu(self.l1) # self.l1_bn = op.batch_norm(name='l1_bn0') # self.l1 = op.lrelu(self.l1_bn(self.l1, train=self.train)) # layer 2 self.l2 = op.conv2d(self.l1, l2_c, name='l2') self.l2_bn = op.batch_norm(name='l2_bn0') self.l2 = op.lrelu(self.l2_bn(self.l2, train=self.train)) # layer 3 self.l3 = op.conv2d(self.l2, l3_c, name='l3') self.l3_bn = op.batch_norm(name='l3_bn0') self.l3 = op.lrelu(self.l3_bn(self.l3, train=self.train)) # layer 4 self.l4 = op.conv2d(self.l3, l4_c, name='l4') self.l4_bn = op.batch_norm(name='l4_bn0') self.l4 = op.lrelu(self.l4_bn(self.l4, train=self.train)) # now self.l4 is size 4, 4, 512, we try to connect the text info self.sentence_vec = tf.expand_dims(self.sentence_vec, 1) self.sentence_vec = tf.expand_dims(self.sentence_vec, 2) # batch, 1, 1, 128 to batch, 4, 4, 128 self.sentence_vec = tf.tile(self.sentence_vec, [1, 4, 4, 1]) self.l4 = tf.concat(3, [self.l4, self.sentence_vec]) # layer 5 self.l5 = op.conv2d(self.l4, l4_c, 1, 1, 1, 1, name='l5') self.l5_bn = op.batch_norm(name='l5_bn0') self.l5 = op.lrelu(self.l5_bn(self.l5, train=self.train)) # layer 6, actually it is different from the original paper.. self.score = op.linear(tf.reshape(self.l5, [self.batch_size, -1]), 1, 'final') return
def encoder(self, images, is_training, reuse=False): with tf.variable_scope("generator"): if reuse: tf.get_variable_scope().reuse_variables() encode_layers = dict() def encode_layer(x, output_filters, layer): act = lrelu(x) conv = conv2d(act, output_filters=output_filters, scope="g_e%d_conv" % layer) enc = batch_norm(conv, is_training, scope="g_e%d_bn" % layer) encode_layers["e%d" % layer] = enc return enc e1 = conv2d(images, self.generator_dim, scope="g_e1_conv") encode_layers["e1"] = e1 e2 = encode_layer(e1, self.generator_dim * 2, 2) e3 = encode_layer(e2, self.generator_dim * 4, 3) e4 = encode_layer(e3, self.generator_dim * 8, 4) e5 = encode_layer(e4, self.generator_dim * 8, 5) e6 = encode_layer(e5, self.generator_dim * 8, 6) e7 = encode_layer(e6, self.generator_dim * 8, 7) e8 = encode_layer(e7, self.generator_dim * 8, 8) return e8, encode_layers
def encode_layer(x, output_filters, layer): act = lrelu(x) conv = conv2d(act, output_filters=output_filters, scope="d_e%d_conv" % layer) enc = batch_norm(conv, is_training, scope="d_e%d_bn" % layer) return enc
def discriminator(self, images, is_training, reuse=False): with tf.variable_scope("discriminator"): if reuse: tf.get_variable_scope().reuse_variables() def encode_layer(x, output_filters, layer): act = lrelu(x) conv = conv2d(act, output_filters=output_filters, scope="d_e%d_conv" % layer) enc = batch_norm(conv, is_training, scope="d_e%d_bn" % layer) return enc # Encoder layers e1 = conv2d(images, self.generator_dim, scope="d_e1_conv") e2 = encode_layer(e1, self.generator_dim * 2, 2) e3 = encode_layer(e2, self.generator_dim * 4, 3) e4 = encode_layer(e3, self.generator_dim * 8, 4) e5 = encode_layer(e4, self.generator_dim * 8, 5) e6 = encode_layer(e5, self.generator_dim * 8, 6) e7 = encode_layer(e6, self.generator_dim * 8, 7) e8 = encode_layer(e7, self.generator_dim * 8, 8) # Decoder layers s = self.output_width s2, s4, s8, s16, s32, s64, s128 = int(s / 2), int(s / 4), int( s / 8), int(s / 16), int(s / 32), int(s / 64), int(s / 128) def decode_layer(x, output_width, output_filters, layer, dropout=False): dec = deconv2d(tf.nn.relu(x), [ self.batch_size, output_width, output_width, output_filters ], scope="d_d%d_deconv" % layer) if layer != 8: dec = batch_norm(dec, is_training, scope="d_d%d_bn" % layer) if dropout: dec = tf.nn.dropout(dec, 0.5) return dec d1 = decode_layer(e8, s128, self.generator_dim * 8, layer=1, dropout=True) d2 = decode_layer(d1, s64, self.generator_dim * 8, layer=2, dropout=True) d3 = decode_layer(d2, s32, self.generator_dim * 8, layer=3, dropout=True) d4 = decode_layer(d3, s16, self.generator_dim * 8, layer=4) d5 = decode_layer(d4, s8, self.generator_dim * 4, layer=5) d6 = decode_layer(d5, s4, self.generator_dim * 2, layer=6) d7 = decode_layer(d6, s2, self.generator_dim, layer=7) d8 = decode_layer(d7, s, self.output_filters, layer=8) output = tf.nn.tanh(d8) # scale to (-1, 1) return output
def build_model(self, train_input_frames, train_target_frames, test_input_frames, test_target_frames, ): self.scale_channels = self.info['MODEL_PARAMETER_G']['SCALE_CHANNELS'] self.scale_kernel_sizes = self.info['MODEL_PARAMETER_G']['SCALE_KERNEL_SIZES'] self.num_scale_nets = len(self.scale_channels) with tf.variable_scope(self.gen_name): with tf.variable_scope(self.gen_name + '_data'): train_height = train_input_frames.get_shape().as_list()[1] train_width = train_target_frames.get_shape().as_list()[2] test_height = test_input_frames.get_shape().as_list()[1] test_width = test_target_frames.get_shape().as_list()[2] train_scale_preds = [] train_scale_targets = [] test_scale_preds = [] test_scale_targets = [] for scale_num in range(self.num_scale_nets): with tf.variable_scope(self.gen_name + '_scale' + str(scale_num)): with tf.variable_scope(self.gen_name + '_scale' + str(scale_num) + 'convolution'): scale_factor = 1. / 2 ** ((self.num_scale_nets - 1) - scale_num) scale_train_height = int(train_height * scale_factor) scale_train_width = int(train_width * scale_factor) scale_test_height = int(test_height * scale_factor) scale_test_width = int(test_width * scale_factor) scale_train_input = tf.image.resize_images(train_input_frames, [scale_train_height, scale_train_width]) scale_test_input = tf.image.resize_images(test_input_frames, [scale_test_height, scale_test_width]) scale_train_target = tf.image.resize_images(train_target_frames, [scale_train_height, scale_train_width]) scale_test_target = tf.image.resize_images(test_target_frames, [scale_test_height, scale_test_width]) if scale_num > 0: last_scale_train_pred = train_scale_preds[-1] last_scale_test_pred = test_scale_preds[-1] last_gen_train_frames = tf.image.resize_images(last_scale_train_pred, [scale_train_height, scale_train_width]) last_gen_test_frames = tf.image.resize_images(last_scale_test_pred, [scale_test_height, scale_test_width]) scale_train_input = tf.concat([scale_train_input, last_gen_train_frames], 3) scale_test_input = tf.concat([scale_test_input, last_gen_test_frames], 3) else: last_scale_train_pred = None last_scale_test_pred = None for i in range(len(self.scale_kernel_sizes[scale_num])): if i == len(self.scale_kernel_sizes[scale_num]) - 1: scale_train_input,scale_test_input = ops.conv2d( input=scale_train_input, test_input = scale_test_input, filter_size=[ self.scale_kernel_sizes[scale_num][i], self.scale_kernel_sizes[scale_num][i], self.scale_channels[scale_num][i], self.scale_channels[scale_num][i + 1] ], b_size=[self.scale_channels[scale_num][i + 1]], strides=[1, 1, 1, 1], padding='SAME', dtype=tf.float32, activate='tanh' ) else: scale_train_input, scale_test_input = ops.conv2d( input=scale_train_input, test_input=scale_test_input, filter_size=[ self.scale_kernel_sizes[scale_num][i], self.scale_kernel_sizes[scale_num][i], self.scale_channels[scale_num][i], self.scale_channels[scale_num][i + 1] ], b_size=[self.scale_channels[scale_num][i + 1]], strides=[1, 1, 1, 1], padding='SAME', dtype=tf.float32, activate='leaky_relu' ) scale_train_pred = scale_train_input scale_test_pred = scale_test_input train_scale_preds.append(scale_train_pred) test_scale_preds.append(scale_test_pred) train_scale_targets.append(scale_train_target) test_scale_targets.append(scale_test_target) return train_scale_preds, train_scale_targets,test_scale_preds,test_scale_targets