def generator(inputs): """ Defines the graph for the generator. Takes input (random samples from uniform distribution) and returns a Spectrogram. :param inputs: A vector with random entries (either from a uniform or a gaussian distribution). :return: A generated audio sample, that hopefully fools the discriminator. """ with tf.variable_scope("g_") as scope: net = fc(inputs, 28672, 'fc1') net = tf.reshape(net, [-1, 4, 7, 1024]) net = tf.nn.relu(net) net = deconv(net, [FLAGS.kernel_size,FLAGS.kernel_size,1024, 1024], 'deconv1') net = batch_norm(True, net, 'bn2') net = tf.nn.relu(net) net = deconv(net, [FLAGS.kernel_size, FLAGS.kernel_size, 512, 1024], 'deconv2') net = batch_norm(True, net, 'bn3') net = tf.nn.relu(net) net = deconv(net, [FLAGS.kernel_size, FLAGS.kernel_size, 256, 512], 'deconv3') net = batch_norm(True, net, 'bn4') net = tf.nn.relu(net) net = deconv(net, [FLAGS.kernel_size, FLAGS.kernel_size, 128, 256], 'deconv4') net = batch_norm(True, net, 'bn5') net = tf.nn.relu(net) net = deconv(net, [FLAGS.kernel_size, FLAGS.kernel_size, 2, 128], 'deconv5') net = net[:, :98, :201, :] # Map logmag and phase to [-1,1] logmag, phase = tf.unstack(net, axis=3) phase = tf.nn.tanh(phase) logmag = tf.nn.tanh(logmag) net = tf.stack([logmag, phase], axis=3) return net
def g_net(img, scope, gf_dim=64, is_training=True, reuse=False): global bn bn = functools.partial(bn, is_training=is_training) def res_block(x, dim, scope='res'): y = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") y = relu(bn(conv(y, dim, kernel_size=3, stride=1, padding='VALID', scope=scope + '_conv1'), scope=scope + '_bn1')) y = tf.pad(y, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") y = bn(conv(y, dim, kernel_size=3, stride=1, padding='VALID', scope=scope + '_conv2'), scope=scope + '_bn2') return y + x with tf.variable_scope(scope + '_g', reuse=reuse): c0 = tf.pad(img, [[0, 0], [3, 3], [3, 3], [0, 0]], "REFLECT") c1 = relu(bn(conv(c0, gf_dim, 7, 1, padding='VALID', scope='c1_conv'), scope='c1_bn')) c2 = relu(bn(conv(c1, gf_dim * 2, 3, 2, scope='c2_conv'), scope='c2_bn')) c3 = relu(bn(conv(c2, gf_dim * 4, 3, 2, scope='c3_conv'), scope='c3_bn')) r1 = res_block(c3, gf_dim * 4, scope='r1') r2 = res_block(r1, gf_dim * 4, scope='r2') r3 = res_block(r2, gf_dim * 4, scope='r3') r4 = res_block(r3, gf_dim * 4, scope='r4') r5 = res_block(r4, gf_dim * 4, scope='r5') r6 = res_block(r5, gf_dim * 4, scope='r6') r7 = res_block(r6, gf_dim * 4, scope='r7') r8 = res_block(r7, gf_dim * 4, scope='r8') r9 = res_block(r8, gf_dim * 4, scope='r9') d1 = relu(bn(deconv(r9, gf_dim * 2, 3, 2, scope='d1_dconv'), scope='d1_bn')) d2 = relu(bn(deconv(d1, gf_dim, 3, 2, scope='d2_dconv'), scope='d2_bn')) d2 = tf.pad(d2, [[0, 0], [3, 3], [3, 3], [0, 0]], "REFLECT") pred = conv(d2, 3, 7, 1, padding='VALID', scope='pred_conv') pred = tf.nn.tanh(pred) return pred
def generatorII(self, x, t_aug, is_train, scope = 'generatorII'): with tf.variable_scope(scope): depth = CONFIG['g_conv_depth'] residual = True with tf.variable_scope('DownSampling'): maps = ops.conv(x, depth/2, 'map1', k=3, s=2, normalizer=tfly.batch_norm, is_train=is_train, residual=False, activation=tf.nn.relu) maps = ops.conv(maps, depth/2, 'map1-0', k=3, s=1, normalizer=tfly.batch_norm, is_train=is_train, residual=residual, activation=tf.nn.relu) maps = ops.conv(maps, depth/1, 'map2', k=3, s=2, normalizer=tfly.batch_norm, is_train=is_train, residual=False, activation=tf.nn.relu) maps = ops.conv(maps, depth/1, 'map2-0', k=3, s=1, normalizer=tfly.batch_norm, is_train=is_train, residual=residual, activation=tf.nn.relu) with tf.variable_scope('MiddleLayer'): size = maps.get_shape().as_list()[1] t_aug = tf.expand_dims(t_aug,1) t_aug = tf.expand_dims(t_aug,2) tiled = tf.tile(t_aug, [1,size,size,1]) maps = tf.concat([maps, tiled],axis = -1) maps = ops.conv(maps, depth/1, 'mapt2', k=3, s=1, normalizer=tfly.batch_norm, is_train=is_train, residual=False, activation=tf.nn.relu) maps = ops.conv(maps, depth/1, 'mapt2_0', k=3, s=1, normalizer=tfly.batch_norm, is_train=is_train, residual=residual, activation=tf.nn.relu) with tf.variable_scope('UpSampling'): maps = ops.deconv(maps, depth/2, 'map1', k=5, s=3, normalizer=tfly.batch_norm, is_train=is_train, activation=tf.nn.relu) maps = ops.deconv(maps, depth/4, 'map2', k=3, s=2, normalizer=tfly.batch_norm, is_train=is_train, activation=tf.nn.relu) g_out = ops.deconv(maps, 3, 'map2_0', k=3, s=1, normalizer=tfly.batch_norm, is_train=is_train, activation=tf.tanh) g_out = g_out * 0.5 + 0.5 - self.data.im_mean return g_out
def decoder(self, ip): with tf.variable_scope('decoder'): gen_linear = linear(ip, ip_size=self.latent_emb_size, out_size=7*7*32, scope='gen_linear') img1 = tf.nn.relu(tf.reshape(gen_linear, [self.batch_size,7,7,32])) gen_h1 = lrelu(deconv(img1, [self.batch_size, 14,14,16], scope = 'gen_h1')) gen_h2 = tf.nn.sigmoid(deconv(gen_h1, [self.batch_size, 28, 28, 1], scope='gen_h2')) gen_image = gen_h2[:,:,:,0] return gen_image
def generatorI(self, z, t_g, is_train, scope = 'generatorI'): with tf.variable_scope(scope): size = CONFIG['image_size_I']/16 depth = CONFIG['g_conv_depth'] #w_initializer=tf.random_normal_initializer(0, 0.02) zt = tf.concat([z, t_g], axis = 1) maps = tf.reshape(ops.linear(zt, depth*size*size, 'ebd2map', activation = tf.nn.relu), [-1,size,size,depth]) maps = ops.deconv(maps, depth/2, 'map1', k=3, s=2, normalizer=tfly.batch_norm, is_train=is_train, activation=tf.nn.relu) maps = ops.deconv(maps, depth/4, 'map2', k=3, s=2, normalizer=tfly.batch_norm, is_train=is_train, activation=tf.nn.relu) maps = ops.deconv(maps, depth/8, 'map3', k=3, s=2, normalizer=tfly.batch_norm, is_train=is_train, activation=tf.nn.relu) g_out = ops.deconv(maps, 3, 'map4', k=3, s=2, normalizer=tfly.batch_norm, is_train=is_train, activation=tf.tanh) g_out = g_out * 0.5 + 0.5 - self.data.im_mean return g_out
def decoder_block(inputs, kernel_size, stride, channel_out, name, mode, add): with tf.variable_scope(name): output = deconv(inputs, channel_out, kernel_size=kernel_size, stride=stride, add=add) output = batchnorm(output, mode) rectified = relu(output) return rectified
def Decoder(input, contents_rec, generator_outputs_channels, mode): layers = [input] kernel_sizes = [3, 3, 3, 3, 3, 3, 3, 5] strides = [2, 2, 2, 2, 2, 2, 2, 1] adds = [0, -1, -1, 0, 0, 0, 0, 0] out_channels = [ a.ndf * 8, a.ndf * 8, a.ndf * 8, a.ndf * 8, a.ndf * 4, a.ndf * 2, a.ndf, 1, ] for i in range(len(kernel_sizes) - 1): kernel_size = kernel_sizes[i] stride = strides[i] out_channel = out_channels[i] add = adds[i] inputs = tf.concat([layers[-1], contents_rec[-i - 1]], 3) output = decoder_block(inputs, kernel_size, stride, out_channel, "decoder_" + str(i + 1), mode, add) layers.append(output) with tf.variable_scope("decoder_8"): # 80*80 80*80 input = tf.concat([layers[-1], contents_rec[-8]], 3) output = deconv(input, generator_outputs_channels, kernel_size=5, stride=1, add=0) layers.append(output) return layers
def decoder(h_i, skips, z=None, z_on=False): # z+c is called h_i here g_dec_depths = g_enc_depths[:-1][::-1] + [1] for layer_idx, layer_depth in enumerate(g_dec_depths): h_i_dim = h_i.get_shape().as_list() out_shape = [h_i_dim[0], h_i_dim[1] * 2, layer_depth] #2 because of skip connections bias_init = None ############# if deconv_type == 'deconv': bias_init = tf.constant_initializer(0.) h_i_dcv = deconv(h_i, out_shape, kwidth=kwidth, dilation=2, init=tf.truncated_normal_initializer(stddev=0.02), bias_init=bias_init) elif deconv_type == 'nn_deconv': bias_init = 0.0 h_i_dcv = nn_deconv( h_i, kwidth=kwidth, dilation=2, init=tf.truncated_normal_initializer(stddev=0.02), bias_init=bias_init) h_i = h_i_dcv if layer_idx < len(g_dec_depths) - 1: if do_prelu: h_i = prelu(h_i) else: h_i = leakyrelu(h_i) # fuse skip connection skip_ = skips[-(layer_idx + 1)] h_i = tf.concat([h_i, skip_], 2) else: h_i = tf.tanh(h_i) wave = h_i # Not sure abotu the following ret_feats = [wave] if z_on: ret_feats.append(z) return ret_feats
def attention(inputs, scope="attention", is_training=True, reuse=False, shared_scope="shared_attention", shared_reuse=False): """ Define Attention Network inputs: input images scope: name of attetion scope is_training: is training process reuse: reuse variable of scope shared_scope: name of shared attetion scope shared_reuse: reuse variable of shared_scope """ with tf.variable_scope(scope, reuse=reuse): net = inputs channel = params.encoder.channel net = ops.conv(net, scope="conv1", dim=channel, kernel_size=[7, 7], stride=1, activation_fn=ops.leaky_relu, is_training=is_training, weights_initializer=params.encoder.weights_initializer) for i in range(1, params.encoder.n_enconder): channel *= 2 net = ops.conv( net, scope="conv_{}".format(i + 1), dim=channel, kernel_size=[3, 3], stride=2, activation_fn=ops.leaky_relu, is_training=is_training, weights_initializer=params.encoder.weights_initializer) for i in range(params.encoder.n_resblock): net = ops.resblock( net, scope="resblock_{}".format(i + 1), dim=channel, kernel_size=[3, 3], stride=1, norm_fn=params.encoder.norm_fn, is_training=is_training, weights_initializer=params.encoder.weights_initializer, dropout_ratio=params.encoder.dropout_ratio) channel = params.decoder.channel for i in range(params.decoder.n_resblock): net = ops.resblock( net, scope="deresblock_{}".format(params.decoder.n_resblock - i), dim=channel, kernel_size=[3, 3], stride=1, norm_fn=params.encoder.norm_fn, is_training=is_training, weights_initializer=params.encoder.weights_initializer, dropout_ratio=params.encoder.dropout_ratio) for i in range(1, params.decoder.n_decoder): channel = channel / 2 net = ops.deconv( net, scope="deconv_{}".format(params.decoder.n_decoder - i + 1), dim=channel, kernel_size=[3, 3], stride=2, activation_fn=ops.leaky_relu, is_training=is_training, weights_initializer=params.decoder.weights_initializer) net = ops.deconv( net, scope="deconv_1", dim=1, kernel_size=[1, 1], stride=1, activation_fn=ops.sigmoid, is_training=is_training, weights_initializer=params.decoder.weights_initializer) return net