def generator(z, is_training=True, reuse=False): # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) with tf.variable_scope("generator", reuse=reuse): net = tf.nn.relu( bn(linear(z, 1024, scope='g_fc1'), is_training=is_training, scope='linear1')) net = tf.nn.relu( bn(linear(net, 128 * 7 * 7, scope='g_fc2'), is_training=is_training, scope='linear2')) net = tf.reshape(net, [-1, 7, 7, 128]) batch_size = net.get_shape().as_list()[0] net = deconv2d(net, output_size=14, output_channel=64, kernel=(4, 4), stride=(2, 2), activation='relu', use_bn=True, is_training=True, name='d_conv1') out = deconv2d(net, output_size=28, output_channel=1, kernel=(4, 4), stride=(2, 2), activation='sigmoid', name='gen_images') return out
def generator(self, z): s_h, s_w = self.image_h, self.image_w s_h2, s_w2 = utils.compute_size(s_h, 2), utils.compute_size(s_w, 2) s_h4, s_w4 = utils.compute_size(s_h2, 2), utils.compute_size(s_w2, 2) s_h8, s_w8 = utils.compute_size(s_h4, 2), utils.compute_size(s_w4, 2) s_h16, s_w16 = utils.compute_size(s_h8, 2), utils.compute_size(s_w8, 2) fmap_dim = self.fmap_dim_g batch_size = self.batch_size with tf.variable_scope("generator") as scope: z_ = utils.fc(z, s_h16 * s_w16 * 8 * fmap_dim, name='g_l0_fc') gl0 = utils.lrelu( self.g_bn_l0( tf.reshape(z_, [batch_size, s_h16, s_w16, fmap_dim * 8]))) gl1 = utils.lrelu( self.g_bn_l1( utils.deconv2d(gl0, [batch_size, s_h8, s_w8, fmap_dim * 4], name='g_l1_deconv'))) gl2 = utils.lrelu( self.g_bn_l2( utils.deconv2d(gl1, [batch_size, s_h4, s_w4, fmap_dim * 2], name='g_l2_deconv'))) gl3 = utils.lrelu( self.g_bn_l3( utils.deconv2d(gl2, [batch_size, s_h2, s_w2, fmap_dim * 1], name='g_l3_deconv'))) gl4 = utils.deconv2d(gl3, [batch_size, s_h, s_w, 3], name='g_l4_deconv') return tf.nn.tanh(gl4)
def generator(self, noise, caption): s = self.image_size s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16) reduced_caption = utils.lrelu( utils.linear(caption, self.reduced_text_dim, 'g_embedding')) noise_concat = tf.concat([noise, reduced_caption], 1) new_noise = utils.linear(noise_concat, self.channel_dim * 8 * s16 * s16, 'g_h0_lin') h0 = tf.reshape(new_noise, [-1, s16, s16, self.channel_dim * 8]) h0 = tf.nn.relu(self.g_bn0(h0)) h1 = utils.deconv2d(h0, [self.batch_size, s8, s8, self.channel_dim * 4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1)) h2 = utils.deconv2d(h1, [self.batch_size, s4, s4, self.channel_dim * 2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2)) h3 = utils.deconv2d(h2, [self.batch_size, s2, s2, self.channel_dim], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3)) h4 = utils.deconv2d(h3, [self.batch_size, s, s, 3], name='g_h4') return (tf.tanh(h4) / 2. + 0.5)
def __init__(self, n_channels, n_channels_sm, n_branches, ksize, fmap_size, use_batchnorm=False): super(DecSwitchedDeconv, self).__init__() self.n_branches = n_branches self.deconvs = nn.ModuleList([ nn.Sequential( U.deconv2d( n_channels, n_channels_sm, ksize, 1, out_h_or_w=fmap_size), nn.BatchNorm2d(n_channels_sm), nn.ReLU(), U.deconv2d( n_channels_sm, n_channels, ksize, 1, out_h_or_w=fmap_size), nn.BatchNorm2d(n_channels)) if use_batchnorm else nn.Sequential( U.deconv2d( n_channels, n_channels_sm, ksize, 1, out_h_or_w=fmap_size), nn.ReLU(), U.deconv2d( n_channels_sm, n_channels, ksize, 1, out_h_or_w=fmap_size)) for _ in range(n_branches) ])
def generator(self, cond): with tf.variable_scope("gen"): feature = conf.conv_channel_base e1 = conv2d(cond, feature, name="e1") e2 = batch_norm(conv2d(lrelu(e1), feature*2, name="e2"), "e2") e3 = batch_norm(conv2d(lrelu(e2), feature*4, name="e3"), "e3") e4 = batch_norm(conv2d(lrelu(e3), feature*8, name="e4"), "e4") e5 = batch_norm(conv2d(lrelu(e4), feature*8, name="e5"), "e5") e6 = batch_norm(conv2d(lrelu(e5), feature*8, name="e6"), "e6") e7 = batch_norm(conv2d(lrelu(e6), feature*8, name="e7"), "e7") e8 = batch_norm(conv2d(lrelu(e7), feature*8, name="e8"), "e8") size = conf.img_size num = [0] * 9 for i in range(1,9): num[9-i]=size size =(size+1)/2 d1 = deconv2d(tf.nn.relu(e8), [1,num[1],num[1],feature*8], name="d1") d1 = tf.concat(3, [tf.nn.dropout(batch_norm(d1, "d1"), 0.5), e7]) d2 = deconv2d(tf.nn.relu(d1), [1,num[2],num[2],feature*8], name="d2") d2 = tf.concat(3, [tf.nn.dropout(batch_norm(d2, "d2"), 0.5), e6]) d3 = deconv2d(tf.nn.relu(d2), [1,num[3],num[3],feature*8], name="d3") d3 = tf.concat(3, [tf.nn.dropout(batch_norm(d3, "d3"), 0.5), e5]) d4 = deconv2d(tf.nn.relu(d3), [1,num[4],num[4],feature*8], name="d4") d4 = tf.concat(3, [batch_norm(d4, "d4"), e4]) d5 = deconv2d(tf.nn.relu(d4), [1,num[5],num[5],feature*4], name="d5") d5 = tf.concat(3, [batch_norm(d5, "d5"), e3]) d6 = deconv2d(tf.nn.relu(d5), [1,num[6],num[6],feature*2], name="d6") d6 = tf.concat(3, [batch_norm(d6, "d6"), e2]) d7 = deconv2d(tf.nn.relu(d6), [1,num[7],num[7],feature], name="d7") d7 = tf.concat(3, [batch_norm(d7, "d7"), e1]) d8 = deconv2d(tf.nn.relu(d7), [1,num[8],num[8],conf.img_channel], name="d8") return tf.nn.tanh(d8)
def generator(self, cond): with tf.variable_scope("gen"): feature = conf.conv_channel_base e1 = conv2d(cond, feature, name="e1") e2 = batch_norm(conv2d(lrelu(e1), feature*2, name="e2"), "e2") e3 = batch_norm(conv2d(lrelu(e2), feature*4, name="e3"), "e3") e4 = batch_norm(conv2d(lrelu(e3), feature*8, name="e4"), "e4") e5 = batch_norm(conv2d(lrelu(e4), feature*8, name="e5"), "e5") e6 = batch_norm(conv2d(lrelu(e5), feature*8, name="e6"), "e6") e7 = batch_norm(conv2d(lrelu(e6), feature*8, name="e7"), "e7") e8 = batch_norm(conv2d(lrelu(e7), feature*8, name="e8"), "e8") d1 = deconv2d(tf.nn.relu(e8), [1,2,2,feature*8], name="d1") d1 = tf.concat(3, [tf.nn.dropout(batch_norm(d1, "d1"), 0.5), e7]) d2 = deconv2d(tf.nn.relu(d1), [1,4,4,feature*8], name="d2") d2 = tf.concat(3, [tf.nn.dropout(batch_norm(d2, "d2"), 0.5), e6]) d3 = deconv2d(tf.nn.relu(d2), [1,8,8,feature*8], name="d3") d3 = tf.concat(3, [tf.nn.dropout(batch_norm(d3, "d3"), 0.5), e5]) d4 = deconv2d(tf.nn.relu(d3), [1,16,16,feature*8], name="d4") d4 = tf.concat(3, [batch_norm(d4, "d4"), e4]) d5 = deconv2d(tf.nn.relu(d4), [1,32,32,feature*4], name="d5") d5 = tf.concat(3, [batch_norm(d5, "d5"), e3]) d6 = deconv2d(tf.nn.relu(d5), [1,64,64,feature*2], name="d6") d6 = tf.concat(3, [batch_norm(d6, "d6"), e2]) d7 = deconv2d(tf.nn.relu(d6), [1,128,128,feature], name="d7") d7 = tf.concat(3, [batch_norm(d7, "d7"), e1]) d8 = deconv2d(tf.nn.relu(d7), [1,256,256,conf.img_channel], name="d8") return tf.nn.tanh(d8)
def __init__(self, factors): super(Decoder, self).__init__() self.factors = factors self.factor_embeds = nn.ParameterDict({ 'color': nn.Parameter(torch.randn(self.factors['color'], N_FACTOR_DIMS)), 'shape': nn.Parameter(torch.randn(self.factors['shape'], N_FACTOR_DIMS)), 'size': nn.Parameter(torch.randn(self.factors['size'], N_FACTOR_DIMS)), 'camera': nn.Parameter(torch.randn(self.factors['camera'], N_FACTOR_DIMS)), 'background': nn.Parameter(torch.randn(self.factors['background'], N_FACTOR_DIMS)), 'horizontal': nn.Parameter(torch.randn(self.factors['horizontal'], N_FACTOR_DIMS)), 'vertical': nn.Parameter(torch.randn(self.factors['vertical'], N_FACTOR_DIMS)) }) n_dims = N_EMBED_DIMS + N_FACTOR_DIMS self.input_color = nn.Linear(n_dims, 512) self.input_shape = nn.Linear(n_dims, 512) self.input_size = nn.Linear(n_dims, 512) self.path_col_shp_siz = nn.Sequential(nn.ReLU(), nn.Linear(512, 1024)) self.input_horizontal = nn.Linear(n_dims, 512) self.input_vertical = nn.Linear(n_dims, 512) self.path_hor_ver = nn.Sequential(nn.ReLU(), nn.Linear(512, 1024)) self.input_camera = nn.Sequential(nn.Linear(n_dims, 512), nn.ReLU(), nn.Linear(512, 1024)) self.input_background = nn.Sequential(nn.Linear(n_dims, 512), nn.ReLU(), nn.Linear(512, 1024)) self.path_shallow = nn.Sequential(nn.ReLU(), nn.Linear(1024, 1024), U.Lambda(lambda x: x.reshape(-1, 16, 8, 8)), # 16 x 8 x 8 nn.ReLU(), U.deconv2d(16, 64, 1, 1, True, 8)) # 64 x 8 x 8 self.path_deep = nn.Sequential(nn.ReLU(), nn.Linear(1024, 1024), U.Lambda(lambda x: x.reshape(-1, 64, 4, 4)), # 64 x 4 x 4 nn.ReLU(), U.deconv2d(64, 64, 4, 1, True, 4), # 64 x 4 x 4 nn.ReLU(), U.deconv2d(64, 64, 4, 2, True, 8)) # 64 x 8 x 8 self.path_base = nn.Sequential(nn.ReLU(), U.deconv2d(64, 16, 4, 2, True, 16), # 16 x 16 x 16 nn.ReLU(), U.deconv2d(16, 3, 6, 4, True, 64)) # 3 x 64 x 64 self.path_shallow2 = nn.Sequential(nn.ReLU(), nn.Linear(1024, 1024), U.Lambda(lambda x: x.reshape(-1, 16, 8, 8)), # 16 x 8 x 8 nn.ReLU(), U.deconv2d(16, 64, 2, 2, True, 16)) # 64 x 16 x 16 self.path_deep2 = nn.Sequential(nn.ReLU(), nn.Linear(1024, 1024), U.Lambda(lambda x: x.reshape(-1, 64, 4, 4)), # 64 x 4 x 4 nn.ReLU(), U.deconv2d(64, 64, 4, 2, True, 8), # 64 x 8 x 8 nn.ReLU(), U.deconv2d(64, 64, 4, 2, True, 16)) # 64 x 16 x 16 self.path_base2 = nn.Sequential(nn.ReLU(), U.deconv2d(64, 16, 2, 2, True, 32), # 16 x 32 x 32 nn.ReLU(), U.deconv2d(16, 3, 2, 2, True, 64)) # 3 x 64 x 64
def set_up(self): with tf.variable_scope('conv1'): network = conv2d(self.input, [7, 7], 32, scope='conv1_1') network = conv2d(network, [3, 3], 32, scope='conv1_2') network = max_pool(network, 'pool1') # downsample with tf.variable_scope('conv1'): network = conv2d(network, [3, 3], 64, scope='conv1_1') network = conv2d(network, [3, 3], 64, scope='conv1_2') network = max_pool(network, 'pool2') # downsample with tf.variable_scope('conv1'): network = conv2d(network, [3, 3], 128, scope='conv1_1') network = conv2d(network, [3, 3], 128, scope='conv1_2') with tf.variable_scope('deconv1'): network = deconv2d(network, [3, 3], 64, scope='deconv1_1') # upsample network = deconv2d(network, [3, 3], 64, stride=1, scope='deconv1_1') with tf.variable_scope('deconv2'): network = deconv2d(network, [3, 3], 32, scope='deconv1_1') # upsample network = deconv2d(network, [3, 3], 32, stride=1, scope='deconv1_1') with tf.variable_scope('out_class'): logits = conv2d(network, [3, 3], 2, bn=False, relu=False, scope='logits') self.pred_prob = tf.nn.softmax(logits, name='predictions')[:, :, :, 1] self.pred = tf.argmax(logits, 3) self.loss = iou_loss(self.pred_prob, self.label) self.train_score = iou_loss(tf.cast(self.pred, tf.float32), self.label) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate, epsilon=1e-4).minimize(self.loss)
def generator(self, cond): with tf.variable_scope("gen"): e1 = batch_norm(conv2d(cond, 64, f=4, name="e1"), 'e1') ##128x128x64 e10 = tf.nn.elu(e1) e1a = Identity_block_for_G(e10, [16, 16, 64], stage='Gstge1a') e1b = Identity_block_for_G(e1a, [16, 16, 64], stage='Gstge1b') e1c = Identity_block_for_G(e1b, [16, 16, 64], stage='Gstge1c') e2 = batch_norm(conv2d(e1c, 128, f=4, name="e2"), "e2") #64x64x128 e20 = tf.nn.elu(e2) e2a = Identity_block_for_G(e20, [32, 32, 128], stage='Gstge2a') e2b = Identity_block_for_G(e2a, [32, 32, 128], stage='Gstge2b') e2c = Identity_block_for_G(e2b, [32, 32, 128], stage='Gstge2c') e3 = batch_norm(conv2d(e2c, 256, f=4, name="e3"), "e3") #32x32x256 e30 = tf.nn.elu(e3) e3a = Identity_block_for_G(e30, [64, 64, 256], stage='Gstge3a') e3b = Identity_block_for_G(e3a, [64, 64, 256], stage='Gstge3b') e3c = Identity_block_for_G(e3b, [64, 64, 256], stage='Gstge3c') e4 = batch_norm(conv2d(e3c, 512, f=4, name="e4"), "e4") #16x16x512 e40 = tf.nn.elu(e4) e4a = Identity_block_for_G(e40, [128, 128, 512], stage='Gstge4a') e4b = Identity_block_for_G(e4a, [128, 128, 512], stage='Gstge4b') e4c = Identity_block_for_G(e4b, [128, 128, 512], stage='Gstge4c') e5 = batch_norm(conv2d(e4c, 512, f=4, name="e5"), "e5") #8x8x512 e50 = tf.nn.elu(e5) e5a = Identity_block_for_G(e50, [128, 128, 512], stage='Gstge5a') e5b = Identity_block_for_G(e5a, [128, 128, 512], stage='Gstge5b') e6 = batch_norm(conv2d(e5b, 512, f=4, name="e6"), "e6") #4x4x512 e60 = tf.nn.elu(e6) d1 = batch_norm(deconv2d(e60, [1, 8, 8, 512], name="d1"), 'd1') d10 = tf.nn.elu(tf.add(d1, e5)) d1a = Identity_block_for_G(d10, [128, 128, 512], stage='Gstge6a') d1b = Identity_block_for_G(d1a, [128, 128, 512], stage='Gstge6b') d2 = batch_norm(deconv2d(d1b, [1, 16, 16, 512], name="d2"), 'd2') d20 = tf.nn.elu(tf.add(e4, d2)) d2a = Identity_block_for_G(d20, [128, 128, 512], stage='Gstge7a') d2b = Identity_block_for_G(d2a, [128, 128, 512], stage='Gstge7b') d2c = Identity_block_for_G(d2b, [128, 128, 512], stage='Gstge7c') d3 = batch_norm(deconv2d(d2c, [1, 32, 32, 256], name="d3"), 'd3') d30 = tf.nn.elu(tf.add(e3, d3)) d3a = Identity_block_for_G(d30, [64, 64, 256], stage='Gstge8a') d3b = Identity_block_for_G(d3a, [64, 64, 256], stage='Gstge8b') d3c = Identity_block_for_G(d3b, [64, 64, 256], stage='Gstge8c') d4 = batch_norm(deconv2d(d3c, [1, 64, 64, 128], name="d4"), 'd4') d40 = tf.nn.elu(tf.add(e2, d4)) d4a = Identity_block_for_G(d40, [32, 32, 128], stage='Gstge9a') d4b = Identity_block_for_G(d4a, [32, 32, 128], stage='Gstge9b') d4c = Identity_block_for_G(d4b, [32, 32, 128], stage='Gstge9c') d5 = batch_norm(deconv2d(d4c, [1, 128, 128, 64], name="d5"), 'd5') d50 = tf.nn.elu(tf.add(e1, d5)) d5a = Identity_block_for_G(d50, [16, 16, 64], stage='Gstge10a') d5b = Identity_block_for_G(d5a, [16, 16, 64], stage='Gstge10b') d5c = Identity_block_for_G(d5b, [16, 16, 64], stage='Gstge10c') d6 = deconv2d(d5c, [1, 256, 256, 1], name="d6") return tf.tanh(d6)
def __call__(self, z, y=None, is_training=True, reuse=False): with tf.variable_scope(self.name, reuse=reuse): batch_size = z.get_shape().as_list()[0] if y is not None: z = tf.concat([z, y], 1) # [bz,zdim+10] net = tf.nn.relu( bn(dense(z, 1024, name='g_fc1'), is_training, name='g_bn1')) net = tf.nn.relu( bn(dense(net, 128 * 7 * 7, name='g_fc2'), is_training, name='g_bn2')) net = tf.reshape(net, [batch_size, 7, 7, 128]) # [bz, 14, 14, 64] net = tf.nn.relu( bn(deconv2d(net, 64, 4, 4, 2, 2, padding='SAME', name='g_dc3'), is_training, name='g_bn3')) # [bz, 28, 28, 1] out = tf.nn.sigmoid( deconv2d(net, 1, 4, 4, 2, 2, padding='SAME', name='g_dc4')) return out
def VAE(input_shape=[None, 784], n_filters=[64, 64, 64], filter_sizes=[4, 4, 4], n_hidden=32, n_code=2, activation=tf.nn.tanh, dropout=False, denoising=False, convolutional=False, variational=False, on_cloud=0): """(Variational) (Convolutional) (Denoising) Autoencoder. Uses tied weights. Parameters ---------- input_shape : list, optional Shape of the input to the network. e.g. for MNIST: [None, 784]. n_filters : list, optional Number of filters for each layer. If convolutional=True, this refers to the total number of output filters to create for each layer, with each layer's number of output filters as a list. If convolutional=False, then this refers to the total number of neurons for each layer in a fully connected network. filter_sizes : list, optional Only applied when convolutional=True. This refers to the ksize (height and width) of each convolutional layer. n_hidden : int, optional Only applied when variational=True. This refers to the first fully connected layer prior to the variational embedding, directly after the encoding. After the variational embedding, another fully connected layer is created with the same size prior to decoding. Set to 0 to not use an additional hidden layer. n_code : int, optional Only applied when variational=True. This refers to the number of latent Gaussians to sample for creating the inner most encoding. activation : function, optional Activation function to apply to each layer, e.g. tf.nn.relu dropout : bool, optional Whether or not to apply dropout. If using dropout, you must feed a value for 'keep_prob', as returned in the dictionary. 1.0 means no dropout is used. 0.0 means every connection is dropped. Sensible values are between 0.5-0.8. denoising : bool, optional Whether or not to apply denoising. If using denoising, you must feed a value for 'corrupt_prob', as returned in the dictionary. 1.0 means no corruption is used. 0.0 means every feature is corrupted. Sensible values are between 0.5-0.8. convolutional : bool, optional Whether or not to use a convolutional network or else a fully connected network will be created. This effects the n_filters parameter's meaning. variational : bool, optional Whether or not to create a variational embedding layer. This will create a fully connected layer after the encoding, if `n_hidden` is greater than 0, then will create a multivariate gaussian sampling layer, then another fully connected layer. The size of the fully connected layers are determined by `n_hidden`, and the size of the sampling layer is determined by `n_code`. Returns ------- model : dict { 'cost': Tensor to optimize. 'Ws': All weights of the encoder. 'x': Input Placeholder 'z': Inner most encoding Tensor (latent features) 'y': Reconstruction of the Decoder 'keep_prob': Amount to keep when using Dropout 'corrupt_prob': Amount to corrupt when using Denoising 'train': Set to True when training/Applies to Batch Normalization. } """ # network input / placeholders for train (bn) and dropout x = tf.placeholder(tf.float32, input_shape, 'x') phase_train = tf.placeholder(tf.bool, name='phase_train') keep_prob = tf.placeholder(tf.float32, name='keep_prob') corrupt_prob = tf.placeholder(tf.float32, [1]) # apply noise if denoising x_ = (utils.corrupt(x) * corrupt_prob + x * (1 - corrupt_prob)) if denoising else x # 2d -> 4d if convolution x_tensor = utils.to_tensor(x_) if convolutional else x_ current_input = x_tensor Ws = [] shapes = [] # Build the encoder for layer_i, n_output in enumerate(n_filters): with tf.variable_scope('encoder/{}'.format(layer_i)): shapes.append(current_input.get_shape().as_list()) if convolutional: h, W = utils.conv2d(x=current_input, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) else: h, W = utils.linear(x=current_input, n_output=n_output) h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i))) if dropout: h = tf.nn.dropout(h, keep_prob) Ws.append(W) current_input = h shapes.append(current_input.get_shape().as_list()) with tf.variable_scope('variational'): if variational: dims = current_input.get_shape().as_list() flattened = utils.flatten(current_input) if n_hidden: h = utils.linear(flattened, n_hidden, name='W_fc')[0] h = activation(batch_norm(h, phase_train, 'fc/bn')) if dropout: h = tf.nn.dropout(h, keep_prob) else: h = flattened z_mu = utils.linear(h, n_code, name='mu')[0] z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0] # Sample from noise distribution p(eps) ~ N(0, 1) epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code])) # Sample from posterior z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma)) if n_hidden: h = utils.linear(z, n_hidden, name='fc_t')[0] h = activation(batch_norm(h, phase_train, 'fc_t/bn')) if dropout: h = tf.nn.dropout(h, keep_prob) else: h = z size = dims[1] * dims[2] * dims[3] if convolutional else dims[1] h = utils.linear(h, size, name='fc_t2')[0] current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn')) if dropout: current_input = tf.nn.dropout(current_input, keep_prob) if convolutional: current_input = tf.reshape( current_input, tf.stack([ tf.shape(current_input)[0], dims[1], dims[2], dims[3] ])) else: z = current_input shapes.reverse() n_filters.reverse() Ws.reverse() n_filters += [input_shape[-1]] # %% # Decoding layers for layer_i, n_output in enumerate(n_filters[1:]): with tf.variable_scope('decoder/{}'.format(layer_i)): shape = shapes[layer_i + 1] if convolutional: h, W = utils.deconv2d(x=current_input, n_output_h=shape[1], n_output_w=shape[2], n_output_ch=shape[3], n_input_ch=shapes[layer_i][3], k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) else: h, W = utils.linear(x=current_input, n_output=n_output) h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i))) if dropout: h = tf.nn.dropout(h, keep_prob) current_input = h y = current_input x_flat = utils.flatten(x) y_flat = utils.flatten(y) # l2 loss loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1) if variational: # variational lower bound, kl-divergence loss_z = -0.5 * tf.reduce_sum( 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1) # add l2 loss cost = tf.reduce_mean(loss_x + loss_z) else: # just optimize l2 loss cost = tf.reduce_mean(loss_x) return { 'cost': cost, 'Ws': Ws, 'x': x, 'z': z, 'y': y, 'keep_prob': keep_prob, 'corrupt_prob': corrupt_prob, 'train': phase_train }
def VAE(input_shape=[None, 784], n_filters=[64, 64, 64], filter_sizes=[4, 4, 4], encoderNum=0, n_hidden=32, n_code=2, activation=tf.nn.tanh): ''' Parameters ---------- input_shape : list, optional Shape of the input to the network. e.g. for MNIST: [None, 784]. n_filters : list, optional Number of filters for each layer. This refers to the total number of output filters to create for each layer, with each layer's number of output filters as a list. filter_sizes : list, optional This refers to the ksize (height and width) of each convolutional layer. n_hidden : int, optional variational. This refers to the first fully connected layer prior to the variational embedding, directly after the encoding. After the variational embedding, another fully connected layer is created with the same size prior to decoding. n_code : int, optional variational. This refers to the number of latent Gaussians to sample for creating the inner most encoding. activation : function, optional Activation function to apply to each layer, e.g. tf.nn.relu dropout : bool, optional Whether or not to apply dropout. If using dropout, you must feed a value for 'keep_prob', as returned in the dictionary. 1.0 means no dropout is used. 0.0 means every connection is dropped. Sensible values are between 0.5-0.8. Returns ------- model:dict { 'cost': Tensor to optimize. 'Ws': All weights of the encoder. 'x': Input Placeholder 'z': Inner most encoding Tensor(latent features) 'y': target image placeholder 'keep_prob': Amount to keep when using Dropout 'train': Set to True when training/Applies to Batch Normalization } ''' #network input placeholders x = tf.placeholder(tf.float32, input_shape, 'x' + str(encoderNum)) y = tf.placeholder(tf.float32, input_shape, 'y' + str(encoderNum)) phase_train = tf.placeholder(tf.bool, name='phase_train' + str(encoderNum)) keep_prob = tf.placeholder(tf.float32, name='keep_prob' + str(encoderNum)) x_tensor = x current_input = x_tensor #lists to hold the weights and shapes of each layer of the encoder Ws = [] shapes = [] #Build the encoder for layer_i, n_output in enumerate(n_filters): with tf.variable_scope(str(layer_i) + str(encoderNum)): shapes.append(current_input.get_shape().as_list()) #produce weights and values through convolution h, W = utils.conv2d(x=current_input, n_output=n_output, k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i], name='conv2d' + str(layer_i) + str(encoderNum)) #pass normalised batch through the activation function h = activation( batch_norm.Batch_norm(h, phase_train, 'bn' + str(layer_i) + str(encoderNum))) #for dropout h = tf.nn.dropout(h, keep_prob) #add the weights to the weights list Ws.append(W) #input for next layer is output for this layer current_input = h shapes.append(current_input.get_shape().as_list()) #variational section with tf.variable_scope('variational' + str(encoderNum)): dims = current_input.get_shape().as_list() if len(dims) == 4: flattened = tf.reshape(current_input, shape=[-1, dims[1] * dims[2] * dims[3]]) elif len(dims) == 2 or len(dims) == 1: flattened = current_input #linear fully connected layer at the centre of the encoder h = utils.linear(flattened, n_hidden, name='W_fc')[0] h = activation(batch_norm.Batch_norm(h, phase_train, 'fc/bn')) h = tf.nn.dropout(h, keep_prob) z_mu = utils.linear(h, n_code, name='mu')[0] z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0] #Sample from noise distribution epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code])) #Sample from posterior z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma)) h = utils.linear(z, n_hidden, name='fc_t')[0] h = activation(batch_norm.Batch_norm(h, phase_train, 'fc_t/bn')) h = tf.nn.dropout(h, keep_prob) size = dims[1] * dims[2] * dims[3] h = utils.linear(h, size, name='fc_t2')[0] current_input = activation( batch_norm.Batch_norm(h, phase_train, 'fc_t2/bn')) current_input = tf.reshape( current_input, tf.stack([tf.shape(current_input)[0], dims[1], dims[2], dims[3]])) #reverse the shapes filters and weights to undo the encoding shapes.reverse() n_filters.reverse() Ws.reverse() n_filters += [input_shape[-1]] ###Decoding------------------- for layer_i, n_output in enumerate(n_filters[1:]): with tf.variable_scope('decoder/{}'.format(layer_i) + str(encoderNum)): shape = shapes[layer_i + 1] #convolve h, W = utils.deconv2d(x=current_input, n_output_h=shape[1], n_output_w=shape[2], n_output_ch=shape[3], n_input_ch=shapes[layer_i][3], k_h=filter_sizes[layer_i], k_w=filter_sizes[layer_i]) h = activation( batch_norm.Batch_norm(h, phase_train, 'dec/bn' + str(layer_i))) #for dropout h = tf.nn.dropout(h, keep_prob) current_input = h #the output from the final decoding layer is the output of the graph x_output = current_input #flatten the target image y_flat = utils.flatten(y) ##make the model learn an output which when added to the original input ##produces the next frame #flatten the input image x_original_flat = utils.flatten(x) #flatten the graph output dims1 = x_output.get_shape().as_list() if len(dims1) == 4: x_output_flat = tf.reshape(x_output, shape=[-1, dims1[1] * dims1[2] * dims1[3]]) elif len(dims1) == 2 or len(dims1) == 1: x_output_flat = x_output #the ultimate output is the graph output added to the original input x_output_final = x_original_flat + x_output_flat #l2 loss #difference between final output and target image loss_x = tf.reduce_sum(tf.squared_difference(y_flat, x_output_final), 1) #penalizing latent vectors loss_z = -0.5 * tf.reduce_sum( 1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma), 1) #total cost is the of the image loss and the latent loss cost = tf.reduce_mean(loss_x + loss_z) return { 'cost': cost, 'Ws': Ws, 'x': x, 'x_output_final': x_output_final, 'z': z, 'y': y, 'keep_prob': keep_prob, 'train': phase_train }
def __init__(self): super(Decoder, self).__init__() self.factors = { 'color': 4, 'shape': 4, 'size': 2, 'camera': 3, 'background': 3, 'horizontal': 40, 'vertical': 40 } self.mu = nn.ParameterDict({ 'color': nn.Parameter(torch.randn(self.factors['color'], 128)), 'shape': nn.Parameter(torch.randn(self.factors['shape'], 128)), 'size': nn.Parameter(torch.randn(self.factors['size'], 128)), 'camera': nn.Parameter(torch.randn(self.factors['camera'], 128)), 'background': nn.Parameter(torch.randn(self.factors['background'], 128)), 'horizontal': nn.Parameter(torch.randn(self.factors['horizontal'], 128)), 'vertical': nn.Parameter(torch.randn(self.factors['vertical'], 128)) }) self.logvar = nn.ParameterDict({ 'color': nn.Parameter(torch.zeros(self.factors['color'], 128)), 'shape': nn.Parameter(torch.zeros(self.factors['shape'], 128)), 'size': nn.Parameter(torch.zeros(self.factors['size'], 128)), 'camera': nn.Parameter(torch.zeros(self.factors['camera'], 128)), 'background': nn.Parameter(torch.zeros(self.factors['background'], 128)), 'horizontal': nn.Parameter(torch.zeros(self.factors['horizontal'], 128)), 'vertical': nn.Parameter(torch.zeros(self.factors['vertical'], 128)) }) self.input_color = nn.Linear(160, 256) self.input_shape = nn.Linear(160, 256) self.input_size = nn.Linear(160, 256) self.path_col_shp_siz = nn.Sequential(nn.ReLU(), nn.Linear(256, 1024)) self.input_horizontal = nn.Linear(160, 256) self.input_vertical = nn.Linear(160, 256) self.path_hor_ver = nn.Sequential(nn.ReLU(), nn.Linear(256, 1024)) self.input_camera = nn.Linear(160, 1024) self.input_background = nn.Linear(160, 1024) self.path_shallow = nn.Sequential( nn.ReLU(), nn.Linear(1024, 1024), U.Lambda(lambda x: x.reshape(-1, 16, 8, 8)), # 16 x 8 x 8 nn.ReLU(), U.deconv2d(16, 64, 1, 1, True, 8)) # 64 x 8 x 8 self.path_deep = nn.Sequential( nn.ReLU(), nn.Linear(1024, 1024), U.Lambda(lambda x: x.reshape(-1, 64, 4, 4)), # 64 x 4 x 4 nn.ReLU(), U.deconv2d(64, 64, 4, 1, True, 4), # 64 x 4 x 4 nn.ReLU(), U.deconv2d(64, 64, 4, 2, True, 8)) # 64 x 8 x 8 self.path_base = nn.Sequential( nn.ReLU(), U.deconv2d(64, 16, 4, 2, True, 16), # 16 x 16 x 16 nn.ReLU(), U.deconv2d(16, 3, 6, 4, True, 64)) # 3 x 64 x 64
def create_conv_network(x, channels_x, channels_y, layers=3, feature_base=64, filter_size=5, pool_size=2, keep_prob=0.8, create_summary=True): """ :param x: input_tensor, shape should be [None, n, m, channels_x] :param channels_x: number of channels in the input image. For Mri, input has 4 channels. :param channels_y: number of channels in the output image. For Mri, output has 2 channels. :param layers: number of layers in u-net architecture. :param feature_base: Neurons in first layer of cnn. Next layers have twice the number of neurons in previous layers. :param filter_size: size of convolution filter :param pool_size: size of pooling layer :create_summary: Creates Tensorboard summary if True """ logging.info( "Layers: {layers}, features: {features}, filter size {fill_size}x{fill_size}, pool size {pool_size}x{pool_size}," "input channels {in_channels}, output channels {out_channels}".format( layers=layers, features=feature_base, fill_size=filter_size, pool_size=pool_size, in_channels=channels_x, out_channels=channels_y)) #placeholder for input image with tf.name_scope("input_image"): n = tf.shape(x)[1] m = tf.shape(x)[2] x_image = tf.reshape(x, tf.stack([-1, n, m, channels_x])) input_node = x_image weights = [] biases = [] convs = [] pools = OrderedDict() deconv = OrderedDict() dw_h_convs = OrderedDict() up_h_convs = OrderedDict() # down layers for layer in range(layers): with tf.name_scope("down_conv_layer{}".format(str(layer))): features = (2**layer) * feature_base std_dev = np.sqrt(2. / (filter_size * filter_size * features)) if layer == 0: w1 = utils.weight_variable( [filter_size, filter_size, channels_x, features], std_dev, "w1") else: w1 = utils.weight_variable( [filter_size, filter_size, features // 2, features], std_dev, "w1") w2 = utils.weight_variable( [filter_size, filter_size, features, features], std_dev, "w2") b1 = utils.bias_variable([features], "b1") b2 = utils.bias_variable([features], "b2") conv_1 = utils.conv2d(input_node, w1, b1, keep_prob) conv_2 = utils.conv2d(tf.nn.relu(conv_1), w2, b2, keep_prob) dw_h_convs[layer] = tf.nn.relu(conv_2) weights.append((w1, w2)) biases.append((b1, b2)) convs.append((conv_1, conv_2)) # do max pooling if not the last layer if layer < layers - 1: pools[layer] = utils.max_pool(dw_h_convs[layer], pool_size) input_node = pools[layer] input_node = dw_h_convs[layers - 1] #up layers for layer in range(layers - 2, -1, -1): with tf.name_scope("up_conv_layer{}".format(str(layer))): features = (2**(layer + 1)) * feature_base std_dev = np.sqrt(2. / (filter_size * filter_size * features)) wd = utils.weight_variable_devonc( [pool_size, pool_size, features // 2, features], std_dev, "wd") bd = utils.bias_variable([features // 2], "bd") h_deconv = tf.nn.relu( utils.deconv2d(input_node, wd, pool_size) + bd) h_deconv_concat = tf.concat([dw_h_convs[layer], h_deconv], 3) deconv[layer] = h_deconv_concat w1 = utils.weight_variable( [filter_size, filter_size, features, features // 2], std_dev, "w1") w2 = utils.weight_variable( [filter_size, filter_size, features // 2, features // 2], std_dev, "w2") b1 = utils.bias_variable([features // 2], "b1") b2 = utils.bias_variable([features // 2], "b2") conv_1 = utils.conv2d(h_deconv_concat, w1, b1, keep_prob) conv_2 = utils.conv2d(tf.nn.relu(conv_1), w2, b2, keep_prob) input_node = tf.nn.relu(conv_2) up_h_convs[layer] = input_node weights.append((w1, w2)) biases.append((b1, b2)) convs.append((conv_1, conv_2)) #Output image with tf.name_scope("output_image"): weight = utils.weight_variable([1, 1, feature_base, channels_y], std_dev, "out_weight") bias = utils.bias_variable([channels_y], "out_bias") output_image = tf.add( utils.conv2d(input_node, weight, bias, tf.constant(1.0)), x_image) up_h_convs["out"] = output_image # Create Summaries if create_summary: with tf.name_scope("summaries"): for i, (c1, c2) in enumerate(convs): tf.summary.image("summary_conv_{:02}_01".format(i), utils.get_image_summary(c1)) tf.summary.image("summary_conv_{:02}_02".format(i), utils.get_image_summary(c2)) for k in pools.keys(): tf.summary.image("summary_pool_{:02}".format(k), utils.get_image_summary(pools[k])) for k in deconv.keys(): tf.summary.image("summary_deconv_concat_{:02}".format(k), utils.get_image_summary(deconv[k])) for k in dw_h_convs.keys(): tf.summary.histogram( "dw_convolution_{:02}/activations".format(k), dw_h_convs[k]) for k in up_h_convs.keys(): tf.summary.histogram("up_convolution_{}/activations".format(k), up_h_convs[k]) variables = [] for w1, w2 in weights: variables.append(w1) variables.append(w2) for b1, b2 in biases: variables.append(b1) variables.append(b2) return output_image, variables
def _build_fcn(self, input_op, reuse=False, is_training=True): row, col = self.input_shape[0], self.input_shape[1] row_p1, col_p1 = int(row / 2), int(col / 2) row_p2, col_p2 = int(row_p1 / 2), int(col_p1 / 2) with tf.variable_scope('FCNN', reuse=reuse): conv1_1 = conv2d_relu(input_op, n_out=64, name='conv1_1', is_training=is_training) conv1_2 = conv2d_relu(conv1_1, n_out=64, name='conv1_2', is_training=is_training) pool_1 = pooling(conv1_2, name='pool_1') conv2_1 = conv2d_relu(pool_1, n_out=128, name='conv2_1', is_training=is_training) conv2_2 = conv2d_relu(conv2_1, n_out=128, name='conv2_2', is_training=is_training) pool_2 = pooling(conv2_2, name='pool_2') conv3_1 = dilated_block(pool_2, n_out=256, is_training=is_training, name='conv3_1') conv3_2 = dilated_block(conv3_1, n_out=256, is_training=is_training, name='conv3_2') conv3_3 = dilated_block(conv3_2, n_out=256, is_training=is_training, name='conv3_3') pool_3 = pooling(conv3_3, name='pool_3') conv4_1 = dilated_block(pool_3, n_out=512, is_training=is_training, name='conv4_1') conv4_2 = dilated_block(conv4_1, n_out=512, is_training=is_training, name='conv4_2') conv4_3 = dilated_block(conv4_2, n_out=512, is_training=is_training, name='conv4_3') deconv_1 = deconv2d( conv4_3, output_shape=[self.batch_size, row_p2, col_p2, 256], name='deconv_1') concat_1 = tf.concat([conv3_3, deconv_1], axis=3, name='concat_1') conv5_1 = dilated_block(concat_1, n_out=256, is_training=is_training, name='conv5_1') conv5_2 = dilated_block(conv5_1, n_out=256, is_training=is_training, name='conv5_2') conv5_3 = dilated_block(conv5_2, n_out=256, is_training=is_training, name='conv5_3') deconv_2 = deconv2d( conv5_3, output_shape=[self.batch_size, row_p1, col_p1, 128], name='deconv_2') concat_2 = tf.concat([conv2_2, deconv_2], axis=3, name='concat_2') conv6_1 = conv2d_relu(concat_2, n_out=151, name='conv6_1', is_training=is_training) conv6_2 = conv2d_relu(conv6_1, n_out=151, name='conv6_2', is_training=is_training) deconv_3 = deconv2d(conv6_2, output_shape=[self.batch_size, row, col, 64], name='deconv_3') concat_3 = tf.concat([conv1_2, deconv_3], axis=3, name='concat_3') conv7_1 = conv2d_relu(concat_3, n_out=151, name='conv7_1', is_training=is_training) conv7_2 = conv2d(conv7_1, n_out=151, name='conv7_2') return tf.nn.softmax(conv7_2, axis=3), conv7_2