def Content_Encoder(self, x, reuse, scope): with slim.arg_scope(training_scope(weight_decay=self.weight_decay)): with tf.variable_scope(scope, reuse=reuse): x = slim.conv2d(x, 64, [7, 7], stride=1, activation_fn=None, scope='conv_0') x = slim.instance_norm(x, activation_fn=tf.nn.relu) x = slim.conv2d(x, 128, [4, 4], stride=2, activation_fn=None, scope='conv_1') x = slim.instance_norm(x, activation_fn=tf.nn.relu) x = slim.conv2d(x, 256, [4, 4], stride=2, activation_fn=None, scope='conv_2') x = slim.instance_norm(x, activation_fn=tf.nn.relu) """ residual block """ x = self.residual_block(x, scope='rb_1') x = self.residual_block(x, scope='rb_2') x = self.residual_block(x, scope='rb_3') x = self.residual_block(x, scope='rb_4') return x
def residual_block(self, x, scope='resblock'): with tf.variable_scope(scope): y = slim.conv2d(x, 256, [3, 3], activation_fn=None, scope='res1') y = slim.instance_norm(y, activation_fn=tf.nn.relu) y = slim.conv2d(y, 256, [3, 3], activation_fn=None, scope='res2') y = slim.instance_norm(y) return x + y
def image_cppn(size, num_output_channels=1, num_hidden_channels=24, num_layers=8, activation_fn=composite_activation, normalize=False): r = 3.0**0.5 # std(coord_range) == 1.0 coord_range = tf.linspace(-r, r, size) y, x = tf.meshgrid(coord_range, coord_range, indexing='ij') net = tf.expand_dims(tf.stack([x, y], -1), 0) # add batch dimension with slim.arg_scope([slim.conv2d], kernel_size=1, activation_fn=None): for i in range(num_layers): in_n = int(net.shape[-1]) net = slim.conv2d( net, num_hidden_channels, # this is untruncated version of tf.variance_scaling_initializer weights_initializer=tf.random_normal_initializer( 0.0, np.sqrt(1.0 / in_n)), ) if normalize: net = slim.instance_norm(net) net = activation_fn(net) rgb = slim.conv2d(net, num_output_channels, activation_fn=tf.nn.sigmoid, weights_initializer=tf.zeros_initializer()) return rgb
def instance_norm_conv2d(self, inputs, num_outputs, kernel_size, stride, scope, mode='REFLECT', activation=tf.nn.relu): with tf.variable_scope(scope): inputs = tf.pad(inputs, [[0, 0], [kernel_size // 2, kernel_size // 2], [kernel_size // 2, kernel_size // 2], [0, 0]], mode=mode) conv = slim.conv2d(inputs, num_outputs, kernel_size, stride, 'VALID', activation_fn=None) outputs = slim.instance_norm(conv, activation_fn=activation) return outputs
def conv_transpose_in(x, out_channel, kernel, stride=1, trainable=True): out = slim.conv2d_transpose(x, out_channel, kernel, stride, trainable=trainable) out = slim.instance_norm(out, trainable=trainable) return out
def DME_model(features): features = slim.instance_norm(features, activation_fn=tf.nn.relu) # front_end, init_vgg16_fn = DME_front_end(features) _, end_points = nets.vgg.vgg_16(features) front_end = end_points['vgg_16/conv4/conv4_3'] feature_map = DME_back_end(front_end) # return output, init_vgg16_fn return feature_map
def conv_in(x, out_channel, kernel, stride=1, trainable=True): out = slim.conv2d( x, out_channel, kernel, stride, trainable=trainable, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(0.0005)) out = slim.instance_norm(out, trainable=trainable) return out
def transfer_net(inputs, name="transfer", reuse=True): inputs = tf.pad(inputs, [[0, 0], [10, 10], [10, 10], [0, 0]], mode='REFLECT') with tf.variable_scope(name, reuse=reuse) as vs: net = slim.conv2d(inputs, 32, [9, 9], stride=1, scope='conv1') net = tf.nn.relu(slim.instance_norm(net)) net = slim.conv2d(net, 64, [3, 3], stride=2, scope='conv2') net = tf.nn.relu(slim.instance_norm(net)) net = slim.conv2d(net, 128, [3, 3], stride=2, scope='conv3') net = tf.nn.relu(slim.instance_norm(net)) net = residual(net, 128, "residual1") net = residual(net, 128, "residual2") net = residual(net, 128, "residual3") net = residual(net, 128, "residual4") net = residual(net, 128, "residual5") net = deconv2d(net, 64, [3, 3], 1, scale=2, scope="conv4") net = tf.nn.relu(slim.instance_norm(net)) net = deconv2d(net, 32, [3, 3], 1, scale=2, scope="conv5") net = tf.nn.relu(slim.instance_norm(net)) net = deconv2d(net, 3, [9, 9], 1, scale=1, scope="conv6") net = tf.nn.tanh(slim.instance_norm(net)) net = (net + 1.0) / 2 * 255.0 variables = tf.contrib.framework.get_variables(vs) height = net.get_shape()[1].value width = net.get_shape()[2].value net = tf.slice(net, [0, 10, 10, 0], tf.stack([-1, height - 20, width - 20, -1])) # net = tf.image.crop_to_bounding_box(net, 10, 10, height - 20, width - 20) return net, variables
def cppn( width, batch=1, num_output_channels=3, num_hidden_channels=24, num_layers=8, activation_func=_composite_activation, normalize=False, ): """Compositional Pattern Producing Network Args: width: width of resulting image, equals height batch: batch dimension of output, note that all params share the same weights! num_output_channels: num_hidden_channels: num_layers: activation_func: normalize: Returns: The collapsed shape, represented as a list. """ r = 3.0**0.5 # std(coord_range) == 1.0 coord_range = tf.linspace(-r, r, width) y, x = tf.meshgrid(coord_range, coord_range, indexing="ij") net = tf.stack([tf.stack([x, y], -1)] * batch, 0) with slim.arg_scope( [slim.conv2d], kernel_size=[1, 1], activation_fn=None, weights_initializer=tf.initializers.variance_scaling(), biases_initializer=tf.initializers.random_normal(0.0, 0.1), ): for i in range(num_layers): x = slim.conv2d(net, num_hidden_channels) if normalize: x = slim.instance_norm(x) net = activation_func(x) rgb = slim.conv2d( net, num_output_channels, activation_fn=tf.nn.sigmoid, weights_initializer=tf.zeros_initializer(), ) return rgb
def cppn(size=None, num_output_channels=3, num_hidden_channels=24, num_layers=8, activation_fn=composite_activation, normalize=False): """Function that returns a Tensor output from a CPPN. Adapted from CPPN Colab notebook from Mordvintsev, et al., "Differentiable Image Parameterizations", Distill, 2018. See References up top. """ global _size _size = tf.placeholder( shape=None, dtype=tf.int32, ) r = 3.0**0.5 coord_range = tf.linspace(-r, r, _size) y, x = tf.meshgrid(coord_range, coord_range, indexing='ij') net = tf.expand_dims(tf.stack([x, y], -1), 0) with slim.arg_scope([slim.conv2d], kernel_size=1, activation_fn=None): colors = tf.constant( value=np.array([[[list(to_rgb(FLAGS.hue_hex))]]]), dtype=tf.float32, ) for i in range(num_layers): in_n = int(net.shape[-1]) net = slim.conv2d( net, num_hidden_channels, weights_initializer=tf.random_normal_initializer( 0.0, np.sqrt(1.0 / in_n)), ) if normalize: net = slim.instance_norm(net) net = activation_fn(net) rgb = slim.conv2d(net, num_output_channels, activation_fn=tf.nn.sigmoid, weights_initializer=tf.zeros_initializer()) rgb = tf.clip_by_value(rgb * colors, 0, 1) return rgb
def deconv_layer(inputs, output_size, input_dim, output_dim, filter_size, stride, padding, normal_type, is_training, name): with tf.variable_scope(name): # 反卷积的kernel的维度是[height, width, output_channel, input_channel] fils = conv_weight_init( [filter_size[0], filter_size[1], output_dim, input_dim], name="weights", stride=stride, mode="Xavier") biases = bias_init([output_dim], name="biases") # weight normalization的实现代码参考了WGAN-GP的源代码 if normal_type == "weight normalization": norm_values = tf.sqrt( tf.reduce_sum(tf.square(fils), axis=[0, 1, 3])) with tf.variable_scope("weight_norm"): norms = tf.sqrt( tf.reduce_sum(tf.square(fils), reduction_indices=[0, 1, 3])) fils = fils * tf.expand_dims(norm_values / norms, 1) deconv = tf.nn.conv2d_transpose(inputs, fils, output_size, strides=stride, padding=padding) result = tf.nn.bias_add(deconv, biases) if normal_type == "layer normalization": result = slim.layer_norm(result, reuse=True, scope="layer_norm") elif normal_type == "instance normalization": result = slim.instance_norm(result, reuse=True, scope="instance_norm") elif normal_type == "batch normalization": result = slim.batch_norm(result, center=True, scale=True, is_training=is_training, scope="batch_norm") elif not normal_type: return result return result
def conv_layer(inputs, input_dim, output_dim, filter_size, strides, padding, normal_type, is_training, name): with tf.variable_scope(name): fils = conv_weight_init( [filter_size[0], filter_size[1], input_dim, output_dim], name="weights", stride=strides, mode="Xavier") biases = bias_init([output_dim], name="biases") # weight normalization作用于模型参数权值,因此是加在卷积计算前面 # weight normalization的实现代码参考了WGAN-GP的源代码 if normal_type == "weight normalization": norm_values = tf.sqrt( tf.reduce_sum(tf.square(fils), axis=[0, 1, 2])) with tf.variable_scope("weight_norm"): norms = tf.sqrt( tf.reduce_sum(tf.square(fils), reduction_indices=[0, 1, 2])) fils = fils * (norm_values / norms) conv = tf.nn.conv2d(inputs, fils, strides, padding=padding) result = tf.nn.bias_add(conv, biases) # 在WGAN-GP论文中指出不要用batch normalization # 可以用其他normalization方法如layer normalization、weight normalization、instance normalization代替 # 论文中推荐使用layer normalization if normal_type == "layer normalization": result = slim.layer_norm(result, reuse=True, scope="layer_norm") elif normal_type == "instance normalization": result = slim.instance_norm(result, reuse=True, scope="instance_norm") elif normal_type == "batch normalization": result = slim.batch_norm(result, center=True, scale=True, is_training=is_training, scope="batch_norm") elif not normal_type: return result return result
def _normalization(self, tensor_in, training=True): if cfg.MODEL.NORMALIZATION == "batch_norm": # batch_norm need UPDATE_OPS tensor_out = slim.batch_norm(tensor_in, scale=True, is_training=training, activation_fn=self._activation()) elif cfg.MODEL.NORMALIZATION == "instance_norm": tensor_out = slim.instance_norm(tensor_in, trainable=training, activation_fn=self._activation()) elif cfg.MODEL.NORMALIZATION == "layer_norm": tensor_out = slim.layer_norm(tensor_in, trainable=training, activation_fn=self._activation()) else: raise ValueError("Unsuppoerted normalization function: %s" % cfg.MODEL.NORMALIZATION) return tensor_out
def E(self, images, is_training=False, reuse=False, bn=False, source=True): # if images.get_shape()[3] == 3: # images = tf.image.rgb_to_grayscale(images) with tf.variable_scope('encoder', reuse=reuse): with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu): with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, padding='SAME'): images = slim.instance_norm(images) net = slim.conv2d(images, 64, 5, scope='conv1') if bn: net = slim.batch_norm(net, is_training=is_training) net = slim.max_pool2d(net, 2, stride=2, scope='pool1') net = slim.conv2d(net, 128, 5, scope='conv2') if bn: net = slim.batch_norm(net, is_training=is_training) net = slim.max_pool2d(net, 2, stride=2, scope='pool2') net = tf.contrib.layers.flatten(net) net = slim.fully_connected(net, 1024, activation_fn=tf.nn.relu, scope='fc3') net = slim.dropout(net, 0.5, is_training=is_training) hidden = slim.fully_connected(net, self.hidden_repr_size, activation_fn=tf.nn.relu, scope='fc4') cat = slim.fully_connected(hidden, 10, activation_fn=tf.identity) if not source: cat = tf.nn.softmax(cat) # dropout here or not? # ~ net = slim.dropout(net, 0.5, is_training=is_training) return hidden, 0, cat, images
def scale_aggregation_network(features): with slim.arg_scope(inception_arg_scope()): # input instance normalization features = tf.divide(features, 255) features = slim.instance_norm(features, epsilon=1e-6) feature_map_encoder = encoder_head(features, 16) feature_map_encoder = slim.max_pool2d(feature_map_encoder, [2, 2], 2, "SAME", scope="max_pooling_2x2") feature_map_encoder = encoder_unit(feature_map_encoder, 32, 1) feature_map_encoder = slim.max_pool2d(feature_map_encoder, [2, 2], 2, "SAME", scope="max_pooling_2x2") feature_map_encoder = encoder_unit(feature_map_encoder, 32, 2) feature_map_encoder = slim.max_pool2d(feature_map_encoder, [2, 2], 2, "SAME", scope="max_pooling_2x2") feature_map_encoder = encoder_unit(feature_map_encoder, 32, 3) density_map_estimator = slim.conv2d(feature_map_encoder, 64, [9, 9], 1, "SAME") density_map_estimator = slim.conv2d_transpose(density_map_estimator, 64, [2, 2], stride=2) density_map_estimator = slim.conv2d(density_map_estimator, 32, [7, 7], 1, "SAME") density_map_estimator = slim.conv2d_transpose(density_map_estimator, 32, [2, 2], stride=2) density_map_estimator = slim.conv2d(density_map_estimator, 16, [5, 5], 1, "SAME") density_map_estimator = slim.conv2d_transpose(density_map_estimator, 16, [2, 2], stride=2) density_map_estimator = slim.conv2d(density_map_estimator, 16, [3, 3], 1, "SAME") density_map_estimator = slim.conv2d(density_map_estimator, 1, [1, 1], 1, "SAME", normalizer_fn=None, normalizer_params=None) # NHWC return density_map_estimator
def DME_model(features): features = slim.instance_norm(features, activation_fn=tf.nn.relu) _, end_points = nets.vgg.vgg_16(features) front_end = end_points['vgg_16/conv4/conv4_3'] feature_map = DME_back_end(front_end, 512) return feature_map