def encoder_v2(self, x, is_training=True, reuse=False): with tf.variable_scope("encoder_v2", reuse=reuse): if self.verbose: print(x.shape) # Layer 1 net = layers.conv2d(x, self.conv_dim >> 1, name='en_conv1') net = tf.nn.leaky_relu(net) if self.verbose: print(net.shape) # Layer 2 net = layers.conv2d(net, self.conv_dim, name='en_conv2') net = layers.batch_norm(net, is_training=is_training, scope='en_bn2') net = tf.nn.leaky_relu(net) if self.verbose: print(net.shape) # Layer 3 net = layers.flatten(net) if self.verbose: print(net.shape) # Layer 4 net = layers.linear(net, self.linear_dim, scope='en_fc3') net = layers.batch_norm(net, is_training=is_training, scope='en_bn3') net = tf.nn.leaky_relu(net) if self.verbose: print(net.shape) # Layer 5 out_logit = layers.linear(net, self.latent_dim, scope='en_fc4') out = tf.nn.sigmoid(out_logit, name="main_out") if self.verbose: print(out.shape) return out
def _depthwise_conv_block(x, weights, strides=1, block_id=0): ''' depthwise convolution and pointwise convolution ''' # depthwise convolution bn_beta = 'conv_dw_' + str(block_id) + '_bn/beta:0' bn_gamma = 'conv_dw_' + str(block_id) + '_bn/gamma:0' bn_mean = 'conv_dw_' + str(block_id) + '_bn/moving_mean:0' bn_var = 'conv_dw_' + str(block_id) + '_bn/moving_variance:0' conv_name = 'conv_dw_' + str(block_id) + '/depthwise_kernel:0' bias_name = 'conv_dw_' + str(block_id) + '/depthwise_bias:0' w, b, s = get_weights(weights, conv_name, bias_name, quant=False) x = depthwise_conv2d(x, w, b, strides=strides, padding='SAME') mean, std, beta, gamma = get_bn_param(weights, bn_mean, bn_var, bn_beta, bn_gamma) x = batch_norm(x, mean, std, beta, gamma) x = tf.nn.relu6(x) # pointwise convolution bn_beta = 'conv_pw_' + str(block_id) + '_bn/beta:0' bn_gamma = 'conv_pw_' + str(block_id) + '_bn/gamma:0' bn_mean = 'conv_pw_' + str(block_id) + '_bn/moving_mean:0' bn_var = 'conv_pw_' + str(block_id) + '_bn/moving_variance:0' conv_name = 'conv_pw_' + str(block_id) + '/kernel:0' bias_name = 'conv_pw_' + str(block_id) + '/bias:0' w, b, s = get_weights(weights, conv_name, bias_name) x = conv_2d(x, w, b, s, strides=1, padding='SAME') mean, std, beta, gamma = get_bn_param(weights, bn_mean, bn_var, bn_beta, bn_gamma) x = batch_norm(x, mean, std, beta, gamma) return tf.nn.relu6(x)
def decoder_v2(self, x, is_training=True, reuse=False): with tf.variable_scope("decoder", reuse=reuse): if self.verbose: print(x.shape) # Layer 1 net = layers.linear(x, self.linear_dim, scope='de_fc1') net = layers.batch_norm(net, is_training=is_training, scope='de_bn1') net = tf.nn.relu(net) if self.verbose: print(net.shape) # Layer 2 shape = self.conv_dim * ((self.image_size >> 2)**2) net = layers.linear(net, shape, scope='de_fc2') net = layers.batch_norm(net, is_training=is_training, scope='de_bn2') net = tf.nn.relu(net) if self.verbose: print(net.shape) # Layer 3 shape = [ self.batch_size, self.image_size >> 2, self.image_size >> 2, self.conv_dim ] net = tf.reshape(net, shape) if self.verbose: print(net.shape) # Layer 4 shape = [ self.batch_size, self.image_size >> 1, self.image_size >> 1, self.conv_dim >> 1 ] net = layers.deconv2d(net, shape, name='de_dc3') net = layers.batch_norm(net, is_training=is_training, scope='de_bn3') net = tf.nn.relu(net) if self.verbose: print(net.shape) # Layer 5 shape = [ self.batch_size, self.image_size, self.image_size, self.channels ] net = layers.deconv2d(net, shape, name='de_dc4') out = tf.nn.sigmoid(net, name="main_out") if self.verbose: print(out.shape) return out
def conv2dbn(l, name, **kwargs): l = nn.layers.dnn.Conv2DDNNLayer( l, name=name, **kwargs ) l = batch_norm(l, name='%sbn' % name) return l
def __call__(self, x): fc1 = dense(x, 600, activation_fn=leaky_relu) fc2 = dense(fc1, 100, activation_fn=tf.identity) z = batch_norm(fc2) z = z + 0.05 * tf.random_normal(tf.shape(fc2), mean=0.0, stddev=1) fc1_ = dense(z, 600, activation_fn=relu_bn) x_ = dense(fc1_, 784, activation_fn=tf.sigmoid) return x_
def conv2dbn(l, name, **kwargs): """ Batch normalized DNN Conv2D Layer """ l = nn.layers.dnn.Conv2DDNNLayer( l, name=name, **kwargs ) l = batch_norm(l, name='%sbn' % name) return l
def residual_block3_localbn(layer, name, num_layers, num_filters, bottleneck=False, bottleneck_factor=4, filter_size=(3, 3), stride=1, pad='same', W=nn.init.GlorotUniform(), nonlinearity=nn.nonlinearities.rectify): conv = layer # Insert shortcut when changing filter size or feature map size if (num_filters != layer.output_shape[1]) or (stride != 1): # Projection shortcut, aka option B layer = nn.layers.dnn.Conv2DDNNLayer( layer, name='%s_shortcut' % name, num_filters=num_filters, filter_size=1, stride=stride, pad=0, nonlinearity=None, b=None ) if bottleneck and num_layers < 3: raise ValueError('At least 3 layers is required for bottleneck configuration') for i in range(num_layers): if bottleneck: # Force then first and last layer to use 1x1 convolution if i == 0 or (i == (num_layers - 1)): actual_filter_size = (1, 1) else: actual_filter_size = filter_size # Only increase the filter size to the target size for # the last layer if i == (num_layers - 1): actual_num_filters = num_filters else: actual_num_filters = num_filters / bottleneck_factor else: actual_num_filters = num_filters actual_filter_size = filter_size # TODO the last layer should probably not be bn-ed.. conv = conv2dbn( conv, name='%s_%s' % (name, i), num_filters=actual_num_filters, filter_size=actual_filter_size, pad=pad, W=W, # Remove nonlinearity for the last conv layer nonlinearity=nonlinearity if (i < num_layers - 1) else None, # Only apply stride for the first conv layer stride=stride if i == 0 else 1 ) l = nn.layers.merge.ElemwiseSumLayer([conv, layer], name='%s_elemsum' % name) l = batch_norm(l) l = nn.layers.NonlinearityLayer(l, nonlinearity=nonlinearity, name='%s_elemsum_nl' % name) return l
def block(name, input, filters, ksz, stride, padding): """ :param name: a string :param input: a tensor :param filters: an integer, # filters :param ksz: an integer, the size of filters :param stride: an integer :param padding: a string, all string need to be uppercase :return: """ with tf.variable_scope(name): conv = conv2d('conv', input, filters, ksz, stride, padding) bn = batch_norm('bn', conv) return relu('relu', bn)
def MobileNet(img_input, weights, alpha): x = tf.reshape(img_input, shape=[-1, 224, 224, 3]) # init convolution w, b, s = get_weights(weights, 'conv1/kernel:0', 'conv1/bias:0') x = conv_2d(x, w, b, s, strides=2, padding='SAME') mean, std, beta, gamma = get_bn_param(weights, 'conv1_bn/moving_mean:0', 'conv1_bn/moving_variance:0', 'conv1_bn/beta:0', 'conv1_bn/gamma:0') x = batch_norm(x, mean, std, beta, gamma) x = tf.nn.relu6(x) x = _depthwise_conv_block(x, weights, block_id=1) x = _depthwise_conv_block(x, weights, strides=2, block_id=2) x = _depthwise_conv_block(x, weights, block_id=3) x = _depthwise_conv_block(x, weights, strides=2, block_id=4) x = _depthwise_conv_block(x, weights, block_id=5) x = _depthwise_conv_block(x, weights, strides=2, block_id=6) x = _depthwise_conv_block(x, weights, block_id=7) x = _depthwise_conv_block(x, weights, block_id=8) x = _depthwise_conv_block(x, weights, block_id=9) x = _depthwise_conv_block(x, weights, block_id=10) x = _depthwise_conv_block(x, weights, block_id=11) x = _depthwise_conv_block(x, weights, strides=2, block_id=12) x = _depthwise_conv_block(x, weights, block_id=13) x = avgpool_2d(x, k=7) x = tf.reshape(x, shape=[-1, 1, 1, int(1024 * alpha)]) w, b, s = get_weights(weights, 'conv_preds/kernel:0', 'conv_preds/bias:0') x = conv_2d(x, w, b, s, strides=1, padding='SAME') x = tf.reshape(x, shape=[-1, 1000]) return x
predicted_img = tf.where(img_new < 0, x=zeros_img, y=ones_img) correct_prediction = tf.cast(tf.equal(predicted_img, img), tf.float32) * mask_modi accuracy_img = tf.cond( tf.equal((tf.reduce_sum(mask_modi) * img_size * img_size), 0), lambda: tf.zeros([]), lambda: tf.reduce_sum(correct_prediction) / (tf.reduce_sum(mask_modi) * img_size * img_size)) # adding batch normalization # For FloorPlanQA we observe that DSMN* performs better without this if reg == 1: img_new = tf.reshape( img_new, shape=[batch_size * img_size * img_size * max_num_sen, -1]) img_new = layers.batch_norm(img_new, is_training) img_new = tf.reshape( img_new, shape=[batch_size, img_size, img_size, max_num_sen]) # the special softmax layer img_new_sig = tf.sigmoid(img_new) mask_modi = tf.expand_dims(tf.expand_dims(mask, 1), 1) img_new_sig_rel = img_new_sig * mask_modi else: with tf.variable_scope('loss_image'): img_created_reshape = tf.reshape(img_new, [-1, 12]) img_reshape = tf.reshape(img, [-1, 12]) loss_mask_temp = loss_mask * tf.expand_dims( tf.expand_dims(tf.expand_dims(img_loss_mask, 1), 1), 1) loss_mask_reshape = tf.reshape(loss_mask_temp, [-1, 12])