def _discriminator_builder(x, name, num_classes=40, filters=[64, 128, 256, 512, 512], num_blocks=[2, 2, 3, 3, 3], strides=[2, 2, 2, 2, 1], is_training=True, use_global_status=True, reuse=False): """Helper function to construct Discriminator. """ if len(filters) != len(num_blocks)\ or len(filters) != len(strides): raise ValueError('length of lists are not consistent') with tf.variable_scope(name, reuse=reuse) as scope: # blocks for ib in range(len(filters)): for iu in range(num_blocks[ib]): name_format = 'layer{:d}/conv{:d}_{:d}' block_name = name_format.format(ib + 1, ib + 1, iu + 1) c_o = filters[ib] # output channel # strides at the end s = strides[ib] if strides[ib] else 1 pad = 'VALID' if s > 1 else 'SAME' x = nn.conv(x, name=block_name, filters=c_o, kernel_size=4, strides=s, padding=pad, biased=False, bn=True, relu=True, decay=0.99, is_training=is_training, use_global_status=use_global_status) x = nn.conv(x, name='block5/fc1_out', filters=num_classes, kernel_size=4, strides=1, padding='SAME', biased=True, bn=False, relu=False, is_training=is_training) print(x) return x
def _fcn_builder(x, name, cnn_fn, num_classes, is_training, use_global_status, reuse=False): """Helper function to build FCN8s model for semantic segmentation. The FCN8s model is composed of one base network (ResNet101) and one classifier. Args: x: A tensor of size [batch_size, height_in, width_in, channels]. name: The prefix of tensorflow variables defined in this network. cnn_fn: A function which builds the base network (ResNet101). num_classes: Number of predicted classes for classification tasks. is_training: If the tensorflow variables defined in this network would be used for training. use_global_status: enable/disable use_global_status for batch normalization. If True, moving mean and moving variance are updated by exponential decay. reuse: enable/disable reuse for reusing tensorflow variables. It is useful for sharing weight parameters across two identical networks. Returns: A tensor of size [batch_size, height_in/8, width_in/8, num_classes]. """ h, w = x.get_shape().as_list()[1:3] # NxHxWxC assert (h % 48 == 0 and w % 48 == 0 and h == w) # Build the base network. x = cnn_fn(x, name, is_training, use_global_status, reuse) with tf.variable_scope(name, reuse=reuse) as scope: x = nn.conv(x, 'block5/fc1_voc12', num_classes, 1, 1, padding='SAME', biased=True, bn=False, relu=False, is_training=is_training) return x
def _pspnet_builder(x, name, cnn_fn, num_classes, is_training, use_global_status, reuse=False): """Helper function to build PSPNet model for semantic segmentation. The PSPNet model is composed of one base network (ResNet101) and one pyramid spatial pooling (PSP) module, followed with concatenation and two more convlutional layers for segmentation prediction. Args: x: A tensor of size [batch_size, height_in, width_in, channels]. name: The prefix of tensorflow variables defined in this network. cnn_fn: A function which builds the base network (ResNet101). num_classes: Number of predicted classes for classification tasks. is_training: If the tensorflow variables defined in this network would be used for training. use_global_status: enable/disable use_global_status for batch normalization. If True, moving mean and moving variance are updated by exponential decay. reuse: enable/disable reuse for reusing tensorflow variables. It is useful for sharing weight parameters across two identical networks. Returns: A tensor of size [batch_size, height_in/8, width_in/8, num_classes]. """ # Ensure that the size of input data is valid (should be multiple of 6x8=48). h, w = x.get_shape().as_list()[1:3] # NxHxWxC assert(h%48 == 0 and w%48 == 0 and h == w) # Build the base network. x = cnn_fn(x, name, is_training, use_global_status, reuse) with tf.variable_scope(name, reuse=reuse) as scope: # Build the PSP module pool_k = int(h/8) # the base network is stride 8 by default. # Build pooling layer results in 1x1 output. pool1 = tf.nn.avg_pool(x, name='block5/pool1', ksize=[1,pool_k,pool_k,1], strides=[1,pool_k,pool_k,1], padding='VALID') pool1 = nn.conv(pool1, 'block5/pool1/conv1', 512, 1, 1, padding='SAME', biased=False, bn=True, relu=True, is_training=is_training, decay=0.99, use_global_status=use_global_status) pool1 = tf.image.resize_bilinear(pool1, [pool_k, pool_k]) # Build pooling layer results in 2x2 output. pool2 = tf.nn.avg_pool(x, name='block5/pool2', ksize=[1,pool_k//2,pool_k//2,1], strides=[1,pool_k//2,pool_k//2,1], padding='VALID') pool2 = nn.conv(pool2, 'block5/pool2/conv1', 512, 1, 1, padding='SAME', biased=False, bn=True, relu=True, is_training=is_training, decay=0.99, use_global_status=use_global_status) pool2 = tf.image.resize_bilinear(pool2, [pool_k, pool_k]) # Build pooling layer results in 3x3 output. pool3 = tf.nn.avg_pool(x, name='block5/pool3', ksize=[1,pool_k//3,pool_k//3,1], strides=[1,pool_k//3,pool_k//3,1], padding='VALID') pool3 = nn.conv(pool3, 'block5/pool3/conv1', 512, 1, 1, padding='SAME', biased=False, bn=True, relu=True, is_training=is_training, decay=0.99, use_global_status=use_global_status) pool3 = tf.image.resize_bilinear(pool3, [pool_k, pool_k]) # Build pooling layer results in 6x6 output. pool6 = tf.nn.avg_pool(x, name='block5/pool6', ksize=[1,pool_k//6,pool_k//6,1], strides=[1,pool_k//6,pool_k//6,1], padding='VALID') pool6 = nn.conv(pool6, 'block5/pool6/conv1', 512, 1, 1, padding='SAME', biased=False, bn=True, relu=True, is_training=is_training, decay=0.99, use_global_status=use_global_status) pool6 = tf.image.resize_bilinear(pool6, [pool_k, pool_k]) # Fuse the pooled feature maps with its input, and generate # segmentation prediction. x = tf.concat([pool1, pool2, pool3, pool6, x], name='block5/concat', axis=3) x = nn.conv(x, 'block5/conv2', 512, 3, 1, padding='SAME', biased=False, bn=True, relu=True, is_training=is_training, decay=0.99, use_global_status=use_global_status) x = nn.conv(x, 'block5/fc1_voc12', num_classes, 1, 1, padding='SAME', biased=True, bn=False, relu=False, is_training=is_training) return x
def _unet_builder(x, name, filters=[64,128,256,512,1024], num_blocks=[2,3,3,3,3], strides=[2,2,2,2,2], dilations=[None,None,None,None,None], num_classes=40, is_training=True, use_global_status=False, reuse=False): """Helper function to construct UNet. """ if len(filters) != len(num_blocks)\ or len(filters) != len(strides): raise ValueError('length of lists are not consistent') with tf.variable_scope(name, reuse=reuse) as scope: # Encoder. shortcuts = [] for ib in range(len(filters)): for iu in range(num_blocks[ib]): name_format = 'layer{:d}/unit_{:d}/encoder/' block_name = name_format.format(ib+1, iu+1) c_o = filters[ib] # output channel # strides at the begginning s = strides[ib] if iu == 0 else 1 d = dilations[ib] if d is not None and d > 1 and s == 1: x = nn.atrous_conv(x, name=block_name+'/conv', filters=c_o, kernel_size=3, dilation=d, padding='SAME', biased=False, bn=True, relu=True, decay=0.99, is_training=is_training, use_global_status=use_global_status) else: padding = 'VALID' if s > 1 else 'SAME' ksize = s*2 if s > 1 else 3 x = nn.conv(x, name=block_name+'/conv', filters=c_o, kernel_size=ksize, strides=s, padding=padding, biased=False, bn=True, relu=True, decay=0.99, is_training=is_training, use_global_status=use_global_status) print(x) shortcuts.append(x) # Decoder. for ib in range(len(shortcuts)-1, 0 ,-1): #for iu in range(num_blocks[ib-1]): for iu in range(3): n, h, w, c_o = shortcuts[ib-1].get_shape().as_list() name_format = 'layer{:d}/unit_{:d}/decoder/' block_name = name_format.format(2*len(filters)-ib, iu+1) x = nn.conv(x, name=block_name+'conv', filters=c_o, kernel_size=3, strides=1, padding='SAME', biased=False, bn=True, relu=True, decay=0.99, is_training=is_training, use_global_status=use_global_status) if iu == 0: x = tf.image.resize_bilinear(x, [h,w]) x = tf.concat([x, shortcuts[ib-1]], axis=-1) print(x) # output segmentation, depth and surface normal estimation. block_name = 'block5' seg = nn.conv(x, block_name+'/fc1_seg', num_classes, 3, 1, padding='SAME', biased=True, bn=False, relu=False, is_training=is_training) dph = nn.conv(x, block_name+'/fc1_depth', 1, 3, 1, padding='SAME', biased=True, bn=False, relu=True, is_training=is_training) nrm = nn.conv(x, block_name+'/fc1_normal', 3, 3, 1, padding='SAME', biased=True, bn=False, relu=False, is_training=is_training) nrm = tf.nn.l2_normalize(nrm, dim=-1) return [seg, dph, nrm]
def bottleneck(x, name, filters, strides=None, dilation=None, is_training=True, use_global_status=True): """Builds the bottleneck module in ResNet. This function stack 3 convolutional layers and fuse the output with the residual connection. Args: x: A tensor of size [batch_size, height_in, width_in, channels]. name: The prefix of tensorflow variables defined in this layer. filters: A number indicating the number of output channels. strides: A number indicating the stride of the sliding window for height and width. dilation: A number indicating the dilation factor for height and width. is_training: If the tensorflow variables defined in this layer would be used for training. use_global_status: enable/disable use_global_status for batch normalization. If True, moving mean and moving variance are updated by exponential decay. Returns: A tensor of size [batch_size, height_out, width_out, channels_out]. """ if strides is None and dilation is None: raise ValueError('None of strides or dilation is specified, ' + 'set one of them to 1 or bigger number.') elif strides > 1 and dilation is not None and dilation > 1: raise ValueError('strides and dilation are both specified, ' + 'set one of them to 1 or None.') with tf.variable_scope(name) as scope: c_i = x.get_shape().as_list()[-1] if c_i != filters * 4: # Use a convolutional layer as residual connection when the # number of input channels is different from output channels. shortcut = nn.conv(x, name='shortcut', filters=filters * 4, kernel_size=1, strides=strides, padding='VALID', biased=False, bn=True, relu=False, is_training=is_training, use_global_status=use_global_status) elif strides > 1: # Use max-pooling as residual connection when the number of # input channel is same as output channels, but stride is # larger than 1. shortcut = nn.max_pool(x, name='shortcut', kernel_size=1, strides=strides, padding='VALID') else: # Otherwise, keep the original input as residual connection. shortcut = x # Build the 1st convolutional layer. x = nn.conv(x, name='conv1', filters=filters, kernel_size=1, strides=1, padding='SAME', biased=False, bn=True, relu=True, is_training=is_training, use_global_status=use_global_status) if dilation is not None and dilation > 1: # If dilation > 1, apply atrous conv to the 2nd convolutional layer. x = nn.atrous_conv(x, name='conv2', filters=filters, kernel_size=3, dilation=dilation, padding='SAME', biased=False, bn=True, relu=True, is_training=is_training, use_global_status=use_global_status) else: padding = 'VALID' if strides > 1 else 'SAME' x = nn.conv(x, name='conv2', filters=filters, kernel_size=3, strides=strides, padding=padding, biased=False, bn=True, relu=True, is_training=is_training, use_global_status=use_global_status) # Build the 3rd convolutional layer (increase the channels). x = nn.conv(x, name='conv3', filters=filters * 4, kernel_size=1, strides=1, padding='SAME', biased=False, bn=True, relu=False, is_training=is_training, use_global_status=use_global_status) # Fuse the convolutional outputs with residual connection. x = tf.add_n([x, shortcut], name='add') x = tf.nn.relu(x, name='relu') return x
def resnet_v1(x, name, filters=[64, 128, 256, 512], num_blocks=[3, 4, 23, 3], strides=[2, 1, 1, 1], dilations=[None, None, 2, 2], is_training=True, use_global_status=True, reuse=False): """Helper function to build ResNet. Args: x: A tensor of size [batch_size, height_in, width_in, channels]. name: The prefix of tensorflow variables defined in this network. filters: A list of numbers indicating the number of output channels (The output channels would be 4 times to the numbers). strides: A list of numbers indicating the stride of the sliding window for height and width. dilation: A number indicating the dilation factor for height and width. is_training: If the tensorflow variables defined in this layer would be used for training. use_global_status: enable/disable use_global_status for batch normalization. If True, moving mean and moving variance are updated by exponential decay. reuse: enable/disable reuse for reusing tensorflow variables. It is useful for sharing weight parameters across two identical networks. Returns: A tensor of size [batch_size, height_out, width_out, channels_out]. """ if len(filters) != len(num_blocks) or len(filters) != len(strides): raise ValueError('length of lists are not consistent') with tf.variable_scope(name, reuse=reuse) as scope: # Build conv1. x = nn.conv(x, name='conv1', filters=64, kernel_size=7, strides=2, padding='VALID', biased=False, bn=True, relu=True, is_training=is_training, use_global_status=use_global_status) bn = [] bn.append(x) # Build pool1. x = nn.max_pool(x, name='pool1', kernel_size=3, strides=2, padding='VALID') # Build residual bottleneck blocks. for ib in range(len(filters)): for iu in range(num_blocks[ib]): name_format = 'block{:d}/unit_{:d}/bottleneck_v1' block_name = name_format.format(ib + 1, iu + 1) c_o = filters[ib] # output channel # Apply strides to the last block. s = strides[ib] if iu == num_blocks[ib] - 1 else 1 d = dilations[ib] if iu == num_blocks[ib] - 1: bn.append(x) x = bottleneck(x, name=block_name, filters=c_o, strides=s, dilation=d, is_training=is_training, use_global_status=use_global_status) return x, bn
def _unet_builder(x, mask, name, filters=[64, 128, 256, 512, 1024], num_blocks=[2, 3, 3, 3, 3], strides=[2, 2, 2, 2, 2], is_training=True, use_global_status=False, reuse=False): """Helper function to construct UNet. """ if len(filters) != len(num_blocks)\ or len(filters) != len(strides): raise ValueError('length of lists are not consistent') with tf.variable_scope('Analyzer', reuse=reuse) as scope: with tf.name_scope(name): input_x = x # Encoder. shortcuts = [] not_ignore_masks = [] for ib in range(len(filters)): for iu in range(num_blocks[ib]): name_format = 'layer{:d}/unit_{:d}/encoder/' block_name = name_format.format(ib + 1, iu + 1) c_o = filters[ib] # output channel # strides at the begginning s = strides[ib] if iu == 0 else 1 padding = 'VALID' if s > 1 else 'SAME' if ib == 0 and iu == 0: x = [] for ix, in_x in enumerate(input_x): x.append( nn.conv( in_x, name=block_name + 'conv{:d}'.format(ix), filters=int(c_o / 2), #filters=c_o, kernel_size=3, strides=s, padding=padding, #biased=False, #bn=True, biased=True, bn=False, relu=False, decay=0.99, is_training=is_training, use_global_status=use_global_status)) x = tf.concat(x, axis=-1, name=block_name + 'concat') else: x = nn.conv( x, name=block_name + 'conv', filters=c_o, kernel_size=3, strides=s, padding=padding, #biased=False, #bn=True, biased=True, bn=False, relu=False, decay=0.99, is_training=is_training, use_global_status=use_global_status) if iu == 0: mask = nn.max_pool(mask, block_name + 'mask_pool', 3, s, padding=padding) not_ignore_masks.append(1 - mask) f = tf.multiply(x, not_ignore_masks[-1], name=block_name + 'masked_conv') tf.add_to_collection('Analyzer/features', f) x = tf.nn.relu(x) print(x) shortcuts.append(x) # Decoder. for ib in range(len(shortcuts) - 1, 0, -1): for iu in range(num_blocks[ib - 1]): n, h, w, c_o = shortcuts[ib - 1].get_shape().as_list() name_format = 'layer{:d}/unit_{:d}/decoder/' block_name = name_format.format(2 * len(filters) - ib, iu + 1) x = nn.conv( x, name=block_name + 'conv', filters=c_o, kernel_size=3, strides=1, padding='SAME', #biased=False, #bn=True, biased=True, bn=False, relu=False, decay=0.99, is_training=is_training, use_global_status=use_global_status) f = tf.multiply(x, not_ignore_masks[ib], name=block_name + 'masked_conv') tf.add_to_collection('Analyzer/features', f) x = tf.nn.relu(x) if iu == 0: x = tf.image.resize_bilinear(x, [h, w]) x = tf.concat([x, shortcuts[ib - 1]], axis=-1) print(x) c_i = 0 for in_x in input_x: c_i += in_x.get_shape().as_list()[-1] x = nn.conv(x, name='block5/fc', filters=c_i, kernel_size=1, strides=1, padding='SAME', biased=True, bn=False, relu=False, is_training=is_training) x = tf.image.resize_bilinear(x, tf.shape(input_x[0])[1:3]) tf.add_to_collection('Analyzer/outputs', x) return x