def call(self, inputs): # conv = conv_block(inputs, n_filters=3, dropout_p=dropout_p) # conv1 = slim.max_pool2d(conv, [2,2]) n_filters = self.n_filters dropout_p = self.dropout_p logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(inputs, self.frontend, is_training=self.is_training) conv2 = end_points["pool2"] # 32 conv3 = end_points["pool3"] # 16 conv4 = end_points["pool4"] # 8 conv5 = end_points["pool5"] # 4 pool = slim.max_pool2d(conv5, [2,2]) # 2 center = self.conv_transpose_block(pool, n_filters*8*2, n_filters*8, dropout_p=dropout_p) # 4 dec5 = self.conv_transpose_block(tf.concat([center, conv5], axis=3), n_filters*8*2, n_filters*8, dropout_p=dropout_p) # 8 dec4 = self.conv_transpose_block(tf.concat([dec5, conv4], axis=3), n_filters*8*2, n_filters*8, dropout_p=dropout_p) # 16 dec3 = self.conv_transpose_block(tf.concat([dec4, conv3], axis=3), n_filters*4*2, n_filters*2,dropout_p=dropout_p) # 32 dec2 = self.conv_transpose_block(tf.concat([dec3, conv2], axis=3), n_filters*2*2, n_filters*2*2, dropout_p=dropout_p) # 64 dec1 = self.conv_transpose_block(dec2, n_filters*2*2, n_filters,dropout_p=dropout_p) # 128 net = slim.conv2d(dec1, self.num_classes, [1, 1]) # if dropout_p != 0.0: # net = slim.dropout(net, keep_prob=(1.0-dropout_p)) return net
def build_deeplabv3_plus(inputs, num_classes, preset_model='DeepLabV3+', frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir="src/Segmentation/models"): """ Builds the DeepLabV3 model. Arguments: inputs: The input tensor= preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: DeepLabV3 model """ logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) label_size = tf.shape(inputs)[1:3] encoder_features = end_points['pool2'] net = AtrousSpatialPyramidPoolingModule(end_points['pool4']) net = slim.conv2d(net, 256, [1, 1], scope="conv_1x1_output", activation_fn=None) decoder_features = Upsampling(net, label_size / 4) encoder_features = slim.conv2d(encoder_features, 48, [1, 1], activation_fn=tf.nn.relu, normalizer_fn=None) net = tf.concat((encoder_features, decoder_features), axis=3) net = slim.conv2d(net, 256, [3, 3], activation_fn=tf.nn.relu, normalizer_fn=None) net = slim.conv2d(net, 256, [3, 3], activation_fn=tf.nn.relu, normalizer_fn=None) net = Upsampling(net, label_size) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_deeplabv3(inputs, num_classes, preset_model='DeepLabV3', frontend="Res101", weight_decay=1e-5, is_training=True, pretrained_dir="models"): """Builds the DeepLabV3 model. Arguments: inputs: The input tensor= preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: DeepLabV3 model """ logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) label_size = tf.shape(inputs)[1:3] net = AtrousSpatialPyramidPoolingModule(end_points['pool4']) net = Upsampling(net, label_size) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_bisenet(inputs, num_classes, preset_model='BiSeNet', frontend="xception", weight_decay=1e-5, is_training=True, pretrained_dir="models"): """ Builds the BiSeNet model. Arguments: inputs: The input tensor= preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: BiSeNet model """ ### The spatial path ### The number of feature maps for each convolution is not specified in the paper ### It was chosen here to be equal to the number of feature maps of a classification ### model at each corresponding stage spatial_net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=256, kernel_size=[3, 3], strides=2) ### Context path logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) # global pool in order to get highest receptive field size = tf.shape(end_points['pool5'])[1:3] global_channels = tf.reduce_mean(end_points['pool5'], [1, 2], keep_dims=True) global_channels = slim.conv2d(global_channels, 128, 1, [1, 1], activation_fn=None) global_channels = tf.nn.relu(slim.batch_norm(global_channels, fused=True)) global_channels = tf.image.resize_bilinear(global_channels, size=size) net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=128) net_5_scaled = tf.add(global_channels, net_5) net_5 = Upsampling(net_5, scale=2) net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3]) net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=128) net_4 = tf.add(net_4, net_5) net_4 = Upsampling(net_4, scale=2) net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3]) context_net = net_4 ### Combining the paths net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=256) net = ConvBlock(net, n_filters=64, kernel_size=[3, 3]) ### Final upscaling and finish net = Upsampling(net, scale=2) net = slim.conv2d(net, 64, [3, 3], rate=2, activation_fn=tf.nn.relu, biases_initializer=None, normalizer_fn=slim.batch_norm) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') net = Upsampling(net, 4) return net, init_fn
def build_pspnet(inputs, label_size, num_classes, preset_model='PSPNet', frontend="ResNet101", pooling_type="MAX", weight_decay=1e-5, upscaling_method="conv", is_training=True, pretrained_dir="models"): """ Builds the PSPNet model. Arguments: inputs: The input tensor label_size: Size of the final label tensor. We need to know this for proper upscaling preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes pooling_type: Max or Average pooling Returns: PSPNet model """ logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) feature_map_shape = [int(x / 8.0) for x in label_size] print(feature_map_shape) psp = PyramidPoolingModule(end_points['pool3'], feature_map_shape=feature_map_shape, pooling_type=pooling_type) net = slim.conv2d(psp, 512, [3, 3], activation_fn=None) net = slim.batch_norm(net, fused=True) net = tf.nn.relu(net) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_dense_aspp(inputs, num_classes, preset_model='DenseASPP', frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir="./models"): logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) init_features = end_points['pool3'] ### First block, rate = 3 d_3_features = DilatedConvBlock(init_features, n_filters=256, kernel_size=[1, 1]) d_3 = DilatedConvBlock(d_3_features, n_filters=64, rate=3, kernel_size=[3, 3]) ### Second block, rate = 6 d_4 = tf.concat([init_features, d_3], axis=-1) d_4 = DilatedConvBlock(d_4, n_filters=256, kernel_size=[1, 1]) d_4 = DilatedConvBlock(d_4, n_filters=64, rate=6, kernel_size=[3, 3]) ### Third block, rate = 12 d_5 = tf.concat([init_features, d_3, d_4], axis=-1) d_5 = DilatedConvBlock(d_5, n_filters=256, kernel_size=[1, 1]) d_5 = DilatedConvBlock(d_5, n_filters=64, rate=12, kernel_size=[3, 3]) ### Fourth block, rate = 18 d_6 = tf.concat([init_features, d_3, d_4, d_5], axis=-1) d_6 = DilatedConvBlock(d_6, n_filters=256, kernel_size=[1, 1]) d_6 = DilatedConvBlock(d_6, n_filters=64, rate=18, kernel_size=[3, 3]) ### Fifth block, rate = 24 d_7 = tf.concat([init_features, d_3, d_4, d_5, d_6], axis=-1) d_7 = DilatedConvBlock(d_7, n_filters=256, kernel_size=[1, 1]) d_7 = DilatedConvBlock(d_7, n_filters=64, rate=24, kernel_size=[3, 3]) full_block = tf.concat([init_features, d_3, d_4, d_5, d_6, d_7], axis=-1) net = slim.conv2d(full_block, num_classes, [1, 1], activation_fn=None, scope='logits') net = Upsampling(net, scale=8) return net, init_fn
def build_gcn(inputs, num_classes, preset_model='GCN', frontend="ResNet101", weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"): """ Builds the GCN model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: GCN model """ logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) res = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] down_5 = GlobalConvBlock(res[0], n_filters=21, size=3) down_5 = BoundaryRefinementBlock(down_5, n_filters=21, kernel_size=[3, 3]) down_5 = ConvUpscaleBlock(down_5, n_filters=21, kernel_size=[3, 3], scale=2) down_4 = GlobalConvBlock(res[1], n_filters=21, size=3) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = tf.add(down_4, down_5) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = ConvUpscaleBlock(down_4, n_filters=21, kernel_size=[3, 3], scale=2) down_3 = GlobalConvBlock(res[2], n_filters=21, size=3) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = tf.add(down_3, down_4) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = ConvUpscaleBlock(down_3, n_filters=21, kernel_size=[3, 3], scale=2) down_2 = GlobalConvBlock(res[3], n_filters=21, size=3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = tf.add(down_2, down_3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = ConvUpscaleBlock(down_2, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) net = ConvUpscaleBlock(net, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(net, n_filters=21, kernel_size=[3, 3]) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_custom(inputs, num_classes, frontend="ResNet101", weight_decay=1e-5, is_training=True, n_filters=32, pretrained_dir="models", dropout_p=0.2): # conv = conv_block(inputs, n_filters=3, dropout_p=dropout_p) # conv1 = slim.max_pool2d(conv, [2,2]) logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, is_training=is_training) conv2 = end_points["pool2"] # 32 conv3 = end_points["pool3"] # 16 conv4 = end_points["pool4"] # 8 conv5 = end_points["pool5"] # 4 pool = slim.max_pool2d(conv5, [2, 2]) # 2 center = conv_transpose_block(pool, n_filters * 8 * 2, n_filters * 8, dropout_p=dropout_p) # 4 dec5 = conv_transpose_block(tf.concat([center, conv5], axis=3), n_filters * 8 * 2, n_filters * 8, dropout_p=dropout_p) # 8 dec4 = conv_transpose_block(tf.concat([dec5, conv4], axis=3), n_filters * 8 * 2, n_filters * 8, dropout_p=dropout_p) # 16 dec3 = conv_transpose_block(tf.concat([dec4, conv3], axis=3), n_filters * 4 * 2, n_filters * 2, dropout_p=dropout_p) # 32 dec2 = conv_transpose_block(tf.concat([dec3, conv2], axis=3), n_filters * 2 * 2, n_filters * 2 * 2, dropout_p=dropout_p) # 64 dec1 = conv_transpose_block(dec2, n_filters * 2 * 2, n_filters, dropout_p=dropout_p) # 128 net = slim.conv2d(dec1, num_classes, [1, 1]) # if dropout_p != 0.0: # net = slim.dropout(net, keep_prob=(1.0-dropout_p)) return net
def build_refinenet(inputs, num_classes, preset_model='RefineNet', frontend="ResNet101", weight_decay=1e-5, upscaling_method="bilinear", pretrained_dir="models", is_training=True): """ Builds the RefineNet model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: RefineNet model """ logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) high = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] low = [None, None, None, None] # Get the feature maps to the proper size with bottleneck high[0]=slim.conv2d(high[0], 512, 1) high[1]=slim.conv2d(high[1], 256, 1) high[2]=slim.conv2d(high[2], 256, 1) high[3]=slim.conv2d(high[3], 256, 1) # RefineNet low[0]=RefineBlock(high_inputs=high[0],low_inputs=None) # Only input ResNet 1/32 low[1]=RefineBlock(high[1],low[0]) # High input = ResNet 1/16, Low input = Previous 1/16 low[2]=RefineBlock(high[2],low[1]) # High input = ResNet 1/8, Low input = Previous 1/8 low[3]=RefineBlock(high[3],low[2]) # High input = ResNet 1/4, Low input = Previous 1/4 # g[3]=Upsampling(g[3],scale=4) net = low[3] net = ResidualConvUnit(net) net = ResidualConvUnit(net) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, scale=4) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_bisenet(inputs, num_classes, preset_model='BiSeNet', frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir="models"): """ Builds the BiSeNet model. Arguments: inputs: The input tensor= preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: BiSeNet model """ ### The spatial path ### The number of feature maps for each convolution is not specified in the paper ### It was chosen here to be equal to the number of feature maps of a classification ### model at each corresponding stage spatial_net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=256, kernel_size=[3, 3], strides=2) ### Context path logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) print(end_points) net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=512) net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=2048) global_channels = tf.reduce_mean(net_5, [1, 2], keep_dims=True) net_5_scaled = tf.multiply(global_channels, net_5) ### Combining the paths net_4 = Upsampling(net_4, scale=2) net_5_scaled = Upsampling(net_5_scaled, scale=4) context_net = tf.concat([net_4, net_5_scaled], axis=-1) net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=num_classes) ### Final upscaling and finish net = Upsampling(net, scale=8) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_mssa(inputs, num_classes, preset_model='mssa', frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir="models"): spatial_net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=256, kernel_size=[3, 3], strides=2) logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=512) net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=2048) global_channels = tf.reduce_mean(net_5, [1, 2], keep_dims=True) net_5_scaled = tf.multiply(global_channels, net_5) net_4 = Upsampling(net_4, scale=2) net_5_scaled = Upsampling(net_5_scaled, scale=4) context_net = tf.concat([net_4, net_5_scaled], axis=-1) net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=num_classes) net = Upsampling(net, scale=8) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_custom(inputs, num_classes, frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir="models"): logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(inputs, frontend, is_training=is_training) up_1 = conv_transpose_block(end_points["pool2"], strides=4, n_filters=64) up_2 = conv_transpose_block(end_points["pool3"], strides=8, n_filters=64) up_3 = conv_transpose_block(end_points["pool4"], strides=16, n_filters=64) up_4 = conv_transpose_block(end_points["pool5"], strides=32, n_filters=64) features = tf.concat([up_1, up_2, up_3, up_4], axis=-1) features = conv_block(inputs=features, n_filters=256, filter_size=[1, 1]) features = conv_block(inputs=features, n_filters=64, filter_size=[3, 3]) features = conv_block(inputs=features, n_filters=64, filter_size=[3, 3]) features = conv_block(inputs=features, n_filters=64, filter_size=[3, 3]) net = slim.conv2d(features, num_classes, [1, 1], scope='logits') return net
def build_deeplabv3_plus_concrete(inputs, num_classes, preset_model='DeepLabV3+', frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir="models", pretrained_file="xception_65.ckpt", one_parameter=False): """ Builds the DeepLabV3 model with Concrete dropout. Arguments: inputs: The input tensor= preset_model: Which model you want to use. num_classes: Number of classes Returns: DeepLabV3 model with Concrete dropout """ logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training, pretrained_file=pretrained_file, one_parameter=one_parameter) label_size = tf.shape(inputs)[1:3] net = AtrousSpatialPyramidPoolingModule(end_points['pool4']) net = slim.conv2d(net, 256, [1, 1], scope="conv_1x1_output", activation_fn=None) decoder_features = Upsampling(net, label_size / 4) # encoder_features = end_points['pool2'] # encoder_features = slim.conv2d(encoder_features, 48, [1, 1], activation_fn=tf.nn.relu, normalizer_fn=None) ## dropout(encoder_features) # # net = tf.concat((encoder_features, decoder_features), axis=3) net = decoder_features net = slim.conv2d(net, 256, [3, 3], activation_fn=tf.nn.relu, normalizer_fn=None) net = slim.conv2d(net, 256, [3, 3], activation_fn=tf.nn.relu, normalizer_fn=None) net = Upsampling(net, label_size) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn, frontend_scope
def build_bisenet(self, reuse=False): """ Builds the BiSeNet model. Arguments: reuse: Reuse variable or not Returns: BiSeNet model """ ### The spatial path ### The number of feature maps for each convolution is not specified in the paper ### It was chosen here to be equal to the number of feature maps of a classification ### model at each corresponding stage batch_norm_params = self.model_config['batch_norm_params'] init_method = self.model_config['conv_config']['init_method'] if init_method == 'kaiming_normal': initializer = slim.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False) else: initializer = slim.xavier_initializer() with tf.variable_scope('spatial_net', reuse=reuse): with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): spatial_net = ConvBlock(self.images, n_filters=64, kernel_size=[7, 7], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[1, 1]) frontend_config = self.model_config['frontend_config'] ### Context path logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( self.images, frontend_config, self.is_training(), reuse) ### Combining the paths with tf.variable_scope('combine_path', reuse=reuse): with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): # tail part size = tf.shape(end_points['pool5'])[1:3] print('111111111111111', end_points['pool5']) exit() global_context = tf.reduce_mean(end_points['pool5'], [1, 2], keep_dims=True) global_context = slim.conv2d(global_context, 128, 1, [1, 1], activation_fn=None) global_context = tf.nn.relu( slim.batch_norm(global_context, fused=True)) global_context = tf.image.resize_bilinear(global_context, size=size) net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=128) net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=128) net_5 = tf.add(net_5, global_context) net_5 = Upsampling(net_5, scale=2) net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3]) net_4 = tf.add(net_4, net_5) net_4 = Upsampling(net_4, scale=2) net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3]) context_net = net_4 net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=256) net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3]) net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3]) net = ConvBlock(net, n_filters=64, kernel_size=[3, 3]) # Upsampling + dilation or only Upsampling net = Upsampling(net, scale=2) net = slim.conv2d(net, 64, [3, 3], rate=2, activation_fn=tf.nn.relu, biases_initializer=None, normalizer_fn=slim.batch_norm) net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits') self.net = Upsampling(net, 4) # net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits') # self.net = Upsampling(net, scale=8) if self.mode in ['train', 'validation', 'test']: sup1 = slim.conv2d(net_5, self.num_classes, [1, 1], activation_fn=None, scope='supl1') sup2 = slim.conv2d(net_4, self.num_classes, [1, 1], activation_fn=None, scope='supl2') self.sup1 = Upsampling(sup1, scale=16) self.sup2 = Upsampling(sup2, scale=8) self.init_fn = init_fn
def build_ddsc(inputs, num_classes, preset_model='DDSC', frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir="models"): """ Builds the Dense Decoder Shortcut Connections model. Arguments: inputs: The input tensor= preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: Dense Decoder Shortcut Connections model """ logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) ### Adapting features for all stages decoder_4 = EncoderAdaptionBlock(end_points['pool5'], n_filters=1024) decoder_3 = EncoderAdaptionBlock(end_points['pool4'], n_filters=512) decoder_2 = EncoderAdaptionBlock(end_points['pool3'], n_filters=256) decoder_1 = EncoderAdaptionBlock(end_points['pool2'], n_filters=128) decoder_4 = SemanticFeatureGenerationBlock(decoder_4, D_features=1024, D_prime_features=1024 / 4, O_features=1024) ### Fusing features from 3 and 4 decoder_4 = ConvBlock(decoder_4, n_filters=512, kernel_size=[3, 3]) decoder_4 = Upsampling(decoder_4, scale=2) decoder_3 = ConvBlock(decoder_3, n_filters=512, kernel_size=[3, 3]) decoder_3 = tf.add_n([decoder_4, decoder_3]) decoder_3 = SemanticFeatureGenerationBlock(decoder_3, D_features=512, D_prime_features=512 / 4, O_features=512) ### Fusing features from 2, 3, 4 decoder_4 = ConvBlock(decoder_4, n_filters=256, kernel_size=[3, 3]) decoder_4 = Upsampling(decoder_4, scale=4) decoder_3 = ConvBlock(decoder_3, n_filters=256, kernel_size=[3, 3]) decoder_3 = Upsampling(decoder_3, scale=2) decoder_2 = ConvBlock(decoder_2, n_filters=256, kernel_size=[3, 3]) decoder_2 = tf.add_n([decoder_4, decoder_3, decoder_2]) decoder_2 = SemanticFeatureGenerationBlock(decoder_2, D_features=256, D_prime_features=256 / 4, O_features=256) ### Fusing features from 1, 2, 3, 4 decoder_4 = ConvBlock(decoder_4, n_filters=128, kernel_size=[3, 3]) decoder_4 = Upsampling(decoder_4, scale=8) decoder_3 = ConvBlock(decoder_3, n_filters=128, kernel_size=[3, 3]) decoder_3 = Upsampling(decoder_3, scale=4) decoder_2 = ConvBlock(decoder_2, n_filters=128, kernel_size=[3, 3]) decoder_2 = Upsampling(decoder_2, scale=2) decoder_1 = ConvBlock(decoder_1, n_filters=128, kernel_size=[3, 3]) decoder_1 = tf.add_n([decoder_4, decoder_3, decoder_2, decoder_1]) decoder_1 = SemanticFeatureGenerationBlock(decoder_1, D_features=128, D_prime_features=128 / 4, O_features=num_classes) ### Final upscaling and finish net = Upsampling(decoder_1, scale=4) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_tbnet(inputs, num_classes, frontend="ResNet101", is_training=True, pretrained_dir="models"): # The spatial stream spatial_net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=256, kernel_size=[3, 3], strides=2) # The context stream logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) gamma1 = tf.get_variable(name='gamma1', shape=[1], initializer=tf.zeros_initializer()) gamma2 = tf.get_variable(name='gamma2', shape=[1], initializer=tf.zeros_initializer()) feature1 = end_points['pool4'] feature1 = slim.conv2d(feature1, 512, kernel_size=[1, 1]) feature1 = slim.batch_norm(feature1, fused=True) # The context-aware attention [_, h, w, filters] = feature1.shape.as_list() b_ = slim.conv2d(feature1, filters / 8, kernel_size=[1, 1], stride=[1, 1], activation_fn=None, normalizer_fn=None) c_ = slim.conv2d(feature1, filters / 8, kernel_size=[1, 1], stride=[1, 1], activation_fn=None, normalizer_fn=None) d_ = slim.conv2d(feature1, filters, kernel_size=[1, 1], stride=[1, 1], activation_fn=None, normalizer_fn=None) vec_b = tf.reshape(b_, [1, -1, tf.shape(feature1)[3] / 8]) vec_cT = tf.transpose(tf.reshape(c_, [1, -1, tf.shape(feature1)[3] / 8]), (0, 2, 1)) bcT = tf.matmul(vec_b, vec_cT) sigmoid_bcT = tf.nn.sigmoid(bcT) vec_d = tf.reshape(d_, [1, -1, tf.shape(feature1)[3]]) bcTd = tf.matmul(sigmoid_bcT, vec_d) bcTd = tf.reshape(bcTd, [ 1, tf.shape(feature1)[1], tf.shape(feature1)[2], tf.shape(feature1)[3] ]) net_4 = gamma1 * bcTd + feature1 feature2 = end_points['pool5'] feature2 = slim.conv2d(feature2, 512, kernel_size=[1, 1]) feature2 = slim.batch_norm(feature2, fused=True) [_, h, w, filters] = feature2.shape.as_list() b_ = slim.conv2d(feature2, filters / 8, kernel_size=[1, 1], stride=[1, 1], activation_fn=None, normalizer_fn=None) c_ = slim.conv2d(feature2, filters / 8, kernel_size=[1, 1], stride=[1, 1], activation_fn=None, normalizer_fn=None) d_ = slim.conv2d(feature2, filters, kernel_size=[1, 1], stride=[1, 1], activation_fn=None, normalizer_fn=None) vec_b = tf.reshape(b_, [1, -1, tf.shape(feature2)[3] / 8]) vec_cT = tf.transpose(tf.reshape(c_, [1, -1, tf.shape(feature2)[3] / 8]), (0, 2, 1)) bcT = tf.matmul(vec_b, vec_cT) sigmoid_bcT = tf.nn.sigmoid(bcT) vec_d = tf.reshape(d_, [1, -1, tf.shape(feature2)[3]]) bcTd = tf.matmul(sigmoid_bcT, vec_d) bcTd = tf.reshape(bcTd, [ 1, tf.shape(feature2)[1], tf.shape(feature2)[2], tf.shape(feature2)[3] ]) net_5 = gamma2 * bcTd + feature2 global_channels = tf.reduce_mean(net_5, [1, 2], keep_dims=True) net_5_scaled = tf.multiply(global_channels, net_5) # The boundary stream conv1 = slim.conv2d(net_4, 512, kernel_size=[1, 1]) res = slim.conv2d(conv1, 512, kernel_size=[3, 3], stride=[1, 1], activation_fn=None, normalizer_fn=None) res = tf.nn.relu(slim.batch_norm(res, fused=True)) res = slim.conv2d(res, 512, kernel_size=[3, 3], stride=[1, 1], activation_fn=None, normalizer_fn=None) res = slim.batch_norm(res, fused=True) res = conv1 + res res = tf.nn.relu(res) net_5_scaled = Upsampling(net_5_scaled, scale=2) conv2 = slim.conv2d(net_5_scaled, 512, kernel_size=[1, 1]) # The global-gated convolution ggc = tf.concat([res, conv2], axis=-1) ggc = slim.batch_norm(ggc, fused=True) ggc = slim.conv2d(ggc, 512, kernel_size=[1, 1]) ggc = tf.nn.relu(ggc) ggc = slim.conv2d(ggc, 512, kernel_size=[1, 1]) ggc = slim.batch_norm(ggc, fused=True) ggc = tf.nn.sigmoid(ggc) gated = res * (1 + ggc) gated = Upsampling(gated, scale=2) output = slim.conv2d(gated, 512, kernel_size=[1, 1]) output_edge = slim.conv2d_transpose(gated, 128, kernel_size=[3, 3], stride=[4, 4], activation_fn=None) output_edge = tf.nn.relu(slim.batch_norm(output_edge)) output_edge = slim.conv2d_transpose(output_edge, 1, kernel_size=[3, 3], stride=[2, 2], activation_fn=None) output_edge = tf.nn.relu(slim.batch_norm(output_edge)) output_edge = tf.nn.sigmoid(output_edge) output_edge = tf.reshape( output_edge, [tf.shape(output_edge)[1], tf.shape(output_edge)[2]]) # The feature fusion net_5_scaled = Upsampling(net_5_scaled, 2) output_s_c = tf.concat([spatial_net, net_5_scaled], axis=-1) output_s_c = slim.batch_norm(output_s_c) output_s_c = slim.conv2d(output_s_c, 256, kernel_size=[1, 1]) net = FeatureFusionModule(input_1=output_s_c, input_2=output, n_filters=num_classes) net = Upsampling(net, scale=8) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn, output_edge
def build_bisenet3(inputs, num_classes, preset_model='DepthwiseAAFF', frontend="xception", weight_decay=1e-5, is_training=True, pretrained_dir="models"): initializer = slim.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False) ### The spatial path ### The number of feature maps for each convolution is not specified in the paper ### It was chosen here to be equal to the number of feature maps of a classification ### model at each corresponding stage # depth-wise convolution point_filter1 = tf.get_variable(name="point_filter1", shape=(1, 1, 64, 128), initializer=initializer) point_filter2 = tf.get_variable(name="point_filter2", shape=(1, 1, 128, 256), initializer=initializer) filter1 = tf.get_variable(name="filter1", shape=(3, 3, 64, 1), initializer=initializer) filter2 = tf.get_variable(name="filter2", shape=(3, 3, 128, 1), initializer=initializer) # spatial path spatial_net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = tf.nn.separable_conv2d(input=spatial_net, depthwise_filter=filter1, pointwise_filter=point_filter1, strides=[1, 2, 2, 1], rate=[1, 1], padding='SAME') spatial_net = tf.nn.separable_conv2d(input=spatial_net, depthwise_filter=filter2, pointwise_filter=point_filter2, strides=[1, 2, 2, 1], rate=[1, 1], padding='SAME') spatial_net = ConvBlock(spatial_net, n_filters=32, kernel_size=[1, 1]) # Context path logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) size = tf.shape(end_points['pool5'])[1:3] net_1 = AttentionAndFeatureFussion(end_points['pool3'], end_points['pool4'], 64) net_2 = AttentionAndFeatureFussion(net_1, end_points['pool5'], 128) net_2 = Upsampling(net_2, scale=2) net_1_2 = tf.concat([net_1, net_2], axis=-1) net_1_2 = Upsampling(net_1_2, scale=2) net_1_2_3 = tf.concat([net_1_2, end_points['pool3']], axis=-1) net_1_2_3 = ConvBlock(net_1_2_3, n_filters=128, kernel_size=[1, 1], strides=1) context_path_left = AttentionRefinementModule(net_1_2_3, n_filters=128) net_3 = AttentionAndFeatureFussion(end_points['pool3'], end_points['pool4'], 64) net_4 = AttentionAndFeatureFussion(net_3, end_points['pool5'], 128) net_4 = Upsampling(net_4, scale=2) net_3_4 = tf.concat([net_3, net_4], axis=-1) net_3_4 = Upsampling(net_3_4, scale=2) net_3_4_5 = tf.concat([net_3_4, end_points['pool3']], axis=-1) net_3_4_5 = ConvBlock(net_3_4_5, n_filters=128, kernel_size=[1, 1], strides=1) context_path_right = AttentionRefinementModule(net_3_4_5, n_filters=128) ### Combining the paths net = FeatureFusionModule(input_1=context_path_left, input_2=context_path_right, input_3=spatial_net, n_filters=256) net = ConvBlock(net, n_filters=64, kernel_size=[3, 3]) ### Final upscaling and finish # Upsampling + dilation or only Upsampling net = Upsampling(net, scale=2) net = slim.conv2d(net, 64, [3, 3], rate=2, activation_fn=tf.nn.relu, biases_initializer=None, normalizer_fn=slim.batch_norm) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') net = Upsampling(net, 4) return net, init_fn
def build_bisenet2(inputs, num_classes, preset_model='DepthwiseBiseNet', frontend="xception", weight_decay=1e-5, is_training=True, pretrained_dir="models"): ### The spatial path ### The number of feature maps for each convolution is not specified in the paper ### It was chosen here to be equal to the number of feature maps of a classification ### model at each corresponding stage # depth-wise convolution point_filter1 = tf.get_variable( name="point_filter1", shape=(1, 1, 64, 128), initializer=tf.contrib.layers.xavier_initializer()) point_filter2 = tf.get_variable( name="point_filter2", shape=(1, 1, 128, 256), initializer=tf.contrib.layers.xavier_initializer()) filter1 = tf.get_variable( name="filter1", shape=(3, 3, 64, 1), initializer=tf.contrib.layers.xavier_initializer()) filter2 = tf.get_variable( name="filter2", shape=(3, 3, 128, 1), initializer=tf.contrib.layers.xavier_initializer()) spatial_net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = tf.nn.separable_conv2d(input=spatial_net, depthwise_filter=filter1, pointwise_filter=point_filter1, strides=[1, 2, 2, 1], rate=[1, 1], padding='SAME') spatial_net = tf.nn.separable_conv2d(input=spatial_net, depthwise_filter=filter2, pointwise_filter=point_filter2, strides=[1, 2, 2, 1], rate=[1, 1], padding='SAME') spatial_net = ConvBlock(spatial_net, n_filters=32, kernel_size=[1, 1]) ### Context path logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training) size = tf.shape(end_points['pool5'])[1:3] global_channels = tf.reduce_mean(end_points['pool5'], [1, 2], keep_dims=True) global_channels = slim.conv2d(global_channels, 128, 1, [1, 1], activation_fn=None) global_channels = tf.nn.relu(slim.batch_norm(global_channels, fused=True)) global_channels = tf.image.resize_bilinear(global_channels, size=size) net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=128) net_5_scaled = tf.add(global_channels, net_5) net_5 = Upsampling(net_5, scale=2) net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3]) net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=128) net_4 = tf.add(net_4, net_5) net_4 = Upsampling(net_4, scale=2) net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3]) context_net = net_4 ### Combining the paths net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=256) net = ConvBlock(net, n_filters=64, kernel_size=[3, 3]) ### Final upscaling and finish net = Upsampling(net, scale=2) net = slim.conv2d(net, 64, [3, 3], rate=2, activation_fn=tf.nn.relu, biases_initializer=None, normalizer_fn=slim.batch_norm) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') net = Upsampling(net, 4) return net, init_fn
def build_bisenet_custom(self, reuse=False): """ Builds the BiSeNet model. Arguments: reuse: Reuse variable or not Returns: BiSeNet model """ ### The spatial path ### The number of feature maps for each convolution is not specified in the paper ### It was chosen here to be equal to the number of feature maps of a classification ### model at each corresponding stage batch_norm_params = self.model_config['batch_norm_params'] init_method = self.model_config['conv_config']['init_method'] down_16x_end_points = self.model_config['net_node']['16xdown:50'] down_32x_end_points = self.model_config['net_node']['32xdown:25'] if init_method == 'kaiming_normal': initializer = slim.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False) else: initializer = slim.xavier_initializer() with tf.variable_scope('spatial_net', reuse=reuse): with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): # inference/spatial_net/Conv/Conv2D run 1 average cost 250.552994 ms, 25.405 %, FlopsRate: 9.064 % # conv2d spatial_net = slim.conv2d(self.images, 16, [3, 3], stride=[2, 2], activation_fn=None) spatial_net = hard_swish( slim.batch_norm(spatial_net, fused=True)) # bneck1 exp_size = _make_divisible(16) spatial_net = slim.conv2d(spatial_net, exp_size, [1, 1], stride=[1, 1], activation_fn=None) spatial_net = slim.batch_norm(spatial_net, fused=True) spatial_net = DepthSepConv(spatial_net, 16, kernel=[3, 3], stride=2) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck2 exp_size = _make_divisible(72) spatial_net = slim.conv2d(spatial_net, exp_size, [1, 1], stride=[1, 1], activation_fn=None) spatial_net = slim.batch_norm(spatial_net, fused=True) spatial_net = DepthSepConv(spatial_net, 24, kernel=[3, 3], stride=2) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck3 exp_size = _make_divisible(88) spatial_net = slim.conv2d(spatial_net, exp_size, [1, 1], stride=[1, 1], activation_fn=None) spatial_net = slim.batch_norm(spatial_net, fused=True) spatial_net = DepthSepConv(spatial_net, 24, kernel=[3, 3], stride=1) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck4 exp_size = _make_divisible(96) spatial_net = slim.conv2d(spatial_net, exp_size, [1, 1], stride=[1, 1], activation_fn=None) spatial_net = slim.batch_norm(spatial_net, fused=True) spatial_net = DepthSepConv(spatial_net, 40, kernel=[3, 3], stride=1) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck5 spatial_net = DepthSepConv(spatial_net, 80, kernel=[3, 3], stride=1) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) # bneck6 spatial_net = DepthSepConv(spatial_net, 128, kernel=[3, 3], stride=1) spatial_net = tf.nn.relu( slim.batch_norm(spatial_net, fused=True)) frontend_config = self.model_config['frontend_config'] ### Context path logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( self.images, frontend_config, self.is_training(), reuse) ### Combining the paths with tf.variable_scope('combine_path', reuse=reuse): with slim.arg_scope([slim.conv2d], biases_initializer=None, weights_initializer=initializer): with slim.arg_scope([slim.batch_norm], is_training=self.is_training(), **batch_norm_params): # tail part global_context = tf.reduce_mean( end_points[down_32x_end_points], [1, 2], keep_dims=True) global_context = slim.conv2d(global_context, 128, 1, [1, 1], activation_fn=None) global_context = tf.nn.relu( slim.batch_norm(global_context, fused=True)) ARM_out1 = AttentionRefinementModule_Custom( end_points[down_32x_end_points], n_filters=128) ARM_out2 = AttentionRefinementModule_Custom( end_points[down_16x_end_points], n_filters=128) ARM_out1 = tf.add(ARM_out1, global_context) ARM_out1 = Upsampling(ARM_out1, scale=2) # inference/combine_path/Conv_6/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 % exp_size = _make_divisible(256) ARM_out1 = slim.conv2d(ARM_out1, exp_size, [1, 1], stride=[1, 1], activation_fn=None) ARM_out1 = slim.batch_norm(ARM_out1, fused=True) ARM_out1 = DepthSepConv(ARM_out1, 128, kernel=[3, 3], stride=1) ARM_out1 = tf.nn.relu(slim.batch_norm(ARM_out1, fused=True)) ARM_out2 = tf.add(ARM_out2, ARM_out1) ARM_out2 = Upsampling(ARM_out2, scale=2) # inference/combine_path/Conv_13/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 % exp_size = _make_divisible(256) ARM_out2 = slim.conv2d(ARM_out2, exp_size, [1, 1], stride=[1, 1], activation_fn=None) ARM_out2 = slim.batch_norm(ARM_out2, fused=True) ARM_out2 = DepthSepConv(ARM_out2, 128, kernel=[3, 3], stride=1) ARM_out2 = tf.nn.relu(slim.batch_norm(ARM_out2, fused=True)) context_net = ARM_out2 FFM_out = FeatureFusionModule_Custom(input_1=spatial_net, input_2=context_net, n_filters=256) ARM_out1 = ConvBlock(ARM_out1, n_filters=128, kernel_size=[3, 3]) ARM_out2 = ConvBlock(ARM_out2, n_filters=128, kernel_size=[3, 3]) exp_size = _make_divisible(128) FFM_out = slim.conv2d(FFM_out, exp_size, [1, 1], stride=[1, 1], activation_fn=None) FFM_out = slim.batch_norm(FFM_out, fused=True) FFM_out = DepthSepConv(FFM_out, 64, kernel=[3, 3], stride=1) FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True)) # Upsampling + dilation or only Upsampling FFM_out = Upsampling(FFM_out, scale=2) # inference/combine_path/Conv_12/Conv2D run 1 average cost 32.151001 ms, 3.260 %, FlopsRate: 8.879 % exp_size = _make_divisible(128) FFM_out = slim.conv2d(FFM_out, exp_size, [1, 1], stride=[1, 1], activation_fn=None) FFM_out = DepthSepConv(FFM_out, 64, kernel=[3, 3], stride=1, rate=2) FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True)) FFM_out = slim.conv2d(FFM_out, self.num_classes, [1, 1], activation_fn=None, scope='logits') self.net = Upsampling(FFM_out, 4) if self.mode in ['train', 'validation', 'test']: sup1 = slim.conv2d(ARM_out1, self.num_classes, [1, 1], activation_fn=None, scope='supl1') sup2 = slim.conv2d(ARM_out2, self.num_classes, [1, 1], activation_fn=None, scope='supl2') self.sup1 = Upsampling(sup1, scale=16) self.sup2 = Upsampling(sup2, scale=8) self.init_fn = init_fn