Exemple #1
0
    def call(self, inputs):

        # conv = conv_block(inputs, n_filters=3, dropout_p=dropout_p)
        # conv1 = slim.max_pool2d(conv, [2,2])

        n_filters = self.n_filters
        dropout_p = self.dropout_p

        logits, end_points, frontend_scope, init_fn  = frontend_builder.build_frontend(inputs, self.frontend, is_training=self.is_training)

        conv2 = end_points["pool2"] # 32
        conv3 = end_points["pool3"] # 16
        conv4 = end_points["pool4"] # 8
        conv5 = end_points["pool5"] # 4

        pool = slim.max_pool2d(conv5, [2,2]) # 2
        center = self.conv_transpose_block(pool, n_filters*8*2, n_filters*8, dropout_p=dropout_p) # 4

        dec5 = self.conv_transpose_block(tf.concat([center, conv5], axis=3), n_filters*8*2, n_filters*8, dropout_p=dropout_p) # 8
        dec4 = self.conv_transpose_block(tf.concat([dec5, conv4], axis=3), n_filters*8*2, n_filters*8, dropout_p=dropout_p) # 16
        dec3 = self.conv_transpose_block(tf.concat([dec4, conv3], axis=3), n_filters*4*2, n_filters*2,dropout_p=dropout_p) # 32
        dec2 = self.conv_transpose_block(tf.concat([dec3, conv2], axis=3), n_filters*2*2, n_filters*2*2, dropout_p=dropout_p) # 64
        dec1 = self.conv_transpose_block(dec2, n_filters*2*2, n_filters,dropout_p=dropout_p) # 128
        net = slim.conv2d(dec1, self.num_classes, [1, 1])
        # if dropout_p != 0.0:
        #     net = slim.dropout(net, keep_prob=(1.0-dropout_p))
        return net
def build_deeplabv3_plus(inputs,
                         num_classes,
                         preset_model='DeepLabV3+',
                         frontend="ResNet101",
                         weight_decay=1e-5,
                         is_training=True,
                         pretrained_dir="src/Segmentation/models"):
    """
    Builds the DeepLabV3 model.

    Arguments:
      inputs: The input tensor=
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction
      num_classes: Number of classes

    Returns:
      DeepLabV3 model
    """

    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training)

    label_size = tf.shape(inputs)[1:3]

    encoder_features = end_points['pool2']

    net = AtrousSpatialPyramidPoolingModule(end_points['pool4'])
    net = slim.conv2d(net,
                      256, [1, 1],
                      scope="conv_1x1_output",
                      activation_fn=None)
    decoder_features = Upsampling(net, label_size / 4)

    encoder_features = slim.conv2d(encoder_features,
                                   48, [1, 1],
                                   activation_fn=tf.nn.relu,
                                   normalizer_fn=None)

    net = tf.concat((encoder_features, decoder_features), axis=3)

    net = slim.conv2d(net,
                      256, [3, 3],
                      activation_fn=tf.nn.relu,
                      normalizer_fn=None)
    net = slim.conv2d(net,
                      256, [3, 3],
                      activation_fn=tf.nn.relu,
                      normalizer_fn=None)

    net = Upsampling(net, label_size)

    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn
Exemple #3
0
def build_deeplabv3(inputs,
                    num_classes,
                    preset_model='DeepLabV3',
                    frontend="Res101",
                    weight_decay=1e-5,
                    is_training=True,
                    pretrained_dir="models"):
    """Builds the DeepLabV3 model. 

        Arguments:
            inputs: The input tensor= 
            preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
            num_classes: Number of classes

        Returns:
            DeepLabV3 model
    """

    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training)
    label_size = tf.shape(inputs)[1:3]
    net = AtrousSpatialPyramidPoolingModule(end_points['pool4'])
    net = Upsampling(net, label_size)
    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')
    return net, init_fn
def build_bisenet(inputs, num_classes, preset_model='BiSeNet', frontend="xception", weight_decay=1e-5, is_training=True, pretrained_dir="models"):
    """
    Builds the BiSeNet model. 

    Arguments:
      inputs: The input tensor=
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes

    Returns:
      BiSeNet model
    """

    ### The spatial path
    ### The number of feature maps for each convolution is not specified in the paper
    ### It was chosen here to be equal to the number of feature maps of a classification
    ### model at each corresponding stage 
    spatial_net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3], strides=2)
    spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[3, 3], strides=2)
    spatial_net = ConvBlock(spatial_net, n_filters=256, kernel_size=[3, 3], strides=2)


    ### Context path
    logits, end_points, frontend_scope, init_fn  = frontend_builder.build_frontend(inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training)
    # global pool in order to get highest receptive field
    size = tf.shape(end_points['pool5'])[1:3]
    global_channels = tf.reduce_mean(end_points['pool5'], [1, 2], keep_dims=True)
    global_channels = slim.conv2d(global_channels, 128, 1, [1, 1], activation_fn=None)
    global_channels = tf.nn.relu(slim.batch_norm(global_channels, fused=True))
    global_channels = tf.image.resize_bilinear(global_channels, size=size)

    net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=128)

    net_5_scaled = tf.add(global_channels, net_5)
    net_5 = Upsampling(net_5, scale=2)
    net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3]) 
    
    net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=128)
    net_4 = tf.add(net_4, net_5)
    net_4 = Upsampling(net_4, scale=2)
    net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3])
    

    context_net = net_4
    
    ### Combining the paths
    net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=256)
    net = ConvBlock(net, n_filters=64, kernel_size=[3, 3])

    ### Final upscaling and finish
    net = Upsampling(net, scale=2)
    net = slim.conv2d(net, 64, [3, 3], rate=2, activation_fn=tf.nn.relu, biases_initializer=None,
                                      normalizer_fn=slim.batch_norm)
    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits')
    net = Upsampling(net, 4)
    

    return net, init_fn
Exemple #5
0
def build_pspnet(inputs,
                 label_size,
                 num_classes,
                 preset_model='PSPNet',
                 frontend="ResNet101",
                 pooling_type="MAX",
                 weight_decay=1e-5,
                 upscaling_method="conv",
                 is_training=True,
                 pretrained_dir="models"):
    """
    Builds the PSPNet model. 

    Arguments:
      inputs: The input tensor
      label_size: Size of the final label tensor. We need to know this for proper upscaling 
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes
      pooling_type: Max or Average pooling

    Returns:
      PSPNet model
    """

    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training)

    feature_map_shape = [int(x / 8.0) for x in label_size]
    print(feature_map_shape)
    psp = PyramidPoolingModule(end_points['pool3'],
                               feature_map_shape=feature_map_shape,
                               pooling_type=pooling_type)

    net = slim.conv2d(psp, 512, [3, 3], activation_fn=None)
    net = slim.batch_norm(net, fused=True)
    net = tf.nn.relu(net)

    if upscaling_method.lower() == "conv":
        net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 256)
        net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 128)
        net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 64)
    elif upscaling_method.lower() == "bilinear":
        net = Upsampling(net, label_size)

    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn
def build_dense_aspp(inputs,
                     num_classes,
                     preset_model='DenseASPP',
                     frontend="ResNet101",
                     weight_decay=1e-5,
                     is_training=True,
                     pretrained_dir="./models"):

    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training)

    init_features = end_points['pool3']

    ### First block, rate = 3
    d_3_features = DilatedConvBlock(init_features,
                                    n_filters=256,
                                    kernel_size=[1, 1])
    d_3 = DilatedConvBlock(d_3_features,
                           n_filters=64,
                           rate=3,
                           kernel_size=[3, 3])

    ### Second block, rate = 6
    d_4 = tf.concat([init_features, d_3], axis=-1)
    d_4 = DilatedConvBlock(d_4, n_filters=256, kernel_size=[1, 1])
    d_4 = DilatedConvBlock(d_4, n_filters=64, rate=6, kernel_size=[3, 3])

    ### Third block, rate = 12
    d_5 = tf.concat([init_features, d_3, d_4], axis=-1)
    d_5 = DilatedConvBlock(d_5, n_filters=256, kernel_size=[1, 1])
    d_5 = DilatedConvBlock(d_5, n_filters=64, rate=12, kernel_size=[3, 3])

    ### Fourth block, rate = 18
    d_6 = tf.concat([init_features, d_3, d_4, d_5], axis=-1)
    d_6 = DilatedConvBlock(d_6, n_filters=256, kernel_size=[1, 1])
    d_6 = DilatedConvBlock(d_6, n_filters=64, rate=18, kernel_size=[3, 3])

    ### Fifth block, rate = 24
    d_7 = tf.concat([init_features, d_3, d_4, d_5, d_6], axis=-1)
    d_7 = DilatedConvBlock(d_7, n_filters=256, kernel_size=[1, 1])
    d_7 = DilatedConvBlock(d_7, n_filters=64, rate=24, kernel_size=[3, 3])

    full_block = tf.concat([init_features, d_3, d_4, d_5, d_6, d_7], axis=-1)

    net = slim.conv2d(full_block,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    net = Upsampling(net, scale=8)

    return net, init_fn
Exemple #7
0
def build_gcn(inputs, num_classes, preset_model='GCN', frontend="ResNet101", weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"):
    """
    Builds the GCN model. 

    Arguments:
      inputs: The input tensor
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes

    Returns:
      GCN model
    """

    logits, end_points, frontend_scope, init_fn  = frontend_builder.build_frontend(inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training)

    


    res = [end_points['pool5'], end_points['pool4'],
         end_points['pool3'], end_points['pool2']]

    down_5 = GlobalConvBlock(res[0], n_filters=21, size=3)
    down_5 = BoundaryRefinementBlock(down_5, n_filters=21, kernel_size=[3, 3])
    down_5 = ConvUpscaleBlock(down_5, n_filters=21, kernel_size=[3, 3], scale=2)

    down_4 = GlobalConvBlock(res[1], n_filters=21, size=3)
    down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3])
    down_4 = tf.add(down_4, down_5)
    down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3])
    down_4 = ConvUpscaleBlock(down_4, n_filters=21, kernel_size=[3, 3], scale=2)

    down_3 = GlobalConvBlock(res[2], n_filters=21, size=3)
    down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3])
    down_3 = tf.add(down_3, down_4)
    down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3])
    down_3 = ConvUpscaleBlock(down_3, n_filters=21, kernel_size=[3, 3], scale=2)

    down_2 = GlobalConvBlock(res[3], n_filters=21, size=3)
    down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3])
    down_2 = tf.add(down_2, down_3)
    down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3])
    down_2 = ConvUpscaleBlock(down_2, n_filters=21, kernel_size=[3, 3], scale=2)

    net = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3])
    net = ConvUpscaleBlock(net, n_filters=21, kernel_size=[3, 3], scale=2)
    net = BoundaryRefinementBlock(net, n_filters=21, kernel_size=[3, 3])

    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits')

    return net, init_fn
Exemple #8
0
def build_custom(inputs,
                 num_classes,
                 frontend="ResNet101",
                 weight_decay=1e-5,
                 is_training=True,
                 n_filters=32,
                 pretrained_dir="models",
                 dropout_p=0.2):

    # conv = conv_block(inputs, n_filters=3, dropout_p=dropout_p)
    # conv1 = slim.max_pool2d(conv, [2,2])
    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs, frontend, is_training=is_training)

    conv2 = end_points["pool2"]  # 32
    conv3 = end_points["pool3"]  # 16
    conv4 = end_points["pool4"]  # 8
    conv5 = end_points["pool5"]  # 4

    pool = slim.max_pool2d(conv5, [2, 2])  # 2
    center = conv_transpose_block(pool,
                                  n_filters * 8 * 2,
                                  n_filters * 8,
                                  dropout_p=dropout_p)  # 4

    dec5 = conv_transpose_block(tf.concat([center, conv5], axis=3),
                                n_filters * 8 * 2,
                                n_filters * 8,
                                dropout_p=dropout_p)  # 8
    dec4 = conv_transpose_block(tf.concat([dec5, conv4], axis=3),
                                n_filters * 8 * 2,
                                n_filters * 8,
                                dropout_p=dropout_p)  # 16
    dec3 = conv_transpose_block(tf.concat([dec4, conv3], axis=3),
                                n_filters * 4 * 2,
                                n_filters * 2,
                                dropout_p=dropout_p)  # 32
    dec2 = conv_transpose_block(tf.concat([dec3, conv2], axis=3),
                                n_filters * 2 * 2,
                                n_filters * 2 * 2,
                                dropout_p=dropout_p)  # 64
    dec1 = conv_transpose_block(dec2,
                                n_filters * 2 * 2,
                                n_filters,
                                dropout_p=dropout_p)  # 128
    net = slim.conv2d(dec1, num_classes, [1, 1])
    # if dropout_p != 0.0:
    #     net = slim.dropout(net, keep_prob=(1.0-dropout_p))
    return net
def build_refinenet(inputs, num_classes, preset_model='RefineNet', frontend="ResNet101", weight_decay=1e-5, upscaling_method="bilinear", pretrained_dir="models", is_training=True):
    """
    Builds the RefineNet model. 

    Arguments:
      inputs: The input tensor
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes

    Returns:
      RefineNet model
    """

    logits, end_points, frontend_scope, init_fn  = frontend_builder.build_frontend(inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training)

    high = [end_points['pool5'], end_points['pool4'],
         end_points['pool3'], end_points['pool2']]

    low = [None, None, None, None]

    # Get the feature maps to the proper size with bottleneck
    high[0]=slim.conv2d(high[0], 512, 1)
    high[1]=slim.conv2d(high[1], 256, 1)
    high[2]=slim.conv2d(high[2], 256, 1)
    high[3]=slim.conv2d(high[3], 256, 1)

    # RefineNet
    low[0]=RefineBlock(high_inputs=high[0],low_inputs=None) # Only input ResNet 1/32
    low[1]=RefineBlock(high[1],low[0]) # High input = ResNet 1/16, Low input = Previous 1/16
    low[2]=RefineBlock(high[2],low[1]) # High input = ResNet 1/8, Low input = Previous 1/8
    low[3]=RefineBlock(high[3],low[2]) # High input = ResNet 1/4, Low input = Previous 1/4

    # g[3]=Upsampling(g[3],scale=4)

    net = low[3]

    net = ResidualConvUnit(net)
    net = ResidualConvUnit(net)

    if upscaling_method.lower() == "conv":
        net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 128)
        net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 64)

    elif upscaling_method.lower() == "bilinear":
        net = Upsampling(net, scale=4)
    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits')
    return net, init_fn
Exemple #10
0
def build_bisenet(inputs, num_classes, preset_model='BiSeNet', frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir="models"):
    """
    Builds the BiSeNet model. 

    Arguments:
      inputs: The input tensor=
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes

    Returns:
      BiSeNet model
    """

    ### The spatial path
    ### The number of feature maps for each convolution is not specified in the paper
    ### It was chosen here to be equal to the number of feature maps of a classification
    ### model at each corresponding stage 
    spatial_net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3], strides=2)
    spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[3, 3], strides=2)
    spatial_net = ConvBlock(spatial_net, n_filters=256, kernel_size=[3, 3], strides=2)


    ### Context path
    logits, end_points, frontend_scope, init_fn  = frontend_builder.build_frontend(inputs, frontend, pretrained_dir=pretrained_dir, is_training=is_training)
    print(end_points)

    net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=512)

    net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=2048)

    global_channels = tf.reduce_mean(net_5, [1, 2], keep_dims=True)
    net_5_scaled = tf.multiply(global_channels, net_5)

    ### Combining the paths
    net_4 = Upsampling(net_4, scale=2)
    net_5_scaled = Upsampling(net_5_scaled, scale=4)

    context_net = tf.concat([net_4, net_5_scaled], axis=-1)

    net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=num_classes)


    ### Final upscaling and finish
    net = Upsampling(net, scale=8)
    
    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits')

    return net, init_fn
Exemple #11
0
def build_mssa(inputs,
               num_classes,
               preset_model='mssa',
               frontend="ResNet101",
               weight_decay=1e-5,
               is_training=True,
               pretrained_dir="models"):
    spatial_net = ConvBlock(inputs,
                            n_filters=64,
                            kernel_size=[3, 3],
                            strides=2)
    spatial_net = ConvBlock(spatial_net,
                            n_filters=128,
                            kernel_size=[3, 3],
                            strides=2)
    spatial_net = ConvBlock(spatial_net,
                            n_filters=256,
                            kernel_size=[3, 3],
                            strides=2)
    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training)

    net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=512)
    net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=2048)

    global_channels = tf.reduce_mean(net_5, [1, 2], keep_dims=True)
    net_5_scaled = tf.multiply(global_channels, net_5)
    net_4 = Upsampling(net_4, scale=2)
    net_5_scaled = Upsampling(net_5_scaled, scale=4)
    context_net = tf.concat([net_4, net_5_scaled], axis=-1)

    net = FeatureFusionModule(input_1=spatial_net,
                              input_2=context_net,
                              n_filters=num_classes)
    net = Upsampling(net, scale=8)
    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')
    return net, init_fn
def build_custom(inputs, num_classes, frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir="models"):
	

	logits, end_points, frontend_scope, init_fn  = frontend_builder.build_frontend(inputs, frontend, is_training=is_training)

	up_1 = conv_transpose_block(end_points["pool2"], strides=4, n_filters=64)
	up_2 = conv_transpose_block(end_points["pool3"], strides=8, n_filters=64)
	up_3 = conv_transpose_block(end_points["pool4"], strides=16, n_filters=64)
	up_4 = conv_transpose_block(end_points["pool5"], strides=32, n_filters=64)

	features = tf.concat([up_1, up_2, up_3, up_4], axis=-1)

	features = conv_block(inputs=features, n_filters=256, filter_size=[1, 1])

	features = conv_block(inputs=features, n_filters=64, filter_size=[3, 3])
	features = conv_block(inputs=features, n_filters=64, filter_size=[3, 3])
	features = conv_block(inputs=features, n_filters=64, filter_size=[3, 3])


	net = slim.conv2d(features, num_classes, [1, 1], scope='logits')
	return net
Exemple #13
0
def build_deeplabv3_plus_concrete(inputs,
                                  num_classes,
                                  preset_model='DeepLabV3+',
                                  frontend="ResNet101",
                                  weight_decay=1e-5,
                                  is_training=True,
                                  pretrained_dir="models",
                                  pretrained_file="xception_65.ckpt",
                                  one_parameter=False):
    """
    Builds the DeepLabV3 model with Concrete dropout. 

    Arguments:
      inputs: The input tensor= 
      preset_model: Which model you want to use. 
      num_classes: Number of classes

    Returns:
      DeepLabV3 model with Concrete dropout
    """

    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training,
        pretrained_file=pretrained_file,
        one_parameter=one_parameter)

    label_size = tf.shape(inputs)[1:3]

    net = AtrousSpatialPyramidPoolingModule(end_points['pool4'])
    net = slim.conv2d(net,
                      256, [1, 1],
                      scope="conv_1x1_output",
                      activation_fn=None)
    decoder_features = Upsampling(net, label_size / 4)

    #    encoder_features = end_points['pool2']
    #    encoder_features = slim.conv2d(encoder_features, 48, [1, 1], activation_fn=tf.nn.relu, normalizer_fn=None)
    ##    dropout(encoder_features)
    #
    #    net = tf.concat((encoder_features, decoder_features), axis=3)

    net = decoder_features

    net = slim.conv2d(net,
                      256, [3, 3],
                      activation_fn=tf.nn.relu,
                      normalizer_fn=None)
    net = slim.conv2d(net,
                      256, [3, 3],
                      activation_fn=tf.nn.relu,
                      normalizer_fn=None)

    net = Upsampling(net, label_size)

    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn, frontend_scope
    def build_bisenet(self, reuse=False):
        """
        Builds the BiSeNet model.

        Arguments:
          reuse: Reuse variable or not

        Returns:
          BiSeNet model
        """

        ### The spatial path
        ### The number of feature maps for each convolution is not specified in the paper
        ### It was chosen here to be equal to the number of feature maps of a classification
        ### model at each corresponding stage
        batch_norm_params = self.model_config['batch_norm_params']
        init_method = self.model_config['conv_config']['init_method']

        if init_method == 'kaiming_normal':
            initializer = slim.variance_scaling_initializer(factor=2.0,
                                                            mode='FAN_IN',
                                                            uniform=False)
        else:
            initializer = slim.xavier_initializer()

        with tf.variable_scope('spatial_net', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    spatial_net = ConvBlock(self.images,
                                            n_filters=64,
                                            kernel_size=[7, 7],
                                            strides=2)
                    spatial_net = ConvBlock(spatial_net,
                                            n_filters=64,
                                            kernel_size=[3, 3],
                                            strides=2)
                    spatial_net = ConvBlock(spatial_net,
                                            n_filters=64,
                                            kernel_size=[3, 3],
                                            strides=2)
                    spatial_net = ConvBlock(spatial_net,
                                            n_filters=128,
                                            kernel_size=[1, 1])

        frontend_config = self.model_config['frontend_config']
        ### Context path
        logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
            self.images, frontend_config, self.is_training(), reuse)

        ### Combining the paths
        with tf.variable_scope('combine_path', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    # tail part
                    size = tf.shape(end_points['pool5'])[1:3]
                    print('111111111111111', end_points['pool5'])
                    exit()
                    global_context = tf.reduce_mean(end_points['pool5'],
                                                    [1, 2],
                                                    keep_dims=True)
                    global_context = slim.conv2d(global_context,
                                                 128,
                                                 1, [1, 1],
                                                 activation_fn=None)
                    global_context = tf.nn.relu(
                        slim.batch_norm(global_context, fused=True))
                    global_context = tf.image.resize_bilinear(global_context,
                                                              size=size)

                    net_5 = AttentionRefinementModule(end_points['pool5'],
                                                      n_filters=128)
                    net_4 = AttentionRefinementModule(end_points['pool4'],
                                                      n_filters=128)

                    net_5 = tf.add(net_5, global_context)
                    net_5 = Upsampling(net_5, scale=2)
                    net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3])
                    net_4 = tf.add(net_4, net_5)
                    net_4 = Upsampling(net_4, scale=2)
                    net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3])

                    context_net = net_4

                    net = FeatureFusionModule(input_1=spatial_net,
                                              input_2=context_net,
                                              n_filters=256)
                    net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3])
                    net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3])
                    net = ConvBlock(net, n_filters=64, kernel_size=[3, 3])

                    # Upsampling + dilation or only Upsampling
                    net = Upsampling(net, scale=2)
                    net = slim.conv2d(net,
                                      64, [3, 3],
                                      rate=2,
                                      activation_fn=tf.nn.relu,
                                      biases_initializer=None,
                                      normalizer_fn=slim.batch_norm)

                    net = slim.conv2d(net,
                                      self.num_classes, [1, 1],
                                      activation_fn=None,
                                      scope='logits')
                    self.net = Upsampling(net, 4)

                    # net = slim.conv2d(net, self.num_classes, [1, 1], activation_fn=None, scope='logits')
                    # self.net = Upsampling(net, scale=8)

                    if self.mode in ['train', 'validation', 'test']:
                        sup1 = slim.conv2d(net_5,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl1')
                        sup2 = slim.conv2d(net_4,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl2')
                        self.sup1 = Upsampling(sup1, scale=16)
                        self.sup2 = Upsampling(sup2, scale=8)
                        self.init_fn = init_fn
Exemple #15
0
def build_ddsc(inputs,
               num_classes,
               preset_model='DDSC',
               frontend="ResNet101",
               weight_decay=1e-5,
               is_training=True,
               pretrained_dir="models"):
    """
    Builds the Dense Decoder Shortcut Connections model. 

    Arguments:
      inputs: The input tensor=
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes

    Returns:
      Dense Decoder Shortcut Connections model
    """

    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training)

    ### Adapting features for all stages
    decoder_4 = EncoderAdaptionBlock(end_points['pool5'], n_filters=1024)
    decoder_3 = EncoderAdaptionBlock(end_points['pool4'], n_filters=512)
    decoder_2 = EncoderAdaptionBlock(end_points['pool3'], n_filters=256)
    decoder_1 = EncoderAdaptionBlock(end_points['pool2'], n_filters=128)

    decoder_4 = SemanticFeatureGenerationBlock(decoder_4,
                                               D_features=1024,
                                               D_prime_features=1024 / 4,
                                               O_features=1024)

    ### Fusing features from 3 and 4
    decoder_4 = ConvBlock(decoder_4, n_filters=512, kernel_size=[3, 3])
    decoder_4 = Upsampling(decoder_4, scale=2)

    decoder_3 = ConvBlock(decoder_3, n_filters=512, kernel_size=[3, 3])

    decoder_3 = tf.add_n([decoder_4, decoder_3])

    decoder_3 = SemanticFeatureGenerationBlock(decoder_3,
                                               D_features=512,
                                               D_prime_features=512 / 4,
                                               O_features=512)

    ### Fusing features from 2, 3, 4
    decoder_4 = ConvBlock(decoder_4, n_filters=256, kernel_size=[3, 3])
    decoder_4 = Upsampling(decoder_4, scale=4)

    decoder_3 = ConvBlock(decoder_3, n_filters=256, kernel_size=[3, 3])
    decoder_3 = Upsampling(decoder_3, scale=2)

    decoder_2 = ConvBlock(decoder_2, n_filters=256, kernel_size=[3, 3])

    decoder_2 = tf.add_n([decoder_4, decoder_3, decoder_2])

    decoder_2 = SemanticFeatureGenerationBlock(decoder_2,
                                               D_features=256,
                                               D_prime_features=256 / 4,
                                               O_features=256)

    ### Fusing features from 1, 2, 3, 4
    decoder_4 = ConvBlock(decoder_4, n_filters=128, kernel_size=[3, 3])
    decoder_4 = Upsampling(decoder_4, scale=8)

    decoder_3 = ConvBlock(decoder_3, n_filters=128, kernel_size=[3, 3])
    decoder_3 = Upsampling(decoder_3, scale=4)

    decoder_2 = ConvBlock(decoder_2, n_filters=128, kernel_size=[3, 3])
    decoder_2 = Upsampling(decoder_2, scale=2)

    decoder_1 = ConvBlock(decoder_1, n_filters=128, kernel_size=[3, 3])

    decoder_1 = tf.add_n([decoder_4, decoder_3, decoder_2, decoder_1])

    decoder_1 = SemanticFeatureGenerationBlock(decoder_1,
                                               D_features=128,
                                               D_prime_features=128 / 4,
                                               O_features=num_classes)

    ### Final upscaling and finish
    net = Upsampling(decoder_1, scale=4)

    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn
Exemple #16
0
def build_tbnet(inputs,
                num_classes,
                frontend="ResNet101",
                is_training=True,
                pretrained_dir="models"):

    # The spatial stream
    spatial_net = ConvBlock(inputs,
                            n_filters=64,
                            kernel_size=[3, 3],
                            strides=2)
    spatial_net = ConvBlock(spatial_net,
                            n_filters=128,
                            kernel_size=[3, 3],
                            strides=2)
    spatial_net = ConvBlock(spatial_net,
                            n_filters=256,
                            kernel_size=[3, 3],
                            strides=2)

    # The context stream
    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training)

    gamma1 = tf.get_variable(name='gamma1',
                             shape=[1],
                             initializer=tf.zeros_initializer())
    gamma2 = tf.get_variable(name='gamma2',
                             shape=[1],
                             initializer=tf.zeros_initializer())

    feature1 = end_points['pool4']
    feature1 = slim.conv2d(feature1, 512, kernel_size=[1, 1])
    feature1 = slim.batch_norm(feature1, fused=True)

    # The context-aware attention
    [_, h, w, filters] = feature1.shape.as_list()
    b_ = slim.conv2d(feature1,
                     filters / 8,
                     kernel_size=[1, 1],
                     stride=[1, 1],
                     activation_fn=None,
                     normalizer_fn=None)
    c_ = slim.conv2d(feature1,
                     filters / 8,
                     kernel_size=[1, 1],
                     stride=[1, 1],
                     activation_fn=None,
                     normalizer_fn=None)
    d_ = slim.conv2d(feature1,
                     filters,
                     kernel_size=[1, 1],
                     stride=[1, 1],
                     activation_fn=None,
                     normalizer_fn=None)
    vec_b = tf.reshape(b_, [1, -1, tf.shape(feature1)[3] / 8])
    vec_cT = tf.transpose(tf.reshape(c_,
                                     [1, -1, tf.shape(feature1)[3] / 8]),
                          (0, 2, 1))
    bcT = tf.matmul(vec_b, vec_cT)
    sigmoid_bcT = tf.nn.sigmoid(bcT)
    vec_d = tf.reshape(d_, [1, -1, tf.shape(feature1)[3]])
    bcTd = tf.matmul(sigmoid_bcT, vec_d)
    bcTd = tf.reshape(bcTd, [
        1,
        tf.shape(feature1)[1],
        tf.shape(feature1)[2],
        tf.shape(feature1)[3]
    ])
    net_4 = gamma1 * bcTd + feature1

    feature2 = end_points['pool5']
    feature2 = slim.conv2d(feature2, 512, kernel_size=[1, 1])
    feature2 = slim.batch_norm(feature2, fused=True)

    [_, h, w, filters] = feature2.shape.as_list()
    b_ = slim.conv2d(feature2,
                     filters / 8,
                     kernel_size=[1, 1],
                     stride=[1, 1],
                     activation_fn=None,
                     normalizer_fn=None)
    c_ = slim.conv2d(feature2,
                     filters / 8,
                     kernel_size=[1, 1],
                     stride=[1, 1],
                     activation_fn=None,
                     normalizer_fn=None)
    d_ = slim.conv2d(feature2,
                     filters,
                     kernel_size=[1, 1],
                     stride=[1, 1],
                     activation_fn=None,
                     normalizer_fn=None)
    vec_b = tf.reshape(b_, [1, -1, tf.shape(feature2)[3] / 8])
    vec_cT = tf.transpose(tf.reshape(c_,
                                     [1, -1, tf.shape(feature2)[3] / 8]),
                          (0, 2, 1))
    bcT = tf.matmul(vec_b, vec_cT)
    sigmoid_bcT = tf.nn.sigmoid(bcT)
    vec_d = tf.reshape(d_, [1, -1, tf.shape(feature2)[3]])
    bcTd = tf.matmul(sigmoid_bcT, vec_d)
    bcTd = tf.reshape(bcTd, [
        1,
        tf.shape(feature2)[1],
        tf.shape(feature2)[2],
        tf.shape(feature2)[3]
    ])
    net_5 = gamma2 * bcTd + feature2

    global_channels = tf.reduce_mean(net_5, [1, 2], keep_dims=True)
    net_5_scaled = tf.multiply(global_channels, net_5)

    # The boundary stream
    conv1 = slim.conv2d(net_4, 512, kernel_size=[1, 1])

    res = slim.conv2d(conv1,
                      512,
                      kernel_size=[3, 3],
                      stride=[1, 1],
                      activation_fn=None,
                      normalizer_fn=None)
    res = tf.nn.relu(slim.batch_norm(res, fused=True))
    res = slim.conv2d(res,
                      512,
                      kernel_size=[3, 3],
                      stride=[1, 1],
                      activation_fn=None,
                      normalizer_fn=None)
    res = slim.batch_norm(res, fused=True)
    res = conv1 + res
    res = tf.nn.relu(res)

    net_5_scaled = Upsampling(net_5_scaled, scale=2)
    conv2 = slim.conv2d(net_5_scaled, 512, kernel_size=[1, 1])

    # The global-gated convolution
    ggc = tf.concat([res, conv2], axis=-1)
    ggc = slim.batch_norm(ggc, fused=True)
    ggc = slim.conv2d(ggc, 512, kernel_size=[1, 1])
    ggc = tf.nn.relu(ggc)
    ggc = slim.conv2d(ggc, 512, kernel_size=[1, 1])
    ggc = slim.batch_norm(ggc, fused=True)
    ggc = tf.nn.sigmoid(ggc)
    gated = res * (1 + ggc)
    gated = Upsampling(gated, scale=2)

    output = slim.conv2d(gated, 512, kernel_size=[1, 1])

    output_edge = slim.conv2d_transpose(gated,
                                        128,
                                        kernel_size=[3, 3],
                                        stride=[4, 4],
                                        activation_fn=None)
    output_edge = tf.nn.relu(slim.batch_norm(output_edge))
    output_edge = slim.conv2d_transpose(output_edge,
                                        1,
                                        kernel_size=[3, 3],
                                        stride=[2, 2],
                                        activation_fn=None)
    output_edge = tf.nn.relu(slim.batch_norm(output_edge))
    output_edge = tf.nn.sigmoid(output_edge)
    output_edge = tf.reshape(
        output_edge, [tf.shape(output_edge)[1],
                      tf.shape(output_edge)[2]])

    # The feature fusion
    net_5_scaled = Upsampling(net_5_scaled, 2)
    output_s_c = tf.concat([spatial_net, net_5_scaled], axis=-1)
    output_s_c = slim.batch_norm(output_s_c)
    output_s_c = slim.conv2d(output_s_c, 256, kernel_size=[1, 1])

    net = FeatureFusionModule(input_1=output_s_c,
                              input_2=output,
                              n_filters=num_classes)
    net = Upsampling(net, scale=8)
    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn, output_edge
def build_bisenet3(inputs,
                   num_classes,
                   preset_model='DepthwiseAAFF',
                   frontend="xception",
                   weight_decay=1e-5,
                   is_training=True,
                   pretrained_dir="models"):

    initializer = slim.variance_scaling_initializer(factor=2.0,
                                                    mode='FAN_IN',
                                                    uniform=False)

    ### The spatial path
    ### The number of feature maps for each convolution is not specified in the paper
    ### It was chosen here to be equal to the number of feature maps of a classification
    ### model at each corresponding stage

    # depth-wise convolution
    point_filter1 = tf.get_variable(name="point_filter1",
                                    shape=(1, 1, 64, 128),
                                    initializer=initializer)
    point_filter2 = tf.get_variable(name="point_filter2",
                                    shape=(1, 1, 128, 256),
                                    initializer=initializer)
    filter1 = tf.get_variable(name="filter1",
                              shape=(3, 3, 64, 1),
                              initializer=initializer)
    filter2 = tf.get_variable(name="filter2",
                              shape=(3, 3, 128, 1),
                              initializer=initializer)
    # spatial path
    spatial_net = ConvBlock(inputs,
                            n_filters=64,
                            kernel_size=[3, 3],
                            strides=2)
    spatial_net = tf.nn.separable_conv2d(input=spatial_net,
                                         depthwise_filter=filter1,
                                         pointwise_filter=point_filter1,
                                         strides=[1, 2, 2, 1],
                                         rate=[1, 1],
                                         padding='SAME')
    spatial_net = tf.nn.separable_conv2d(input=spatial_net,
                                         depthwise_filter=filter2,
                                         pointwise_filter=point_filter2,
                                         strides=[1, 2, 2, 1],
                                         rate=[1, 1],
                                         padding='SAME')
    spatial_net = ConvBlock(spatial_net, n_filters=32, kernel_size=[1, 1])

    # Context path
    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training)

    size = tf.shape(end_points['pool5'])[1:3]

    net_1 = AttentionAndFeatureFussion(end_points['pool3'],
                                       end_points['pool4'], 64)
    net_2 = AttentionAndFeatureFussion(net_1, end_points['pool5'], 128)
    net_2 = Upsampling(net_2, scale=2)
    net_1_2 = tf.concat([net_1, net_2], axis=-1)
    net_1_2 = Upsampling(net_1_2, scale=2)
    net_1_2_3 = tf.concat([net_1_2, end_points['pool3']], axis=-1)
    net_1_2_3 = ConvBlock(net_1_2_3,
                          n_filters=128,
                          kernel_size=[1, 1],
                          strides=1)
    context_path_left = AttentionRefinementModule(net_1_2_3, n_filters=128)

    net_3 = AttentionAndFeatureFussion(end_points['pool3'],
                                       end_points['pool4'], 64)
    net_4 = AttentionAndFeatureFussion(net_3, end_points['pool5'], 128)
    net_4 = Upsampling(net_4, scale=2)
    net_3_4 = tf.concat([net_3, net_4], axis=-1)
    net_3_4 = Upsampling(net_3_4, scale=2)
    net_3_4_5 = tf.concat([net_3_4, end_points['pool3']], axis=-1)
    net_3_4_5 = ConvBlock(net_3_4_5,
                          n_filters=128,
                          kernel_size=[1, 1],
                          strides=1)
    context_path_right = AttentionRefinementModule(net_3_4_5, n_filters=128)

    ### Combining the paths
    net = FeatureFusionModule(input_1=context_path_left,
                              input_2=context_path_right,
                              input_3=spatial_net,
                              n_filters=256)
    net = ConvBlock(net, n_filters=64, kernel_size=[3, 3])

    ### Final upscaling and finish # Upsampling + dilation or only Upsampling
    net = Upsampling(net, scale=2)
    net = slim.conv2d(net,
                      64, [3, 3],
                      rate=2,
                      activation_fn=tf.nn.relu,
                      biases_initializer=None,
                      normalizer_fn=slim.batch_norm)
    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')
    net = Upsampling(net, 4)

    return net, init_fn
def build_bisenet2(inputs,
                   num_classes,
                   preset_model='DepthwiseBiseNet',
                   frontend="xception",
                   weight_decay=1e-5,
                   is_training=True,
                   pretrained_dir="models"):

    ### The spatial path
    ### The number of feature maps for each convolution is not specified in the paper
    ### It was chosen here to be equal to the number of feature maps of a classification
    ### model at each corresponding stage

    # depth-wise convolution
    point_filter1 = tf.get_variable(
        name="point_filter1",
        shape=(1, 1, 64, 128),
        initializer=tf.contrib.layers.xavier_initializer())
    point_filter2 = tf.get_variable(
        name="point_filter2",
        shape=(1, 1, 128, 256),
        initializer=tf.contrib.layers.xavier_initializer())
    filter1 = tf.get_variable(
        name="filter1",
        shape=(3, 3, 64, 1),
        initializer=tf.contrib.layers.xavier_initializer())
    filter2 = tf.get_variable(
        name="filter2",
        shape=(3, 3, 128, 1),
        initializer=tf.contrib.layers.xavier_initializer())

    spatial_net = ConvBlock(inputs,
                            n_filters=64,
                            kernel_size=[3, 3],
                            strides=2)
    spatial_net = tf.nn.separable_conv2d(input=spatial_net,
                                         depthwise_filter=filter1,
                                         pointwise_filter=point_filter1,
                                         strides=[1, 2, 2, 1],
                                         rate=[1, 1],
                                         padding='SAME')
    spatial_net = tf.nn.separable_conv2d(input=spatial_net,
                                         depthwise_filter=filter2,
                                         pointwise_filter=point_filter2,
                                         strides=[1, 2, 2, 1],
                                         rate=[1, 1],
                                         padding='SAME')
    spatial_net = ConvBlock(spatial_net, n_filters=32, kernel_size=[1, 1])

    ### Context path
    logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
        inputs,
        frontend,
        pretrained_dir=pretrained_dir,
        is_training=is_training)

    size = tf.shape(end_points['pool5'])[1:3]
    global_channels = tf.reduce_mean(end_points['pool5'], [1, 2],
                                     keep_dims=True)
    global_channels = slim.conv2d(global_channels,
                                  128,
                                  1, [1, 1],
                                  activation_fn=None)
    global_channels = tf.nn.relu(slim.batch_norm(global_channels, fused=True))
    global_channels = tf.image.resize_bilinear(global_channels, size=size)

    net_5 = AttentionRefinementModule(end_points['pool5'], n_filters=128)

    net_5_scaled = tf.add(global_channels, net_5)
    net_5 = Upsampling(net_5, scale=2)
    net_5 = ConvBlock(net_5, n_filters=128, kernel_size=[3, 3])

    net_4 = AttentionRefinementModule(end_points['pool4'], n_filters=128)
    net_4 = tf.add(net_4, net_5)
    net_4 = Upsampling(net_4, scale=2)
    net_4 = ConvBlock(net_4, n_filters=128, kernel_size=[3, 3])

    context_net = net_4

    ### Combining the paths
    net = FeatureFusionModule(input_1=spatial_net,
                              input_2=context_net,
                              n_filters=256)
    net = ConvBlock(net, n_filters=64, kernel_size=[3, 3])

    ### Final upscaling and finish
    net = Upsampling(net, scale=2)
    net = slim.conv2d(net,
                      64, [3, 3],
                      rate=2,
                      activation_fn=tf.nn.relu,
                      biases_initializer=None,
                      normalizer_fn=slim.batch_norm)
    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')
    net = Upsampling(net, 4)

    return net, init_fn
Exemple #19
0
    def build_bisenet_custom(self, reuse=False):
        """
        Builds the BiSeNet model.

        Arguments:
          reuse: Reuse variable or not

        Returns:
          BiSeNet model
        """
        ### The spatial path
        ### The number of feature maps for each convolution is not specified in the paper
        ### It was chosen here to be equal to the number of feature maps of a classification
        ### model at each corresponding stage
        batch_norm_params = self.model_config['batch_norm_params']
        init_method = self.model_config['conv_config']['init_method']
        down_16x_end_points = self.model_config['net_node']['16xdown:50']
        down_32x_end_points = self.model_config['net_node']['32xdown:25']
        if init_method == 'kaiming_normal':
            initializer = slim.variance_scaling_initializer(factor=2.0,
                                                            mode='FAN_IN',
                                                            uniform=False)
        else:
            initializer = slim.xavier_initializer()

        with tf.variable_scope('spatial_net', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    # inference/spatial_net/Conv/Conv2D run 1 average cost 250.552994 ms, 25.405 %, FlopsRate: 9.064 %
                    # conv2d
                    spatial_net = slim.conv2d(self.images,
                                              16, [3, 3],
                                              stride=[2, 2],
                                              activation_fn=None)
                    spatial_net = hard_swish(
                        slim.batch_norm(spatial_net, fused=True))

                    # bneck1
                    exp_size = _make_divisible(16)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               16,
                                               kernel=[3, 3],
                                               stride=2)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))

                    # bneck2
                    exp_size = _make_divisible(72)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               24,
                                               kernel=[3, 3],
                                               stride=2)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck3
                    exp_size = _make_divisible(88)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               24,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck4
                    exp_size = _make_divisible(96)
                    spatial_net = slim.conv2d(spatial_net,
                                              exp_size, [1, 1],
                                              stride=[1, 1],
                                              activation_fn=None)
                    spatial_net = slim.batch_norm(spatial_net, fused=True)
                    spatial_net = DepthSepConv(spatial_net,
                                               40,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck5
                    spatial_net = DepthSepConv(spatial_net,
                                               80,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))
                    # bneck6
                    spatial_net = DepthSepConv(spatial_net,
                                               128,
                                               kernel=[3, 3],
                                               stride=1)
                    spatial_net = tf.nn.relu(
                        slim.batch_norm(spatial_net, fused=True))

        frontend_config = self.model_config['frontend_config']
        ### Context path
        logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend(
            self.images, frontend_config, self.is_training(), reuse)

        ### Combining the paths
        with tf.variable_scope('combine_path', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                biases_initializer=None,
                                weights_initializer=initializer):
                with slim.arg_scope([slim.batch_norm],
                                    is_training=self.is_training(),
                                    **batch_norm_params):
                    # tail part
                    global_context = tf.reduce_mean(
                        end_points[down_32x_end_points], [1, 2],
                        keep_dims=True)
                    global_context = slim.conv2d(global_context,
                                                 128,
                                                 1, [1, 1],
                                                 activation_fn=None)
                    global_context = tf.nn.relu(
                        slim.batch_norm(global_context, fused=True))
                    ARM_out1 = AttentionRefinementModule_Custom(
                        end_points[down_32x_end_points], n_filters=128)
                    ARM_out2 = AttentionRefinementModule_Custom(
                        end_points[down_16x_end_points], n_filters=128)

                    ARM_out1 = tf.add(ARM_out1, global_context)
                    ARM_out1 = Upsampling(ARM_out1, scale=2)
                    # inference/combine_path/Conv_6/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 %
                    exp_size = _make_divisible(256)
                    ARM_out1 = slim.conv2d(ARM_out1,
                                           exp_size, [1, 1],
                                           stride=[1, 1],
                                           activation_fn=None)
                    ARM_out1 = slim.batch_norm(ARM_out1, fused=True)
                    ARM_out1 = DepthSepConv(ARM_out1,
                                            128,
                                            kernel=[3, 3],
                                            stride=1)
                    ARM_out1 = tf.nn.relu(slim.batch_norm(ARM_out1,
                                                          fused=True))
                    ARM_out2 = tf.add(ARM_out2, ARM_out1)
                    ARM_out2 = Upsampling(ARM_out2, scale=2)
                    # inference/combine_path/Conv_13/Conv2D run 1 average cost 23.034000 ms, 2.336 %, FlopsRate: 8.879 %
                    exp_size = _make_divisible(256)
                    ARM_out2 = slim.conv2d(ARM_out2,
                                           exp_size, [1, 1],
                                           stride=[1, 1],
                                           activation_fn=None)
                    ARM_out2 = slim.batch_norm(ARM_out2, fused=True)
                    ARM_out2 = DepthSepConv(ARM_out2,
                                            128,
                                            kernel=[3, 3],
                                            stride=1)
                    ARM_out2 = tf.nn.relu(slim.batch_norm(ARM_out2,
                                                          fused=True))
                    context_net = ARM_out2

                    FFM_out = FeatureFusionModule_Custom(input_1=spatial_net,
                                                         input_2=context_net,
                                                         n_filters=256)

                    ARM_out1 = ConvBlock(ARM_out1,
                                         n_filters=128,
                                         kernel_size=[3, 3])
                    ARM_out2 = ConvBlock(ARM_out2,
                                         n_filters=128,
                                         kernel_size=[3, 3])
                    exp_size = _make_divisible(128)
                    FFM_out = slim.conv2d(FFM_out,
                                          exp_size, [1, 1],
                                          stride=[1, 1],
                                          activation_fn=None)
                    FFM_out = slim.batch_norm(FFM_out, fused=True)
                    FFM_out = DepthSepConv(FFM_out,
                                           64,
                                           kernel=[3, 3],
                                           stride=1)
                    FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True))
                    # Upsampling + dilation or only Upsampling
                    FFM_out = Upsampling(FFM_out, scale=2)
                    # inference/combine_path/Conv_12/Conv2D run 1 average cost 32.151001 ms, 3.260 %, FlopsRate: 8.879 %
                    exp_size = _make_divisible(128)
                    FFM_out = slim.conv2d(FFM_out,
                                          exp_size, [1, 1],
                                          stride=[1, 1],
                                          activation_fn=None)
                    FFM_out = DepthSepConv(FFM_out,
                                           64,
                                           kernel=[3, 3],
                                           stride=1,
                                           rate=2)
                    FFM_out = tf.nn.relu(slim.batch_norm(FFM_out, fused=True))
                    FFM_out = slim.conv2d(FFM_out,
                                          self.num_classes, [1, 1],
                                          activation_fn=None,
                                          scope='logits')
                    self.net = Upsampling(FFM_out, 4)

                    if self.mode in ['train', 'validation', 'test']:
                        sup1 = slim.conv2d(ARM_out1,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl1')
                        sup2 = slim.conv2d(ARM_out2,
                                           self.num_classes, [1, 1],
                                           activation_fn=None,
                                           scope='supl2')
                        self.sup1 = Upsampling(sup1, scale=16)
                        self.sup2 = Upsampling(sup2, scale=8)
                        self.init_fn = init_fn