def build_deeplabv3(inputs, num_classes=2, preset_model='DeepLabV3', frontend="ResNet101", load_pre_train=False, pre_trained_dir=None, is_training=True): """ Builds the DeepLabV3 model. Arguments: inputs: The input tensor num_classes: Number of classes preset_model: frontend: Which frontend model you want to use. Select which ResNet model to use for feature extraction load_pre_train: Indicate whether to load pre-trained models pre_trained_dir: Path to directory where pre-trained model is to be loaded from. is_training: Training Flag for batch normalization Returns: DeepLabV3 model """ logits, end_points, frontend_scope, init_fn, stable_params = \ frontend_builder.build_frontend(inputs, frontend, pre_trained_dir=pre_trained_dir, is_training=is_training, load_pre_train=load_pre_train, requested_stages=[4]) label_size = tf.shape(inputs)[1:3] net = AtrousSpatialPyramidPoolingModule(end_points['stage4']) with tf.variable_scope('Upsampling'): net = Upsampling(net, label_size) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn, stable_params
def build_pspnet(inputs, num_classes, label_size, upscaling_method="conv", pooling_type="MAX", preset_model=None, frontend="ResNet101", load_pre_train=False, pre_trained_dir=None, is_training=True): """ Builds the PSPNet model. Arguments: inputs: The input tensor num_classes: Number of classes label_size: Size of the final label tensor. We need to know this for proper upscaling upscaling_method: pooling_type: Max or Average pooling preset_model: frontend: Which frontend model you want to use. Select which ResNet model to use for feature extraction load_pre_train: Indicate whether to load pre-trained models pre_trained_dir: Path to directory where pre-trained model is to be loaded from. is_training: Training Flag for batch normalization Returns: PSPNet model """ logits, end_points, frontend_scope, init_fn, stable_params = \ frontend_builder.build_frontend(inputs, frontend, pre_trained_dir=pre_trained_dir, is_training=is_training, load_pre_train=load_pre_train, requested_stages=[3]) feature_map_shape = [int(x / 8.0) for x in label_size] print(feature_map_shape) with tf.variable_scope('PyramidPoolingModule'): psp = PyramidPoolingModule(end_points['stage3'], feature_map_shape=feature_map_shape, pooling_type=pooling_type) with tf.variable_scope('Conv'): net = slim.conv2d(psp, 512, [3, 3], activation_fn=None) net = slim.batch_norm(net, fused=True) net = tf.nn.relu(net) with tf.variable_scope('Upscaling'): if upscaling_method.lower() == "conv": with tf.variable_scope('ConvUpscaleBlock_1'): net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) with tf.variable_scope('ConvUpscaleBlock_2'): net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) with tf.variable_scope('ConvUpscaleBlock_3'): net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn, stable_params
def build_refinenet(inputs, num_classes=2, preset_model='Refinenet', upscaling_method="bilinear", frontend="ResNet101", load_pre_train=False, pre_trained_dir=None, is_training=True): """ Builds the RefineNet model. Arguments: inputs: The input tensor num_classes: Number of classes upscaling_method: frontend: Which model you want to use. Select which ResNet model to use for feature extraction load_pre_train: Indicate whether to load pre-trained models pre_trained_dir: Path to directory where pre-trained model is to be loaded from. is_training: Training Flag for batch normalization Returns: RefineNet model """ logits, end_points, frontend_scope, init_fn, stable_params = \ frontend_builder.build_frontend(inputs, frontend, pre_trained_dir=pre_trained_dir, is_training=is_training, load_pre_train=load_pre_train, requested_stages=[2, 3, 4, 7]) high = [end_points['stage7'], end_points['stage4'], end_points['stage3'], end_points['stage2']] low = [None, None, None, None] # Get the feature maps to the proper size with bottleneck high[0] = slim.conv2d(high[0], 512, 1) high[1] = slim.conv2d(high[1], 256, 1) high[2] = slim.conv2d(high[2], 256, 1) high[3] = slim.conv2d(high[3], 256, 1) # RefineNet with tf.variable_scope('Refine_1'): low[0] = RefineBlock(high_inputs=high[0], low_inputs=None) # Only input ResNet 1/32 with tf.variable_scope('Refine_2'): low[1] = RefineBlock(high[1], low[0]) # High input = ResNet 1/16, Low input = Previous 1/16 with tf.variable_scope('Refine_3'): low[2] = RefineBlock(high[2], low[1]) # High input = ResNet 1/8, Low input = Previous 1/8 with tf.variable_scope('Refine_4'): low[3] = RefineBlock(high[3], low[2]) # High input = ResNet 1/4, Low input = Previous 1/4 # g[3]=Upsampling(g[3],scale=4) net = low[3] # with tf.variable_scope('ResConv1'): net = ResidualConvUnit(net) # with tf.variable_scope('ResConv1'): net = ResidualConvUnit(net) with tf.variable_scope('Upscale'): if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, scale=4) with tf.variable_scope('logits'): net = tf.image.resize_bilinear(net, size=[tf.shape(inputs)[1], tf.shape(inputs)[2]]) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn, stable_params
def build_ddsc(inputs, num_classes, preset_model='DDSC', frontend="ResNet101", weight_decay=1e-5, is_training=True, pretrained_dir=FRONT_END_MODEL_PATH): """ Builds the Dense Decoder Shortcut Connections model. Arguments: inputs: The input tensor= preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: Dense Decoder Shortcut Connections model """ logits, end_points, frontend_scope, init_fn = frontend_builder.build_frontend( inputs, frontend, pre_trained_dir=pretrained_dir, is_training=is_training, load_pre_train=False) # Adapting features for all stages decoder_4 = EncoderAdaptionBlock(end_points['stage7'], n_filters=1024) decoder_3 = EncoderAdaptionBlock(end_points['stage4'], n_filters=512) decoder_2 = EncoderAdaptionBlock(end_points['stage3'], n_filters=256) decoder_1 = EncoderAdaptionBlock(end_points['stage2'], n_filters=128) decoder_4 = SemanticFeatureGenerationBlock(decoder_4, D_features=1024, D_prime_features=1024 / 4, O_features=1024) # Fusing features from 3 and 4 decoder_4 = ConvBlock(decoder_4, n_filters=512, kernel_size=[3, 3]) decoder_4 = Upsampling(decoder_4, scale=2) decoder_3 = ConvBlock(decoder_3, n_filters=512, kernel_size=[3, 3]) decoder_3 = tf.add_n([decoder_4, decoder_3]) decoder_3 = SemanticFeatureGenerationBlock(decoder_3, D_features=512, D_prime_features=512 / 4, O_features=512) # Fusing features from 2, 3, 4 decoder_4 = ConvBlock(decoder_4, n_filters=256, kernel_size=[3, 3]) decoder_4 = Upsampling(decoder_4, scale=4) decoder_3 = ConvBlock(decoder_3, n_filters=256, kernel_size=[3, 3]) decoder_3 = Upsampling(decoder_3, scale=2) decoder_2 = ConvBlock(decoder_2, n_filters=256, kernel_size=[3, 3]) decoder_2 = tf.add_n([decoder_4, decoder_3, decoder_2]) decoder_2 = SemanticFeatureGenerationBlock(decoder_2, D_features=256, D_prime_features=256 / 4, O_features=256) # Fusing features from 1, 2, 3, 4 decoder_4 = ConvBlock(decoder_4, n_filters=128, kernel_size=[3, 3]) decoder_4 = Upsampling(decoder_4, scale=8) decoder_3 = ConvBlock(decoder_3, n_filters=128, kernel_size=[3, 3]) decoder_3 = Upsampling(decoder_3, scale=4) decoder_2 = ConvBlock(decoder_2, n_filters=128, kernel_size=[3, 3]) decoder_2 = Upsampling(decoder_2, scale=2) decoder_1 = ConvBlock(decoder_1, n_filters=128, kernel_size=[3, 3]) decoder_1 = tf.add_n([decoder_4, decoder_3, decoder_2, decoder_1]) decoder_1 = SemanticFeatureGenerationBlock(decoder_1, D_features=128, D_prime_features=128 / 4, O_features=num_classes) # Final upscaling and finish with tf.variable_scope('Upsampling'): net = Upsampling(decoder_1, scale=4) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_bisenet(inputs, num_classes=2, preset_model='BiSeNet', frontend="ResNet101", load_pre_train=False, pre_trained_dir=None, is_training=True): """ Builds the BiSeNet model. Arguments: inputs: The input tensor num_classes: Number of classes preset_model: frontend: Which frontend model you want to use. Select which ResNet model to use for feature extraction load_pre_train: Indicate whether to load pre-trained models pre_trained_dir: Path to directory where pre-trained model is to be loaded from. is_training: Training Flag for batch normalization Returns: BiSeNet model """ # The spatial path # The number of feature maps for each convolution is not specified in the paper # It was chosen here to be equal to the number of feature maps of a classification # model at each corresponding stage with tf.variable_scope('Spatial_path'): spatial_net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=128, kernel_size=[3, 3], strides=2) spatial_net = ConvBlock(spatial_net, n_filters=256, kernel_size=[3, 3], strides=2) # Context path logits, end_points, frontend_scope, init_fn, stable_params = \ frontend_builder.build_frontend(inputs, frontend, pre_trained_dir=pre_trained_dir, is_training=is_training, load_pre_train=load_pre_train, requested_stages=[4, 7]) with tf.variable_scope('Context_path'): net_4 = AttentionRefinementModule(end_points['stage4'], n_filters=512) net_5 = AttentionRefinementModule(end_points['stage7'], n_filters=2048) global_channels = tf.reduce_mean(net_5, [1, 2], keepdims=True) net_5_scaled = tf.multiply(global_channels, net_5) # Combining the paths with tf.variable_scope('Upsampling_1'): net_4 = Upsampling(net_4, scale=2) with tf.variable_scope('Upsampling_2'): net_5_scaled = Upsampling(net_5_scaled, scale=4) context_net = tf.concat([net_4, net_5_scaled], axis=-1) with tf.variable_scope('FeatureFusionModule'): net = FeatureFusionModule(input_1=spatial_net, input_2=context_net, n_filters=num_classes) # Final upscaling and finish with tf.variable_scope('Upsampling_3'): net = Upsampling(net, scale=8) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn, stable_params
def build_gcn(inputs, num_classes, preset_model='GCN', frontend="ResNet101", load_pre_train=False, pre_trained_dir=None, is_training=True): """ Builds the GCN model. Arguments: inputs: The input tensor num_classes: Number of classes preset_model: frontend: Which frontend model you want to use. Select which ResNet model to use for feature extraction load_pre_train: Indicate whether to load pre-trained models pre_trained_dir: Path to directory where pre-trained model is to be loaded from. is_training: Training Flag for batch normalization Returns: GCN model """ logits, end_points, frontend_scope, init_fn, stable_params = \ frontend_builder.build_frontend(inputs, frontend, pre_trained_dir=pre_trained_dir, is_training=is_training, load_pre_train=load_pre_train, requested_stages=[2, 3, 4, 7]) res = [ end_points['stage7'], end_points['stage4'], end_points['stage3'], end_points['stage2'] ] with tf.variable_scope('GCN_1'): down_5 = GlobalConvBlock(res[0], n_filters=21, size=3) with tf.variable_scope('BR_1'): down_5 = BoundaryRefinementBlock(down_5, n_filters=21, kernel_size=[3, 3]) with tf.variable_scope('Deconv_1'): down_5 = ConvUpscaleBlock(down_5, n_filters=21, kernel_size=[3, 3], scale=2) with tf.variable_scope('GCN_2'): down_4 = GlobalConvBlock(res[1], n_filters=21, size=3) with tf.variable_scope('BR2'): down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) with tf.variable_scope('Add_1'): down_4 = tf.add(down_4, down_5) with tf.variable_scope('BR2_1'): down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = ConvUpscaleBlock(down_4, n_filters=21, kernel_size=[3, 3], scale=2) with tf.variable_scope('GCN_3'): down_3 = GlobalConvBlock(res[2], n_filters=21, size=3) with tf.variable_scope('BR_3'): down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) with tf.variable_scope('Add_2'): down_3 = tf.add(down_3, down_4) with tf.variable_scope('BR3_1'): down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = ConvUpscaleBlock(down_3, n_filters=21, kernel_size=[3, 3], scale=2) with tf.variable_scope('GCN_4'): down_2 = GlobalConvBlock(res[3], n_filters=21, size=3) with tf.variable_scope('BR_4'): down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) with tf.variable_scope('Add_3'): down_2 = tf.add(down_2, down_3) with tf.variable_scope('BR4_1'): down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = ConvUpscaleBlock(down_2, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) net = ConvUpscaleBlock(net, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(net, n_filters=21, kernel_size=[3, 3]) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn, stable_params
def build_dense_aspp(inputs, num_classes, preset_model='DenseASPP', frontend="ResNet101", load_pre_train=False, pre_trained_dir=None, is_training=True): """ Builds the Dense ASPP model. Arguments: inputs: The input tensor num_classes: Number of classes preset_model: frontend: Which frontend model you want to use. Select which ResNet model to use for feature extraction load_pre_train: Indicate whether to load pre-trained models pre_trained_dir: Path to directory where pre-trained model is to be loaded from. is_training: Training Flag for batch normalization Returns: Dense ASPP model """ logits, end_points, frontend_scope, init_fn, stable_params = \ frontend_builder.build_frontend(inputs, frontend, pre_trained_dir=pre_trained_dir, is_training=is_training, load_pre_train=load_pre_train, requested_stages=[3]) init_features = end_points['stage3'] # First block, rate = 3 with tf.variable_scope('Atrous_conv_d3'): d_3_features = DilatedConvBlock(init_features, n_filters=256, kernel_size=[1, 1]) d_3 = DilatedConvBlock(d_3_features, n_filters=64, rate=3, kernel_size=[3, 3]) with tf.variable_scope('concat_1'): d_4 = tf.concat([init_features, d_3], axis=-1) # Second block, rate = 6 with tf.variable_scope('Atrous_conv_d6'): d_4 = DilatedConvBlock(d_4, n_filters=256, kernel_size=[1, 1]) d_4 = DilatedConvBlock(d_4, n_filters=64, rate=6, kernel_size=[3, 3]) with tf.variable_scope('concat_2'): d_5 = tf.concat([init_features, d_3, d_4], axis=-1) # Third block, rate = 12 with tf.variable_scope('Atrous_conv_d12'): d_5 = DilatedConvBlock(d_5, n_filters=256, kernel_size=[1, 1]) d_5 = DilatedConvBlock(d_5, n_filters=64, rate=12, kernel_size=[3, 3]) with tf.variable_scope('concat_3'): d_6 = tf.concat([init_features, d_3, d_4, d_5], axis=-1) # Fourth block, rate = 18 with tf.variable_scope('Atrous_conv_d18'): d_6 = DilatedConvBlock(d_6, n_filters=256, kernel_size=[1, 1]) d_6 = DilatedConvBlock(d_6, n_filters=64, rate=18, kernel_size=[3, 3]) with tf.variable_scope('concat_4'): d_7 = tf.concat([init_features, d_3, d_4, d_5, d_6], axis=-1) # Fifth block, rate = 24 with tf.variable_scope('Atrous_conv_d24'): d_7 = DilatedConvBlock(d_7, n_filters=256, kernel_size=[1, 1]) d_7 = DilatedConvBlock(d_7, n_filters=64, rate=24, kernel_size=[3, 3]) full_block = tf.concat([init_features, d_3, d_4, d_5, d_6, d_7], axis=-1) net = slim.conv2d(full_block, num_classes, [1, 1], activation_fn=None, scope='logits') with tf.variable_scope('Upsampling'): net = Upsampling(net, scale=8) return net, init_fn, stable_params