def non_bt_1d(input, filters, init_scaling=1.0, dilation_rate=1, name="non_bt_1D"): with tf.variable_scope(name): conv1 = conv_bn(input, filters=filters, kernel_size=(3, 1), padding="same", trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv1") conv2 = conv_bn(conv1, filters=filters, kernel_size=(1, 3), padding="same", trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv2") conv3 = conv_bn(conv2, filters=filters, kernel_size=(3, 1), padding="same", dilation_rate=dilation_rate, trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv3") conv4 = conv_bn(conv3, filters=filters, kernel_size=(1, 3), padding="same", dilation_rate=dilation_rate, trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv4") result = tf.nn.relu(input + conv4, name="relu") return result
def deeplab_v3_plus(image, *, n_classes=7, trainable=True): """ DeepLabV3+ Model structure. Either Xception or ResNet101 Model can be used as backbone with output stride=16. Number of parameters in Model (modified ResNet101): 34 107 958 Number of parameters in Model (original ResNet101): 57 709 494 Number of parameters in Model (Xception): 63 004 110 https://arxiv.org/pdf/1802.02611.pdf :param image: input tensor :param n_classes: number of classes :param trainable: whether the variables of the model should be trainable or fixed :return: the output tensor of the Model """ with tf.variable_scope("deeplab_v3_plus"): # Backbone Model backbone_output, high_level_features = resnet101(image, trainable) # atrous spatial pyramid pooling with tf.variable_scope("aspp"): aspp_conv0 = conv_bn(backbone_output, filters=256, kernel_size=(1, 1), padding="same", trainable=trainable, name="conv0") aspp_conv1 = conv_bn(backbone_output, filters=256, kernel_size=(3, 3), dilation_rate=6, padding="same", trainable=trainable, name="conv1") aspp_conv2 = conv_bn(backbone_output, filters=256, kernel_size=(3, 3), dilation_rate=12, padding="same", trainable=trainable, name="conv2") aspp_conv3 = conv_bn(backbone_output, filters=256, kernel_size=(3, 3), dilation_rate=18, padding="same", trainable=trainable, name="conv3") aspp_pool = tf.reduce_mean(backbone_output, axis=[1, 2], keepdims=True, name="global_avg_pool") image_level_conv = conv_bn(aspp_pool, filters=256, kernel_size=(1, 1), padding="same", trainable=trainable, name="image_level_conv") image_level_upsample = tf.image.resize_images(image_level_conv, [backbone_output.get_shape().as_list()[1], backbone_output.get_shape().as_list()[2]]) aspp_concat = tf.concat([aspp_conv0, aspp_conv1, aspp_conv2, aspp_conv3, image_level_upsample], axis=-1, name="concat") aspp_conv4 = conv_bn(aspp_concat, filters=256, kernel_size=(1, 1), padding="same", trainable=trainable, name="conv4") # decoder new_size = [aspp_conv4.get_shape().as_list()[1] * 4, aspp_conv4.get_shape().as_list()[2] * 4] upsample0 = tf.image.resize_images(aspp_conv4, new_size, align_corners=True) decoder_conv0 = tf.layers.Conv2D(filters=48, kernel_size=(1, 1), padding="same", trainable=trainable, activation="relu", name="decoder_conv0")(high_level_features) decoder_concat = tf.concat([upsample0, decoder_conv0], axis=-1, name="decoder_concat") decoder_conv1 = tf.layers.Conv2D(filters=256, kernel_size=(3, 3), padding="same", trainable=trainable, activation="relu", name="decoder_conv1")(decoder_concat) decoder_conv2 = tf.layers.Conv2D(filters=256, kernel_size=(3, 3), padding="same", trainable=trainable, activation="relu", name="decoder_conv2")(decoder_conv1) new_size = [decoder_conv2.get_shape().as_list()[1] * 4, decoder_conv2.get_shape().as_list()[2] * 4] upsample1 = tf.image.resize_images(decoder_conv2, new_size, align_corners=True) classes = tf.layers.Conv2D(filters=n_classes, kernel_size=(1, 1), padding="same", trainable=trainable, activation="softmax", name="classes")(upsample1) return classes
def resnet_50(input_shape=(224, 224, 3), classes=20): """Build ResNet-50 model. Keyword Arguments: input_shape {tuple} -- Input shape of resnet_34 model (default: {(224,224,3)}) classes {int} -- Number of classes to classify images (default: {20}) Returns: A Keras model instance. """ inputs = Input(shape=input_shape) # conv1 X = ZeroPadding2D((3, 3))(inputs) X = conv_bn(X=X, filters=64, kernel_size=(7, 7), strides=(2, 2), padding='valid', prefix='conv1') X = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(X) # conv2_x X = bottleneck_block(X=X, filters=[64, 64, 256], prefix='conv2_1', with_conv_shortcut=True) X = bottleneck_block(X=X, filters=[64, 64, 256], prefix='conv2_2') X = bottleneck_block(X=X, filters=[64, 64, 256], prefix='conv2_3') # conv3_x X = bottleneck_block(X=X, filters=[128, 128, 512], strides=(2, 2), prefix='conv3_1', with_conv_shortcut=True) X = bottleneck_block(X=X, filters=[128, 128, 512], prefix='conv3_2') X = bottleneck_block(X=X, filters=[128, 128, 512], prefix='conv3_3') X = bottleneck_block(X=X, filters=[128, 128, 512], prefix='conv3_4') # conv4_x X = bottleneck_block(X=X, filters=[256, 256, 1024], strides=(2, 2), prefix='conv4_1', with_conv_shortcut=True) X = bottleneck_block(X=X, filters=[256, 256, 1024], prefix='conv4_2') X = bottleneck_block(X=X, filters=[256, 256, 1024], prefix='conv4_3') X = bottleneck_block(X=X, filters=[256, 256, 1024], prefix='conv4_4') X = bottleneck_block(X=X, filters=[256, 256, 1024], prefix='conv4_5') X = bottleneck_block(X=X, filters=[256, 256, 1024], prefix='conv4_6') # conv5_x X = bottleneck_block(X=X, filters=[512, 512, 2048], strides=(2, 2), prefix='conv5_1', with_conv_shortcut=True) X = bottleneck_block(X=X, filters=[512, 512, 2048], prefix='conv5_2') X = bottleneck_block(X=X, filters=[512, 512, 2048], prefix='conv5_3') # dense X = AveragePooling2D(pool_size=(7, 7))(X) X = Flatten()(X) X = Dense(classes, activation='softmax')(X) model = Model(inputs=inputs, outputs=X, name='ResNet50') return model
def resnet101(image, trainable=True): """ ResNet101-like feature extractor for DeeplabV3+. The bottleneck modules have been replaced with Non-bt-1D modules similar to ERFNet. Additionally Batch Normalisation Layers have been added after each Convolution. The parameters are initialized with a variance scaling initializer. The scale Factor is multiplied with 0.75 throughout the Network similar to the findings of this paper: https://arxiv.org/pdf/1803.01719.pdf ResNet101: https://arxiv.org/pdf/1512.03385.pdf :param image: input tensor :param trainable: whether the variables of the model should be trainable or fixed :return: extracted features. """ with tf.variable_scope("resnet101"): def downsample_block(input, output_filter_size, id, init_scaling): bt_conv1 = tf.layers.Conv2D(filters=output_filter_size, kernel_size=(1, 1), padding="same", trainable=trainable, strides=(2, 2), activation="relu", kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv{}_1".format(id))(input) bt_conv2 = tf.layers.Conv2D(filters=output_filter_size, kernel_size=(3, 3), padding="same", trainable=trainable, activation="relu", kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv{}_2".format(id))(bt_conv1) bt_conv3 = tf.layers.Conv2D(filters=output_filter_size, kernel_size=(1, 1), padding="same", trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv{}_3".format(id))(bt_conv2) skip_conv = tf.layers.Conv2D(filters=output_filter_size, kernel_size=(1, 1), padding="same", trainable=trainable, strides=(2, 2), kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="skip_conv{}".format(id))(input) add = tf.add(skip_conv, bt_conv3, name="add{}".format(id)) return tf.nn.relu(add, name="block{}_act".format(id)) def non_bt_1d(input, filters, init_scaling=1.0, dilation_rate=1, name="non_bt_1D"): with tf.variable_scope(name): conv1 = conv_bn(input, filters=filters, kernel_size=(3, 1), padding="same", trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv1") conv2 = conv_bn(conv1, filters=filters, kernel_size=(1, 3), padding="same", trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv2") conv3 = conv_bn(conv2, filters=filters, kernel_size=(3, 1), padding="same", dilation_rate=dilation_rate, trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv3") conv4 = conv_bn(conv3, filters=filters, kernel_size=(1, 3), padding="same", dilation_rate=dilation_rate, trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(init_scaling), bias_initializer=tf.initializers.variance_scaling(init_scaling), name="conv4") result = tf.nn.relu(input + conv4, name="relu") return result scale_factor = 1.0 conv1 = conv_bn(image, filters=64, kernel_size=(3, 3), strides=(2, 2), padding="same", trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(scale_factor), bias_initializer=tf.initializers.variance_scaling(scale_factor), name="conv1_1") conv1 = conv_bn(conv1, filters=64, kernel_size=(3, 3), padding="same", trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(scale_factor), bias_initializer=tf.initializers.variance_scaling(scale_factor), name="conv1_2") conv1 = conv_bn(conv1, filters=64, kernel_size=(3, 3), padding="same", trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(scale_factor), bias_initializer=tf.initializers.variance_scaling(scale_factor), name="conv1_3") pool1 = tf.layers.max_pooling2d(conv1, pool_size=(3, 3), strides=(2, 2), padding="same", name="pool1") scale_factor *= 0.75 block2_1 = non_bt_1d(pool1, 64, init_scaling=scale_factor, name="block2_1") block2_2 = non_bt_1d(block2_1, 64, init_scaling=scale_factor, name="block2_2") block2_3 = non_bt_1d(block2_2, 64, init_scaling=scale_factor, name="block2_3") scale_factor *= 0.75 block3_1 = downsample_block(block2_3, 128, "3_1", init_scaling=scale_factor) block3_2 = non_bt_1d(block3_1, 128, init_scaling=scale_factor, name="block3_2") block3_3 = non_bt_1d(block3_2, 128, init_scaling=scale_factor, name="block3_3") block3_4 = non_bt_1d(block3_3, 128, init_scaling=scale_factor, name="block3_4") scale_factor *= 0.75 block4 = downsample_block(block3_4, 256, "4_1", init_scaling=scale_factor) for i in range(2, 24): block4 = non_bt_1d(block4, 256, init_scaling=scale_factor, name="block4_{}".format(i)) scale_factor *= 0.75 block4 = tf.layers.Conv2D(filters=512, kernel_size=(1, 1), activation="relu", padding="same", trainable=trainable, kernel_initializer=tf.initializers.variance_scaling(scale_factor), bias_initializer=tf.initializers.variance_scaling(scale_factor), name="filter_up")(block4) block5_1 = non_bt_1d(block4, 512, init_scaling=scale_factor, name="block5_1", dilation_rate=2) block5_2 = non_bt_1d(block5_1, 512, init_scaling=scale_factor, name="block5_2", dilation_rate=4) block5_3 = non_bt_1d(block5_2, 512, init_scaling=scale_factor, name="block5_3", dilation_rate=8) return block5_3, block2_3
def __init__(self, n_class=3, in_size=(224, 448), width_mult=1., out_sec=256, aspp_sec=(12, 24, 36)): """ MobileNetV2Plus: MobileNetV2 based Semantic Segmentation :param n_class: (int) Number of classes :param in_size: (tuple or int) Size of the input image feed to the network :param width_mult: (float) Network width multiplier :param out_sec: (tuple) Number of the output channels of the ASPP Block :param aspp_sec: (tuple) Dilation rates used in ASPP """ super(MobileNetV2ASPP, self).__init__() self.n_class = n_class # setting of inverted residual blocks self.interverted_residual_setting = [ # t, c, n, s, d [1, 16, 1, 1, 1], # 1/2 [6, 24, 2, 2, 1], # 1/4 [6, 32, 3, 2, 1], # 1/8 [6, 64, 4, 1, 2], # 1/8 [6, 96, 3, 1, 4], # 1/8 [6, 160, 3, 1, 8], # 1/8 [6, 320, 1, 1, 16], # 1/8 ] # building first layer assert in_size[0] % 8 == 0 assert in_size[1] % 8 == 0 self.input_size = in_size input_channel = int(32 * width_mult) self.mod1 = nn.Sequential( OrderedDict([("conv1", conv_bn(inp=3, oup=input_channel, stride=2))])) # building inverted residual blocks mod_id = 0 for t, c, n, s, d in self.interverted_residual_setting: output_channel = int(c * width_mult) # Create blocks for module blocks = [] for block_id in range(n): if block_id == 0 and s == 2: blocks.append(("block%d" % (block_id + 1), InvertedResidual(inp=input_channel, oup=output_channel, stride=s, dilate=1, expand_ratio=t))) else: blocks.append(("block%d" % (block_id + 1), InvertedResidual(inp=input_channel, oup=output_channel, stride=1, dilate=d, expand_ratio=t))) input_channel = output_channel self.add_module("mod%d" % (mod_id + 2), nn.Sequential(OrderedDict(blocks))) mod_id += 1 # building last several layers org_last_chns = (self.interverted_residual_setting[0][1] + self.interverted_residual_setting[1][1] + self.interverted_residual_setting[2][1] + self.interverted_residual_setting[3][1] + self.interverted_residual_setting[4][1] + self.interverted_residual_setting[5][1] + self.interverted_residual_setting[6][1]) self.aspp = nn.Sequential( ASPPModule(320), nn.Conv2d(512, n_class, kernel_size=1, stride=1, padding=0, bias=True)) self._initialize_weights()