def SepConv_BN(x, filters, prefix, stride=1, kernel_size=3, rate=1, depth_activation=False, epsilon=1e-3, use_coordconv=True): """ SepConv with BN between depthwise & pointwise. Optionally add activation after BN Implements right "same" padding for even kernel sizes Args: x: input tensor filters: num of filters in pointwise convolution prefix: prefix before name stride: stride at depthwise conv kernel_size: kernel size for depthwise convolution rate: atrous rate for depthwise convolution depth_activation: flag to use activation between depthwise & poinwise convs epsilon: epsilon to use in BN layer """ if stride == 1: depth_padding = 'same' else: kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) pad_total = kernel_size_effective - 1 pad_beg = pad_total // 2 pad_end = pad_total - pad_beg x = ZeroPadding2D((pad_beg, pad_end))(x) depth_padding = 'valid' if not depth_activation: x = Activation('relu')(x) x = DepthwiseConv2D((kernel_size, kernel_size), strides=(stride, stride), dilation_rate=(rate, rate), padding=depth_padding, use_bias=False, name=prefix + '_depthwise')(x) x = BatchNormalization(name=prefix + '_depthwise_BN', epsilon=epsilon)(x) if depth_activation: x = Activation('relu')(x) if use_coordconv: x = CoordinateChannel2D(use_radius=True)(x) x = Conv2D(filters, (1, 1), padding='same', use_bias=False, name=prefix + '_pointwise')(x) x = BatchNormalization(name=prefix + '_pointwise_BN', epsilon=epsilon)(x) if depth_activation: x = Activation('relu')(x) return x
def res(inputs, bn=True): n = int(inputs.shape[-1]) x = CoordinateChannel2D()(inputs) # x=(inputs) c1 = relu()(Conv2D(n, 3, padding='same')(x)) c2 = relu()(Conv2D(n, 3, padding='same')(c1)) m = add([c2, inputs]) if bn: m = BN()(m) return m
plt.imshow(np.sum(train_onehot, axis=0)[:, :, 0], cmap='gray') plt.title('Train One-hot dataset') plt.show() plt.imshow(np.sum(test_onehot, axis=0)[:, :, 0], cmap='gray') plt.title('Test One-hot dataset') plt.show() # flatten the datasets train_onehot = train_onehot.reshape((-1, 64 * 64)) test_onehot = test_onehot.reshape((-1, 64 * 64)) # model definition ip = Input(shape=(64, 64, 2)) x = CoordinateChannel2D()(ip) x = Conv2D(32, (1, 1), padding='same', activation='relu')(x) x = Conv2D(32, (1, 1), padding='same', activation='relu')(x) x = Conv2D(64, (1, 1), padding='same', activation='relu')(x) x = Conv2D(64, (1, 1), padding='same', activation='relu')(x) x = Conv2D(1, (1, 1), padding='same', activation='linear')(x) x = Flatten()(x) x = Softmax(axis=-1)(x) model = Model(ip, x) print(model.summary()) optimizer = Adam(lr=1e-2) model.compile(optimizer, 'categorical_crossentropy', metrics=['accuracy'])
def Deeplabv3(weights='pascal_voc', input_tensor=None, input_shape=(512, 512, 3), classes=21, backbone='mobilenetv2', OS=16, alpha=1., use_coordconv=True): """ Instantiates the Deeplabv3+ architecture Optionally loads weights pre-trained on PASCAL VOC. This model is available for TensorFlow only, and can only be used with inputs following the TensorFlow data format `(width, height, channels)`. # Arguments weights: one of 'pascal_voc' (pre-trained on pascal voc) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images classes: number of desired classes. If classes != 21, last layer is initialized randomly backbone: backbone to use. one of {'xception','mobilenetv2'} OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. alpha: controls the width of the MobileNetV2 network. This is known as the width multiplier in the MobileNetV2 paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. Used only for mobilenetv2 backbone # Returns A Keras model instance. # Raises RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. ValueError: in case of invalid argument for `weights` or `backbone` """ if not (weights in {'pascal_voc', None}): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `pascal_voc` ' '(pre-trained on PASCAL VOC)') if K.backend() != 'tensorflow': raise RuntimeError('The Deeplabv3+ model is only available with ' 'the TensorFlow backend.') if not (backbone in {'xception', 'mobilenetv2'}): raise ValueError('The `backbone` argument should be either ' '`xception` or `mobilenetv2` ') if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor batches_input = img_input if use_coordconv: # implemented as in image-classification task in resnet50 (Section S5, page 15) batches_input = CoordinateChannel2D(use_radius=True)(batches_input) batches_input = Conv2D( 8, (1, 1), padding='same', name='8_channels_coordconv_input')(batches_input) if backbone == 'xception': if OS == 8: entry_block3_stride = 1 middle_block_rate = 2 # ! Not mentioned in paper, but required exit_block_rates = (2, 4) atrous_rates = (12, 24, 36) else: entry_block3_stride = 2 middle_block_rate = 1 exit_block_rates = (1, 2) atrous_rates = (6, 12, 18) x = Conv2D(32, (3, 3), strides=(2, 2), name='entry_flow_conv1_1', use_bias=False, padding='same')(batches_input) x = BatchNormalization(name='entry_flow_conv1_1_BN')(x) x = Activation('relu')(x) x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1) x = BatchNormalization(name='entry_flow_conv1_2_BN')(x) x = Activation('relu')(x) x = _xception_block(x, [128, 128, 128], 'entry_flow_block1', skip_connection_type='conv', stride=2, depth_activation=False) x, skip1 = _xception_block(x, [256, 256, 256], 'entry_flow_block2', skip_connection_type='conv', stride=2, depth_activation=False, return_skip=True) x = _xception_block(x, [728, 728, 728], 'entry_flow_block3', skip_connection_type='conv', stride=entry_block3_stride, depth_activation=False) for i in range(16): x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1), skip_connection_type='sum', stride=1, rate=middle_block_rate, depth_activation=False) x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1', skip_connection_type='conv', stride=1, rate=exit_block_rates[0], depth_activation=False) x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2', skip_connection_type='none', stride=1, rate=exit_block_rates[1], depth_activation=True) else: OS = 8 first_block_filters = _make_divisible(32 * alpha, 8) x = Conv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv')(batches_input) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x) x = Activation(relu6, name='Conv_Relu6')(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0, skip_connection=False, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1, skip_connection=False, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2, skip_connection=True, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3, skip_connection=False, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4, skip_connection=True, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5, skip_connection=True, use_coordconv=use_coordconv) # stride in block 6 changed from 2 -> 1, so we need to use rate = 2 x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, # 1! expansion=6, block_id=6, skip_connection=False, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=7, skip_connection=True, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=8, skip_connection=True, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=9, skip_connection=True, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=10, skip_connection=False, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=11, skip_connection=True, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=12, skip_connection=True, use_coordconv=use_coordconv) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=1, rate=2, # 1! expansion=6, block_id=13, skip_connection=False, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=14, skip_connection=True, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=15, skip_connection=True, use_coordconv=use_coordconv) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=4, expansion=6, block_id=16, skip_connection=False, use_coordconv=use_coordconv) # end of feature extractor # branching for Atrous Spatial Pyramid Pooling # Image Feature branch #out_shape = int(np.ceil(input_shape[0] / OS)) b4 = AveragePooling2D(pool_size=(int(np.ceil(input_shape[0] / OS)), int(np.ceil(input_shape[1] / OS))))(x) b4 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='image_pooling')(b4) b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4) b4 = Activation('relu')(b4) b4 = BilinearUpsampling((int(np.ceil(input_shape[0] / OS)), int(np.ceil(input_shape[1] / OS))))(b4) # simple 1x1 if use_coordconv: x = CoordinateChannel2D(use_radius=True)(x) b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x) b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0) b0 = Activation('relu', name='aspp0_activation')(b0) # there are only 2 branches in mobilenetV2. not sure why if backbone == 'xception': # rate = 6 (12) b1 = SepConv_BN(x, 256, 'aspp1', rate=atrous_rates[0], depth_activation=True, epsilon=1e-5, use_coordconv=use_coordconv) # rate = 12 (24) b2 = SepConv_BN(x, 256, 'aspp2', rate=atrous_rates[1], depth_activation=True, epsilon=1e-5, use_coordconv=use_coordconv) # rate = 18 (36) b3 = SepConv_BN(x, 256, 'aspp3', rate=atrous_rates[2], depth_activation=True, epsilon=1e-5, use_coordconv=use_coordconv) # concatenate ASPP branches & project x = Concatenate()([b4, b0, b1, b2, b3]) else: x = Concatenate()([b4, b0]) if use_coordconv: x = CoordinateChannel2D(use_radius=True)(x) x = Conv2D(256, (1, 1), padding='same', use_bias=False, name='concat_projection')(x) x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x) x = Activation('relu')(x) x = Dropout(0.1)(x) # DeepLab v.3+ decoder if backbone == 'xception': # Feature projection # x4 (x2) block x = BilinearUpsampling(output_size=(int(np.ceil(input_shape[0] / 4)), int(np.ceil(input_shape[1] / 4))))(x) dec_skip1 = Conv2D(48, (1, 1), padding='same', use_bias=False, name='feature_projection0')(skip1) dec_skip1 = BatchNormalization(name='feature_projection0_BN', epsilon=1e-5)(dec_skip1) dec_skip1 = Activation('relu')(dec_skip1) x = Concatenate()([x, dec_skip1]) x = SepConv_BN(x, 256, 'decoder_conv0', depth_activation=True, epsilon=1e-5, use_coordconv=use_coordconv) x = SepConv_BN(x, 256, 'decoder_conv1', depth_activation=True, epsilon=1e-5, use_coordconv=use_coordconv) # you can use it with arbitary number of classes if classes == 21: last_layer_name = 'logits_semantic' else: last_layer_name = 'custom_logits_semantic' if use_coordconv: x = CoordinateChannel2D(use_radius=True)(x) x = Conv2D(classes, (1, 1), padding='same', name=last_layer_name)(x) x = BilinearUpsampling(output_size=(input_shape[0], input_shape[1]))(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input model = Model(inputs, x, name='deeplabv3+') # load weights if weights == 'pascal_voc': if backbone == 'xception': weights_path = get_file( 'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_X, cache_subdir='models') else: weights_path = get_file( 'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_MOBILE, cache_subdir='models') model.load_weights(weights_path, by_name=True) return model
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id, skip_connection, rate=1, use_coordconv=True): in_channels = inputs._keras_shape[-1] pointwise_conv_filters = int(filters * alpha) pointwise_filters = _make_divisible(pointwise_conv_filters, 8) x = inputs prefix = 'expanded_conv_{}_'.format(block_id) if block_id: # Expand if use_coordconv: x = CoordinateChannel2D(use_radius=True)(x) x = Conv2D(expansion * in_channels, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x) x = Activation(relu6, name=prefix + 'expand_relu')(x) else: prefix = 'expanded_conv_' # Depthwise x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same', dilation_rate=(rate, rate), name=prefix + 'depthwise')(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x) x = Activation(relu6, name=prefix + 'depthwise_relu')(x) # Project if use_coordconv: x = CoordinateChannel2D(use_radius=True)(x) x = Conv2D(pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project')(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) if skip_connection: return Add(name=prefix + 'add')([inputs, x]) # if in_channels == pointwise_filters and stride == 1: # return Add(name='res_connect_' + str(block_id))([inputs, x]) return x