def dcn(self): # dcn model = Sequential() # conv1 model.add(Conv2D(20, kernel_size=(5, 5), padding="same", activation='relu', input_shape=self.input_shape)) model.add(ConvOffset2D(20)) # pool1 # model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) # conv2 model.add(Conv2D(50, kernel_size=(5, 5), padding="same", activation='relu')) model.add(ConvOffset2D(50)) # pool2 # model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) # model.add(Dropout(0.25)) model.add(Flatten()) # ip1 model.add(Dense(500, activation='relu', name='fc1')) # model.add(Dropout(0.5)) # ip2 model.add(Dense(self.num_classes, activation='softmax', name='predictions')) return model
def crnn_dcn(self): #vgg 16 model = Sequential() model.add( TimeDistributed(Conv2D(8, (3, 3), activation='relu', padding='same'), input_shape=self.input_shape)) model.add(TimeDistributed(ConvOffset2D(8))) model.add( TimeDistributed( Conv2D(8, (3, 3), activation='relu', padding='same'))) model.add(TimeDistributed(ConvOffset2D(8))) model.add( TimeDistributed( Conv2D(16, (3, 3), activation='relu', padding='same'))) model.add( TimeDistributed( Conv2D(16, (3, 3), activation='relu', padding='same'))) model.add(TimeDistributed(Flatten())) model.add( LSTM(500, return_sequences=True, input_shape=self.input_shape, dropout=0.5)) model.add(Flatten()) model.add(Dense(self.nb_classes, activation='softmax')) return model
def test_tf_resampler_layer(): np.random.seed(42) x = np.random.random((4, 10, 10, 3)) # offsets = np.random.random((4, 20, 20, 6)) * 2 x = K.variable(x) layer = ConvOffset2D(3) ori_offset = K.eval(layer(x)) tf_offset = K.eval(layer(x, use_resam=True)) print(ori_offset.shape) print(tf_offset.shape) print('-' * 100) print(ori_offset[0, :, :, 0]) print('+' * 100) print(tf_offset[0, :, :, 0]) print('-' * 100) print('-' * 100) print(ori_offset[0, :, :, 1]) print('+' * 100) print(tf_offset[0, :, :, 1]) print('-' * 100) assert hasattr(tf.contrib, 'resampler') assert np.allclose(ori_offset, tf_offset, atol=1e-4)
def identity_block(input_tensor, kernel_size, filters, stage, block): """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. """ filters1, filters2, filters3 = filters if backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = layers.Conv2D(filters1, (1, 1), kernel_initializer='he_normal', name=conv_name_base + '2a')(input_tensor) x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = ConvOffset2D(filters1)(x) x = layers.Conv2D(filters2, kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = ConvOffset2D(filters2)(x) x = layers.Conv2D(filters3, (1, 1), kernel_initializer='he_normal', name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = layers.Activation('relu')(x) return x
def _dimension_reduction(net, branch_0_depth=224, branch_1_depth=96, use_deform_conv=False, scope='dimension_reduction'): """ Dimension reduction module of ldnet-v1. :param net: the net input. :param branch_0_depth: the depth of branch_0. :param branch_1_depth: the depth of branch_1. :param scope: optional scope. :return: the size of returned net: [batch_size, height, width, channel], which channel = (branch_0_depth + branch_1_depth) + last_net_depth """ with variable_scope.variable_scope(scope): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, branch_0_depth, [3, 3], stride=2, scope='Conv2d_1a_1x1') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') if use_deform_conv: branch_1 = ConvOffset2D(64, name='conv3_offset')( branch_1) # net offset branch_1 = layers.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3') if use_deform_conv: branch_1 = ConvOffset2D(96, name='conv3_offset')( branch_1) # net offset branch_1 = layers.conv2d(branch_1, branch_1_depth, [3, 3], stride=2, scope='Conv2d_1c_1x1') with variable_scope.variable_scope('Branch_2'): branch_2 = layers_lib.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_1a_3x3') net = array_ops.concat([branch_0, branch_1, branch_2], 3) return net
def off_conv_block(x_input, filters, strides=1): x = Conv2D(filters=filters, kernel_size=3, padding='same', kernel_initializer='he_normal', strides=strides)(x_input) x = ConvOffset2D(filters=filters)(x) x_output = LeakyReLU(0.2)(x) return x_output
def get_deform_cnn(trainable): inputs = l = Input((28, 28, 1), name='input') # conv11 l = Conv2D(32, (3, 3), padding='same', name='conv11', trainable=trainable)(l) l = Activation('relu', name='conv11_relu')(l) l = BatchNormalization(name='conv11_bn')(l) # conv12 l_offset = ConvOffset2D(32, name='conv12_offset')(l) l = Conv2D(64, (3, 3), padding='same', strides=(2, 2), name='conv12', trainable=trainable)(l_offset) l = Activation('relu', name='conv12_relu')(l) l = BatchNormalization(name='conv12_bn')(l) # conv21 l_offset = ConvOffset2D(64, name='conv21_offset')(l) l = Conv2D(128, (3, 3), padding='same', name='conv21', trainable=trainable)(l_offset) l = Activation('relu', name='conv21_relu')(l) l = BatchNormalization(name='conv21_bn')(l) # conv22 l_offset = ConvOffset2D(128, name='conv22_offset')(l) l = Conv2D(128, (3, 3), padding='same', strides=(2, 2), name='conv22', trainable=trainable)(l_offset) l = Activation('relu', name='conv22_relu')(l) l = BatchNormalization(name='conv22_bn')(l) # out l = GlobalAvgPool2D(name='avg_pool')(l) l = Dense(10, name='fc1', trainable=trainable)(l) outputs = l = Activation('softmax', name='out')(l) return inputs, outputs
def convolution_offset_2D(inputs, filters, step, stride=2, Normal=True): #use for encoder encoder = ZeroPadding2D(padding=(1, 1))(inputs) encoder = ConvOffset2D(filters, name='conv_%d_offset' % step)(encoder) encoder = Convolution2D(filters, 4, 4, subsample=(stride, stride), name='conv_%d' % step)(encoder) if Normal: encoder = BatchNormalization(name='CBat_%d' % step)(encoder) encoder = LeakyReLU(alpha=0.2, name='CLRelu_%d' % step)(encoder) return encoder
def get_cnn_model(params): """ Load base CNN model and add metadata fusion layers if 'use_metadata' is set in params.py :param params: global parameters, used to find location of the dataset and json file :return model: CNN model with or without depending on params """ input_tensor = Input(shape=(params.target_img_size[0], params.target_img_size[1], params.num_channels)) baseModel = densenet.DenseNetImageNet161( input_shape=(params.target_img_size[0], params.target_img_size[1], params.num_channels), include_top=False, input_tensor=input_tensor) modelStruct = baseModel.layers[-1].output if params.use_nlm: modelStruct = baseModel.layers[-2].output modelStruct = non_local_block(modelStruct, computation_compression=1, mode='embedded') modelStruct = Conv2D(params.cnn_lstm_layer_length, [3, 3], name='conv_nlm')(modelStruct) modelStruct = Flatten()(modelStruct) modelStruct = Dense(params.cnn_lstm_layer_length, activation='relu', name='fc_nlm')(modelStruct) modelStruct = Dropout(0.5)(modelStruct) if params.use_spp: modelStruct = baseModel.layers[-2].output modelStruct = SpatialPyramidPooling([1, 2, 4], name='spp')(modelStruct) modelStruct = Dense(params.cnn_lstm_layer_length, activation='relu', name='fc_spp')(modelStruct) modelStruct = Dropout(0.5)(modelStruct) if params.use_deform: modelStruct = baseModel.layers[-2].output modelStruct = ConvOffset2D(params.cnn_lstm_layer_length, name='deform')(modelStruct) modelStruct = Conv2D(params.cnn_lstm_layer_length, [3, 3], name='conv_deform')(modelStruct) modelStruct = Flatten()(modelStruct) modelStruct = Dense(params.cnn_lstm_layer_length, activation='relu', name='fc_deform')(modelStruct) modelStruct = Dropout(0.5)(modelStruct) if params.use_metadata: auxiliary_input = Input(shape=(params.metadata_length, ), name='aux_input') modelStruct = merge([modelStruct, auxiliary_input], 'concat') modelStruct = Dense(params.cnn_last_layer_length, activation='relu', name='fc1')(modelStruct) modelStruct = Dropout(0.5)(modelStruct) modelStruct = Dense(params.cnn_last_layer_length, activation='relu', name='fc2')(modelStruct) modelStruct = Dropout(0.5)(modelStruct) predictions = Dense(params.num_labels, activation='softmax')(modelStruct) if not params.use_metadata: model = Model(input=[baseModel.input], output=predictions) else: model = Model(input=[baseModel.input, auxiliary_input], output=predictions) for i, layer in enumerate(model.layers): layer.trainable = True return model
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): """A block that has a conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the first conv layer in the block. # Returns Output tensor for the block. Note that from stage 3, the first conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ filters1, filters2, filters3 = filters if backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = layers.Conv2D(filters1, (1, 1), strides=strides, kernel_initializer='he_normal', name=conv_name_base + '2a')(input_tensor) x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = ConvOffset2D(filters1)(x) x = layers.Conv2D(filters2, kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = ConvOffset2D(filters2)(x) x = layers.Conv2D(filters3, (1, 1), kernel_initializer='he_normal', name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, kernel_initializer='he_normal', name=conv_name_base + '1')(input_tensor) shortcut = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = layers.Activation('relu')(x) return x
def _feature_extraction_residual(net, first_layer_depth=48, second_layer_depth=64, last_layer_depth=96, use_deform_conv=False, scope='feature_extraction_residual'): """ Feature extraction module of ldnet-v1. :param net: the net input. :param first_layer_depth: first layer depth. :param second_layer_depth: second layer depth. :param last_layer_depth: last layer depth. :param scope: optional scope. :return: the size of returned net: [batch_size, height, width, channel], which channel = (second_layer_depth + last_layer_depth) * 2 """ with variable_scope.variable_scope(scope): with variable_scope.variable_scope('Branch_0'): branch_0 = layers.conv2d(net, first_layer_depth, [1, 1], scope='Conv2d_0a_1x1') if use_deform_conv: branch_0 = ConvOffset2D(first_layer_depth, name='conv3_offset')( branch_0) # net offset branch_0 = layers.conv2d(branch_0, second_layer_depth, [3, 3], scope='Conv2d_0b_3x3') with variable_scope.variable_scope('Branch_1'): branch_1 = layers.conv2d(net, first_layer_depth, [1, 1], scope='Conv2d_0a_1x1') if use_deform_conv: branch_1 = ConvOffset2D(first_layer_depth, name='conv3_offset')( branch_1) # net offset branch_1 = layers.conv2d(branch_1, second_layer_depth, [5, 5], scope='Conv2d_0b_5x5') if use_deform_conv: branch_1 = ConvOffset2D(second_layer_depth, name='conv3_offset')( branch_1) # net offset branch_1 = layers.conv2d(branch_1, last_layer_depth, [5, 5], scope='Conv2d_0c_5x5') with variable_scope.variable_scope('Branch_2'): branch_2 = layers.conv2d(net, first_layer_depth, [1, 1], scope='Conv2d_0a_1x1') if use_deform_conv: branch_2 = ConvOffset2D(first_layer_depth, name='conv3_offset')( branch_2) # net offset branch_2 = layers.conv2d(branch_2, second_layer_depth, [7, 7], scope='Conv2d_0b_7x7') with variable_scope.variable_scope('Branch_3'): branch_3 = layers_lib.avg_pool2d(net, [5, 5], scope='AvgPool_0a_5x5') branch_3 = layers.conv2d(branch_3, last_layer_depth, [1, 1], scope='Conv2d_0b_1x1') net = array_ops.concat([branch_0, branch_1, branch_2, branch_3], 3) return net
def ldnet_v1(inputs, num_classes=3, dropout_keep_prob=0.5, spatial_squeeze=True, scope="ldnet", use_deform_conv=True, print_current_tensor=False): """ ldnet architecture: input: 32*32*3 input depth kenal stride padding conv0: net = conv(input, 32, [3, 3], 1, "same") --> 32*32*32 conv1: net = conv(net, 32, [3, 3], 1, "same") --> 32*32*32 conv2: net = conv(net, 64, [3, 3], 1, "same") --> 32*32*64 maxpool1: net = pool(net, [3, 3], 1, "same") --> 32*32*64 conv3: net = conv(net, 192, [3, 3], 1, "same") --> 32*32*192 maxpool2: net = pool(net, [3, 3], 1, "same") --> 32*32*192 ldnet blocks: mixed_1: 32 x 32 x 320 Feature extraction module mixed_2: 32 x 32 x 320 Feature extraction module mixed_res1: 32 x 32 x 320 Feature extraction module mixed_3: 16 x 16 x 640 Dimension reduction module mixed_4: 16 x 16 x 640 Feature extraction module mixed_res2: 16 x 16 x 640 Feature extraction module mixed_5: 8 x 8 x 1280 Dimension reduction module mixed_6: 8 x 8 x 1280 Feature extraction module mixed_res3: 8 x 8 x 1280 Feature extraction module Final pooling and prediction -> 3 :param inputs: the size of imputs is [batch_num, width, height, channel]. :param num_classes: num of classes predicted. :param dropout_keep_prob: dropout probability. :param spatial_squeeze: whether or not squeeze. :param scope: optional scope. :param use_deform_conv: whether to use deform conv. :param print_current_tensor: whether or not print current tenser shape, name and type. :return: logits: [batch_size, num_classes] """ # end_points will collect relevant activations for the computation # of shortcuts. end_points = [] with variable_scope.variable_scope(scope, "ldnet_v1", [inputs]): with arg_scope([layers.conv2d, layers_lib.max_pool2d], kernel_size=[3, 3], stride=1, padding='SAME'): # input: 32 * 32 * 3 net = inputs end_point = "conv0" # if use_deform_conv: # net = ConvOffset2D(3, name='conv0_offset')(net) # net offset net = layers.conv2d(net, 32, scope=end_point) if print_current_tensor: print(net) # --> 32 * 32 * 32 end_point = "conv1" if use_deform_conv: net = ConvOffset2D(32, name='conv1_offset')(net) # net offset net = layers.conv2d(net, 32, scope=end_point) if print_current_tensor: print(net) # --> 32 * 32 * 32 end_point = "conv2" if use_deform_conv: net = ConvOffset2D(32, name='conv2_offset')(net) # net offset net = layers.conv2d(net, 64, scope=end_point) if print_current_tensor: print(net) # --> 32 * 32 * 64 end_point = "maxpool0" net = layers_lib.max_pool2d(net, kernel_size=[2, 2], scope=end_point) if print_current_tensor: print(net) # --> 32 * 32 * 64 end_point = 'conv3' if use_deform_conv: net = ConvOffset2D(64, name='conv3_offset')(net) # net offset net = layers.conv2d(net, 192, scope=end_point) if print_current_tensor: print(net) # end_points.append(net) # --> 32 * 32 * 192 end_point = 'maxpool1' net = layers_lib.max_pool2d(net, kernel_size=[2, 2], scope=end_point) if print_current_tensor: print(net) # net.alias = end_point # end_points.append(net) # --> 32 * 32 * 192 # ldnet blocks with arg_scope( [layers.conv2d, layers_lib.max_pool2d, layers_lib.avg_pool2d], stride=1, padding='SAME'): # mixed_1: 32 x 32 x 320 Feature extraction module end_point = 'mixed_1' with variable_scope.variable_scope(end_point): net = _feature_extraction_residual(net, first_layer_depth=48, second_layer_depth=64, last_layer_depth=96, scope='feature_extraction') end_points.append(net) if print_current_tensor: print(net, len(end_points)) # mixed_2: 32 x 32 x 320 Feature extraction module end_point = 'mixed_2' with variable_scope.variable_scope(end_point): net = _feature_extraction_residual( net, first_layer_depth=48, second_layer_depth=64, last_layer_depth=96, scope='feature_extraction_residual') end_points.append(net) if print_current_tensor: print(net, len(end_points)) # mixed_res1: 32 x 32 x 320 Feature extraction module end_point = 'mixed_res1' with variable_scope.variable_scope(end_point): net = _feature_extraction_residual( net, first_layer_depth=48, second_layer_depth=64, last_layer_depth=96, scope='feature_extraction_residual') net_linear = layers.conv2d(net, int(net.shape[3]), [1, 1], activation_fn=None, scope='net_linear_projection') shortcuts = _shortcuts_addition(net.shape, end_points[-1], end_points[-2], scope="shortcuts_addition") net = nn_ops.relu(net_linear + shortcuts) end_points.append(net) if print_current_tensor: print(net, len(end_points)) # mixed_3: 16 x 16 x 640 Dimension reduction module end_point = "mixed_3" with variable_scope.variable_scope(end_point): net = _dimension_reduction(net, branch_0_depth=224, branch_1_depth=96, scope='dimension_reduction') end_points.append(net) if print_current_tensor: print(net, len(end_points)) # mixed_4: 16 x 16 x 640 Feature extraction module end_point = "mixed_4" with variable_scope.variable_scope(end_point): net = _feature_extraction_residual( net, first_layer_depth=48 * 2, second_layer_depth=64 * 2, last_layer_depth=96 * 2, scope='feature_extraction_residual') end_points.append(net) if print_current_tensor: print(net, len(end_points)) # mixed_res2: 16 x 16 x 640 Feature extraction module end_point = "mixed_res2" with variable_scope.variable_scope(end_point): net = _feature_extraction_residual( net, first_layer_depth=48 * 2, second_layer_depth=64 * 2, last_layer_depth=96 * 2, scope='feature_extraction_residual') net_linear = layers.conv2d(net, int(net.shape[3]), [1, 1], activation_fn=None, scope='net_linear_projection') shortcuts = _shortcuts_addition(net.shape, end_points[-1], end_points[-2], scope="shortcuts_addition") net = nn_ops.relu(net_linear + shortcuts) end_points.append(net) if print_current_tensor: print(net, len(end_points)) # mixed_5: 8 x 8 x 1280 Dimension reduction module end_point = "mixed_5" with variable_scope.variable_scope(end_point): net = _dimension_reduction(net, branch_0_depth=224 * 2, branch_1_depth=96 * 2, scope='dimension_reduction') end_points.append(net) if print_current_tensor: print(net, len(end_points)) # mixed_6: 8 x 8 x 1280 Feature extraction module end_point = "mixed_6" with variable_scope.variable_scope(end_point): net = _feature_extraction_residual( net, first_layer_depth=48 * 4, second_layer_depth=64 * 4, last_layer_depth=96 * 4, scope='feature_extraction_residual') end_points.append(net) if print_current_tensor: print(net, len(end_points)) # mixed_res3: 8 x 8 x 1280 Feature extraction module end_point = "mixed_res3" with variable_scope.variable_scope(end_point): net = _feature_extraction_residual( net, first_layer_depth=48 * 4, second_layer_depth=64 * 4, last_layer_depth=96 * 4, scope='feature_extraction_residual') net_linear = layers.conv2d(net, int(net.shape[3]), [1, 1], activation_fn=None, scope='net_linear_projection') shortcuts = _shortcuts_addition(net.shape, end_points[-1], end_points[-2], scope="shortcuts_addition") net = nn_ops.relu(net_linear + shortcuts) end_points.append(net) if print_current_tensor: print(net, len(end_points)) # Final pooling and prediction with variable_scope.variable_scope('Logits'): with arg_scope([layers.conv2d], normalizer_fn=None, normalizer_params=None): net = layers.conv2d(net, int(net.shape[3]), [3, 3], stride=2, scope='conv2d_1a_3x3') # 4 x 4 x 1280 net = layers_lib.avg_pool2d(net, [4, 4], padding='VALID', scope='AvgPool_1b_4x4') # 1 x 1 x 1280 # net = layers.conv2d(net, 640, [1, 1], scope='Conv2d_0c_1x1') # local1 with variable_scope.variable_scope('local1') as scope: # Move everything into depth so we can perform a single matrix multiply. reshape = tf.reshape(net, [-1, 1280]) weights = _variable_with_weight_decay('weights', shape=[1280, 640], stddev=0.04, wd=0.0001) biases = _variable_on_cpu('biases', [640], tf.constant_initializer(0.1)) net = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) # 1 x 1 x 640 net = layers_lib.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_0c') # net = layers.conv2d(net, 320, [1, 1], scope='Conv2d_0d_1x1') # local2 with variable_scope.variable_scope('local2') as scope: weights = _variable_with_weight_decay('weights', shape=[640, 320], stddev=0.04, wd=0.0001) biases = _variable_on_cpu('biases', [320], tf.constant_initializer(0.1)) net = tf.nn.relu(tf.matmul(net, weights) + biases, name=scope.name) # 1 x 1 x 320 net = layers_lib.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_0d') net = tf.expand_dims(net, 1) net = tf.expand_dims(net, 1) logits = layers.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Conv2d_0e_1x1') # 1 x 1 x 3 if spatial_squeeze: logits = array_ops.squeeze(logits, [1, 2], name='SpatialSqueeze') # 3 return logits