def prediction(x,i,num_priors,min_s,max_s,aspect,num_classes,img_size): a=Conv2D(num_priors*4,(3,3),padding='same',name=str(i)+'_mbox_loc')(x) mbox_loc_flat=Flatten(name=str(i)+'_mbox_loc_flat')(a) b=Conv2D(num_priors*num_classes,(3,3),padding='same',name=str(i)+'_mbox_conf')(x) mbox_conf_flat=Flatten(name=str(i)+'_mbox_conf_flat')(b) mbox_priorbox=PriorBox(img_size,min_size=min_s,max_size=max_s,aspect_ratios=aspect,variances=[0.1,0.1,0.2,0.2],name=str(i)+'_mbox_priorbox')(x) return mbox_loc_flat,mbox_conf_flat,mbox_priorbox
def SSD300(input_shape, num_classes=21,weights=None): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ net = {} # Block 1 input_tensor = input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor if weights==None: vgg_weights = 'imagenet' else: vgg_weights = None ### VGG layers vgg = VGG16(input_tensor=input_tensor,weights=vgg_weights, input_shape=input_shape, include_top=False) for layer in vgg.layers: layer.trainable = False #block 1 net['conv1_1'] = vgg.layers[1](net['input']) net['conv1_2'] = vgg.layers[2](net['conv1_1']) net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool1')(net['conv1_2']) #block 2 net['conv2_1'] = vgg.layers[4](net['pool1']) net['conv2_2'] = vgg.layers[5](net['conv2_1']) net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool2')(net['conv2_2']) # block 3 net['conv3_1'] = vgg.layers[7](net['pool2']) net['conv3_2'] = vgg.layers[8](net['conv3_1']) net['conv3_3'] = vgg.layers[9](net['conv3_2']) net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool3')(net['conv3_3']) # block 4 net['conv4_1'] = vgg.layers[11](net['pool3']) net['conv4_2'] = vgg.layers[12](net['conv4_1']) net['conv4_3'] = vgg.layers[13](net['conv4_2']) net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool4')(net['conv4_3']) # block 5 net['conv5_1'] = vgg.layers[15](net['pool4']) net['conv5_2'] = vgg.layers[16](net['conv5_1']) net['conv5_3'] = vgg.layers[17](net['conv5_2']) net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same', name='pool5')(net['conv5_3']) ### Beginning of SSD layers # FC6 net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6), activation='relu', border_mode='same', name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Convolution2D(1024, 1, 1, activation='relu', border_mode='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Convolution2D(256, 1, 1, activation='relu', border_mode='same', name='conv6_1')(net['fc7']) net['conv6_2'] = Convolution2D(512, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv6_2')(net['conv6_1']) # Block 7 net['conv7_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) net['conv7_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='valid', name='conv7_2')(net['conv7_2']) # Block 8 net['conv8_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv8_2')(net['conv8_1']) # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) ### Prediction layers at different depths # Prediction from conv4_3 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 3 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # Prediction from conv6_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv6_2']) net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # Prediction from conv7_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv7_2']) net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # Prediction from conv8_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv8_2']) net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # Prediction from pool6 num_priors = 6 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) net['pool6_mbox_conf_flat'] = x priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) # Gather all predictions net['mbox_loc'] = merge([net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat']], mode='concat', concat_axis=1, name='mbox_loc') net['mbox_conf'] = merge([net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat']], mode='concat', concat_axis=1, name='mbox_conf') net['mbox_priorbox'] = merge([net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox']], mode='concat', concat_axis=1, name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = merge([net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], mode='concat', concat_axis=2, name='predictions') model = Model(net['input'], net['predictions']) if weights: model.load_weights(weights) return model
def RGBD_SSD300(input_shape, depth_input_shape, num_classes=21): vgg16 = VGG16(weights='imagenet', include_top=False) weights = vgg16.get_weights() input_layer = Input(shape=input_shape) depth_input_layer = Input(shape=depth_input_shape) conv1_1 = Conv2D(64, (3, 3), name='rgb_conv1_1', padding='same', activation='relu', weights=[weights[0], weights[1]])(input_layer) depth_conv1_1 = Conv2D( 64, (3, 3), name='depth_conv1_1', padding='same', activation='relu', # weights=[weights[0], weights[1]] )(depth_input_layer) conv1_2 = Conv2D(64, (3, 3), name='conv1_2', padding='same', activation='relu', weights=[weights[2], weights[3]])(conv1_1) depth_conv1_2 = Conv2D(64, (3, 3), name='depth_conv1_2', padding='same', activation='relu', weights=[weights[2], weights[3]])(depth_conv1_1) pool1 = MaxPooling2D( name='pool1', pool_size=(2, 2), strides=(2, 2), padding='same', )(conv1_2) depth_pool1 = MaxPooling2D( name='depth_pool1', pool_size=(2, 2), strides=(2, 2), padding='same', )(depth_conv1_2) # Block 2 conv2_1 = Conv2D(128, (3, 3), name='conv2_1', padding='same', activation='relu', weights=[weights[4], weights[5]])(pool1) depth_conv2_1 = Conv2D(128, (3, 3), name='depth_conv2_1', padding='same', activation='relu', weights=[weights[4], weights[5]])(depth_pool1) conv2_2 = Conv2D(128, (3, 3), name='conv2_2', padding='same', activation='relu', weights=[weights[6], weights[7]])(conv2_1) depth_conv2_2 = Conv2D(128, (3, 3), name='depth_conv2_2', padding='same', activation='relu', weights=[weights[6], weights[7]])(depth_conv2_1) pool2 = MaxPooling2D(name='pool2', pool_size=(2, 2), strides=(2, 2), padding='same')(conv2_2) depth_pool2 = MaxPooling2D(name='depth_pool2', pool_size=(2, 2), strides=(2, 2), padding='same')(depth_conv2_2) # Block 3 conv3_1 = Conv2D(256, (3, 3), name='conv3_1', padding='same', activation='relu', weights=[weights[8], weights[9]])(pool2) depth_conv3_1 = Conv2D(256, (3, 3), name='depth_conv3_1', padding='same', activation='relu', weights=[weights[8], weights[9]])(depth_pool2) conv3_2 = Conv2D(256, (3, 3), name='conv3_2', padding='same', activation='relu', weights=[weights[10], weights[11]])(conv3_1) depth_conv3_2 = Conv2D(256, (3, 3), name='depth_conv3_2', padding='same', activation='relu', weights=[weights[10], weights[11]])(depth_conv3_1) conv3_3 = Conv2D(256, (3, 3), name='conv3_3', padding='same', activation='relu', weights=[weights[12], weights[13]])(conv3_2) depth_conv3_3 = Conv2D(256, (3, 3), name='depth_conv3_3', padding='same', activation='relu', weights=[weights[12], weights[13]])(depth_conv3_2) pool3 = MaxPooling2D(name='pool3', pool_size=(2, 2), strides=(2, 2), padding='same')(conv3_3) depth_pool3 = MaxPooling2D(name='depth_pool3', pool_size=(2, 2), strides=(2, 2), padding='same')(depth_conv3_3) # Block 4 conv4_1 = Conv2D(512, (3, 3), name='conv4_1', padding='same', activation='relu', weights=[weights[14], weights[15]])(pool3) depth_conv4_1 = Conv2D(512, (3, 3), name='depth_conv4_1', padding='same', activation='relu', weights=[weights[14], weights[15]])(depth_pool3) conv4_2 = Conv2D(512, (3, 3), name='conv4_2', padding='same', activation='relu', weights=[weights[16], weights[17]])(conv4_1) depth_conv4_2 = Conv2D(512, (3, 3), name='depth_conv4_2', padding='same', activation='relu', weights=[weights[16], weights[17]])(depth_conv4_1) conv4_3 = Conv2D(512, (3, 3), name='conv4_3', padding='same', activation='relu', weights=[weights[18], weights[19]])(conv4_2) depth_conv4_3 = Conv2D(512, (3, 3), name='depth_conv4_3', padding='same', activation='relu', weights=[weights[18], weights[19]])(depth_conv4_2) pool4 = MaxPooling2D(name='pool4', pool_size=(2, 2), strides=(2, 2), padding='same')(conv4_3) depth_pool4 = MaxPooling2D(name='depth_pool4', pool_size=(2, 2), strides=(2, 2), padding='same')(depth_conv4_3) # Block 5 conv5_1 = Conv2D(512, (3, 3), name='conv5_1', padding='same', activation='relu', weights=[weights[20], weights[21]])(pool4) depth_conv5_1 = Conv2D(512, (3, 3), name='depth_conv5_1', padding='same', activation='relu', weights=[weights[20], weights[21]])(depth_pool4) conv5_2 = Conv2D(512, (3, 3), name='conv5_2', padding='same', activation='relu', weights=[weights[22], weights[23]])(conv5_1) depth_conv5_2 = Conv2D(512, (3, 3), name='depth_conv5_2', padding='same', activation='relu', weights=[weights[22], weights[23]])(depth_conv5_1) conv5_3 = Conv2D(512, (3, 3), name='conv5_3', padding='same', activation='relu', weights=[weights[24], weights[25]])(conv5_2) depth_conv5_3 = Conv2D(512, (3, 3), name='depth_conv5_3', padding='same', activation='relu', weights=[weights[24], weights[25]])(depth_conv5_2) pool5 = MaxPooling2D(name='pool5', pool_size=(3, 3), strides=(1, 1), padding='same')(conv5_3) depth_pool5 = MaxPooling2D(name='depth_pool5', pool_size=(3, 3), strides=(1, 1), padding='same')(depth_conv5_3) concat_pool5 = concatenate([pool5, depth_pool5], axis=1, name='concat_pool5') # FC6 fc6 = Conv2D(1024, (3, 3), name='fc6', dilation_rate=(6, 6), padding='same', activation='relu')(pool5) fc6 = Dropout(0.5, name='drop6')(fc6) # FC7 fc7 = Conv2D(1024, (1, 1), name='fc7', padding='same', activation='relu')(fc6) fc7 = Dropout(0.5, name='drop7')(fc7) # Block 6 conv6_1 = Conv2D(256, (1, 1), name='conv6_1', padding='same', activation='relu')(fc7) conv6_2 = Conv2D(512, (3, 3), name='conv6_2', strides=(2, 2), padding='same', activation='relu')(conv6_1) # Block 7 conv7_1 = Conv2D(128, (1, 1), name='conv7_1', padding='same', activation='relu')(conv6_2) conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1) conv7_2 = Conv2D(256, (3, 3), name='conv7_2', padding='valid', strides=(2, 2), activation='relu')(conv7_1z) # Block 8 conv8_1 = Conv2D(128, (1, 1), name='conv8_1', padding='same', activation='relu')(conv7_2) conv8_2 = Conv2D(256, (3, 3), name='conv8_2', padding='same', strides=(2, 2), activation='relu')(conv8_1) # Last Pool pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2) # Prediction from conv4_3 num_priors = 3 img_size = (input_shape[1], input_shape[0]) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) concat_conv4_3 = concatenate([conv4_3, depth_conv4_3], axis=3) conv4_3_norm = Normalize(20, name='conv4_3_norm')(concat_conv4_3) conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='conv4_3_norm_mbox_loc', padding='same')(conv4_3_norm) conv4_3_norm_mbox_loc_flat = Flatten( name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc) conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), name=name, padding='same')(conv4_3_norm) conv4_3_norm_mbox_conf_flat = Flatten( name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf) conv4_3_norm_mbox_priorbox = PriorBox(img_size, 30.0, name='conv4_3_norm_mbox_priorbox', aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2])(conv4_3_norm) # Prediction from fc7 num_priors = 6 name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) fc7_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(fc7) fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf) fc7_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='fc7_mbox_loc', padding='same')(fc7) fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc) fc7_mbox_priorbox = PriorBox(img_size, 60.0, name='fc7_mbox_priorbox', max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2])(fc7) # Prediction from conv6_2 num_priors = 6 name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv6_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv6_2) conv6_2_mbox_conf_flat = Flatten( name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf) conv6_2_mbox_loc = Conv2D(num_priors * 4, ( 3, 3, ), name='conv6_2_mbox_loc', padding='same')(conv6_2) conv6_2_mbox_loc_flat = Flatten( name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc) conv6_2_mbox_priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox')(conv6_2) # Prediction from conv7_2 num_priors = 6 name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv7_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv7_2) conv7_2_mbox_conf_flat = Flatten( name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf) conv7_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(conv7_2) conv7_2_mbox_loc_flat = Flatten( name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc) conv7_2_mbox_priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox')(conv7_2) # Prediction from conv8_2 num_priors = 6 name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv8_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv8_2) conv8_2_mbox_conf_flat = Flatten( name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf) conv8_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(conv8_2) conv8_2_mbox_loc_flat = Flatten( name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc) conv8_2_mbox_priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox')(conv8_2) # Prediction from pool6 num_priors = 6 name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) pool6_mbox_loc_flat = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(pool6) pool6_mbox_conf_flat = Dense(num_priors * num_classes, name=name)(pool6) pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6) pool6_mbox_priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox')(pool6_reshaped) # Gather all predictions mbox_loc = concatenate([ conv4_3_norm_mbox_loc_flat, fc7_mbox_loc_flat, conv6_2_mbox_loc_flat, conv7_2_mbox_loc_flat, conv8_2_mbox_loc_flat, pool6_mbox_loc_flat ], axis=1, name='mbox_loc') mbox_conf = concatenate([ conv4_3_norm_mbox_conf_flat, fc7_mbox_conf_flat, conv6_2_mbox_conf_flat, conv7_2_mbox_conf_flat, conv8_2_mbox_conf_flat, pool6_mbox_conf_flat ], axis=1, name='mbox_conf') mbox_priorbox = concatenate([ conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox, conv6_2_mbox_priorbox, conv7_2_mbox_priorbox, conv8_2_mbox_priorbox, pool6_mbox_priorbox ], axis=1, name='mbox_priorbox') if hasattr(mbox_loc, '_keras_shape'): num_boxes = mbox_loc._keras_shape[-1] // 4 elif hasattr(mbox_loc, 'int_shape'): num_boxes = K.int_shape(mbox_loc)[-1] // 4 mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc) mbox_conf = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(mbox_conf) mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf) predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox], axis=2, name='predictions') model = Model(inputs=[input_layer, depth_input_layer], outputs=predictions) return model
def __init__(self, input_shape, num_classes=21): super(SSD300, self).__init__() self.input_shape = input_shape self.num_classes = num_classes img_size = (self.input_shape[1], self.input_shape[0]) # vgg16 self.block1_conv_3x3x64 = ConvLayers2D(layers=2, filters=64, kernel_size=3, pool=True, name="block1_conv_3x3x64") self.block2_conv_3x3x128 = ConvLayers2D(layers=2, filters=128, kernel_size=3, pool=True, name="block2_conv_3x3x128") self.block3_conv_3x3x256 = ConvLayers2D(layers=3, filters=256, kernel_size=3, pool=True, name="block3_conv_3x3x256") self.block4_conv_3x3x512 = ConvLayers2D(layers=3, filters=512, kernel_size=3, pool=True, name="block4_conv_3x3x512") self.block5_conv_3x3x512 = ConvLayers2D(layers=3, filters=512, kernel_size=3, pool=True, pool_size=(3, 3), pool_strides=(1, 1), name="block5_conv_3x3x512") self.block6_conv_3x3x1024 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', name='block6_conv_3x3x1024') self.block6_conv_1x1x1024 = Conv2D(1024, (1, 1), activation='relu', padding='same', name='block6_conv_1x1x1024') self.block7_conv_1x1x256 = Conv2D(256, (1, 1), activation='relu', padding='same', name='block7_conv_1x1x256') self.block7_conv_3x3x512 = Conv2D(512, (3, 3), subsample=(2, 2), activation='relu', padding='same', name='block7_conv_3x3x512') self.block8_conv_1x1x128 = Conv2D(128, (1, 1), activation='relu', padding='same', name='block8_conv_1x1x128') self.block8_conv_3x3x256 = Conv2D(256, (3, 3), subsample=(2, 2), activation='relu', padding='valid', name='block8_conv_3x3x256') self.block9_conv_1x1x128 = Conv2D(128, (1, 1), activation='relu', padding='same', name='block9_conv_1x1x128') self.block9_conv_3x3x256 = Conv2D(256, (3, 3), subsample=(2, 2), activation='relu', padding='same', name='block9_conv_3x3x256') self.block10_conv_1x1x128 = Conv2D(128, 1, 1, activation='relu', padding='same', name='block10_conv_1x1x128') self.block10_conv_3x3x256 = Conv2D(256, (3, 3), subsample=(2, 2), activation='relu', padding='same', name='block10_conv_3x3x256') self.flatten = Flatten() num_priors = 3 self.block4_norm_mbox_loc = Conv2D(num_priors * 4, 3, 3, padding='same', name='block4_norm_mbox_loc') self.block4_norm_mbox_conf = Conv2D(num_priors * self.num_classes, 3, 3, padding='same', name='block4_norm_mbox_conf') self.block4_norm_mbox_priorbox = PriorBox( img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='block4_norm_mbox_priorbox') self.block4_norm = Normalize(20, name='block4_norm') num_priors = 6 self.block6_mbox_loc = Conv2D(num_priors * 4, 3, 3, padding='same', name='block6_mbox_loc') self.block6_mbox_conf = Conv2D(num_priors * num_classes, 3, 3, padding='same', name='block6_mbox_conf') self.block6_mbox_priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='block6_mbox_priorbox') self.block7_mbox_loc = Conv2D(num_priors * 4, 3, 3, padding='same', name='block7_mbox_loc') self.block7_mbox_conf = Conv2D(num_priors * num_classes, 3, 3, padding='same', name='block7_mbox_conf') self.block7_mbox_priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='block7_mbox_priorbox') self.block8_mbox_loc = Conv2D(num_priors * 4, 3, 3, padding='same', name='block8_mbox_loc') self.block8_mbox_conf = Conv2D(num_priors * num_classes, 3, 3, padding='same', name='block8_mbox_conf') self.block8_mbox_priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='block8_mbox_priorbox') self.block9_mbox_loc = Conv2D(num_priors * 4, 3, 3, padding='same', name='block9_mbox_loc') self.block9_mbox_conf = Conv2D(num_priors * num_classes, 3, 3, padding='same', name='block9_mbox_conf') self.block9_mbox_priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='block9_mbox_priorbox') self.block10_mbox_loc_flat = Dense(num_priors * 4, name='block10_mbox_loc_flat') self.block10_mbox_conf_flat = Dense(num_priors * num_classes, name='block10_mbox_conf_flat') self.block10_reshape = Reshape((1, 1, 256), name='block10_reshape') self.block10_mbox_priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='block10_mbox_priorbox') self.concat_conf = Concatenate(axis=1, name='mbox_conf') self.concat_loc = Concatenate(axis=1, name='mbox_loc') self.concat_priorbox = Concatenate(axis=1, name='mbox_priorbox') self.concat_predictions = Concatenate(axis=2, name='mbox_predictions') self.reshape_loc = Reshape((4, 4), name='mbox_loc_final') self.reshape_conf = Reshape((4, self.num_classes), name='mbox_conf_logits') self.activate_softmax = Activation('softmax', name='mbox_conf_softmax')
def SSD300(input_shape, num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ net = {} # Block 1 卷积层块 input_tensor = input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor # 二维卷积层对二维输入进行滑动窗卷积 # keras.layers.Conv2D(filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, # dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', # bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, # kernel_constraint=None, bias_constraint=None) net['conv1_1'] = Convolution2D( 64, 3, 3, # 64个过滤器;kernel_size:3,卷积窗口大小;strides:步长; activation='relu', # 激活函数:ReLU border_mode='same', # 过滤模式:same/valid name='conv1_1')(net['input']) net['conv1_2'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_2')(net['conv1_1']) # 对空间数据的最大池化 # keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None) # strides 默认为 None,为 None 时大小等于 net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool1')(net['conv1_2']) # Block 2 卷积层块 net['conv2_1'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_1')(net['pool1']) net['conv2_2'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_2')(net['conv2_1']) net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool2')(net['conv2_2']) # Block 3 卷积层块 net['conv3_1'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_1')(net['pool2']) net['conv3_2'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_2')(net['conv3_1']) net['conv3_3'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_3')(net['conv3_2']) net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool3')(net['conv3_3']) # Block 4 卷积层块 net['conv4_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_1')(net['pool3']) net['conv4_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_2')(net['conv4_1']) net['conv4_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_3')(net['conv4_2']) net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool4')(net['conv4_3']) # Block 5 卷积层块 net['conv5_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_1')(net['pool4']) net['conv5_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_2')(net['conv5_1']) net['conv5_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_3')(net['conv5_2']) net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same', name='pool5')(net['conv5_3']) # FC6 该层对二维输入进行Atrous卷积,也即膨胀卷积或带孔洞的卷积。 net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6), activation='relu', border_mode='same', name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Convolution2D(1024, 1, 1, activation='relu', border_mode='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Convolution2D(256, 1, 1, activation='relu', border_mode='same', name='conv6_1')(net['fc7']) net['conv6_2'] = Convolution2D(512, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv6_2')(net['conv6_1']) # Block 7 net['conv7_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) net['conv7_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='valid', name='conv7_2')(net['conv7_2']) # Block 8 net['conv8_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv8_2')(net['conv8_1']) # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) # Prediction from conv4_3 # keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, # beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', # beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None) # axis: 整数,需要标准化的轴 (通常是特征轴) # 批量标准化层 (Ioffe and Szegedy, 2014)。在每一个批次的数据中标准化前一层的激活项, 即,应用一个维持激活项平均值接近 0,标准差接近 1 的转换。 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 3 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # Prediction from conv6_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv6_2']) net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # Prediction from conv7_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv7_2']) net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # Prediction from conv8_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv8_2']) net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # Prediction from pool6 num_priors = 6 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) net['pool6_mbox_conf_flat'] = x priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) # Gather all predictions net['mbox_loc'] = merge([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat'] ], mode='concat', concat_axis=1, name='mbox_loc') net['mbox_conf'] = merge([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat'] ], mode='concat', concat_axis=1, name='mbox_conf') net['mbox_priorbox'] = merge([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox'] ], mode='concat', concat_axis=1, name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = merge( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], mode='concat', concat_axis=2, name='predictions') model = Model(net['input'], net['predictions']) return model
def SSD(input_shape, num_classes): """SSD512 architecture. # Arguments input_shape: Shape of the input image, expected to be either (512, 512, 3) or (3, 512, 512)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ # Block 1 input_shape = (input_shape[1], input_shape[0], 3) input = Input(input_shape) resnet50 = ResNet50(input_shape=input_shape,include_top=False, weights='imagenet') FeatureExtractor = Model(inputs=resnet50.input, outputs=resnet50.get_layer('add_7').output) pool3 = FeatureExtractor(input) conv4_0 = Conv2DTranspose(512, (2, 2), name='conv4_0', activation='relu', border_mode='valid')(pool3) #for VGG16,19,Resnet50 # Block 4 conv4_1 = Conv2D(512, (3, 3),activation='relu',padding='same',name='conv4_1')(conv4_0) conv4_2 = Conv2D(512, (3, 3),activation='relu',padding='same',name='conv4_2')(conv4_1) conv4_3 = Conv2D(512, (3, 3),activation='relu',padding='same',name='conv4_3')(conv4_2) pool4 = MaxPooling2D((2, 2), strides=(2, 2), padding='same',name='pool4')(conv4_3) # Block 5 conv5_1 = Conv2D(512, (3, 3), name='conv5_1', padding='same', activation='relu')(pool4) conv5_2 = Conv2D(512, (3, 3), name='conv5_2', padding='same', activation='relu')(conv5_1) conv5_3 = Conv2D(512, (3, 3), name='conv5_3', padding='same', activation='relu')(conv5_2) pool5 = MaxPooling2D(name='pool5', pool_size=(3, 3), strides=(1, 1), padding='same')(conv5_3) # FC6 fc6 = Conv2D(1024, (3, 3), name='fc6', dilation_rate=(6, 6), padding='same', activation='relu' )(pool5) #5 # x = Dropout(0.5, name='drop6')(x) # FC7 fc7 = Conv2D(1024, (1, 1), name='fc7', padding='same', activation='relu' )(fc6) # x = Dropout(0.5, name='drop7')(x) # Block 6 conv6_1 = Conv2D(256, (1, 1), name='conv6_1', padding='same', activation='relu')(fc7) conv6_2 = Conv2D(512, (3, 3), name='conv6_2', strides=(2, 2), padding='same', activation='relu')(conv6_1) # Block 7 conv7_1 = Conv2D(128, (1, 1), name='conv7_1', padding='same', activation='relu')(conv6_2) conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1) conv7_2 = Conv2D(256, (3, 3), name='conv7_2', padding='valid', strides=(2, 2), activation='relu')(conv7_1z) # Block 8 conv8_1 = Conv2D(128, (1, 1), name='conv8_1', padding='same', activation='relu')(conv7_2) conv8_2 = Conv2D(256, (3, 3), name='conv8_2', padding='same', strides=(2, 2), activation='relu')(conv8_1) # Last Pool pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2) #8_2 # Prediction from conv4_3 num_priors = 3 img_size = (input_shape[1], input_shape[0]) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3) #4_3 conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='conv4_3_norm_mbox_loc', padding='same')(conv4_3_norm) conv4_3_norm_mbox_loc_flat = Flatten(name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc) conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), name=name, padding='same')(conv4_3_norm) conv4_3_norm_mbox_conf_flat = Flatten(name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf) conv4_3_norm_mbox_priorbox = PriorBox(img_size, 30.0, name='conv4_3_norm_mbox_priorbox', aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2])(conv4_3_norm) # Prediction from fc7 num_priors = 6 name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) fc7_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(fc7) fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf) fc7_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='fc7_mbox_loc', padding='same')(fc7) fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc) fc7_mbox_priorbox = PriorBox(img_size, 60.0, name='fc7_mbox_priorbox', max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2] )(fc7) # Prediction from conv6_2 num_priors = 6 name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv6_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv6_2) conv6_2_mbox_conf_flat = Flatten(name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf) conv6_2_mbox_loc = Conv2D(num_priors * 4, (3, 3,), name='conv6_2_mbox_loc', padding='same')(conv6_2) conv6_2_mbox_loc_flat = Flatten(name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc) conv6_2_mbox_priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox')(conv6_2) # Prediction from conv7_2 num_priors = 6 name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv7_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv7_2) conv7_2_mbox_conf_flat = Flatten(name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf) conv7_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(conv7_2) conv7_2_mbox_loc_flat = Flatten(name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc) conv7_2_mbox_priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox')(conv7_2) # Prediction from conv8_2 num_priors = 6 name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv8_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv8_2) conv8_2_mbox_conf_flat = Flatten(name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf) conv8_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(conv8_2) conv8_2_mbox_loc_flat = Flatten(name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc) conv8_2_mbox_priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox')(conv8_2) # Prediction from pool6 num_priors = 6 name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) pool6_mbox_loc_flat = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(pool6) pool6_mbox_conf_flat = Dense(num_priors * num_classes, name=name)(pool6) pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6) pool6_mbox_priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox')(pool6_reshaped) # Gather all predictions mbox_loc = concatenate([conv4_3_norm_mbox_loc_flat, fc7_mbox_loc_flat, conv6_2_mbox_loc_flat, conv7_2_mbox_loc_flat, conv8_2_mbox_loc_flat, pool6_mbox_loc_flat], axis=1, name='mbox_loc') mbox_conf = concatenate([conv4_3_norm_mbox_conf_flat, fc7_mbox_conf_flat, conv6_2_mbox_conf_flat, conv7_2_mbox_conf_flat, conv8_2_mbox_conf_flat, pool6_mbox_conf_flat], axis=1, name='mbox_conf') mbox_priorbox = concatenate([conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox, conv6_2_mbox_priorbox, conv7_2_mbox_priorbox, conv8_2_mbox_priorbox, pool6_mbox_priorbox], axis=1, name='mbox_priorbox') if hasattr(mbox_loc, '_keras_shape'): num_boxes = mbox_loc._keras_shape[-1] // 4 elif hasattr(mbox_loc, 'int_shape'): num_boxes = K.int_shape(mbox_loc)[-1] // 4 mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc) mbox_conf = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(mbox_conf) mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf) predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox], axis=2, name='predictions') model = Model(input, outputs=predictions) return model
def SSD_300( input_shape, num_classes=17 + 1, min_scale=None, max_scale=None, aspect_ratios_per_layer=[[2.0], [2.0, 3.0], [2.0, 3.0], [2.0, 3.0], [2.0, 3.0], [2.0, 3.0]], variances=[0.1, 0.1, 0.2, 0.2], scales=[30, 60, 114, 168, 222, 276, 330], # scales = [100, 168, 222, 276, 330], clip_boxes=True): ''' Arguments: input_shape (tuple): The height and width and channel of the input images. min_scale (float): A float in [0, 1], the scaling factor for the size of the generated anchor boxes as a fraction of the shorter side of the input image. max_scale (float): A float in [0, 1], the next larger scaling factor. Only relevant if `self.two_boxes_for_ar1 == True`. aspect_ratios_per_layer (list, optional): The list of aspect ratios for which default boxes are to be generated for this layer. clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries. variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by its respective variance value. ''' n_predictor_layers = 6 # The number of predictor conv layers in the network is 6 for the original SSD300. if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) net = {} img_height, img_width, img_channels = input_shape[0], input_shape[ 1], input_shape[2] image_size = (input_shape[1], input_shape[0]) # Block 1 input_tensor = Input(shape=(img_height, img_width, img_channels)) net['input'] = input_tensor net['conv1_1'] = Convolution2D(32, kernel_size=3, activation='relu', padding='same')(net['input']) net['conv1_2'] = Convolution2D(64, kernel_size=3, strides=2, activation='relu', padding='valid', name='conv1_2')(net['conv1_1']) # Block 2 net['res2_1'] = Residual_Block(32, net['conv1_2'], name='res2_1') net['conv3_1'] = Convolution2D(128, kernel_size=3, activation='relu', padding='valid', strides=2, name='conv3_1')(net['res2_1']) # Block 3 net['res4_1'] = Residual_Block(64, net['conv3_1'], name='res4_1') net['res4_2'] = Residual_Block(64, net['res4_1'], name='res4_2') net['conv4_3'] = Convolution2D(256, kernel_size=3, activation='relu', padding='valid', strides=2, name='conv4_3')(net['res4_2']) # Block 4 net['res5_1'] = Residual_Block(128, net['conv4_3'], name='res5_1') net['res5_2'] = Residual_Block(128, net['res5_1'], name='res5_2') net['res5_3'] = Residual_Block(128, net['res5_2'], name='res5_3') net['res5_4'] = Residual_Block(128, net['res5_3'], name='res5_4') net['res5_5'] = Residual_Block(128, net['res5_4'], name='res5_5') net['res5_6'] = Residual_Block(128, net['res5_5'], name='res5_6') net['res5_7'] = Residual_Block(128, net['res5_6'], name='res5_7') net['res5_8'] = Residual_Block(128, net['res5_7'], name='res5_8') net['conv5_9'] = Convolution2D(512, kernel_size=3, activation='relu', padding='valid', strides=2, name='conv5_9')(net['res5_8']) # Block 5 net['res6_1'] = Residual_Block(256, net['conv5_9'], name='res6_1') net['res6_2'] = Residual_Block(256, net['res6_1'], name='res6_2') net['res6_3'] = Residual_Block(256, net['res6_2'], name='res6_3') net['res6_4'] = Residual_Block(256, net['res6_3'], name='res6_4') net['res6_5'] = Residual_Block( 256, net['res6_4'], name='res6_5') # prediction from 6_5 layer 26 net['res6_6'] = Residual_Block(256, net['res6_5'], name='res6_6') net['res6_7'] = Residual_Block(256, net['res6_6'], name='res6_7') net['res6_8'] = Residual_Block(256, net['res6_7'], name='res6_8') net['conv6_9'] = Convolution2D(1024, kernel_size=3, activation='relu', padding='valid', strides=2, name='conv6_9')(net['res6_8']) # Block 6 net['res7_1'] = Residual_Block(512, net['conv6_9'], name='res7_1') net['res7_2'] = Residual_Block( 512, net['res7_1'], name='res7_2') # prediction from 7_2 layer 34 net['res7_3'] = Residual_Block(512, net['res7_2'], name='res7_3') net['res7_4'] = Residual_Block( 512, net['res7_3'], name='res7_4') # prediction from 7_4 layer 34 # Last pool net['pool7_5'] = GlobalAveragePooling2D(name='pool7_5')(net['res7_4']) # Prediction from conv5_9 net['conv5_9_norm'] = Normalize(20)(net['conv5_9']) num_priors = 3 net['conv5_9_norm_mbox_loc'] = Convolution2D(num_priors * 4, kernel_size=3, padding='same')( net['conv5_9_norm']) net['conv5_9_norm_mbox_loc_flat'] = Flatten()(net['conv5_9_norm_mbox_loc']) net['conv5_9_norm_mbox_conf'] = Convolution2D(num_priors * num_classes, kernel_size=3, padding='same')( net['conv5_9_norm']) net['conv5_9_norm_mbox_conf_flat'] = Flatten()( net['conv5_9_norm_mbox_conf']) net['conv5_9_norm_mbox_priorbox'] = PriorBox( image_size, min_size=scales[0], aspect_ratios=aspect_ratios_per_layer[0], variances=variances)(net['conv5_9_norm']) # Prediction from res6_5 num_priors = 6 net['res6_5_mbox_loc'] = Convolution2D(num_priors * 4, kernel_size=3, padding='same')(net['res6_5']) net['res6_5_mbox_loc_flat'] = Flatten()(net['res6_5_mbox_loc']) net['res6_5_mbox_conf'] = Convolution2D(num_priors * num_classes, kernel_size=3, padding='same')(net['res6_5']) net['res6_5_mbox_conf_flat'] = Flatten()(net['res6_5_mbox_conf']) net['res6_5_mbox_priorbox'] = PriorBox( image_size, min_size=scales[1], max_size=scales[2], aspect_ratios=aspect_ratios_per_layer[1], variances=variances)(net['res6_5']) # Prediction from conv6_9 num_priors = 6 net['conv6_9_mbox_loc'] = Convolution2D(num_priors * 4, kernel_size=3, padding='same')(net['conv6_9']) net['conv6_9_mbox_loc_flat'] = Flatten()(net['conv6_9_mbox_loc']) net['conv6_9_mbox_conf'] = Convolution2D(num_priors * num_classes, kernel_size=3, padding='same')(net['conv6_9']) net['conv6_9_mbox_conf_flat'] = Flatten()(net['conv6_9_mbox_conf']) net['conv6_9_mbox_priorbox'] = PriorBox( image_size, min_size=scales[2], max_size=scales[3], aspect_ratios=aspect_ratios_per_layer[2], variances=variances)(net['conv6_9']) # Prediction from res7_2 num_priors = 6 net['res7_2_mbox_loc'] = Convolution2D(num_priors * 4, kernel_size=3, padding='same')(net['res7_2']) net['res7_2_mbox_loc_flat'] = Flatten()(net['res7_2_mbox_loc']) net['res7_2_mbox_conf'] = Convolution2D(num_priors * num_classes, kernel_size=3, padding='same')(net['res7_2']) net['res7_2_mbox_conf_flat'] = Flatten()(net['res7_2_mbox_conf']) net['res7_2_mbox_priorbox'] = PriorBox( image_size, min_size=scales[3], max_size=scales[4], aspect_ratios=aspect_ratios_per_layer[3], variances=variances)(net['res7_2']) # Prediction from res7_4 num_priors = 6 net['res7_4_mbox_loc'] = Convolution2D(num_priors * 4, kernel_size=3, padding='same')(net['res7_4']) net['res7_4_mbox_loc_flat'] = Flatten()(net['res7_4_mbox_loc']) net['res7_4_mbox_conf'] = Convolution2D(num_priors * num_classes, kernel_size=3, padding='same')(net['res7_4']) net['res7_4_mbox_conf_flat'] = Flatten()(net['res7_4_mbox_conf']) net['res7_4_mbox_priorbox'] = PriorBox( image_size, min_size=scales[4], max_size=scales[5], aspect_ratios=aspect_ratios_per_layer[4], variances=variances)(net['res7_4']) # Prediction from pool7_5 num_priors = 6 net['pool7_5_mbox_loc_flat'] = Dense(num_priors * 4)(net['pool7_5']) net['pool7_5_mbox_conf_flat'] = Dense(num_priors * num_classes)( net['pool7_5']) if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 1024) else: target_shape = (1024, 1, 1) net['pool7_5_reshaped'] = Reshape(target_shape)(net['pool7_5']) net['pool7_5_mbox_priorbox'] = PriorBox( image_size, min_size=scales[5], max_size=scales[6], aspect_ratios=aspect_ratios_per_layer[5], variances=variances)(net['pool7_5_reshaped']) # Combine predictions # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4` # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)` net['mbox_loc'] = concatenate([ net['conv5_9_norm_mbox_loc_flat'], net['res6_5_mbox_loc_flat'], net['conv6_9_mbox_loc_flat'], net['res7_2_mbox_loc_flat'], net['res7_4_mbox_loc_flat'], net['pool7_5_mbox_loc_flat'] ], axis=1) # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes` # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)` net['mbox_conf'] = concatenate([ net['conv5_9_norm_mbox_conf_flat'], net['res6_5_mbox_conf_flat'], net['conv6_9_mbox_conf_flat'], net['res7_2_mbox_conf_flat'], net['res7_4_mbox_conf_flat'], net['pool7_5_mbox_conf_flat'] ], axis=1) # Output shape of anchors: `(batch, height, width, n_boxes, 8)` net['mbox_prior'] = concatenate([ net['conv5_9_norm_mbox_priorbox'], net['res6_5_mbox_priorbox'], net['conv6_9_mbox_priorbox'], net['res7_2_mbox_priorbox'], net['res7_4_mbox_priorbox'], net['pool7_5_mbox_priorbox'] ], axis=1) # Calculating number of boxes to isolate it using Reshape if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], '_int_shape'): num_boxes = net['mbox_loc']._int_shape[-1] // 4 # Concatenate all predictions from different layers # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, # so we want to concatenate along axis 1, the number of boxes per layer # Output shape of `mbox_loc`: (batch, n_boxes_total, 4) net['mbox_loc'] = Reshape((num_boxes, 4))(net['mbox_loc']) # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes) net['mbox_conf'] = Reshape((num_boxes, num_classes))(net['mbox_conf']) net['mbox_conf'] = Activation('softmax')(net['mbox_conf']) net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_prior']], axis=2) model = Model(net['input'], net['predictions']) # model = Model(net['input'], net['pool7_5']) # for debugging return model
def SSD300(input_shape, num_classes=2): net = {} # Block 1 input_tensor = input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor net['conv1_1'] = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(net['input']) net['conv1_2'] = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(net['conv1_1']) net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool1')(net['conv1_2']) # Block 2 net['conv2_1'] = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(net['pool1']) net['conv2_2'] = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(net['conv2_1']) net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool2')(net['conv2_2']) # Block 3 net['conv3_1'] = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(net['pool2']) net['conv3_2'] = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(net['conv3_1']) net['conv3_3'] = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(net['conv3_2']) net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool3')(net['conv3_3']) # Block 4 net['conv4_1'] = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(net['pool3']) net['conv4_2'] = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(net['conv4_1']) net['conv4_3'] = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(net['conv4_2']) net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool4')(net['conv4_3']) # Block 5 net['conv5_1'] = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(net['pool4']) net['conv5_2'] = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(net['conv5_1']) net['conv5_3'] = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(net['conv5_2']) net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), padding='same', name='pool5')(net['conv5_3']) # FC6 net['fc6'] = Conv2D(1024, (3, 3), activation="relu", name="fc6", dilation_rate=(6, 6), padding="same")(net['pool5']) # net['fc6'] = AtrousConvolution2D(1024, (3, 3), atrous_rate=(6, 6), # activation='relu', padding='same', # name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Conv2D(1024, (1, 1), activation='relu', padding='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Conv2D(256, (1, 1), activation='relu', padding='same', name='conv6_1')(net['fc7']) net['conv6_2'] = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv6_2')(net['conv6_1']) # Block 7 net['conv7_1'] = Conv2D(128, (1, 1), activation='relu', padding='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) net['conv7_2'] = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', name='conv7_2')(net['conv7_2']) # Block 8 net['conv8_1'] = Conv2D(128, (1, 1), activation='relu', padding='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv8_2')(net['conv8_1']) # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) # Prediction from conv4_3 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 3 x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Conv2D(num_priors * 4, (3, 3), padding='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # Prediction from conv6_2 num_priors = 6 x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['conv6_2']) net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # Prediction from conv7_2 num_priors = 6 x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['conv7_2']) net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # Prediction from conv8_2 num_priors = 6 x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['conv8_2']) net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # Prediction from pool6 num_priors = 6 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) net['pool6_mbox_conf_flat'] = x priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') if K.common.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) # Gather all predictions net['mbox_loc'] = concatenate( [ #net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat'] ], # mode='concat', concat_ axis=1, name='mbox_loc') net['mbox_conf'] = concatenate( [ #net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat'] ], # mode='concat', concat_ axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate( [ #net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox'] ], # mode='concat', concat_ axis=1, name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], # mode='concat', concat_ axis=2, name='predictions') model = Model(net['input'], net['predictions']) # model = Model(net['input'], net['mbox_loc']) # plot_model(model, to_file='model.png') return model
def SSD(input_shape=(300, 300, 3), num_classes=21, segmentation_head=False, depth_head=False): """SSD architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3). num_classes: Number of classes including background. conv3_4 conv4_6 fc7 conv6_2 conv7_2 pool6 + + + + + + | | | | | | | | v v | | | | | | | | +----------------+ | | | +--> | | <----+ | | | Concatenate | | +----------> | | <-----------+ +-------+--------+ | v prediction # References SSD: https://arxiv.org/abs/1512.02325 Rainbow SSD: https://arxiv.org/abs/1705.09587 """ net = {} # Block 1 input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) #################################################################################### # zerro-padding need for backward compatibility x = ZeroPadding2D((3, 3))(input_tensor) model = ResNet50(include_top=False, input_tensor=x) # resnet_out = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='pool5v')(model.get_layer('activation_49').output) resnet_out = MaxPooling2D((3, 3), strides=(1, 1), padding='same', name='pool5v')(model.get_layer('activation_49').output) net['conv3_4'] = model.get_layer("activation_22").output net['conv4_6'] = model.get_layer("activation_40").output # END ResNet50 ##################################################################################### # FC6 net['fc6'] = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', name='fc6')(resnet_out) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Conv2D(1024, (1, 1), activation='relu', padding='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Conv2D(256, (1, 1), activation='relu', padding='same', name='conv6_1')(net['fc7']) net['conv6_2'] = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv6_2')(net['conv6_1']) # Block 7 net['conv7_1'] = Conv2D(128, (1, 1), activation='relu', padding='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv7_2')(net['conv7_1']) # Block 8 net['conv8_1'] = Conv2D(128, (1, 1), activation='relu', padding='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv8_2')(net['conv8_1']) # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) ########################################################################### # Segmentation PSP ######################################################## if depth_head: # depth map x = Conv2D(512, (3, 3), strides=(1, 1), padding="same", name="depth_conv1_3", use_bias=False)(psp) x = BatchNormalization(momentum=0.95, epsilon=1e-5, name="depth_conv1_3_bn")(x) x = Activation('relu')(x) x = Dropout(0.1)(x) x = Conv2D(512, (3, 3), strides=(1, 1), padding="same", name="depth_conv2_3", use_bias=False)(x) x = BatchNormalization(momentum=0.95, epsilon=1e-5, name="depth_conv2_3_bn")(x) x = Activation('relu')(x) x = Conv2D(1, (3, 3), strides=(1, 1), padding="same", name="depth_conv2_3", use_bias=False)(x) x = Lambda(Interp, arguments={'shape': (input_shape[0], input_shape[1])})(x) depth_map = Activation('relu', name="depth_map")(x) ########################################################################### asp0 = [1. / 2, 1, 1., 2.] asp1 = [1. / 3, 1. / 2, 1, 1., 2., 3.] scales = [0.1, 0.2, 0.38, 0.56, 0.74, 0.92, 1.1] if segmentation_head: net['psp1'] = Lambda(Interp, arguments={'shape': (60, 60)})(model.output) ########################################################################### # CLASSIFIER:1 LAYER: conv3_4 ############################################# num_priors = len(asp0) cl1_input = Normalize(20, name='conv3_4_norm')(net['conv3_4']) x = Conv2D(num_priors * 4, (3, 3), strides=(1, 1), dilation_rate=(2, 2), padding='same', name='conv3_4_norm_mbox_loc')(cl1_input) x = Flatten(name='conv3_4_norm_mbox_loc_flat')(x) net['conv3_4_norm_mbox_loc_flat'] = x x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="conv3_4_norm_mbox_conf")(cl1_input) if segmentation_head: net['psp6'] = Lambda(Interp, arguments={'shape': (60, 60)})(x) # net['psp6'] = interp_block(y, 1, (60,60), str_lvl=6) x = Flatten(name='conv3_4_norm_mbox_conf_flat')(x) net['conv3_4_norm_mbox_conf_flat'] = x x = PriorBox(img_size, scales[0] * img_size[0], aspect_ratios=asp0, variances=[0.1, 0.1, 0.2, 0.2], name='conv3_4_norm_mbox_priorbox')(cl1_input) net['conv3_4_norm_mbox_priorbox'] = x ########################################################################### # CLASSIFIER:2 LAYER: conv4_6 ############################################# num_priors = len(asp1) cl2_input = net['conv4_6'] x = Conv2D(num_priors * 4, (3, 3), padding='same', name='fc7_mbox_loc')(cl2_input) x = Flatten(name='fc7_mbox_loc_flat')(x) net['fc7_mbox_loc_flat'] = x x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="fc7_mbox_conf")(cl2_input) if segmentation_head: net['psp5'] = Lambda(Interp, arguments={'shape': (60, 60)})(x) # net['psp5'] = interp_block(y, 2, (60,60), str_lvl=4) x = Flatten(name='fc7_mbox_conf_flat')(x) net['fc7_mbox_conf_flat'] = x x = PriorBox(img_size, scales[1] * img_size[0], max_size=scales[2] * img_size[0], aspect_ratios=asp1, variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox')(cl2_input) net['fc7_mbox_priorbox'] = x ########################################################################### # CLASSIFIER:3 LAYER: fc7 ################################################# num_priors = len(asp1) cl3_input = Conv2D(512, (1, 1), activation='relu', padding='same', name='fc7_mbox_pre')(net['fc7']) x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv6_2_mbox_loc')(cl3_input) x = Flatten(name='conv6_2_mbox_loc_flat')(x) net['conv6_2_mbox_loc_flat'] = x x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="conv6_2_mbox_conf")(cl3_input) if segmentation_head: net['psp4'] = Lambda(Interp, arguments={'shape': (60, 60)})(x) # net['psp4'] = interp_block(y, 3, (60,60), str_lvl=3) x = Flatten(name='conv6_2_mbox_conf_flat')(x) net['conv6_2_mbox_conf_flat'] = x x = PriorBox(img_size, scales[2] * img_size[0], max_size=scales[3] * img_size[0], aspect_ratios=asp1, variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox')(cl3_input) net['conv6_2_mbox_priorbox'] = x ########################################################################### # CLASSIFIER:4 LAYER: conv6_2 ############################################# num_priors = len(asp1) cl4_input = Conv2D(256, (1, 1), activation='relu', padding='same', name='conv6_2_mbox_pre')(net['conv6_2']) x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(cl4_input) x = Flatten(name='conv7_2_mbox_loc_flat')(x) net['conv7_2_mbox_loc_flat'] = x x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="conv7_2_mbox_conf")(cl4_input) if segmentation_head: net['psp3'] = Lambda(Interp, arguments={'shape': (60, 60)})(x) # net['psp3'] = interp_block(y, 4, (60,60), str_lvl=2) x = Flatten(name='conv7_2_mbox_conf_flat')(x) net['conv7_2_mbox_conf_flat'] = x x = PriorBox(img_size, scales[3] * img_size[0], max_size=scales[4] * img_size[0], aspect_ratios=asp1, variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox')(cl4_input) net['conv7_2_mbox_priorbox'] = x ########################################################################### # CLASSIFIER:5 LAYER: conv7_2 ############################################# num_priors = len(asp1) cl5_input = net['conv7_2'] x = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(cl5_input) x = Flatten(name='conv8_2_mbox_loc_flat')(x) net['conv8_2_mbox_loc_flat'] = x x = Conv2D(num_priors * num_classes, (3, 3), padding='same', name="conv8_2_mbox_conf")(cl5_input) if segmentation_head: net['psp2'] = Lambda(Interp, arguments={'shape': (60, 60)})(x) # net['psp2'] = interp_block(y, 6, (60,60), str_lvl=1) x = Flatten(name='conv8_2_mbox_conf_flat')(x) net['conv8_2_mbox_conf_flat'] = x x = PriorBox(img_size, scales[4] * img_size[0], max_size=scales[5] * img_size[0], aspect_ratios=asp1, variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox')(cl5_input) net['conv8_2_mbox_priorbox'] = x ########################################################################### # CLASSIFIER:6 LAYER: pool6 ############################################### num_priors = len(asp0) cl6_input = net['pool6'] x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(cl6_input) net['pool6_mbox_loc_flat'] = x x = Dense(num_priors * num_classes, name="pool6_mbox_conf_flat")(cl6_input) net['pool6_mbox_conf_flat'] = x if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) x = Reshape(target_shape, name='pool6_reshaped')(cl6_input) x = PriorBox(img_size, scales[5] * img_size[0], max_size=scales[6] * img_size[0], aspect_ratios=asp0, variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox')(x) net['pool6_mbox_priorbox'] = x ########################################################################### # Gather all predictions net['mbox_loc'] = Concatenate(axis=1, name='mbox_loc')([ net['conv3_4_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat']]) net['mbox_conf'] = Concatenate(axis=1, name='mbox_conf')([ net['conv3_4_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat']]) net['mbox_priorbox'] = Concatenate(axis=1, name='mbox_priorbox')([ net['conv3_4_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox']]) if segmentation_head: psp = Concatenate(axis=-1, name='psp')([ net['psp1'], net['psp2'], net['psp3'], net['psp4'], net['psp5'], net['psp6'], ]) psp.trainable = False x = Conv2D(256, (3, 3), strides=(1, 1), padding="same", name="seg_conv1_1")(psp) x = Activation('relu')(x) x = Conv2D(256, (3, 3), strides=(1, 1), padding="same", name="seg_conv1_2")(x) x = BatchNormalization(momentum=0.95, epsilon=1e-5, name="seg_conv1_2_bn")(x) x = Activation('relu')(x) x = Dropout(0.1)(x) x = Conv2D(num_classes, (1, 1), strides=(1, 1), name="seg_conv_last")(x) x = Lambda(Interp, arguments={'shape': (input_shape[0], input_shape[1])})(x) segmentation = Activation('sigmoid', name='segmentation')(x) if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) ssd_out = Concatenate(axis=2, name='ssd_out')([ net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']]) if not segmentation_head and not depth_head: model = Model(input_tensor, ssd_out) else: out = [ssd_out] if segmentation_head: out.append(segmentation) if depth_head: out.append(depth_map) model = Model(input_tensor, out) return model
def ssd512(input_shape=(512, 512, 3), num_classes=21, min_scale=0.1, max_scale=0.9, scales=None, aspect_ratios_global=None, aspect_ratios_per_layer=None, two_boxes_for_ar1=True, limit_boxes=True, variances=[0.1, 0.1, 0.2, 0.2], weights_path=None, frozen_layers=None, summary=False, plot=False): n_predictor_layers = 7 # The number of predictor conv layers in the network is 6 for the original SSD300 default_aspect_ratios = [[0.5, 1.0, 2.0], [1.0/3.0, 0.5, 1.0, 2.0, 3.0], [1.0/3.0, 0.5, 1.0, 2.0, 3.0], [1.0/3.0, 0.5, 1.0, 2.0, 3.0], [1.0/3.0, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]] # Get a few exceptions out of the way first if aspect_ratios_global is None and aspect_ratios_per_layer is None: print( "`aspect_ratios_global` and `aspect_ratios_per_layer` both are None. Default aspect ratios of the paper implementation are used.") if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}.".format( n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError("Either `min_scale` and `max_scale` or `scales` need to be specified.") if scales: if len(scales) != n_predictor_layers + 1: raise ValueError("It must be either scales is None or len(scales) == {}, but len(scales) == {}.".format( n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len(variances) != 4: raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError("All variances must be >0, but the variances given are {}".format(variances)) # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_global is None and aspect_ratios_per_layer is None: aspect_ratios = default_aspect_ratios elif aspect_ratios_per_layer and aspect_ratios_global is None: aspect_ratios = aspect_ratios_per_layer elif aspect_ratios_per_layer is None and aspect_ratios_global: aspect_ratios = [aspect_ratios_global] * n_predictor_layers aspect_ratios_conv4_3 = aspect_ratios[0] aspect_ratios_fc7 = aspect_ratios[1] aspect_ratios_conv6_2 = aspect_ratios[2] aspect_ratios_conv7_2 = aspect_ratios[3] aspect_ratios_conv8_2 = aspect_ratios[4] aspect_ratios_conv9_2 = aspect_ratios[5] aspect_ratios_conv10_2 = aspect_ratios[6] # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios: n_boxes = [] for aspect_ratio in aspect_ratios: if (1 in aspect_ratio) & two_boxes_for_ar1: n_boxes.append(len(aspect_ratio) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(aspect_ratio)) n_boxes_conv4_3 = n_boxes[0] n_boxes_fc7 = n_boxes[1] n_boxes_conv6_2 = n_boxes[2] n_boxes_conv7_2 = n_boxes[3] n_boxes_conv8_2 = n_boxes[4] n_boxes_conv9_2 = n_boxes[5] n_boxes_conv10_2 = n_boxes[6] input_layer = Input(shape=input_shape) img_height, img_width, img_channels = input_shape[0], input_shape[1], input_shape[2] # Block 1 ----------------------------------------------- conv1_1 = Conv2D(64, (3, 3), name='conv1_1', padding='same', activation='relu')(input_layer) conv1_2 = Conv2D(64, (3, 3), name='conv1_2', padding='same', activation='relu')(conv1_1) pool1 = MaxPooling2D(name='pool1', pool_size=(2, 2), strides=(2, 2), padding='same', )(conv1_2) # Block 2 ---------------------------------------------- conv2_1 = Conv2D(128, (3, 3), name='conv2_1', padding='same', activation='relu')(pool1) conv2_2 = Conv2D(128, (3, 3), name='conv2_2', padding='same', activation='relu')(conv2_1) pool2 = MaxPooling2D(name='pool2', pool_size=(2, 2), strides=(2, 2), padding='same')(conv2_2) # Block 3 ---------------------------------------------- conv3_1 = Conv2D(256, (3, 3), name='conv3_1', padding='same', activation='relu')(pool2) conv3_2 = Conv2D(256, (3, 3), name='conv3_2', padding='same', activation='relu')(conv3_1) conv3_3 = Conv2D(256, (3, 3), name='conv3_3', padding='same', activation='relu')(conv3_2) pool3 = MaxPooling2D(name='pool3', pool_size=(2, 2), strides=(2, 2), padding='same')(conv3_3) # Block 4 --------------------------------------------- conv4_1 = Conv2D(512, (3, 3), name='conv4_1', padding='same', activation='relu')(pool3) conv4_2 = Conv2D(512, (3, 3), name='conv4_2', padding='same', activation='relu')(conv4_1) conv4_3 = Conv2D(512, (3, 3), name='conv4_3', padding='same', activation='relu')(conv4_2) pool4 = MaxPooling2D(name='pool4', pool_size=(2, 2), strides=(2, 2), padding='same')(conv4_3) # Block 5 -------------------------------------------- conv5_1 = Conv2D(512, (3, 3), name='conv5_1', padding='same', activation='relu')(pool4) conv5_2 = Conv2D(512, (3, 3), name='conv5_2', padding='same', activation='relu')(conv5_1) conv5_3 = Conv2D(512, (3, 3), name='conv5_3', padding='same', activation='relu')(conv5_2) pool5 = MaxPooling2D(name='pool5', pool_size=(3, 3), strides=(1, 1), padding='same')(conv5_3) # Block 6 -------------------------------------------- fc6 = Conv2D(1024, (3, 3), name='fc6', dilation_rate=(6, 6), padding='same', activation='relu' )(pool5) # Block 7 -------------------------------------------- fc7 = Conv2D(1024, (1, 1), name='fc7', padding='same', activation='relu' )(fc6) # EXTRAS # Block 8 -------------------------------------------- conv6_1 = Conv2D(256, (1, 1), name='conv6_1', padding='same', activation='relu')(fc7) conv6_1z = ZeroPadding2D(name='conv6_1z')(conv6_1) conv6_2 = Conv2D(512, (3, 3), name='conv6_2', strides=(2, 2), padding='valid', activation='relu')(conv6_1z) # Block 9 -------------------------------------------- conv7_1 = Conv2D(128, (1, 1), name='conv7_1', padding='same', activation='relu')(conv6_2) conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1) conv7_2 = Conv2D(256, (3, 3), name='conv7_2', padding='valid', strides=(2, 2), activation='relu')(conv7_1z) # Block 10 ------------------------------------------- conv8_1 = Conv2D(128, (1, 1), name='conv8_1', padding='same', activation='relu')(conv7_2) conv8_2 = Conv2D(256, (3, 3), name='conv8_2', padding='valid', strides=(1, 1), activation='relu')(conv8_1) # Block 11 ------------------------------------------- conv9_1 = Conv2D(128, (1, 1), name='conv9_1', padding='same', activation='relu')(conv8_2) conv9_2 = Conv2D(256, (3, 3), name='conv9_2', padding='valid', strides=(1, 1), activation='relu')(conv9_1) # Block 12 ------------------------------------------- conv10_1 = Conv2D(128, (1, 1), name='conv10_1', padding='same', activation='relu')(conv9_2) conv10_2 = Conv2D(256, (4, 4), name='conv10_2', padding='valid', strides=(1, 1), activation='relu')(conv10_1) # Last Pool ------------------------------------------ # pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2) # Prediction from conv4_3 ---------------------------- conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3) conv4_3_norm_mbox_loc = Conv2D(n_boxes_conv4_3 * 4, (3, 3), name='conv4_3_norm_mbox_loc', padding='same')(conv4_3_norm) conv4_3_norm_mbox_loc_flat = Flatten(name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc) conv4_3_norm_mbox_conf = Conv2D(n_boxes_conv4_3 * num_classes, (3, 3), name='conv4_3_norm_mbox_conf', padding='same')(conv4_3_norm) conv4_3_norm_mbox_conf_flat = Flatten(name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf) conv4_3_norm_mbox_priorbox = PriorBox(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios_conv4_3, two_boxes_for_ar1=two_boxes_for_ar1, limit_boxes=limit_boxes, variances=variances, name='conv4_3_norm_mbox_priorbox')(conv4_3_norm) # Prediction from fc7 --------------------------------- fc7_mbox_conf = Conv2D(n_boxes_fc7 * num_classes, (3, 3), padding='same', name='fc7_mbox_conf')(fc7) fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf) fc7_mbox_loc = Conv2D(n_boxes_fc7 * 4, (3, 3), name='fc7_mbox_loc', padding='same')(fc7) fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc) fc7_mbox_priorbox = PriorBox(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios_fc7, two_boxes_for_ar1=two_boxes_for_ar1, limit_boxes=limit_boxes, variances=variances, name='fc7_mbox_priorbox')(fc7) # Prediction from conv6_2 ------------------------------ conv6_2_mbox_conf = Conv2D(n_boxes_conv6_2 * num_classes, (3, 3), padding='same', name='conv6_2_mbox_conf')(conv6_2) conv6_2_mbox_conf_flat = Flatten(name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf) conv6_2_mbox_loc = Conv2D(n_boxes_conv6_2 * 4, (3, 3,), name='conv6_2_mbox_loc', padding='same')(conv6_2) conv6_2_mbox_loc_flat = Flatten(name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc) conv6_2_mbox_priorbox = PriorBox(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios_conv6_2, two_boxes_for_ar1=two_boxes_for_ar1, limit_boxes=limit_boxes, variances=variances, name='conv6_2_mbox_priorbox')(conv6_2) # Prediction from conv7_2 -------------------------------- conv7_2_mbox_conf = Conv2D(n_boxes_conv7_2 * num_classes, (3, 3), padding='same', name='conv7_2_mbox_conf')(conv7_2) conv7_2_mbox_conf_flat = Flatten(name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf) conv7_2_mbox_loc = Conv2D(n_boxes_conv7_2 * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(conv7_2) conv7_2_mbox_loc_flat = Flatten(name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc) conv7_2_mbox_priorbox = PriorBox(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios_conv7_2, two_boxes_for_ar1=two_boxes_for_ar1, limit_boxes=limit_boxes, variances=variances, name='conv7_2_mbox_priorbox')(conv7_2) # Prediction from conv8_2 ------------------------------- conv8_2_mbox_conf = Conv2D(n_boxes_conv8_2 * num_classes, (3, 3), padding='same', name='conv8_2_mbox_conf')(conv8_2) conv8_2_mbox_conf_flat = Flatten(name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf) conv8_2_mbox_loc = Conv2D(n_boxes_conv8_2 * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(conv8_2) conv8_2_mbox_loc_flat = Flatten(name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc) conv8_2_mbox_priorbox = PriorBox(img_height, img_width, this_scale=scales[4], next_scale=scales[5], aspect_ratios=aspect_ratios_conv8_2, two_boxes_for_ar1=two_boxes_for_ar1, limit_boxes=limit_boxes, variances=variances, name='conv8_2_mbox_priorbox')(conv8_2) # Prediction from conv9_2 ------------------------------- conv9_2_mbox_conf = Conv2D(n_boxes_conv9_2 * num_classes, (3, 3), padding='same', name='conv9_2_mbox_conf')(conv9_2) conv9_2_mbox_conf_flat = Flatten(name='conv9_2_mbox_conf_flat')(conv9_2_mbox_conf) conv9_2_mbox_loc = Conv2D(n_boxes_conv9_2 * 4, (3, 3), padding='same', name='conv9_2_mbox_loc')(conv9_2) conv9_2_mbox_loc_flat = Flatten(name='conv9_2_mbox_loc_flat')(conv9_2_mbox_loc) conv9_2_mbox_priorbox = PriorBox(img_height, img_width, this_scale=scales[5], next_scale=scales[6], aspect_ratios=aspect_ratios_conv9_2, two_boxes_for_ar1=two_boxes_for_ar1, limit_boxes=limit_boxes, variances=variances, name='conv9_2_mbox_priorbox')(conv9_2) # Prediction from conv10_2 -------------------------------------------- conv10_2_mbox_conf = Conv2D(n_boxes_conv10_2 * num_classes, (3, 3), padding='same', name='conv10_2_mbox_conf')(conv10_2) conv10_2_mbox_conf_flat = Flatten(name='conv10_2_mbox_conf_flat')(conv10_2_mbox_conf) conv10_2_mbox_loc = Conv2D(n_boxes_conv10_2 * 4, (3, 3), padding='same', name='conv10_2_mbox_loc')(conv10_2) conv10_2_mbox_loc_flat = Flatten(name='conv10_2_mbox_loc_flat')(conv10_2_mbox_loc) conv10_2_mbox_priorbox = PriorBox(img_height, img_width, this_scale=scales[6], next_scale=scales[7], aspect_ratios=aspect_ratios_conv10_2, two_boxes_for_ar1=two_boxes_for_ar1, limit_boxes=limit_boxes, variances=variances, name='conv10_2_mbox_priorbox')(conv10_2) # Gather all predictions ------------------------------------------- mbox_loc = concatenate([conv4_3_norm_mbox_loc_flat, fc7_mbox_loc_flat, conv6_2_mbox_loc_flat, conv7_2_mbox_loc_flat, conv8_2_mbox_loc_flat, conv9_2_mbox_loc_flat, conv10_2_mbox_loc_flat], axis=1, name='mbox_loc') mbox_conf = concatenate([conv4_3_norm_mbox_conf_flat, fc7_mbox_conf_flat, conv6_2_mbox_conf_flat, conv7_2_mbox_conf_flat, conv8_2_mbox_conf_flat, conv9_2_mbox_conf_flat, conv10_2_mbox_conf_flat], axis=1, name='mbox_conf') # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` conv4_3_norm_mbox_priorbox_reshape = Reshape((-1, 8), name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox) fc7_mbox_priorbox_reshape = Reshape((-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox) conv6_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox) conv7_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox) conv8_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox) conv9_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox) conv10_2_mbox_priorbox_reshape = Reshape((-1, 8), name='conv10_2_mbox_priorbox_reshape')(conv10_2_mbox_priorbox) mbox_priorbox = concatenate([conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape, conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape, conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape, conv10_2_mbox_priorbox_reshape], axis=1, name='mbox_priorbox') if hasattr(mbox_loc, '_keras_shape'): num_boxes = mbox_loc._keras_shape[-1] // 4 elif hasattr(mbox_loc, 'int_shape'): num_boxes = K.int_shape(mbox_loc)[-1] // 4 mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc) mbox_conf = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(mbox_conf) mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf) predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox], axis=2, name='predictions') model = Model(inputs=input_layer, outputs=predictions) if weights_path is not None: model.load_weights(weights_path, by_name=True) if frozen_layers is not None: for layer in model.layers: if layer.name in frozen_layers: layer.trainable = False if summary: model.summary() if plot: plot_model(model, to_file='SSD512.png') SVG(model_to_dot(model).create(prog='dot', format='svg')) return model
def SSD300(input_shape, num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ # SSD网路(以键值对方式存储每个网络层张量) net = {} # <editor-fold defaultstate = "collapsed" desc = "block1" > # Block 1 # 输入源 input_tensor = input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor # 卷积 net['conv1_1'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_1')(net['input']) # 卷积 net['conv1_2'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_2')(net['conv1_1']) # 池化 net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool1')(net['conv1_2']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "block2" > # Block 2 # 卷积 net['conv2_1'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_1')(net['pool1']) # 卷积 net['conv2_2'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_2')(net['conv2_1']) # 池化 net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool2')(net['conv2_2']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "block3" > # Block 3 # 卷积 net['conv3_1'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_1')(net['pool2']) # 卷积 net['conv3_2'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_2')(net['conv3_1']) # 卷积 net['conv3_3'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_3')(net['conv3_2']) # 池化 net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool3')(net['conv3_3']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "block4" > # Block 4 # 卷积 net['conv4_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_1')(net['pool3']) # 卷积 net['conv4_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_2')(net['conv4_1']) # 卷积 net['conv4_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_3')(net['conv4_2']) # 池化 net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool4')(net['conv4_3']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "block5" > # Block 5 # 卷积 net['conv5_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_1')(net['pool4']) # 卷积 net['conv5_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_2')(net['conv5_1']) # 卷积 net['conv5_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_3')(net['conv5_2']) # 池化 net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same', name='pool5')(net['conv5_3']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "fc6" > # FC6 # 带孔卷积 net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6), activation='relu', border_mode='same', name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "fc7" > # FC7 # 卷积 net['fc7'] = Convolution2D(1024, 1, 1, activation='relu', border_mode='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "block6" > # Block 6 # 卷积 net['conv6_1'] = Convolution2D(256, 1, 1, activation='relu', border_mode='same', name='conv6_1')(net['fc7']) # 卷积 net['conv6_2'] = Convolution2D(512, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv6_2')(net['conv6_1']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "block7" > # Block 7 # 卷积 net['conv7_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv7_1')(net['conv6_2']) # ZeroPadding net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) # 卷积 net['conv7_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='valid', name='conv7_2')(net['conv7_2']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "block8" > # Block 8 # 卷积 net['conv8_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv8_1')(net['conv7_2']) # 卷积 net['conv8_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv8_2')(net['conv8_1']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "AveragePooling" > # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "Prediction from conv4_3" > # Prediction from conv4_3 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 3 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "Prediction from fc7" > # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "Prediction from conv6_2" > # Prediction from conv6_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv6_2']) net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "Prediction from conv7_2" > # Prediction from conv7_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv7_2']) net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "Prediction from conv8_2" > # Prediction from conv8_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv8_2']) net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "Prediction from pool6" > # Prediction from pool6 num_priors = 6 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) net['pool6_mbox_conf_flat'] = x priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "Gather all predictions" > # Gather all predictions net['mbox_loc'] = merge([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat'] ], mode='concat', concat_axis=1, name='mbox_loc') net['mbox_conf'] = merge([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat'] ], mode='concat', concat_axis=1, name='mbox_conf') net['mbox_priorbox'] = merge([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox'] ], mode='concat', concat_axis=1, name='mbox_priorbox') # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "Reshape And Merge" > if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = merge( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], mode='concat', concat_axis=2, name='predictions') # </editor-fold> # <editor-fold defaultstate = "collapsed" desc = "Build Model" > model = Model(net['input'], net['predictions']) return model
def SSD300(input_shape, num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ net2 = MarkNet(input_shape=(64, 64, 3)) net = {} # Block 1 input_tensor = Input(shape=input_shape) # prior layerに引数として渡す際利用する img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor net['conv1_1'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_1')(net['input']) net['conv1_2'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_2')(net['conv1_1']) net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool1')(net['conv1_2']) # Block 2 net['conv2_1'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_1')(net['pool1']) net['conv2_2'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_2')(net['conv2_1']) net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool2')(net['conv2_2']) # Block 3 net['conv3_1'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_1')(net['pool2']) net['conv3_2'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_2')(net['conv3_1']) net['conv3_3'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_3')(net['conv3_2']) net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool3')(net['conv3_3']) # Block 4 net['conv4_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_1')(net['pool3']) net['conv4_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_2')(net['conv4_1']) net['conv4_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_3')(net['conv4_2']) net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool4')(net['conv4_3']) # Block 5 net['conv5_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_1')(net['pool4']) net['conv5_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_2')(net['conv5_1']) net['conv5_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_3')(net['conv5_2']) net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same', name='pool5')(net['conv5_3']) # FC6 net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6), activation='relu', border_mode='same', name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Convolution2D(1024, 1, 1, activation='relu', border_mode='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Convolution2D(256, 1, 1, activation='relu', border_mode='same', name='conv6_1')(net['fc7']) net['conv6_2'] = Convolution2D(512, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv6_2')(net['conv6_1']) # Block 7 net['conv7_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) net['conv7_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='valid', name='conv7_2')(net['conv7_2']) # Block 8 net['conv8_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv8_2')(net['conv8_1']) # Last Pool 最終出力 net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) from keras.layers import Lambda # Prediction from conv4_3 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 3 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # Prediction from conv6_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv6_2']) net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # Prediction from conv7_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv7_2']) net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # Prediction from conv8_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv8_2']) net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # Prediction from pool6 num_priors = 6 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) # Marknetとのmarge # merge = Add()([x, net2['dense2m']]) net['pool6_mbox_conf_flat'] = x # merge#x priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) # Gather all predictions net['mbox_loc'] = concatenate([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat'] ], axis=1, name='mbox_loc') net['mbox_conf'] = concatenate([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat'] ], axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox'] ], axis=1, name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) # 最終出力 net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], axis=2, name='predictions') model = Model(net['input'], net['predictions']) # モデルの構造プロット keras.utils.plot_model(model, "./ssdmodel.png", show_shapes=True) return model
def SSD300(input_shape=(300, 300, 3), num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ net = {} # Block 1 input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) #################################################################################### # ResNet50 architecture # (adapted from https://github.com/fchollet/deep-learning-models/resnet50.py) if not K.is_keras_tensor(input_tensor): net['input'] = Input(tensor=input_tensor) else: net['input'] = input_tensor if K.image_dim_ordering() == 'tf': bn_axis = 3 else: bn_axis = 1 # Block 1 x = ZeroPadding2D((3, 3))(net['input']) net['conv1'] = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x) net['bn_conv1'] = BatchNormalization(axis=bn_axis, name='bn_conv1')(net['conv1']) x = Activation('relu')(net['bn_conv1']) x = ZeroPadding2D((1, 1))(x) net['pool1'] = MaxPooling2D((3, 3), strides=(2, 2))(x) # Block 2 net['conv2_1'] = conv_block(net['pool1'], 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) net['conv2_2'] = identity_block(net['conv2_1'], 3, [64, 64, 256], stage=2, block='b') net['conv2_3'] = identity_block(net['conv2_2'], 3, [64, 64, 256], stage=2, block='c') # Block 3 net['conv3_1'] = conv_block(net['conv2_3'], 3, [128, 128, 512], stage=3, block='a') net['conv3_2'] = identity_block(net['conv3_1'], 3, [128, 128, 512], stage=3, block='b') net['conv3_3'] = identity_block(net['conv3_2'], 3, [128, 128, 512], stage=3, block='c') net['conv3_4'] = identity_block(net['conv3_3'], 3, [128, 128, 512], stage=3, block='d') # Block 4 net['conv4_1'] = conv_block(net['conv3_4'], 3, [256, 256, 1024], stage=4, block='a') net['conv4_2'] = identity_block(net['conv4_1'], 3, [256, 256, 1024], stage=4, block='b') net['conv4_3'] = identity_block(net['conv4_2'], 3, [256, 256, 1024], stage=4, block='c') net['conv4_4'] = identity_block(net['conv4_3'], 3, [256, 256, 1024], stage=4, block='d') net['conv4_5'] = identity_block(net['conv4_4'], 3, [256, 256, 1024], stage=4, block='e') net['conv4_6'] = identity_block(net['conv4_5'], 3, [256, 256, 1024], stage=4, block='f') # Block 5 net['conv5_1'] = conv_block(net['conv4_6'], 3, [512, 512, 2048], stage=5, block='a') net['conv5_2'] = identity_block(net['conv5_1'], 3, [512, 512, 2048], stage=5, block='b') net['conv5_3'] = identity_block(net['conv5_2'], 3, [512, 512, 2048], stage=5, block='c') # net['pool5'] = AveragePooling2D((7, 7), name='pool5')(net['conv5_3']) # resnet uses this map directly onto the classification (top layer) # we will use the VGG pooling instead, which provides an appropriately sized input to fc6 net['pool5v'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same', name='pool5v')(net['conv5_3']) # END ResNet50 ##################################################################################### # FC6 net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6), activation='relu', border_mode='same', name='fc6')(net['pool5v']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Convolution2D(1024, 1, 1, activation='relu', border_mode='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Convolution2D(256, 1, 1, activation='relu', border_mode='same', name='conv6_1')(net['fc7']) net['conv6_2'] = ZeroPadding2D()(net['conv6_1']) net['conv6_2'] = Convolution2D(512, 3, 3, subsample=(2, 2), activation='relu', border_mode='valid', name='conv6_2')(net['conv6_2']) # Block 7 net['conv7_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) net['conv7_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='valid', name='conv7_2')(net['conv7_2']) # Block 8 net['conv8_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = ZeroPadding2D()(net['conv8_1']) net['conv8_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='valid', name='conv8_2')(net['conv8_2']) # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) # Prediction from conv3_4 (still called conv4_3 in the remainder) # Will clean this up after training tests net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv3_4']) num_priors = 3 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from conv4_6 -- again, will replace after train test num_priors = 6 net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='fc7_mbox_loc')(net['conv4_6']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Convolution2D( num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv4_6']) # changed from fc7 flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') # Another change from fc7 net['fc7_mbox_priorbox'] = priorbox(net['conv4_6']) # Prediction from this fc7 (it will still be called 6_2) # project it so that its channels are 512, as bounding box data net['fc7_mbox_pre'] = Convolution2D(512, 1, 1, activation='relu', border_mode='same', name='fc7_mbox_pre')(net['fc7']) num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv6_2_mbox_loc')(net['fc7_mbox_pre']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['fc7_mbox_pre']) # changed from conv6_2 net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox( net['fc7_mbox_pre']) # changed from conv6_2 # Prediction from conv6_2 # Project it down to 256 # (old conv7_2) net['conv6_2_mbox_pre'] = Convolution2D(256, 1, 1, activation='relu', border_mode='same', name='conv6_2_mbox_pre')( net['conv6_2']) num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv7_2_mbox_loc')(net['conv6_2_mbox_pre']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)( net['conv6_2_mbox_pre']) # changed from conv7_2 net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') # old conv7_2 net['conv7_2_mbox_priorbox'] = priorbox(net['conv6_2_mbox_pre']) # Prediction from conv7_2 # old (conv8_2) # no projections needed num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv8_2_mbox_loc')(net['conv7_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv7_2']) # changed from conv8_2 net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox( net['conv7_2']) # changed from conv8_2 # Prediction from pool6 num_priors = 6 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) net['pool6_mbox_conf_flat'] = x priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) # Gather all predictions net['mbox_loc'] = merge([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat'] ], mode='concat', concat_axis=1, name='mbox_loc') net['mbox_conf'] = merge([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat'] ], mode='concat', concat_axis=1, name='mbox_conf') net['mbox_priorbox'] = merge([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox'] ], mode='concat', concat_axis=1, name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = merge( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], mode='concat', concat_axis=2, name='predictions') model = Model(net['input'], net['predictions']) return model
def SSD(input_shape, num_classes): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ img_size=(input_shape[1],input_shape[0]) input_shape=(input_shape[1],input_shape[0],3) mobilenet_input_shape=(224,224,3) net={} net['input']=Input(input_shape) mobilenet=MobileNet(input_shape=mobilenet_input_shape,include_top=False,weights='imagenet') FeatureExtractor=Model(inputs=mobilenet.input,outputs=mobilenet.get_layer('conv_dw_11_relu').output) conv11=FeatureExtractor(net['input']) net['conv11'] = Conv2D(512, (1, 1), padding='same', name='conv11')(conv11) net['conv11'] = BatchNormalization( momentum=0.99, name='bn11')(net['conv11']) net['conv11'] = Activation('relu')(net['conv11']) # Block #(19,19) net['conv12dw'] = SeparableConv2D(512, (3, 3),strides=(2, 2), padding='same', name='conv12dw')(net['conv11']) net['conv12dw'] = BatchNormalization( momentum=0.99, name='bn12dw')(net['conv12dw']) net['conv12dw'] = Activation('relu')(net['conv12dw']) net['conv12'] = Conv2D(1024, (1, 1), padding='same',name='conv12')(net['conv12dw']) net['conv12'] = BatchNormalization( momentum=0.99, name='bn12')(net['conv12']) net['conv12'] = Activation('relu')(net['conv12']) net['conv13dw'] = SeparableConv2D(1024, (3, 3), padding='same',name='conv13dw')(net['conv12']) net['conv13dw'] = BatchNormalization( momentum=0.99, name='bn13dw')(net['conv13dw']) net['conv13dw'] = Activation('relu')(net['conv13dw']) net['conv13'] = Conv2D(1024, (1, 1), padding='same',name='conv13')(net['conv13dw']) net['conv13'] = BatchNormalization( momentum=0.99, name='bn13')(net['conv13']) net['conv13'] = Activation('relu')(net['conv13']) net['conv14_1'] = Conv2D(256, (1, 1), padding='same', name='conv14_1')(net['conv13']) net['conv14_1'] = BatchNormalization( momentum=0.99, name='bn14_1')(net['conv14_1']) net['conv14_1'] = Activation('relu')(net['conv14_1']) net['conv14_2'] = Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='conv14_2')(net['conv14_1']) net['conv14_2'] = BatchNormalization( momentum=0.99, name='bn14_2')(net['conv14_2']) net['conv14_2'] = Activation('relu')(net['conv14_2']) net['conv15_1'] = Conv2D(128, (1, 1), padding='same',name='conv15_1')(net['conv14_2']) net['conv15_1'] = BatchNormalization( momentum=0.99, name='bn15_1')(net['conv15_1']) net['conv15_1'] = Activation('relu')(net['conv15_1']) net['conv15_2'] = Conv2D(256, (3, 3), strides=(2, 2), padding='same',name='conv15_2')(net['conv15_1']) net['conv15_2'] = BatchNormalization( momentum=0.99, name='bn15_2')(net['conv15_2']) net['conv15_2'] = Activation('relu')(net['conv15_2']) net['conv16_1'] = Conv2D(128, (1, 1), padding='same', name='conv16_1')(net['conv15_2']) net['conv16_1'] = BatchNormalization( momentum=0.99, name='bn16_1')(net['conv16_1']) net['conv16_1'] = Activation('relu')(net['conv16_1']) net['conv16_2'] = Conv2D(256, (3, 3), strides=(2, 2), padding='same', name='conv16_2')(net['conv16_1']) net['conv16_2'] = BatchNormalization( momentum=0.99, name='bn16_2')(net['conv16_2']) net['conv16_2'] = Activation('relu')(net['conv16_2']) net['conv17_1'] = Conv2D(64, (1, 1), padding='same', name='conv17_1')(net['conv16_2']) net['conv17_1'] = BatchNormalization( momentum=0.99, name='bn17_1')(net['conv17_1']) net['conv17_1'] = Activation('relu')(net['conv17_1']) net['conv17_2'] = Conv2D(128, (3, 3), strides=(2, 2), padding='same', name='conv17_2')(net['conv17_1']) net['conv17_2'] = BatchNormalization( momentum=0.99, name='bn17_2')(net['conv17_2']) net['conv17_2'] = Activation('relu')(net['conv17_2']) #Prediction from conv11 num_priors = 3 x = Conv2D(num_priors * 4, (1,1), padding='same',name='conv11_mbox_loc')(net['conv11']) net['conv11_mbox_loc'] = x flatten = Flatten(name='conv11_mbox_loc_flat') net['conv11_mbox_loc_flat'] = flatten(net['conv11_mbox_loc']) name = 'conv11_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, (1,1), padding='same',name=name)(net['conv11']) net['conv11_mbox_conf'] = x flatten = Flatten(name='conv11_mbox_conf_flat') net['conv11_mbox_conf_flat'] = flatten(net['conv11_mbox_conf']) priorbox = PriorBox(img_size,60,max_size=None, aspect_ratios=[2],variances=[0.1, 0.1, 0.2, 0.2],name='conv11_mbox_priorbox') net['conv11_mbox_priorbox'] = priorbox(net['conv11']) # Prediction from conv13 num_priors = 6 net['conv13_mbox_loc'] = Conv2D(num_priors * 4, (1,1),padding='same',name='conv13_mbox_loc')(net['conv13']) flatten = Flatten(name='conv13_mbox_loc_flat') net['conv13_mbox_loc_flat'] = flatten(net['conv13_mbox_loc']) name = 'conv13_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['conv13_mbox_conf'] = Conv2D(num_priors * num_classes, (1,1),padding='same',name=name)(net['conv13']) flatten = Flatten(name='conv13_mbox_conf_flat') net['conv13_mbox_conf_flat'] = flatten(net['conv13_mbox_conf']) priorbox = PriorBox(img_size, 105.0, max_size=150.0, aspect_ratios=[2, 3],variances=[0.1, 0.1, 0.2, 0.2],name='conv13_mbox_priorbox') net['conv13_mbox_priorbox'] = priorbox(net['conv13']) # Prediction from conv12 num_priors = 6 x = Conv2D(num_priors * 4, (1,1), padding='same',name='conv14_2_mbox_loc')(net['conv14_2']) net['conv14_2_mbox_loc'] = x flatten = Flatten(name='conv14_2_mbox_loc_flat') net['conv14_2_mbox_loc_flat'] = flatten(net['conv14_2_mbox_loc']) name = 'conv14_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, (1,1), padding='same',name=name)(net['conv14_2']) net['conv14_2_mbox_conf'] = x flatten = Flatten(name='conv14_2_mbox_conf_flat') net['conv14_2_mbox_conf_flat'] = flatten(net['conv14_2_mbox_conf']) priorbox = PriorBox(img_size, 150, max_size=195.0, aspect_ratios=[2, 3],variances=[0.1, 0.1, 0.2, 0.2],name='conv14_2_mbox_priorbox') net['conv14_2_mbox_priorbox'] = priorbox(net['conv14_2']) # Prediction from conv15_2_mbox num_priors = 6 x = Conv2D(num_priors * 4, (1,1), padding='same',name='conv15_2_mbox_loc')(net['conv15_2']) net['conv15_2_mbox_loc'] = x flatten = Flatten(name='conv15_2_mbox_loc_flat') net['conv15_2_mbox_loc_flat'] = flatten(net['conv15_2_mbox_loc']) name = 'conv15_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, (1,1), padding='same',name=name)(net['conv15_2']) net['conv15_2_mbox_conf'] = x flatten = Flatten(name='conv15_2_mbox_conf_flat') net['conv15_2_mbox_conf_flat'] = flatten(net['conv15_2_mbox_conf']) priorbox = PriorBox(img_size, 195.0, max_size=240.0, aspect_ratios=[2, 3],variances=[0.1, 0.1, 0.2, 0.2],name='conv15_2_mbox_priorbox') net['conv15_2_mbox_priorbox'] = priorbox(net['conv15_2']) # Prediction from conv16_2 num_priors = 6 x = Conv2D(num_priors * 4, (1,1), padding='same',name='conv16_2_mbox_loc')(net['conv16_2']) net['conv16_2_mbox_loc'] = x flatten = Flatten(name='conv16_2_mbox_loc_flat') net['conv16_2_mbox_loc_flat'] = flatten(net['conv16_2_mbox_loc']) name = 'conv16_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, (1,1), padding='same',name=name)(net['conv16_2']) net['conv16_2_mbox_conf'] = x flatten = Flatten(name='conv16_2_mbox_conf_flat') net['conv16_2_mbox_conf_flat'] = flatten(net['conv16_2_mbox_conf']) priorbox = PriorBox(img_size, 240.0, max_size=285.0, aspect_ratios=[2, 3],variances=[0.1, 0.1, 0.2, 0.2],name='conv16_2_mbox_priorbox') net['conv16_2_mbox_priorbox'] = priorbox(net['conv16_2']) # Prediction from conv17_2 num_priors = 6 x = Conv2D(num_priors * 4,(1, 1), padding='same', name='conv17_2_mbox_loc')(net['conv17_2']) net['conv17_2_mbox_loc'] = x flatten = Flatten(name='conv17_2_mbox_loc_flat') net['conv17_2_mbox_loc_flat'] = flatten(net['conv17_2_mbox_loc']) name = 'conv17_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, (1,1), padding='same', name=name)(net['conv17_2']) net['conv17_2_mbox_conf'] = x flatten = Flatten(name='conv17_2_mbox_conf_flat') net['conv17_2_mbox_conf_flat'] = flatten(net['conv17_2_mbox_conf']) priorbox = PriorBox(img_size, 285.0, max_size=300.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2],name='conv17_2_mbox_priorbox') net['conv17_2_mbox_priorbox'] = priorbox(net['conv17_2']) # Gather all predictions net['mbox_loc'] = concatenate([net['conv11_mbox_loc_flat'],net['conv13_mbox_loc_flat'],net['conv14_2_mbox_loc_flat'],net['conv15_2_mbox_loc_flat'],net['conv16_2_mbox_loc_flat'],net['conv17_2_mbox_loc_flat']],axis=1, name='mbox_loc') net['mbox_conf'] = concatenate([net['conv11_mbox_conf_flat'],net['conv13_mbox_conf_flat'],net['conv14_2_mbox_conf_flat'],net['conv15_2_mbox_conf_flat'],net['conv16_2_mbox_conf_flat'],net['conv17_2_mbox_conf_flat']],axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate([net['conv11_mbox_priorbox'],net['conv13_mbox_priorbox'],net['conv14_2_mbox_priorbox'],net['conv15_2_mbox_priorbox'],net['conv16_2_mbox_priorbox'],net['conv17_2_mbox_priorbox']],axis=1,name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4),name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes),name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax',name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = concatenate([net['mbox_loc'],net['mbox_conf'],net['mbox_priorbox']],axis=2,name='predictions') model = Model(net['input'], net['predictions']) return model
def SSD300v2(input_shape, num_classes=21, featurte_map=None): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ input_layer = Input(shape=input_shape) # Block 1 with tf.name_scope("Block1"): conv1_1 = Conv2D(64, (3, 3), name='conv1_1', padding='same', activation='relu')(input_layer) conv1_2 = Conv2D(64, (3, 3), name='conv1_2', padding='same', activation='relu')(conv1_1) pool1 = MaxPooling2D( name='pool1', pool_size=(2, 2), strides=(2, 2), padding='same', )(conv1_2) # Block 2 with tf.name_scope("Block2"): conv2_1 = Conv2D(128, (3, 3), name='conv2_1', padding='same', activation='relu')(pool1) conv2_2 = Conv2D(128, (3, 3), name='conv2_2', padding='same', activation='relu')(conv2_1) pool2 = MaxPooling2D(name='pool2', pool_size=(2, 2), strides=(2, 2), padding='same')(conv2_2) # Block 3 with tf.name_scope("Block3"): conv3_1 = Conv2D(256, (3, 3), name='conv3_1', padding='same', activation='relu')(pool2) conv3_2 = Conv2D(256, (3, 3), name='conv3_2', padding='same', activation='relu')(conv3_1) conv3_3 = Conv2D(256, (3, 3), name='conv3_3', padding='same', activation='relu')(conv3_2) pool3 = MaxPooling2D(name='pool3', pool_size=(2, 2), strides=(2, 2), padding='same')(conv3_3) # Block 4 with tf.name_scope("Block4"): conv4_1 = Conv2D(512, (3, 3), name='conv4_1', padding='same', activation='relu')(pool3) conv4_2 = Conv2D(512, (3, 3), name='conv4_2', padding='same', activation='relu')(conv4_1) conv4_3 = Conv2D(512, (3, 3), name='conv4_3', padding='same', activation='relu')(conv4_2) pool4 = MaxPooling2D(name='pool4', pool_size=(2, 2), strides=(2, 2), padding='same')(conv4_3) # Block 5 with tf.name_scope("Block5"): conv5_1 = Conv2D(512, (3, 3), name='conv5_1', padding='same', activation='relu')(pool4) conv5_2 = Conv2D(512, (3, 3), name='conv5_2', padding='same', activation='relu')(conv5_1) conv5_3 = Conv2D(512, (3, 3), name='conv5_3', padding='same', activation='relu')(conv5_2) pool5 = MaxPooling2D(name='pool5', pool_size=(2, 2), strides=(1, 1), padding='same')(conv5_3) # FC6 with tf.name_scope("fc6"): fc6 = Conv2D(1024, (3, 3), name='fc6', dilation_rate=(6, 6), padding='same', activation='relu')(pool5) # x = Dropout(0.5, name='drop6')(x) # FC7 with tf.name_scope("fc7"): fc7 = Conv2D(1024, (1, 1), name='fc7', padding='same', activation='relu')(fc6) # x = Dropout(0.5, name='drop7')(x) # Block 6 with tf.name_scope("Block6"): conv6_1 = Conv2D(256, (1, 1), name='conv6_1', padding='same', activation='relu')(fc7) conv6_2 = Conv2D(512, (3, 3), name='conv6_2', strides=(2, 2), padding='same', activation='relu')(conv6_1) # Block 7 with tf.name_scope("Block7"): conv7_1 = Conv2D(128, (1, 1), name='conv7_1', padding='same', activation='relu')(conv6_2) conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1) conv7_2 = Conv2D(256, (3, 3), name='conv7_2', padding='valid', strides=(2, 2), activation='relu')(conv7_1z) # Block 8 with tf.name_scope("Block8"): conv8_1 = Conv2D(128, (1, 1), name='conv8_1', padding='same', activation='relu')(conv7_2) conv8_2 = Conv2D(256, (3, 3), name='conv8_2', padding='same', strides=(2, 2), activation='relu')(conv8_1) # Last Pool with tf.name_scope("LastPool"): pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2) # Prediction from conv4_3 num_priors = 3 img_size = (input_shape[1], input_shape[0]) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) with tf.name_scope("conv4_3"): conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3) conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='conv4_3_norm_mbox_loc', padding='same')(conv4_3_norm) conv4_3_norm_mbox_loc_flat = Flatten( name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc) conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), name=name, padding='same')(conv4_3_norm) conv4_3_norm_mbox_conf_flat = Flatten( name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf) conv4_3_norm_mbox_priorbox = PriorBox( img_size, 30.0, name='conv4_3_norm_mbox_priorbox', aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2])(conv4_3_norm) # Prediction from fc7 num_priors = 6 name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) with tf.name_scope("fc7"): fc7_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(fc7) fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf) fc7_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='fc7_mbox_loc', padding='same')(fc7) fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc) fc7_mbox_priorbox = PriorBox(img_size, 60.0, name='fc7_mbox_priorbox', max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2])(fc7) # Prediction from conv6_2 num_priors = 6 name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) with tf.name_scope("conv6_2"): conv6_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv6_2) conv6_2_mbox_conf_flat = Flatten( name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf) conv6_2_mbox_loc = Conv2D(num_priors * 4, ( 3, 3, ), name='conv6_2_mbox_loc', padding='same')(conv6_2) conv6_2_mbox_loc_flat = Flatten( name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc) conv6_2_mbox_priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox')(conv6_2) # Prediction from conv7_2 num_priors = 6 name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) with tf.name_scope("conv7_2"): conv7_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv7_2) conv7_2_mbox_conf_flat = Flatten( name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf) conv7_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(conv7_2) conv7_2_mbox_loc_flat = Flatten( name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc) conv7_2_mbox_priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox')(conv7_2) # Prediction from conv8_2 num_priors = 6 name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) with tf.name_scope("conv8_2"): conv8_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv8_2) conv8_2_mbox_conf_flat = Flatten( name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf) conv8_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(conv8_2) conv8_2_mbox_loc_flat = Flatten( name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc) conv8_2_mbox_priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox')(conv8_2) # Prediction from pool6 num_priors = 6 name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) with tf.name_scope("pool6"): pool6_mbox_loc_flat = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(pool6) pool6_mbox_conf_flat = Dense(num_priors * num_classes, name=name)(pool6) pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6) pool6_mbox_priorbox = PriorBox( img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox')(pool6_reshaped) # Gather all predictions with tf.name_scope("mbox"): mbox_loc = concatenate([ conv4_3_norm_mbox_loc_flat, fc7_mbox_loc_flat, conv6_2_mbox_loc_flat, conv7_2_mbox_loc_flat, conv8_2_mbox_loc_flat, pool6_mbox_loc_flat ], axis=1, name='mbox_loc') mbox_conf = concatenate([ conv4_3_norm_mbox_conf_flat, fc7_mbox_conf_flat, conv6_2_mbox_conf_flat, conv7_2_mbox_conf_flat, conv8_2_mbox_conf_flat, pool6_mbox_conf_flat ], axis=1, name='mbox_conf') mbox_priorbox = concatenate([ conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox, conv6_2_mbox_priorbox, conv7_2_mbox_priorbox, conv8_2_mbox_priorbox, pool6_mbox_priorbox ], axis=1, name='mbox_priorbox') print('{} conv4_3_norm_mbox_loc_flat'.format( conv4_3_norm_mbox_loc_flat._keras_shape)) print('{} conv4_3_norm_mbox_conf_flat'.format( conv4_3_norm_mbox_conf_flat._keras_shape)) print( '{} conv4_3_norm_mbox_priorbox'.format(conv4_3_norm_mbox_priorbox)) if hasattr(mbox_loc, '_keras_shape'): num_boxes = mbox_loc._keras_shape[-1] // 4 elif hasattr(mbox_loc, 'int_shape'): num_boxes = K.int_shape(mbox_loc)[-1] // 4 print('{} num_boxes'.format(num_boxes)) print('{} mbox_loc'.format(mbox_loc._keras_shape)) print('{} mbox_conf'.format(mbox_conf._keras_shape)) mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc) mbox_conf = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(mbox_conf) mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf) print('{} locatation'.format(mbox_loc)) print('{} conf'.format(mbox_conf)) print('{} priorbox'.format(mbox_priorbox)) if featurte_map == 'conv4_3_norm_mbox_loc_flat': return set_return_model(input_layer=input_layer, output_layer=conv4_3_norm_mbox_loc_flat) elif featurte_map == 'fc7_mbox_loc_flat': return set_return_model(input_layer=input_layer, output_layer=fc7_mbox_loc_flat) elif featurte_map == 'conv4_3_norm_mbox_conf_flat': return set_return_model(input_layer=input_layer, output_layer=conv4_3_norm_mbox_conf_flat) elif featurte_map == 'fc7_mbox_conf_flat': return set_return_model(input_layer=input_layer, output_layer=fc7_mbox_conf_flat) predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox], axis=2, name='predictions') print('{} predictions'.format(predictions.shape)) print('{} predictions'.format(predictions)) model = Model(inputs=input_layer, outputs=predictions) return model
def SSD512(input_shape, num_classes=21): """SSD512 architecture. # Arguments input_shape: Shape of the input image, expected to be either (512, 512, 3) or (3, 512, 512)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ net = {} # Block 1 input_tensor = input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor net['conv1_1'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_1')(net['input']) net['conv1_2'] = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='conv1_2')(net['conv1_1']) net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool1')(net['conv1_2']) # Block 2 net['conv2_1'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_1')(net['pool1']) net['conv2_2'] = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='conv2_2')(net['conv2_1']) net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool2')(net['conv2_2']) # Block 3 net['conv3_1'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_1')(net['pool2']) net['conv3_2'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_2')(net['conv3_1']) net['conv3_3'] = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='conv3_3')(net['conv3_2']) net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool3')(net['conv3_3']) # Block 4 net['conv4_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_1')(net['pool3']) net['conv4_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_2')(net['conv4_1']) net['conv4_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv4_3')(net['conv4_2']) net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), border_mode='same', name='pool4')(net['conv4_3']) # Block 5 net['conv5_1'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_1')(net['pool4']) net['conv5_2'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_2')(net['conv5_1']) net['conv5_3'] = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='conv5_3')(net['conv5_2']) net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same', name='pool5')(net['conv5_3']) # FC6 net['fc6'] = AtrousConvolution2D(1024, 3, 3, atrous_rate=(6, 6), activation='relu', border_mode='same', name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Convolution2D(1024, 1, 1, activation='relu', border_mode='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Convolution2D(256, 1, 1, activation='relu', border_mode='same', name='conv6_1')(net['fc7']) net['conv6_2'] = Convolution2D(512, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv6_2')(net['conv6_1']) # Block 7 net['conv7_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) net['conv7_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='valid', name='conv7_2')(net['conv7_2']) # Block 8 net['conv8_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv8_2')(net['conv8_1']) # Block 9 net['conv9_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv9_1')(net['conv8_2']) net['conv9_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv9_2')(net['conv9_1']) # Block 10 net['conv10_1'] = Convolution2D(128, 1, 1, activation='relu', border_mode='same', name='conv10_1')(net['conv9_2']) net['conv10_2'] = Convolution2D(256, 3, 3, subsample=(2, 2), activation='relu', border_mode='same', name='conv10_2')(net['conv10_1']) # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv10_2']) # Prediction from conv4_3 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 4 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 35.84, max_size=76.8, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 76.8, max_size=153.6, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # Prediction from conv6_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv6_2']) net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 153.6, max_size=230.4, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # Prediction from conv7_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv7_2']) net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 230.4, max_size=307.2, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # Prediction from conv8_2 num_priors = 6 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv8_2']) net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 307.2, max_size=384.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # Prediction from conv9_2 num_priors = 4 x = Convolution2D(num_priors * 4, 3, 3, border_mode='same', name='conv9_2_mbox_loc')(net['conv9_2']) net['conv9_2_mbox_loc'] = x flatten = Flatten(name='conv9_2_mbox_loc_flat') net['conv9_2_mbox_loc_flat'] = flatten(net['conv9_2_mbox_loc']) name = 'conv9_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, 3, 3, border_mode='same', name=name)(net['conv9_2']) net['conv9_2_mbox_conf'] = x flatten = Flatten(name='conv9_2_mbox_conf_flat') net['conv9_2_mbox_conf_flat'] = flatten(net['conv9_2_mbox_conf']) priorbox = PriorBox(img_size, 384.0, max_size=460.8, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv9_2_mbox_priorbox') net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2']) # Prediction from pool6 num_priors = 4 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) net['pool6_mbox_conf_flat'] = x priorbox = PriorBox(img_size, 460.8, max_size=537.6, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) # Gather all predictions net['mbox_loc'] = merge([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat'], net['pool6_mbox_loc_flat'] ], mode='concat', concat_axis=1, name='mbox_loc') net['mbox_conf'] = merge([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat'], net['pool6_mbox_conf_flat'] ], mode='concat', concat_axis=1, name='mbox_conf') net['mbox_priorbox'] = merge([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox'], net['pool6_mbox_priorbox'] ], mode='concat', concat_axis=1, name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = merge( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], mode='concat', concat_axis=2, name='predictions') model = Model(net['input'], net['predictions']) return model
def SSD(input_shape, num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ net = {} # Block 1 input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) input0 = input_tensor conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(input0) conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(conv1_1) pool1 = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool1')(conv1_2) # Block 2 conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(pool1) conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(conv2_1) pool2 = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool2')(conv2_2) # Block 3 conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(pool2) conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(conv3_1) conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(conv3_2) pool3 = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool3')(conv3_3) # Block 4 conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(pool3) conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(conv4_1) conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(conv4_2) pool4 = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool4')(conv4_3) # Block 5 conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(pool4) conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(conv5_1) conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(conv5_2) pool5 = MaxPooling2D((3, 3), strides=(1, 1), padding='same', name='pool5')(conv5_3) # FC6 fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', name='fc6')(pool5) #fc6 = Dropout(0.5, name='drop6')(fc6) # FC7 fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same', name='fc7')(fc6) #fc7 = Dropout(0.5, name='drop7')(fc7) # Block 6 conv6_1 = Conv2D(256, (1, 1), activation='relu', padding='same', name='conv6_1')(fc7) conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv6_2')(conv6_1) # Block 7 conv7_1 = Conv2D(128, (1, 1), activation='relu', padding='same', name='conv7_1')(conv6_2) conv7_2 = ZeroPadding2D()(conv7_1) conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', name='conv7_2')(conv7_2) # Block 8 conv8_1 = Conv2D(128, (1, 1), activation='relu', padding='same', name='conv8_1')(conv7_2) conv8_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv8_2')(conv8_1) # Last Pool pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2) # Prediction from conv4_3 conv4_3_norm = Normalize(num_classes - 1, name='conv4_3_norm')(conv4_3) num_priors = 3 conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv4_3_norm_mbox_loc')(conv4_3_norm) conv4_3_norm_mbox_loc_flat = Flatten( name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv4_3_norm) conv4_3_norm_mbox_conf_flat = Flatten( name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf) conv4_3_norm_mbox_priorbox = PriorBox( img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox')(conv4_3_norm) # Prediction from fc7 num_priors = 6 fc7_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='fc7_mbox_loc')(fc7) fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) fc7_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(fc7) fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf) fc7_mbox_priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox')(fc7) # Prediction from conv6_2 num_priors = 6 conv6_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv6_2_mbox_loc')(conv6_2) conv6_2_mbox_loc_flat = Flatten( name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv6_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv6_2) conv6_2_mbox_conf_flat = Flatten( name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf) conv6_2_mbox_priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox')(conv6_2) # Prediction from conv7_2 num_priors = 6 conv7_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(conv7_2) conv7_2_mbox_loc_flat = Flatten( name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv7_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv7_2) conv7_2_mbox_conf_flat = Flatten( name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf) conv7_2_mbox_priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox')(conv7_2) # Prediction from conv8_2 num_priors = 6 conv8_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(conv8_2) conv8_2_mbox_loc_flat = Flatten( name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv8_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv8_2) conv8_2_mbox_conf_flat = Flatten( name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf) conv8_2_mbox_priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox')(conv8_2) # Prediction from pool6 num_priors = 6 pool6_mbox_loc_flat = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(pool6) name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) pool6_mbox_conf_flat = Dense(num_priors * num_classes, name=name)(pool6) if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6) pool6_mbox_priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox')(pool6_reshaped) # Gather all predictions mbox_loc = concatenate([ conv4_3_norm_mbox_loc_flat, fc7_mbox_loc_flat, conv6_2_mbox_loc_flat, conv7_2_mbox_loc_flat, conv8_2_mbox_loc_flat, pool6_mbox_loc_flat ], axis=1, name='mbox_loc') mbox_conf = concatenate([ conv4_3_norm_mbox_conf_flat, fc7_mbox_conf_flat, conv6_2_mbox_conf_flat, conv7_2_mbox_conf_flat, conv8_2_mbox_conf_flat, pool6_mbox_conf_flat ], axis=1, name='mbox_conf') mbox_priorbox = concatenate([ conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox, conv6_2_mbox_priorbox, conv7_2_mbox_priorbox, conv8_2_mbox_priorbox, pool6_mbox_priorbox ], axis=1, name='mbox_priorbox') if hasattr(mbox_loc, '_keras_shape'): num_boxes = mbox_loc._keras_shape[-1] // 4 elif hasattr(mbox_loc, 'int_shape'): num_boxes = K.int_shape(mbox_loc)[-1] // 4 mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc) mbox_conf = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(mbox_conf) mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf) predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox], axis=2, name='predictions') model = Model(input0, predictions) return model
def SSD(input_shape, num_classes): img_size = (input_shape[1], input_shape[0]) input_shape = (input_shape[1], input_shape[0], 3) alpha = 1.0 depth_multiplier = 1 input0 = Input(input_shape) x = _conv_block(input0, 32, alpha, strides=(2, 2)) x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) x = _depthwise_conv_block_f(x, depth_multiplier, strides=(1, 1), block_id=11) x, conv11 = _conv_blockSSD_f(x, 512, depth_multiplier, kernel=(1, 1), strides=(1, 1), block_id=11) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=12) x = _depthwise_conv_block_f(x, depth_multiplier, strides=(1, 1), block_id=13) x, conv13 = _conv_blockSSD_f(x, 512, alpha, kernel=(1, 1), strides=(1, 1), block_id=13) x, conv14_2 = _conv_blockSSD(x, 256, alpha, block_id=14) x, conv15_2 = _conv_blockSSD(x, 128, alpha, block_id=15) x, conv16_2 = _conv_blockSSD(x, 128, alpha, block_id=16) x, conv17_2 = _conv_blockSSD(x, 64, alpha, block_id=17) #Prediction from conv11 num_priors = 3 x = Conv2D(num_priors * 4, (1, 1), padding='same', name='conv11_mbox_loc')(conv11) conv11_mbox_loc = x flatten = Flatten(name='conv11_mbox_loc_flat') conv11_mbox_loc_flat = flatten(conv11_mbox_loc) name = 'conv11_mbox_conf' # type: str conv11_mbox_conf = Conv2D(num_priors * num_classes, (1, 1), padding='same', name=name)(conv11) flatten = Flatten(name='conv11_mbox_conf_flat') conv11_mbox_conf_flat = flatten(conv11_mbox_conf) priorbox = PriorBox(img_size, 60, max_size=None, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv11_mbox_priorbox') conv11_mbox_priorbox = priorbox(conv11) num_priors = 6 x = Conv2D(num_priors * 4, (1, 1), padding='same', name='conv13_mbox_loc')(conv13) conv13_mbox_loc = x flatten = Flatten(name='conv13_mbox_loc_flat') conv13_mbox_loc_flat = flatten(conv13_mbox_loc) name = 'conv13_mbox_conf' conv13_mbox_conf = Conv2D(num_priors * num_classes, (1, 1), padding='same', name=name)(conv13) flatten = Flatten(name='conv13_mbox_conf_flat') conv13_mbox_conf_flat = flatten(conv13_mbox_conf) priorbox = PriorBox(img_size, 105.0, max_size=150.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv13_mbox_priorbox') conv13_mbox_priorbox = priorbox(conv13) num_priors = 6 x = Conv2D(num_priors * 4, (1, 1), padding='same', name='conv14_2_mbox_loc')(conv14_2) conv14_2_mbox_loc = x flatten = Flatten(name='conv14_2_mbox_loc_flat') conv14_2_mbox_loc_flat = flatten(conv14_2_mbox_loc) name = 'conv14_2_mbox_conf' x = Conv2D(num_priors * num_classes, (1, 1), padding='same', name=name)(conv14_2) conv14_2_mbox_conf = x flatten = Flatten(name='conv14_2_mbox_conf_flat') conv14_2_mbox_conf_flat = flatten(conv14_2_mbox_conf) priorbox = PriorBox(img_size, 150, max_size=195.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv14_2_mbox_priorbox') conv14_2_mbox_priorbox = priorbox(conv14_2) num_priors = 6 x = Conv2D(num_priors * 4, (1, 1), padding='same', name='conv15_2_mbox_loc')(conv15_2) conv15_2_mbox_loc = x flatten = Flatten(name='conv15_2_mbox_loc_flat') conv15_2_mbox_loc_flat = flatten(conv15_2_mbox_loc) name = 'conv15_2_mbox_conf' x = Conv2D(num_priors * num_classes, (1, 1), padding='same', name=name)(conv15_2) conv15_2_mbox_conf = x flatten = Flatten(name='conv15_2_mbox_conf_flat') conv15_2_mbox_conf_flat = flatten(conv15_2_mbox_conf) priorbox = PriorBox(img_size, 195.0, max_size=240.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv15_2_mbox_priorbox') conv15_2_mbox_priorbox = priorbox(conv15_2) num_priors = 6 x = Conv2D(num_priors * 4, (1, 1), padding='same', name='conv16_2_mbox_loc')(conv16_2) conv16_2_mbox_loc = x flatten = Flatten(name='conv16_2_mbox_loc_flat') conv16_2_mbox_loc_flat = flatten(conv16_2_mbox_loc) name = 'conv16_2_mbox_conf' x = Conv2D(num_priors * num_classes, (1, 1), padding='same', name=name)(conv16_2) conv16_2_mbox_conf = x flatten = Flatten(name='conv16_2_mbox_conf_flat') conv16_2_mbox_conf_flat = flatten(conv16_2_mbox_conf) priorbox = PriorBox(img_size, 240.0, max_size=285.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv16_2_mbox_priorbox') conv16_2_mbox_priorbox = priorbox(conv16_2) num_priors = 6 x = Conv2D(num_priors * 4, (1, 1), padding='same', name='conv17_2_mbox_loc')(conv17_2) conv17_2_mbox_loc = x flatten = Flatten(name='conv17_2_mbox_loc_flat') conv17_2_mbox_loc_flat = flatten(conv17_2_mbox_loc) name = 'conv17_2_mbox_conf' x = Conv2D(num_priors * num_classes, (1, 1), padding='same', name=name)(conv17_2) conv17_2_mbox_conf = x flatten = Flatten(name='conv17_2_mbox_conf_flat') conv17_2_mbox_conf_flat = flatten(conv17_2_mbox_conf) priorbox = PriorBox(img_size, 285.0, max_size=300.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv17_2_mbox_priorbox') conv17_2_mbox_priorbox = priorbox(conv17_2) mbox_loc = concatenate([ conv11_mbox_loc_flat, conv13_mbox_loc_flat, conv14_2_mbox_loc_flat, conv15_2_mbox_loc_flat, conv16_2_mbox_loc_flat, conv17_2_mbox_loc_flat ], axis=1, name='mbox_loc') mbox_conf = concatenate([ conv11_mbox_conf_flat, conv13_mbox_conf_flat, conv14_2_mbox_conf_flat, conv15_2_mbox_conf_flat, conv16_2_mbox_conf_flat, conv17_2_mbox_conf_flat ], axis=1, name='mbox_conf') mbox_priorbox = concatenate([ conv11_mbox_priorbox, conv13_mbox_priorbox, conv14_2_mbox_priorbox, conv15_2_mbox_priorbox, conv16_2_mbox_priorbox, conv17_2_mbox_priorbox ], axis=1, name='mbox_priorbox') if hasattr(mbox_loc, '_keras_shape'): num_boxes = mbox_loc._keras_shape[-1] // 4 elif hasattr(mbox_loc, 'int_shape'): num_boxes = K.int_shape(mbox_loc)[-1] // 4 mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc) mbox_conf = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(mbox_conf) mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf) predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox], axis=2, name='predictions') model = Model(inputs=input0, outputs=predictions) return model
def SSD300(input_shape=(300, 300, 3), num_classes=21): net = {} net['inputs'] = Input(shape=input_shape, name='inputs') net['conv1_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv1_1_zp')(net['inputs']) net['conv1_1'] = Conv2D(64, (3, 3), activation='relu', strides=(1, 1), name='conv1_1')(net['conv1_1_zp']) net['conv1_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv1_2_zp')(net['conv1_1']) net['conv1_2'] = Conv2D(64, (3, 3), activation='relu', strides=(1, 1), name='conv1_2')(net['conv1_2_zp']) net['pool1'] = MaxPool2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(net['conv1_2']) net['conv2_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv2_1_zp')(net['pool1']) net['conv2_1'] = Conv2D(128, (3, 3), activation='relu', strides=(1, 1), name='conv2_1')(net['conv2_1_zp']) net['conv2_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv2_2_zp')(net['conv2_1']) net['conv2_2'] = Conv2D(128, (3, 3), activation='relu', strides=(1, 1), name='conv2_2')(net['conv2_2_zp']) net['pool2'] = MaxPool2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(net['conv2_2']) net['conv3_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv3_1_zp')(net['pool2']) net['conv3_1'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv3_1')(net['conv3_1_zp']) net['conv3_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv3_2_zp')(net['conv3_1']) net['conv3_2'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv3_2')(net['conv3_2_zp']) net['conv3_3_zp'] = ZeroPadding2D(padding=(1, 1), name='conv3_3_zp')(net['conv3_2']) net['conv3_3'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv3_3')(net['conv3_3_zp']) net['pool3'] = MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(net['conv3_3']) net['conv4_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_1_zp')(net['pool3']) net['conv4_1'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv4_1')(net['conv4_1_zp']) net['conv4_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_2_zp')(net['conv4_1']) net['conv4_2'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv4_2')(net['conv4_2_zp']) net['conv4_3_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_3_zp')(net['conv4_2']) net['conv4_3'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv4_3')(net['conv4_3_zp']) net['pool4'] = MaxPool2D(pool_size=(2, 2), strides=(2, 2), name='pool4')(net['conv4_3']) net['conv5_1_zp'] = ZeroPadding2D(padding=(1, 1), name='conv5_1_zp')(net['pool4']) net['conv5_1'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv5_1')(net['conv5_1_zp']) net['conv5_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv5_2_zp')(net['conv5_1']) net['conv5_2'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv5_2')(net['conv5_2_zp']) net['conv5_3_zp'] = ZeroPadding2D(padding=(1, 1), name='conv5_3_zp')(net['conv5_2']) net['conv5_3'] = Conv2D(512, (3, 3), activation='relu', strides=(1, 1), name='conv5_3')(net['conv5_3_zp']) net['pool5_zp'] = ZeroPadding2D(padding=(1, 1), name='pool5_zp')(net['conv5_3']) net['pool5'] = MaxPool2D(pool_size=(3, 3), strides=(1, 1), name='pool5')(net['pool5_zp']) net['fc6_zp'] = ZeroPadding2D(padding=(6, 6), name='fc6_zp')(net['pool5']) net['fc6'] = Conv2D(1024, (3, 3), activation='relu', strides=(1, 1), dilation_rate=(6, 6), name='fc6')(net['fc6_zp']) net['fc7'] = Conv2D(1024, (1, 1), activation='relu', strides=(1, 1), name='fc7')(net['fc6']) net['conv6_1'] = Conv2D(256, (1, 1), activation='relu', strides=(1, 1), name='conv6_1')(net['fc7']) net['conv6_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv6_2_zp')(net['conv6_1']) net['conv6_2'] = Conv2D(512, (3, 3), activation='relu', strides=(2, 2), name='conv6_2')(net['conv6_2_zp']) net['conv7_1'] = Conv2D(128, (1, 1), activation='relu', strides=(1, 1), name='conv7_1')(net['conv6_2']) net['conv7_2_zp'] = ZeroPadding2D(padding=(1, 1), name='conv7_2_zp')(net['conv7_1']) net['conv7_2'] = Conv2D(256, (3, 3), activation='relu', strides=(2, 2), name='conv7_2')(net['conv7_2_zp']) net['conv8_1'] = Conv2D(128, (1, 1), activation='relu', strides=(1, 1), name='conv8_1')(net['conv7_2']) net['conv8_2'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv8_2')(net['conv8_1']) net['conv9_1'] = Conv2D(128, (1, 1), activation='relu', strides=(1, 1), name='conv9_1')(net['conv8_2']) net['conv9_2'] = Conv2D(256, (3, 3), activation='relu', strides=(1, 1), name='conv9_2')(net['conv9_1']) net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 4 net['conv4_3_norm_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_3_norm_mbox_loc_zp')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv4_3_norm_mbox_loc')(net['conv4_3_norm_mbox_loc_zp']) net['conv4_3_norm_mbox_loc_flat'] = Flatten(name='conv4_3_norm_mbox_loc_flat')(net['conv4_3_norm_mbox_loc']) net['conv4_3_norm_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv4_3_norm_mbox_conf_zp')(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv4_3_norm_mbox_conf')(net['conv4_3_norm_mbox_conf_zp']) net['conv4_3_norm_mbox_conf_flat'] = Flatten(name='conv4_3_norm_mbox_conf_flat')(net['conv4_3_norm_mbox_conf']) net['conv4_3_norm_mbox_priorbox'] = PriorBox((300, 300), min_size=30.0, max_size=60.0, aspect_ratios=[2.0], variances=[0.10, 0.10, 0.20, 0.20], flip=True, clip=False, name='conv4_3_norm_mbox_priorbox')(net['conv4_3_norm']) num_priors = 6 net['fc7_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='fc7_mbox_loc_zp')(net['fc7']) net['fc7_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='fc7_mbox_loc')(net['fc7_mbox_loc_zp']) net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')(net['fc7_mbox_loc']) net['fc7_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='fc7_mbox_conf_zp')(net['fc7']) net['fc7_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='fc7_mbox_conf')(net['fc7_mbox_conf_zp']) net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')(net['fc7_mbox_conf']) net['fc7_mbox_priorbox'] = PriorBox((300, 300), min_size=60.0, max_size=111.0, aspect_ratios=[2.0, 3.0], variances=[0.10, 0.10, 0.20, 0.20], flip=True, clip=False, name='fc7_mbox_priorbox')(net['fc7']) net['conv6_2_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv6_2_mbox_loc_zp')(net['conv6_2']) net['conv6_2_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv6_2_mbox_loc')(net['conv6_2_mbox_loc_zp']) net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')(net['conv6_2_mbox_loc']) net['conv6_2_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv6_2_mbox_conf_zp')(net['conv6_2']) net['conv6_2_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv6_2_mbox_conf')(net['conv6_2_mbox_conf_zp']) net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')(net['conv6_2_mbox_conf']) net['conv6_2_mbox_priorbox'] = PriorBox((300, 300), min_size=111.0, max_size=162.0, aspect_ratios=[2.0, 3.0], variances=[0.10, 0.10, 0.20, 0.20], flip=True, clip=False, name='conv6_2_mbox_priorbox')(net['conv6_2']) net['conv7_2_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv7_2_mbox_loc_zp')(net['conv7_2']) net['conv7_2_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv7_2_mbox_loc')(net['conv7_2_mbox_loc_zp']) net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')(net['conv7_2_mbox_loc']) net['conv7_2_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv7_2_mbox_conf_zp')(net['conv7_2']) net['conv7_2_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv7_2_mbox_conf')(net['conv7_2_mbox_conf_zp']) net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')(net['conv7_2_mbox_conf']) net['conv7_2_mbox_priorbox'] = PriorBox((300, 300), min_size=162.0, max_size=213.0, aspect_ratios=[2.0, 3.0], variances=[0.10, 0.10, 0.20, 0.20], flip=True, clip=False, name='conv7_2_mbox_priorbox')(net['conv7_2']) num_priors = 4 net['conv8_2_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv8_2_mbox_loc_zp')(net['conv8_2']) net['conv8_2_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv8_2_mbox_loc')(net['conv8_2_mbox_loc_zp']) net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')(net['conv8_2_mbox_loc']) net['conv8_2_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv8_2_mbox_conf_zp')(net['conv8_2']) net['conv8_2_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv8_2_mbox_conf')(net['conv8_2_mbox_conf_zp']) net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')(net['conv8_2_mbox_conf']) net['conv8_2_mbox_priorbox'] = PriorBox((300, 300), min_size=213.0, max_size=264.0, aspect_ratios=[2.0], variances=[0.10, 0.10, 0.20, 0.20], flip=True, clip=False, name='conv8_2_mbox_priorbox')(net['conv8_2']) net['conv9_2_mbox_loc_zp'] = ZeroPadding2D(padding=(1, 1), name='conv9_2_mbox_loc_zp')(net['conv9_2']) net['conv9_2_mbox_loc'] = Conv2D(4 * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv9_2_mbox_loc')(net['conv9_2_mbox_loc_zp']) net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')(net['conv9_2_mbox_loc']) net['conv9_2_mbox_conf_zp'] = ZeroPadding2D(padding=(1, 1), name='conv9_2_mbox_conf_zp')(net['conv9_2']) net['conv9_2_mbox_conf'] = Conv2D(num_classes * num_priors, (3, 3), activation='relu', strides=(1, 1), name='conv9_2_mbox_conf')(net['conv9_2_mbox_conf_zp']) net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')(net['conv9_2_mbox_conf']) net['conv9_2_mbox_priorbox'] = PriorBox((300, 300), min_size=264.0, max_size=315.0, aspect_ratios=[2.0], variances=[0.10, 0.10, 0.20, 0.20], flip=True, clip=False, name='conv9_2_mbox_priorbox')(net['conv9_2']) net['mbox_loc'] = concatenate(inputs=[net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat']], axis=1, name='mbox_loc') net['mbox_conf'] = concatenate(inputs=[net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat']], axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate(inputs=[net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox']], axis=1, name='mbox_priorbox') num_boxes = net['mbox_loc']._keras_shape[-1] // 4 net['mbox_conf_reshape'] = Reshape(target_shape=(num_boxes, num_classes), name='mbox_conf_reshape')(net['mbox_conf']) net['mbox_conf_softmax'] = Activation('softmax', name='mbox_conf_softmax')(net['mbox_conf_reshape']) net['mbox_loc_reshape'] = Reshape(target_shape=(num_boxes, 4), name='mbox_loc_reshape')(net['mbox_loc']) net['detection_out'] = concatenate(inputs=[net['mbox_loc_reshape'], net['mbox_conf_softmax'], net['mbox_priorbox']], axis=2, name='detection_out') model = Model(net['inputs'], net['detection_out']) return model
def SSD300(input_shape, num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ net = {} # Block 1 input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor net['conv1_1'] = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', name='conv1_1')(net['input']) net['conv1_2'] = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', name='conv1_2')(net['conv1_1']) net['pool1'] = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(net['conv1_2']) # Block 2 net['conv2_1'] = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', name='conv2_1')(net['pool1']) net['conv2_2'] = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', name='conv2_2')(net['conv2_1']) net['pool2'] = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(net['conv2_2']) # Block 3 net['conv3_1'] = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', name='conv3_1')(net['pool2']) net['conv3_2'] = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', name='conv3_2')(net['conv3_1']) net['conv3_3'] = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', name='conv3_3')(net['conv3_2']) net['pool3'] = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(net['conv3_3']) # Block 4 net['conv4_1'] = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='conv4_1')(net['pool3']) net['conv4_2'] = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='conv4_2')(net['conv4_1']) net['conv4_3'] = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='conv4_3')(net['conv4_2']) net['pool4'] = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(net['conv4_3']) # Block 5 net['conv5_1'] = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='conv5_1')(net['pool4']) net['conv5_2'] = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='conv5_2')(net['conv5_1']) net['conv5_3'] = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='conv5_3')(net['conv5_2']) net['pool5'] = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(net['conv5_3']) # FC6 net['fc6'] = Conv2D(1024, kernel_size=(3, 3), dilation_rate=(6, 6), activation='relu', padding='same', name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Conv2D(1024, kernel_size=(1, 1), activation='relu', padding='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Conv2D(256, kernel_size=(1, 1), activation='relu', padding='same', name='conv6_1')(net['fc7']) net['conv6_2'] = Conv2D(512, kernel_size=(3, 3), strides=(2, 2), activation='relu', padding='same', name='conv6_2')(net['conv6_1']) # Block 7 net['conv7_1'] = Conv2D(128, kernel_size=(1, 1), activation='relu', padding='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) net['conv7_2'] = Conv2D(256, kernel_size=(3, 3), strides=(2, 2), activation='relu', padding='valid', name='conv7_2')(net['conv7_2']) # Block 8 net['conv8_1'] = Conv2D(128, kernel_size=(1, 1), activation='relu', padding='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = Conv2D(256, kernel_size=(3, 3), strides=(2, 2), activation='relu', padding='same', name='conv8_2')(net['conv8_1']) # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) # Prediction from conv4_3 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 3 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # Prediction from conv6_2 num_priors = 6 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name=name)(net['conv6_2']) net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # Prediction from conv7_2 num_priors = 6 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name=name)(net['conv7_2']) net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # Prediction from conv8_2 num_priors = 6 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name=name)(net['conv8_2']) net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # Prediction from pool6 num_priors = 6 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) net['pool6_mbox_conf_flat'] = x priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') if K.image_data_format() == 'channels_last': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) # Gather all predictions net['mbox_loc'] = concatenate([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat'] ], axis=1, name='mbox_loc') net['mbox_conf'] = concatenate([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat'] ], axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox'] ], axis=1, name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], axis=2, name='predictions') model = Model(inputs=(net['input'], ), outputs=(net['predictions'], )) return model
def ssd_prior_box_layer(img_size, conv4_3_norm, fc7, conv6_2, conv7_2, conv8_2, pool6): # conv4_3 conv4_3_norm_mbox_priorbox = PriorBox(img_size, 30.0, name='conv4_3_norm_mbox_priorbox', aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2])(conv4_3_norm) # fc7 fc7_mbox_priorbox = PriorBox(img_size, 60.0, name='fc7_mbox_priorbox', max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2])(fc7) # conv6_2 conv6_2_mbox_priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox')(conv6_2) # conv7_2 conv7_2_mbox_priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox')(conv7_2) # conv8_2 conv8_2_mbox_priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox')(conv8_2) # pool6 if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6) pool6_mbox_priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox')(pool6_reshaped) return concatenate([ conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox, conv6_2_mbox_priorbox, conv7_2_mbox_priorbox, conv8_2_mbox_priorbox, pool6_mbox_priorbox ], axis=1, name='mbox_priorbox')