def m2det(input_shape, num_classes=21, num_anchors=6): inputs = keras.layers.Input(shape=input_shape) #------------------------------------------------# # 利用主干特征提取网络获得两个有效特征层 # 分别是C4 40,40,512 # 分别是C5 20,20,1024 #------------------------------------------------# C4, C5 = VGG16(inputs).outputs[2:] # base_feature的shape为40,40,768 base_feature = FFMv1(C4, C5, feature_size_1=256, feature_size_2=512) #---------------------------------------------------------------------------------------------------# # 在_create_feature_pyramid函数里,我们会使用TUM模块对输入进来的特征层进行特征提取 # 最终输出的特征层有六个,由于进行了四次的TUM模块,所以六个有效特征层由4次TUM模块的输出堆叠而成 # o1 40,40,128*4 40,40,512 # o2 20,20,128*4 20,20,512 # o3 10,10,128*4 10,10,512 # o4 5,5,128*4 5,5,512 # o5 3,3,128*4 3,3,512 # o6 1,1,128*4 1,1,512 #---------------------------------------------------------------------------------------------------# feature_pyramid = _create_feature_pyramid(base_feature, stage=4) #-------------------------------------------------# # 给合并后的特征层添加上注意力机制 #-------------------------------------------------# outputs = SFAM(feature_pyramid) #-------------------------------------------------# # 将有效特征层转换成输出结果 #-------------------------------------------------# classifications = [] regressions = [] for feature in outputs: classification = keras.layers.Conv2D(filters=num_anchors * num_classes, kernel_size=3, strides=1, padding='same')(feature) classification = keras.layers.Reshape( (-1, num_classes))(classification) classification = keras.layers.Activation('softmax')(classification) regression = keras.layers.Conv2D(filters=num_anchors * 4, kernel_size=3, strides=1, padding='same')(feature) regression = keras.layers.Reshape((-1, 4))(regression) classifications.append(classification) regressions.append(regression) classifications = keras.layers.Concatenate( axis=1, name="classification")(classifications) regressions = keras.layers.Concatenate(axis=1, name="regression")(regressions) pyramids = keras.layers.Concatenate( axis=-1, name="out")([regressions, classifications]) return keras.models.Model(inputs=inputs, outputs=pyramids)
def siamese(input_shape): vgg_model = VGG16() input_image_1 = Input(shape=input_shape) input_image_2 = Input(shape=input_shape) #------------------------------------------# # 我们将两个输入传入到主干特征提取网络 #------------------------------------------# encoded_image_1 = vgg_model.call(input_image_1) encoded_image_2 = vgg_model.call(input_image_2) #-------------------------# # 相减取绝对值 #-------------------------# l1_distance = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))( [encoded_image_1, encoded_image_2]) #-------------------------# # 进行两次全连接 #-------------------------# out = Dense(512, activation='relu')(l1_distance) #---------------------------------------------# # 利用sigmoid函数将最后的值固定在0-1之间。 #---------------------------------------------# out = Dense(1, activation='sigmoid')(out) model = Model([input_image_1, input_image_2], out) return model
def __init__(self, input_shape, pretrained=False): super(Siamese, self).__init__() self.vgg = VGG16(pretrained, input_shape[-1]) del self.vgg.avgpool del self.vgg.classifier flat_shape = 512 * get_img_output_length(input_shape[1], input_shape[0]) self.fully_connect1 = torch.nn.Linear(flat_shape, 512) self.fully_connect2 = torch.nn.Linear(512, 1)
def siamese(input_shape): VGG_model = VGG16(input_shape) # densenet_model =MobileNetv3_large(input_shape) input_image_1 = Input(shape=input_shape) input_image_2 = Input(shape=input_shape) encoded_image_1 = VGG_model(input_image_1) encoded_image_2 = VGG_model(input_image_2) l1_distance_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) l1_distance = l1_distance_layer([encoded_image_1, encoded_image_2]) out = Dense(512, activation='relu')(l1_distance) out = Dense(1, activation='sigmoid')(out) model = Model([input_image_1, input_image_2], out) return model
def __init__(self, num_classes=21, in_channels=3, pretrained=False): super(Unet, self).__init__() self.vgg = VGG16(pretrained=pretrained, in_channels=in_channels) in_filters = [192, 384, 768, 1024] out_filters = [64, 128, 256, 512] # upsampling # 64,64,512 self.up_concat4 = unetUp(in_filters[3], out_filters[3]) # 128,128,256 self.up_concat3 = unetUp(in_filters[2], out_filters[2]) # 256,256,128 self.up_concat2 = unetUp(in_filters[1], out_filters[1]) # 512,512,64 self.up_concat1 = unetUp(in_filters[0], out_filters[0]) # final conv (without any concat) self.final = nn.Conv2d(out_filters[0], num_classes, 1)
def get_predict_model(num_classes, backbone, num_anchors=9): inputs = Input(shape=(None, None, 3)) roi_input = Input(shape=(None, 4)) if backbone == 'vgg': feature_map_input = Input(shape=(None, None, 512)) #----------------------------------------------------# # 假设输入为600,600,3 # 获得一个37,37,512的共享特征层base_layers #----------------------------------------------------# base_layers = VGG16(inputs) #----------------------------------------------------# # 将共享特征层传入建议框网络 # 该网络结果会对先验框进行调整获得建议框 #----------------------------------------------------# rpn = get_rpn(base_layers, num_anchors) #----------------------------------------------------# # 将共享特征层和建议框传入classifier网络 # 该网络结果会对建议框进行调整获得预测框 #----------------------------------------------------# classifier = get_vgg_classifier(feature_map_input, roi_input, 7, num_classes) else: feature_map_input = Input(shape=(None, None, 1024)) #----------------------------------------------------# # 假设输入为600,600,3 # 获得一个38,38,1024的共享特征层base_layers #----------------------------------------------------# base_layers = ResNet50(inputs) #----------------------------------------------------# # 将共享特征层传入建议框网络 # 该网络结果会对先验框进行调整获得建议框 #----------------------------------------------------# rpn = get_rpn(base_layers, num_anchors) #----------------------------------------------------# # 将共享特征层和建议框传入classifier网络 # 该网络结果会对建议框进行调整获得预测框 #----------------------------------------------------# classifier = get_resnet50_classifier(feature_map_input, roi_input, 14, num_classes) model_rpn = Model(inputs, rpn + [base_layers]) model_classifier_only = Model([feature_map_input, roi_input], classifier) return model_rpn, model_classifier_only
def get_model(num_classes, backbone, num_anchors=9, input_shape=[None, None, 3]): inputs = Input(shape=input_shape) roi_input = Input(shape=(None, 4)) if backbone == 'vgg': #----------------------------------------------------# # 假设输入为600,600,3 # 获得一个37,37,512的共享特征层base_layers #----------------------------------------------------# base_layers = VGG16(inputs) #----------------------------------------------------# # 将共享特征层传入建议框网络 # 该网络结果会对先验框进行调整获得建议框 #----------------------------------------------------# rpn = get_rpn(base_layers, num_anchors) #----------------------------------------------------# # 将共享特征层和建议框传入classifier网络 # 该网络结果会对建议框进行调整获得预测框 #----------------------------------------------------# classifier = get_vgg_classifier(base_layers, roi_input, 7, num_classes) else: #----------------------------------------------------# # 假设输入为600,600,3 # 获得一个38,38,1024的共享特征层base_layers #----------------------------------------------------# base_layers = ResNet50(inputs) #----------------------------------------------------# # 将共享特征层传入建议框网络 # 该网络结果会对先验框进行调整获得建议框 #----------------------------------------------------# rpn = get_rpn(base_layers, num_anchors) #----------------------------------------------------# # 将共享特征层和建议框传入classifier网络 # 该网络结果会对建议框进行调整获得预测框 #----------------------------------------------------# classifier = get_resnet50_classifier(base_layers, roi_input, 14, num_classes) model_rpn = Model(inputs, rpn) model_all = Model([inputs, roi_input], rpn + classifier) return model_rpn, model_all
def siamese(input_shape): vgg_model = VGG16() input_image_1 = Input(shape=input_shape) input_image_2 = Input(shape=input_shape) # 我们将两个输入传入到主干特征提取网络 encoded_image_1 = vgg_model.call(input_image_1) encoded_image_2 = vgg_model.call(input_image_2) # 相减取绝对值 l1_distance_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) l1_distance = l1_distance_layer([encoded_image_1, encoded_image_2]) # 两次全连接 out = Dense(512, activation='relu')(l1_distance) # 值,固定在0-1 out = Dense(1, activation='sigmoid')(out) model = Model([input_image_1, input_image_2], out) return model
def SSD300(input_shape, num_classes=21, weight_decay=5e-4): #---------------------------------# # 典型的输入大小为[300,300,3] #---------------------------------# input_tensor = Input(shape=input_shape) # net变量里面包含了整个SSD的结构,通过层名可以找到对应的特征层 net = VGG16(input_tensor, weight_decay=weight_decay) #-----------------------将提取到的主干特征进行处理---------------------------# # 对conv4_3的通道进行l2标准化处理 # 38,38,512 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv4_3_norm_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv4_3_norm_mbox_loc')( net['conv4_3_norm']) net['conv4_3_norm_mbox_loc_flat'] = Flatten( name='conv4_3_norm_mbox_loc_flat')(net['conv4_3_norm_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv4_3_norm_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv4_3_norm_mbox_conf')( net['conv4_3_norm']) net['conv4_3_norm_mbox_conf_flat'] = Flatten( name='conv4_3_norm_mbox_conf_flat')(net['conv4_3_norm_mbox_conf']) # 对fc7层进行处理 # 19,19,1024 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['fc7_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='fc7_mbox_loc')(net['fc7']) net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')( net['fc7_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='fc7_mbox_conf')(net['fc7']) net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')( net['fc7_mbox_conf']) # 对conv6_2进行处理 # 10,10,512 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv6_2_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')( net['conv6_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv6_2_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv6_2_mbox_conf')(net['conv6_2']) net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')( net['conv6_2_mbox_conf']) # 对conv7_2进行处理 # 5,5,256 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv7_2_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')( net['conv7_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv7_2_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv7_2_mbox_conf')(net['conv7_2']) net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')( net['conv7_2_mbox_conf']) # 对conv8_2进行处理 # 3,3,256 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv8_2_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')( net['conv8_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv8_2_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv8_2_mbox_conf')(net['conv8_2']) net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')( net['conv8_2_mbox_conf']) # 对conv9_2进行处理 # 1,1,256 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv9_2_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv9_2_mbox_loc')(net['conv9_2']) net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')( net['conv9_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv9_2_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(weight_decay), name='conv9_2_mbox_conf')(net['conv9_2']) net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')( net['conv9_2_mbox_conf']) # 将所有结果进行堆叠 net['mbox_loc'] = Concatenate(axis=1, name='mbox_loc')([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat'] ]) net['mbox_conf'] = Concatenate(axis=1, name='mbox_conf')([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat'] ]) # 8732,4 net['mbox_loc'] = Reshape((-1, 4), name='mbox_loc_final')(net['mbox_loc']) # 8732,21 net['mbox_conf'] = Reshape((-1, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) # 8732,25 net['predictions'] = Concatenate( axis=-1, name='predictions')([net['mbox_loc'], net['mbox_conf']]) model = Model(net['input'], net['predictions']) return model