def fpn_classifier_graph(rois, feature_maps, image_meta, pool_size, num_classes, train_bn=True, fc_layers_size=1024): #---------------------------------------------------------------# # ROI Pooling,利用建议框在特征层上进行截取 # x : [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] #---------------------------------------------------------------# x = PyramidROIAlign([pool_size, pool_size], name="roi_align_classifier")([rois, image_meta] + feature_maps) #------------------------------------------------------------------# # 利用卷积进行特征整合 # x : [batch, num_rois, 1, 1, fc_layers_size] #------------------------------------------------------------------# x = TimeDistributed(Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"), name="mrcnn_class_conv1")(x) x = TimeDistributed(BatchNormalization(), name='mrcnn_class_bn1')(x, training=train_bn) x = Activation('relu')(x) #------------------------------------------------------------------# # x : [batch, num_rois, 1, 1, fc_layers_size] #------------------------------------------------------------------# x = TimeDistributed(Conv2D(fc_layers_size, (1, 1)), name="mrcnn_class_conv2")(x) x = TimeDistributed(BatchNormalization(), name='mrcnn_class_bn2')(x, training=train_bn) x = Activation('relu')(x) #------------------------------------------------------------------# # x : [batch, num_rois, fc_layers_size] #------------------------------------------------------------------# shared = Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), name="pool_squeeze")(x) #------------------------------------------------------------------# # Classifier head # 这个的预测结果代表这个先验框内部的物体的种类 # mrcnn_probs : [batch, num_rois, num_classes] #------------------------------------------------------------------# mrcnn_class_logits = TimeDistributed(Dense(num_classes), name='mrcnn_class_logits')(shared) mrcnn_probs = TimeDistributed(Activation("softmax"), name="mrcnn_class")(mrcnn_class_logits) #------------------------------------------------------------------# # BBox head # 这个的预测结果会对先验框进行调整 # mrcnn_bbox : [batch, num_rois, num_classes, 4] #------------------------------------------------------------------# x = TimeDistributed(Dense(num_classes * 4, activation='linear'), name='mrcnn_bbox_fc')(shared) mrcnn_bbox = Reshape((-1, num_classes, 4), name="mrcnn_bbox")(x) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
def build_fpn_mask_graph(rois, feature_maps, image_meta, pool_size, num_classes, train_bn=True): # ROI Pooling,利用建议框在特征层上进行截取 # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] x = PyramidROIAlign([pool_size, pool_size], name="roi_align_mask")([rois, image_meta] + feature_maps) # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] x = TimeDistributed(Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv1")(x) x = TimeDistributed(BatchNormalization(), name='mrcnn_mask_bn1')(x, training=train_bn) x = Activation('relu')(x) # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] x = TimeDistributed(Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv2")(x) x = TimeDistributed(BatchNormalization(), name='mrcnn_mask_bn2')(x, training=train_bn) x = Activation('relu')(x) # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] x = TimeDistributed(Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv3")(x) x = TimeDistributed(BatchNormalization(), name='mrcnn_mask_bn3')(x, training=train_bn) x = Activation('relu')(x) # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] x = TimeDistributed(Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv4")(x) x = TimeDistributed(BatchNormalization(), name='mrcnn_mask_bn4')(x, training=train_bn) x = Activation('relu')(x) # Shape: [batch, num_rois, 2xMASK_POOL_SIZE, 2xMASK_POOL_SIZE, channels] x = TimeDistributed(Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), name="mrcnn_mask_deconv")(x) # 反卷积后再次进行一个1x1卷积调整通道,使其最终数量为numclasses,代表分的类 x = TimeDistributed(Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"), name="mrcnn_mask")(x) return x
def fpn_classifier_graph( rois, feature_maps, image_meta, pool_size, num_classes, train_bn=True, fc_layers_size=1024): # pool_size调整后局部特征层的长宽,rois是建议框,image_meta保存图片信息 # ROI Pooling,利用建议框在特征层上进行截取[p2,p3,p4,p5]featureMap上截取 # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] x = PyramidROIAlign([pool_size, pool_size], name="roi_align_classifier")( [rois, image_meta] + feature_maps) # x是抠图 # Shape: [batch, num_rois, 1, 1, fc_layers_size],相当于两次全连接,对后三个维度进行卷积和标准化 x = TimeDistributed(Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"), name="mrcnn_class_conv1")(x) # eg: fc_layers_size=1024 x = TimeDistributed(BatchNormalization(), name='mrcnn_class_bn1')(x, training=train_bn) x = Activation('relu')(x) # Shape: [batch, num_rois, 1, 1, fc_layers_size] x = TimeDistributed(Conv2D(fc_layers_size, (1, 1)), name="mrcnn_class_conv2")(x) x = TimeDistributed(BatchNormalization(), name='mrcnn_class_bn2')(x, training=train_bn) x = Activation('relu')(x) # Shape: [batch, num_rois, fc_layers_size] shared = Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), name="pool_squeeze")(x) # Classifier head # 这个的预测结果代表这个先验框内部的物体的种类[batch, num_rois,num_classes] mrcnn_class_logits = TimeDistributed(Dense(num_classes), name='mrcnn_class_logits')(shared) mrcnn_probs = TimeDistributed(Activation("softmax"), name="mrcnn_class")(mrcnn_class_logits) # BBox head # 这个的预测结果会对先验框进行调整 # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))] x = TimeDistributed(Dense(num_classes * 4, activation='linear'), name='mrcnn_bbox_fc')(shared) # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] mrcnn_bbox = Reshape((-1, num_classes, 4), name="mrcnn_bbox")(x) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox