def classifier(base_layers, input_rois, num_rois, nb_classes=21, trainable=False): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 14 input_shape = (num_rois, 14, 14, 1024) elif K.backend() == 'theano': pooling_regions = 7 input_shape = (num_rois, 1024, 7, 7) out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) out = classifier_layers(out_roi_pool, input_shape=input_shape, trainable=True) out = TimeDistributed(Flatten())(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def classifier(base_layers, input_rois, num_rois, nb_classes=21, trainable=False, alpha=1.0, depth_mult=1): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 14 input_shape = (num_rois, 14, 14, 1024) elif K.backend() == 'theano': pooling_regions = 7 input_shape = (num_rois, 1024, 7, 7) out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) out = classifier_layers(out_roi_pool, trainable=True, alpha=1.0, depth_multiplier=1) out = TimeDistributed(AveragePooling2D(name='Global_average_Pooling_classifier_layer'), name='TimeDistributed_AVG')(out) out = TimeDistributed(Flatten(name='flatten'), name='TimeDistributed_flatten')(out) #out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out) #out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero', name='dense_class'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero', name='dense_regr'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def classifier(base_layers, input_rois, num_rois, nb_classes=21, trainable=False): if K.backend() == 'tensorflow': pooling_regions = 7 input_shape = (num_rois, 7, 7, 512) out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool) out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out) out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def classifier(base_layers, input_rois, num_rois, nb_classes=21, trainable=False): ''' Construct a ROI classifier from base layers and input roi. Only ROI classifier uses the TimeDistributed Layer. # Args | base_layers: base layers | input_rois: a placeholder for input rois | num_rois: the number of ROI to be processed each time (? to be verified) | num_classes: number of classes including background # Return | out_class: (num_rois, nb_classes) softmax probailities | out_regr: (num_rois, 4*(nb_classes-1)) regression result ''' # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround # The pooling_region defines the fixed feature map used in ROI pooling layer # if K.backend() == 'tensorflow': # pooling_regions = 7 # input_shape = (num_rois, 7, 7, 512) # elif K.backend() == 'theano': # pooling_regions = 7 # input_shape = (num_rois, 512, 7, 7) pooling_regions = 7 out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) out = TimeDistributed(Flatten(name='flatten'), name='TimeDistributed_flatten')(out_roi_pool) out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out) out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out) # https://keras.io/layers/core/#dense # Input nD tensor with shape: (batch_size, ..., input_dim) # Output has shape (batch_size, ..., units). out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def classifier(base_layers, input_rois, num_rois, nb_classes=11, trainable=False): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 7 input_shape = (num_rois, 7, 7, 512) # return x的通道数 elif K.backend() == 'theano': pooling_regions = 7 input_shape = (num_rois, 512, 7, 7) # RoiPoolingConv:返回的shape为(1, 32, 7, 7, 512) # 含义是batch_size,预选框的个数,特征图宽,特征图高度,特征图深度 out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) # TimeDistributed:输入至少为3D张量,下标为1的维度将被认为是时间维。即对以一个维度下的变量当作一个完整变量来看待。本文是32。 # 你要实现的目的就是对32个预选宽提出的32个图片做出判断。 out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool) # 4096=32* out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out) out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out) # out_class的shape:(?, 32, 21); out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class # out_regr的shape: (?, 32, 80) out_regr = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) # 产生num_rois个out_class和out_reg # 四个损失函数中的:Fast R-CNN classification和Fast R-CNN regression(proposal ->box)。 return [out_class, out_regr]
def classifier(base_layers, input_rois, num_rois, nb_classes=21, trainable=False): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 14 input_shape = (num_rois, 14, 14, 1024) elif K.backend() == 'theano': pooling_regions = 7 input_shape = (num_rois, 1024, 7, 7) # RoiPoolingConv的作用是分别将每一个ROI对应的原图区域resize为指定大小(14,14), # 然后,把所有的resize后的矩阵在0轴上串联起来。 # 其作用就是将尺寸不一而同的ROI都调整为相同的大小,输入给后续的层,因为后续的层有全连接层,所以需要规范输入图的大小 out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)( [base_layers, input_rois]) #shape=(1, num_rois, 14, 14, 1024) dtype=float32 out = classifier_layers( out_roi_pool, input_shape=input_shape, trainable=True) #9个卷积层#shape=(1, num_rois, 1, 1, 2048) dtype=float32> out = TimeDistributed(Flatten())( out) #shape=(?, num_rois, 2048) dtype=float32 out_class = TimeDistributed( Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))( out) #shape=(?, num_rois, nb_classes) dtype=float32 # note: no regression target for bg class out_regr = TimeDistributed( Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))( out) #shape=(?, num_rois, 4 * (nb_classes - 1)) dtype=float32 return [out_class, out_regr]
def classifier(base_layers, input_rois, num_rois, nb_classes=11, trainable=False): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 14 input_shape = (num_rois, 14, 14, 1024) # return x的通道数 elif K.backend() == 'theano': pooling_regions = 7 input_shape = (num_rois, 1024, 7, 7) # # 该层的输入为feature maps和roi的坐标信息 out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) # 输出的是(None, num_riois, 2048)的feature map out = classifier_layers(out_roi_pool, input_shape=input_shape, trainable=True) # 因为是对num_rois个feature maps分别处理的,所以需要使用timedistributed进行包装 out = TimeDistributed(Flatten())(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class # 我们可以使用包装器TimeDistributed包装Dense,以产生针对各个时间步信号的独立全连接 out_regr = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr] # 一共有num_riois个out_class和out_regr
base_layers) x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x) x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x) return [x_class, x_regr, base_layers] def classifier(base_layers, input_rois, num_rois, nb_classes=21, trainable=False): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround # -if K.backend() == 'tensorflow': if K.image_data_format == 'channels_last': pooling_regions = 7 input_shape = (num_rois, 7, 7, 512) # -elif K.backend() == 'theano': elif K.image_data_format = 'channels_first': pooling_regions = 7 input_shape = (num_rois, 512, 7, 7) out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool) out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out) out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]