def define_stacked_model(members): for i in range(len(members)): # update all layers in all models to not be trainable model = members[i] for layer in model.layers: layer.trainable = False layer._name = 'ensemble_' + str(i+1) + '_' + layer.name # avoid 'unique layer name' issue ensemble_visible = [model.input for model in members] # define multi-headed input ensemble_outputs = [model.output for model in members] # concatenate merge output from each model merge = concatenate(ensemble_outputs) hidden = Dense(10, activation='relu')(merge) output = Dense(3, activation='softmax')(hidden) model = Model(inputs=ensemble_visible, outputs=output) plot_model(model, show_shapes=True, to_file='model_graph.png') # plot graph of ensemble model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # compile return model
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) 隐藏层一:3*3卷积层 隐藏层二:池化层,池化窗口大小为2 隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层四:循环层、LSTM/GRU层 隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, 输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出 ''' # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h1 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 layer_h1 = Dropout(0.1)(layer_h1) layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2) # 池化层 #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 layer_h3 = Dropout(0.2)(layer_h3) layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 layer_h4 = Dropout(0.2)(layer_h4) layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5) # 池化层 layer_h6 = Dropout(0.3)(layer_h6) layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 layer_h7 = Dropout(0.3)(layer_h7) layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 layer_h9 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h8) # 池化层 layer_h9 = Dropout(0.3)(layer_h9) layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 layer_h10 = Dropout(0.4)(layer_h10) layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 layer_h12 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 #test=Model(inputs = input_data, outputs = layer_h6) #test.summary() layer_h13 = Reshape((200, 3200))(layer_h12) #Reshape层 layer_h13 = Dropout(0.4)(layer_h13) layer_h14 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h13) # 全连接层 layer_h14 = Dropout(0.4)(layer_h14) inner = layer_h14 #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 rnn_size = 128 gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) gru2 = concatenate([gru_2, gru_2b]) #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11) layer_h15 = Dropout(0.4)(gru2) layer_h16 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h15) # 全连接层 layer_h16 = Dropout(0.5)(layer_h16) # 随机中断部分神经网络连接,防止过拟合 layer_h17 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h16) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h17) model_data = Model(inputs=input_data, outputs=y_pred) #model_data.summary() labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # tensorflow.keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # clipnorm seems to speeds up convergence #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=ada_d) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) print('[*提示] 创建模型成功,模型编译成功') return model, model_data
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 隐藏层:全连接层 输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 ''' input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h = Conv2D(32, (3, 3), use_bias=False, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 #layer_h = Dropout(0.05)(layer_h) layer_h = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 layer_h = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h) # 池化层 #layer_h = Dropout(0.05)(layer_h) # 随机中断部分神经网络连接,防止过拟合 layer_h = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = Dropout(0.1)(layer_h) layer_h = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 layer_h = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h) # 池化层 #layer_h = Dropout(0.1)(layer_h) layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = Dropout(0.15)(layer_h) layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 layer_h = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h) # 池化层 #layer_h = Dropout(0.15)(layer_h) layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = Dropout(0.2)(layer_h) layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 layer_h = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h) # 池化层 #layer_h = Dropout(0.2)(layer_h) #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = Dropout(0.2)(layer_h) #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 #layer_h = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h) # 池化层 #test=Model(inputs = input_data, outputs = layer_h) #test.summary() layer_h = Reshape((200, 3200))(layer_h) #Reshape层 #layer_h16 = Dropout(0.3)(layer_h16) # 随机中断部分神经网络连接,防止过拟合 layer_h = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h) # 全连接层 inner = layer_h #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 rnn_size = 128 gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) gru2 = concatenate([gru_2, gru_2b]) layer_h = gru2 #layer_h20 = Dropout(0.4)(gru2) layer_h = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h) # 全连接层 #layer_h17 = Dropout(0.3)(layer_h17) layer_h = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h) model_data = Model(inputs=input_data, outputs=y_pred) #model_data.summary() labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # clipnorm seems to speeds up convergence #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) #opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06) opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=opt) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) #print('[*提示] 创建模型成功,模型编译成功') print('[*Info] Create Model Successful, Compiles Model Successful. ') return model, model_data
def build_UNet2D_4L(inp_shape, k_size=3): merge_axis = -1 # Feature maps are concatenated along last axis (for tf backend) data = Input(shape=inp_shape) conv1 = Convolution2D(filters=32, kernel_size=k_size, padding='same', activation='relu')(data) conv1 = Convolution2D(filters=32, kernel_size=k_size, padding='same', activation='relu')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = Convolution2D(filters=64, kernel_size=k_size, padding='same', activation='relu')(pool1) conv2 = Convolution2D(filters=64, kernel_size=k_size, padding='same', activation='relu')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = Convolution2D(filters=64, kernel_size=k_size, padding='same', activation='relu')(pool2) conv3 = Convolution2D(filters=64, kernel_size=k_size, padding='same', activation='relu')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = Convolution2D(filters=128, kernel_size=k_size, padding='same', activation='relu')(pool3) conv4 = Convolution2D(filters=128, kernel_size=k_size, padding='same', activation='relu')(conv4) pool4 = MaxPooling2D(pool_size=(2, 2))(conv4) conv5 = Convolution2D(filters=256, kernel_size=k_size, padding='same', activation='relu')(pool4) up1 = UpSampling2D(size=(2, 2))(conv5) conv6 = Convolution2D(filters=256, kernel_size=k_size, padding='same', activation='relu')(up1) conv6 = Convolution2D(filters=256, kernel_size=k_size, padding='same', activation='relu')(conv6) merged1 = concatenate([conv4, conv6], axis=merge_axis) conv6 = Convolution2D(filters=256, kernel_size=k_size, padding='same', activation='relu')(merged1) up2 = UpSampling2D(size=(2, 2))(conv6) conv7 = Convolution2D(filters=256, kernel_size=k_size, padding='same', activation='relu')(up2) conv7 = Convolution2D(filters=256, kernel_size=k_size, padding='same', activation='relu')(conv7) merged2 = concatenate([conv3, conv7], axis=merge_axis) conv7 = Convolution2D(filters=256, kernel_size=k_size, padding='same', activation='relu')(merged2) up3 = UpSampling2D(size=(2, 2))(conv7) conv8 = Convolution2D(filters=128, kernel_size=k_size, padding='same', activation='relu')(up3) conv8 = Convolution2D(filters=128, kernel_size=k_size, padding='same', activation='relu')(conv8) merged3 = concatenate([conv2, conv8], axis=merge_axis) conv8 = Convolution2D(filters=128, kernel_size=k_size, padding='same', activation='relu')(merged3) up4 = UpSampling2D(size=(2, 2))(conv8) conv9 = Convolution2D(filters=64, kernel_size=k_size, padding='same', activation='relu')(up4) conv9 = Convolution2D(filters=64, kernel_size=k_size, padding='same', activation='relu')(conv9) merged4 = concatenate([conv1, conv9], axis=merge_axis) conv9 = Convolution2D(filters=64, kernel_size=k_size, padding='same', activation='relu')(merged4) conv10 = Convolution2D(filters=1, kernel_size=k_size, padding='same', activation='sigmoid')(conv9) output = conv10 model = Model(data, output) return model
def make_yolov3_model(): input_image = Input(shape=(None, None, 3)) # Layer 0 => 4 x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0}, {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1}, {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2}, {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}]) # Layer 5 => 8 x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5}, {'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6}, {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}]) # Layer 9 => 11 x = _conv_block(x, [{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9}, {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}]) # Layer 12 => 15 x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12}, {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}]) # Layer 16 => 36 for i in range(7): x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}]) skip_36 = x # Layer 37 => 40 x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37}, {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38}, {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}]) # Layer 41 => 61 for i in range(7): x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3}, {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}]) skip_61 = x # Layer 62 => 65 x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62}, {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}]) # Layer 66 => 74 for i in range(3): x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}]) # Layer 75 => 79 x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76}, {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78}, {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False) # Layer 80 => 82 yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80}, {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False) # Layer 83 => 86 x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False) x = UpSampling2D(2)(x) x = concatenate([x, skip_61]) # Layer 87 => 91 x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87}, {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88}, {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89}, {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90}, {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False) # Layer 92 => 94 yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92}, {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False) # Layer 95 => 98 x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 96}], skip=False) x = UpSampling2D(2)(x) x = concatenate([x, skip_36]) # Layer 99 => 106 yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 100}, {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 101}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 102}, {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103}, {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104}, {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False) model = Model(input_image, [yolo_82, yolo_94, yolo_106]) return model
def architecture(config): input_image = Input(shape=(IMAGE_H, IMAGE_W, 3)) true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER, 4)) # Layer 1 x = Conv2D(32, (3, 3), strides=(1, 1), padding='same', name='conv_1', use_bias=False)(input_image) x = BatchNormalization(name='norm_1')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 2 x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', name='conv_2', use_bias=False)(x) x = BatchNormalization(name='norm_2')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 3 x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_3', use_bias=False)(x) x = BatchNormalization(name='norm_3')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 4 x = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_4', use_bias=False)(x) x = BatchNormalization(name='norm_4')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 5 x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_5', use_bias=False)(x) x = BatchNormalization(name='norm_5')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 6 x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_6', use_bias=False)(x) x = BatchNormalization(name='norm_6')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 7 x = Conv2D(128, (1, 1), strides=(1, 1), padding='same', name='conv_7', use_bias=False)(x) x = BatchNormalization(name='norm_7')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 8 x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_8', use_bias=False)(x) x = BatchNormalization(name='norm_8')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 9 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_9', use_bias=False)(x) x = BatchNormalization(name='norm_9')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 10 x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_10', use_bias=False)(x) x = BatchNormalization(name='norm_10')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 11 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_11', use_bias=False)(x) x = BatchNormalization(name='norm_11')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 12 x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_12', use_bias=False)(x) x = BatchNormalization(name='norm_12')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 13 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_13', use_bias=False)(x) x = BatchNormalization(name='norm_13')(x) x = LeakyReLU(alpha=0.1)(x) skip_connection = x x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 14 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_14', use_bias=False)(x) x = BatchNormalization(name='norm_14')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 15 x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_15', use_bias=False)(x) x = BatchNormalization(name='norm_15')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 16 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_16', use_bias=False)(x) x = BatchNormalization(name='norm_16')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 17 x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_17', use_bias=False)(x) x = BatchNormalization(name='norm_17')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 18 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_18', use_bias=False)(x) x = BatchNormalization(name='norm_18')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 19 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_19', use_bias=False)(x) x = BatchNormalization(name='norm_19')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 20 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_20', use_bias=False)(x) x = BatchNormalization(name='norm_20')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 21 skip_connection = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_21', use_bias=False)(skip_connection) skip_connection = BatchNormalization(name='norm_21')(skip_connection) skip_connection = LeakyReLU(alpha=0.1)(skip_connection) skip_connection = Lambda(space_to_depth_x2)(skip_connection) x = concatenate([skip_connection, x]) # Layer 22 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_22', use_bias=False)(x) x = BatchNormalization(name='norm_22')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 23 x = Conv2D(BOX * (4 + 1 + CLASS), (1, 1), strides=(1, 1), padding='same', name='conv_23')(x) output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x) # small hack to allow true_boxes to be registered when Keras build the model # for more information: https://github.com/fchollet/keras/issues/2790 output = Lambda(lambda args: args[0])([output, true_boxes]) model = Model([input_image, true_boxes], output)