Beispiel #1
0
def define_stacked_model(members):
	for i in range(len(members)): # update all layers in all models to not be trainable
		model = members[i]
		for layer in model.layers:
			layer.trainable = False
			layer._name = 'ensemble_' + str(i+1) + '_' + layer.name # avoid 'unique layer name' issue
	
	ensemble_visible = [model.input for model in members]  # define multi-headed input
	ensemble_outputs = [model.output for model in members] # concatenate merge output from each model
	merge = concatenate(ensemble_outputs)
	hidden = Dense(10, activation='relu')(merge)
	output = Dense(3, activation='softmax')(hidden)
	model = Model(inputs=ensemble_visible, outputs=output)
	plot_model(model, show_shapes=True, to_file='model_graph.png') # plot graph of ensemble
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # compile
	return model
    def CreateModel(self):
        '''
		定义CNN/LSTM/CTC模型,使用函数式模型
		输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
		隐藏层一:3*3卷积层
		隐藏层二:池化层,池化窗口大小为2
		隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层四:循环层、LSTM/GRU层
		隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
		输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出
		
		'''
        # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500
        input_data = Input(name='the_input',
                           shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH,
                                  1))

        layer_h1 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(input_data)  # 卷积层
        layer_h1 = Dropout(0.1)(layer_h1)
        layer_h2 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h1)  # 卷积层
        layer_h3 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h2)  # 池化层
        #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合
        layer_h3 = Dropout(0.2)(layer_h3)
        layer_h4 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h3)  # 卷积层
        layer_h4 = Dropout(0.2)(layer_h4)
        layer_h5 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h4)  # 卷积层
        layer_h6 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h5)  # 池化层

        layer_h6 = Dropout(0.3)(layer_h6)
        layer_h7 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h6)  # 卷积层
        layer_h7 = Dropout(0.3)(layer_h7)
        layer_h8 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h7)  # 卷积层
        layer_h9 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h8)  # 池化层

        layer_h9 = Dropout(0.3)(layer_h9)
        layer_h10 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h9)  # 卷积层
        layer_h10 = Dropout(0.4)(layer_h10)
        layer_h11 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h10)  # 卷积层
        layer_h12 = MaxPooling2D(pool_size=1, strides=None,
                                 padding="valid")(layer_h11)  # 池化层

        #test=Model(inputs = input_data, outputs = layer_h6)
        #test.summary()

        layer_h13 = Reshape((200, 3200))(layer_h12)  #Reshape层

        layer_h13 = Dropout(0.4)(layer_h13)
        layer_h14 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h13)  # 全连接层
        layer_h14 = Dropout(0.4)(layer_h14)
        inner = layer_h14
        #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层

        rnn_size = 128
        gru_1 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        gru2 = concatenate([gru_2, gru_2b])
        #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11)

        layer_h15 = Dropout(0.4)(gru2)
        layer_h16 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h15)  # 全连接层

        layer_h16 = Dropout(0.5)(layer_h16)  # 随机中断部分神经网络连接,防止过拟合
        layer_h17 = Dense(self.MS_OUTPUT_SIZE,
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h16)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h17)
        model_data = Model(inputs=input_data, outputs=y_pred)
        #model_data.summary()

        labels = Input(name='the_labels',
                       shape=[self.label_max_string_length],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # tensorflow.keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer

        #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ),
                          name='ctc')(
                              [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

        model.summary()

        # clipnorm seems to speeds up convergence
        #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06)

        #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=ada_d)

        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        print('[*提示] 创建模型成功,模型编译成功')
        return model, model_data
Beispiel #3
0
    def CreateModel(self):
        '''
		定义CNN/LSTM/CTC模型,使用函数式模型
		输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
		隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2
		隐藏层:全连接层
		输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
		CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出
		
		'''

        input_data = Input(name='the_input',
                           shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH,
                                  1))

        layer_h = Conv2D(32, (3, 3),
                         use_bias=False,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(input_data)  # 卷积层
        #layer_h = Dropout(0.05)(layer_h)
        layer_h = Conv2D(32, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=2, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.05)(layer_h) # 随机中断部分神经网络连接,防止过拟合
        layer_h = Conv2D(64, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        #layer_h = Dropout(0.1)(layer_h)
        layer_h = Conv2D(64, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=2, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.1)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        #layer_h = Dropout(0.15)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=2, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.15)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        #layer_h = Dropout(0.2)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=1, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.2)(layer_h)
        #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层
        #layer_h = Dropout(0.2)(layer_h)
        #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层
        #layer_h = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h) # 池化层

        #test=Model(inputs = input_data, outputs = layer_h)
        #test.summary()

        layer_h = Reshape((200, 3200))(layer_h)  #Reshape层

        #layer_h16 = Dropout(0.3)(layer_h16) # 随机中断部分神经网络连接,防止过拟合
        layer_h = Dense(128,
                        activation="relu",
                        use_bias=True,
                        kernel_initializer='he_normal')(layer_h)  # 全连接层

        inner = layer_h
        #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层

        rnn_size = 128
        gru_1 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        gru2 = concatenate([gru_2, gru_2b])

        layer_h = gru2
        #layer_h20 = Dropout(0.4)(gru2)
        layer_h = Dense(128,
                        activation="relu",
                        use_bias=True,
                        kernel_initializer='he_normal')(layer_h)  # 全连接层

        #layer_h17 = Dropout(0.3)(layer_h17)
        layer_h = Dense(self.MS_OUTPUT_SIZE,
                        use_bias=True,
                        kernel_initializer='he_normal')(layer_h)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h)
        model_data = Model(inputs=input_data, outputs=y_pred)
        #model_data.summary()

        labels = Input(name='the_labels',
                       shape=[self.label_max_string_length],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer

        #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ),
                          name='ctc')(
                              [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

        model.summary()

        # clipnorm seems to speeds up convergence
        #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        #opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06)
        opt = Adam(lr=0.001,
                   beta_1=0.9,
                   beta_2=0.999,
                   decay=0.0,
                   epsilon=10e-8)
        #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=opt)

        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        #print('[*提示] 创建模型成功,模型编译成功')
        print('[*Info] Create Model Successful, Compiles Model Successful. ')
        return model, model_data
Beispiel #4
0
def build_UNet2D_4L(inp_shape, k_size=3):
    merge_axis = -1  # Feature maps are concatenated along last axis (for tf backend)
    data = Input(shape=inp_shape)
    conv1 = Convolution2D(filters=32,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(data)
    conv1 = Convolution2D(filters=32,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Convolution2D(filters=64,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(pool1)
    conv2 = Convolution2D(filters=64,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Convolution2D(filters=64,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(pool2)
    conv3 = Convolution2D(filters=64,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Convolution2D(filters=128,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(pool3)
    conv4 = Convolution2D(filters=128,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Convolution2D(filters=256,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(pool4)

    up1 = UpSampling2D(size=(2, 2))(conv5)
    conv6 = Convolution2D(filters=256,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(up1)
    conv6 = Convolution2D(filters=256,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(conv6)
    merged1 = concatenate([conv4, conv6], axis=merge_axis)
    conv6 = Convolution2D(filters=256,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(merged1)

    up2 = UpSampling2D(size=(2, 2))(conv6)
    conv7 = Convolution2D(filters=256,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(up2)
    conv7 = Convolution2D(filters=256,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(conv7)
    merged2 = concatenate([conv3, conv7], axis=merge_axis)
    conv7 = Convolution2D(filters=256,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(merged2)

    up3 = UpSampling2D(size=(2, 2))(conv7)
    conv8 = Convolution2D(filters=128,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(up3)
    conv8 = Convolution2D(filters=128,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(conv8)
    merged3 = concatenate([conv2, conv8], axis=merge_axis)
    conv8 = Convolution2D(filters=128,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(merged3)

    up4 = UpSampling2D(size=(2, 2))(conv8)
    conv9 = Convolution2D(filters=64,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(up4)
    conv9 = Convolution2D(filters=64,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(conv9)
    merged4 = concatenate([conv1, conv9], axis=merge_axis)
    conv9 = Convolution2D(filters=64,
                          kernel_size=k_size,
                          padding='same',
                          activation='relu')(merged4)

    conv10 = Convolution2D(filters=1,
                           kernel_size=k_size,
                           padding='same',
                           activation='sigmoid')(conv9)

    output = conv10
    model = Model(data, output)
    return model
Beispiel #5
0
def make_yolov3_model():
    input_image = Input(shape=(None, None, 3))

    # Layer  0 => 4
    x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
                                  {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
                                  {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
                                  {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])

    # Layer  5 => 8
    x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
                        {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])

    # Layer  9 => 11
    x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])

    # Layer 12 => 15
    x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
                        {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
                        {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])

    # Layer 16 => 36
    for i in range(7):
        x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
                            {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])

    skip_36 = x

    # Layer 37 => 40
    x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])

    # Layer 41 => 61
    for i in range(7):
        x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
                            {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])

    skip_61 = x

    # Layer 62 => 65
    x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])

    # Layer 66 => 74
    for i in range(3):
        x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
                            {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])

    # Layer 75 => 79
    x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)

    # Layer 80 => 82
    yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
                              {'filter':  255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False)

    # Layer 83 => 86
    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False)
    x = UpSampling2D(2)(x)
    x = concatenate([x, skip_61])

    # Layer 87 => 91
    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)

    # Layer 92 => 94
    yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
                              {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)

    # Layer 95 => 98
    x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], skip=False)
    x = UpSampling2D(2)(x)
    x = concatenate([x, skip_36])

    # Layer 99 => 106
    yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
                               {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)

    model = Model(input_image, [yolo_82, yolo_94, yolo_106])
    return model
Beispiel #6
0
def architecture(config):
    input_image = Input(shape=(IMAGE_H, IMAGE_W, 3))
    true_boxes = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER, 4))

    # Layer 1
    x = Conv2D(32, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_1',
               use_bias=False)(input_image)
    x = BatchNormalization(name='norm_1')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 2
    x = Conv2D(64, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_2',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_2')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 3
    x = Conv2D(128, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_3',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_3')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 4
    x = Conv2D(64, (1, 1),
               strides=(1, 1),
               padding='same',
               name='conv_4',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_4')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 5
    x = Conv2D(128, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_5',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_5')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 6
    x = Conv2D(256, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_6',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_6')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 7
    x = Conv2D(128, (1, 1),
               strides=(1, 1),
               padding='same',
               name='conv_7',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_7')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 8
    x = Conv2D(256, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_8',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_8')(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 9
    x = Conv2D(512, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_9',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_9')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 10
    x = Conv2D(256, (1, 1),
               strides=(1, 1),
               padding='same',
               name='conv_10',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_10')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 11
    x = Conv2D(512, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_11',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_11')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 12
    x = Conv2D(256, (1, 1),
               strides=(1, 1),
               padding='same',
               name='conv_12',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_12')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 13
    x = Conv2D(512, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_13',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_13')(x)
    x = LeakyReLU(alpha=0.1)(x)

    skip_connection = x

    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Layer 14
    x = Conv2D(1024, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_14',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_14')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 15
    x = Conv2D(512, (1, 1),
               strides=(1, 1),
               padding='same',
               name='conv_15',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_15')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 16
    x = Conv2D(1024, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_16',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_16')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 17
    x = Conv2D(512, (1, 1),
               strides=(1, 1),
               padding='same',
               name='conv_17',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_17')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 18
    x = Conv2D(1024, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_18',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_18')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 19
    x = Conv2D(1024, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_19',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_19')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 20
    x = Conv2D(1024, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_20',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_20')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 21
    skip_connection = Conv2D(64, (1, 1),
                             strides=(1, 1),
                             padding='same',
                             name='conv_21',
                             use_bias=False)(skip_connection)
    skip_connection = BatchNormalization(name='norm_21')(skip_connection)
    skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
    skip_connection = Lambda(space_to_depth_x2)(skip_connection)

    x = concatenate([skip_connection, x])

    # Layer 22
    x = Conv2D(1024, (3, 3),
               strides=(1, 1),
               padding='same',
               name='conv_22',
               use_bias=False)(x)
    x = BatchNormalization(name='norm_22')(x)
    x = LeakyReLU(alpha=0.1)(x)

    # Layer 23
    x = Conv2D(BOX * (4 + 1 + CLASS), (1, 1),
               strides=(1, 1),
               padding='same',
               name='conv_23')(x)
    output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x)

    # small hack to allow true_boxes to be registered when Keras build the model
    # for more information: https://github.com/fchollet/keras/issues/2790
    output = Lambda(lambda args: args[0])([output, true_boxes])

    model = Model([input_image, true_boxes], output)