Ejemplo n.º 1
0
def _shortcut(input_feature, residual, conv_name_base=None, bn_name_base=None):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(input_feature)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] /
                              residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input_feature
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        print('reshaping via a convolution...')
        if conv_name_base is not None:
            conv_name_base = conv_name_base + '1'
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal",
                          kernel_regularizer=l2(0.0001),
                          name=conv_name_base)(input_feature)
        if bn_name_base is not None:
            bn_name_base = bn_name_base + '1'
        shortcut = BatchNormalization(axis=CHANNEL_AXIS,
                                      name=bn_name_base)(shortcut)

    return add([shortcut, residual])
Ejemplo n.º 2
0
def ___conv4_block(input, k=1, dropout=0.0):
    init = input

    channel_axis = 1 if K.image_dim_ordering() == 'th' else -1

    # Check if input number of filters is same as 64 * k, else
    # create convolution2d for this input
    if K.image_dim_ordering() == 'th':
        if init._keras_shape[1] != 64 * k:
            init = Conv2D(64 * k, (1, 1), activation='linear',
                          padding='same')(init)
    else:
        if init._keras_shape[-1] != 64 * k:
            init = Conv2D(64 * k, (1, 1), activation='linear',
                          padding='same')(init)

    x = Conv2D(64 * k, (3, 3), padding='same')(input)
    x = BatchNormalization(axis=channel_axis)(x)
    x = Activation('relu')(x)

    if dropout > 0.0:
        x = Dropout(dropout)(x)

    x = Conv2D(64 * k, (3, 3), padding='same')(x)
    x = BatchNormalization(axis=channel_axis)(x)
    x = Activation('relu')(x)

    m = add([init, x])
    return m
Ejemplo n.º 3
0
def _conv_block(inp, convs, skip=True):
    x = inp
    count = 0

    for conv in convs:
        if count == (len(convs) - 2) and skip:
            skip_connection = x
        count += 1

        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
        x = Conv2D(conv['filter'],
                   conv['kernel'],
                   strides=conv['stride'],
                   padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
                   name='conv_' + str(conv['layer_idx']),
                   use_bias=False if conv['bnorm'] else True)(x)
        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)

    return add([skip_connection, x]) if skip else x
Ejemplo n.º 4
0
def _shortcut3d(input, residual):
    """3D shortcut to match input and residual and merges them with "sum"."""
    stride_dim1 = ceil(input._keras_shape[DIM1_AXIS] \
        / residual._keras_shape[DIM1_AXIS])
    stride_dim2 = ceil(input._keras_shape[DIM2_AXIS] \
        / residual._keras_shape[DIM2_AXIS])
    stride_dim3 = ceil(input._keras_shape[DIM3_AXIS] \
        / residual._keras_shape[DIM3_AXIS])
    equal_channels = residual._keras_shape[CHANNEL_AXIS] \
        == input._keras_shape[CHANNEL_AXIS]

    shortcut = input
    if stride_dim1 > 1 or stride_dim2 > 1 or stride_dim3 > 1 \
            or not equal_channels:
        shortcut = Conv3D(
            filters=residual._keras_shape[CHANNEL_AXIS],
            kernel_size=(1, 1, 1),
            strides=(stride_dim1, stride_dim2, stride_dim3),
            kernel_initializer="he_normal", padding="valid",
            kernel_regularizer=l2(1e-4)
            )(input)
    return add([shortcut, residual])
#creates dense layer with no. of neurons = 256 in it, this layer is connected to inp layer defined above
out_1 = Dense(256, activation='relu')(reg_1)

#creates input layer with size max_length of caption, for lstm
inp_2 = Input(shape=(max_length, ))
#200 dimensional vector to each input word (vocab). This statement only creates a mapping table that maps word to
#a vector in n dimension, in our case 200, and then whenever a input word comes, it replaces with the corresponding vector
#note here,embedding layer is always connected to the first layer or the input layer of the network
#mask_zero identifies the special zero padding and ignores it to continue variable size computing
hidden_1 = Embedding(vocab_size, embedding_dim, mask_zero=True)(inp_2)
reg_2 = Dropout(0.5)(hidden_1)
#create lstm unit having output size 256 or let's say neuorns. It is connceted to the input layer
out_2 = LSTM(256)(reg_2)

#merging by adding the outpts of lstm and cnn having same dimesion 256 and return single layer with same dimension
decoder1 = add([out_1, out_2])
#dense layer (or let's say fully connected layer to above layer)
decoder2 = Dense(256, activation='relu')(decoder1)
#final probabilistic output
output = Dense(vocab_size, activation='softmax')(decoder2)

#creating a model finaly
model = Model(inputs=[inp_1, inp_2], outputs=output)
model.summary()

#set weights the first hidden layer and set trainable = False as the weights are already trained
model.layers[2].set_weights([embedding_matrix])
model.layers[2].trainable = False
#adam optimizer is the combination of two most useful algorithms : AdaGrad, RMSprop
#categorial_crossentropy: refined version of binary cross entrop(only for 2 classes)
model.compile(loss='categorical_crossentropy', optimizer='adam')
    def CreateModel(self):
        '''
		定义CNN/LSTM/CTC模型,使用函数式模型
		输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
		隐藏层一:3*3卷积层
		隐藏层二:池化层,池化窗口大小为2
		隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层四:循环层、LSTM/GRU层
		隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
		输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出
		
		'''
        # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500
        input_data = Input(name='the_input',
                           shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH,
                                  1))

        layer_h1 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(input_data)  # 卷积层
        layer_h1 = Dropout(0.1)(layer_h1)
        layer_h2 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h1)  # 卷积层
        layer_h3 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h2)  # 池化层
        #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合
        layer_h3 = Dropout(0.2)(layer_h3)
        layer_h4 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h3)  # 卷积层
        layer_h4 = Dropout(0.2)(layer_h4)
        layer_h5 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h4)  # 卷积层
        layer_h6 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h5)  # 池化层

        layer_h6 = Dropout(0.3)(layer_h6)
        layer_h7 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h6)  # 卷积层
        layer_h7 = Dropout(0.3)(layer_h7)
        layer_h8 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h7)  # 卷积层
        layer_h9 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h8)  # 池化层

        layer_h9 = Dropout(0.3)(layer_h9)
        layer_h10 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h9)  # 卷积层
        layer_h10 = Dropout(0.4)(layer_h10)
        layer_h11 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h10)  # 卷积层
        layer_h12 = MaxPooling2D(pool_size=1, strides=None,
                                 padding="valid")(layer_h11)  # 池化层

        #test=Model(inputs = input_data, outputs = layer_h6)
        #test.summary()

        layer_h13 = Reshape((200, 3200))(layer_h12)  #Reshape层

        layer_h13 = Dropout(0.4)(layer_h13)
        layer_h14 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h13)  # 全连接层
        layer_h14 = Dropout(0.4)(layer_h14)
        inner = layer_h14
        #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层

        rnn_size = 128
        gru_1 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        gru2 = concatenate([gru_2, gru_2b])
        #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11)

        layer_h15 = Dropout(0.4)(gru2)
        layer_h16 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h15)  # 全连接层

        layer_h16 = Dropout(0.5)(layer_h16)  # 随机中断部分神经网络连接,防止过拟合
        layer_h17 = Dense(self.MS_OUTPUT_SIZE,
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h16)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h17)
        model_data = Model(inputs=input_data, outputs=y_pred)
        #model_data.summary()

        labels = Input(name='the_labels',
                       shape=[self.label_max_string_length],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # tensorflow.keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer

        #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ),
                          name='ctc')(
                              [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

        model.summary()

        # clipnorm seems to speeds up convergence
        #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06)

        #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=ada_d)

        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        print('[*提示] 创建模型成功,模型编译成功')
        return model, model_data
Ejemplo n.º 7
0
    def CreateModel(self):
        '''
		定义CNN/LSTM/CTC模型,使用函数式模型
		输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
		隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2
		隐藏层:全连接层
		输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
		CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出
		
		'''

        input_data = Input(name='the_input',
                           shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH,
                                  1))

        layer_h = Conv2D(32, (3, 3),
                         use_bias=False,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(input_data)  # 卷积层
        #layer_h = Dropout(0.05)(layer_h)
        layer_h = Conv2D(32, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=2, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.05)(layer_h) # 随机中断部分神经网络连接,防止过拟合
        layer_h = Conv2D(64, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        #layer_h = Dropout(0.1)(layer_h)
        layer_h = Conv2D(64, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=2, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.1)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        #layer_h = Dropout(0.15)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=2, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.15)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        #layer_h = Dropout(0.2)(layer_h)
        layer_h = Conv2D(128, (3, 3),
                         use_bias=True,
                         activation='relu',
                         padding='same',
                         kernel_initializer='he_normal')(layer_h)  # 卷积层
        layer_h = MaxPooling2D(pool_size=1, strides=None,
                               padding="valid")(layer_h)  # 池化层

        #layer_h = Dropout(0.2)(layer_h)
        #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层
        #layer_h = Dropout(0.2)(layer_h)
        #layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层
        #layer_h = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h) # 池化层

        #test=Model(inputs = input_data, outputs = layer_h)
        #test.summary()

        layer_h = Reshape((200, 3200))(layer_h)  #Reshape层

        #layer_h16 = Dropout(0.3)(layer_h16) # 随机中断部分神经网络连接,防止过拟合
        layer_h = Dense(128,
                        activation="relu",
                        use_bias=True,
                        kernel_initializer='he_normal')(layer_h)  # 全连接层

        inner = layer_h
        #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层

        rnn_size = 128
        gru_1 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        gru2 = concatenate([gru_2, gru_2b])

        layer_h = gru2
        #layer_h20 = Dropout(0.4)(gru2)
        layer_h = Dense(128,
                        activation="relu",
                        use_bias=True,
                        kernel_initializer='he_normal')(layer_h)  # 全连接层

        #layer_h17 = Dropout(0.3)(layer_h17)
        layer_h = Dense(self.MS_OUTPUT_SIZE,
                        use_bias=True,
                        kernel_initializer='he_normal')(layer_h)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h)
        model_data = Model(inputs=input_data, outputs=y_pred)
        #model_data.summary()

        labels = Input(name='the_labels',
                       shape=[self.label_max_string_length],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer

        #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ),
                          name='ctc')(
                              [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

        model.summary()

        # clipnorm seems to speeds up convergence
        #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        #opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06)
        opt = Adam(lr=0.001,
                   beta_1=0.9,
                   beta_2=0.999,
                   decay=0.0,
                   epsilon=10e-8)
        #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=opt)

        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        #print('[*提示] 创建模型成功,模型编译成功')
        print('[*Info] Create Model Successful, Compiles Model Successful. ')
        return model, model_data