def flor(input_size, d_model, learning_rate): """ Gated Convolucional Recurrent Neural Network by Flor et al. """ input_data = Input(name="input", shape=input_size) cnn = Conv2D(filters=16, kernel_size=(3,3), strides=(2,2), padding="same", kernel_initializer="he_uniform")(input_data) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=16, kernel_size=(3,3), padding="same")(cnn) cnn = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=32, kernel_size=(3,3), padding="same")(cnn) cnn = Conv2D(filters=40, kernel_size=(2,4), strides=(2,4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=40, kernel_size=(3,3), padding="same", kernel_constraint=MaxNorm(4, [0,1,2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3,3), strides=(1,1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=48, kernel_size=(3,3), padding="same", kernel_constraint=MaxNorm(4, [0,1,2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=56, kernel_size=(2,4), strides=(2,4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=56, kernel_size=(3,3), padding="same", kernel_constraint=MaxNorm(4, [0,1,2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = MaxPooling2D(pool_size=(1,2), strides=(1,2), padding="valid")(cnn) shape = cnn.get_shape() nb_units = shape[2] * shape[3] bgru = Reshape((shape[1], nb_units))(cnn) bgru = Bidirectional(GRU(units=nb_units, return_sequences=True, dropout=0.5))(bgru) bgru = Dense(units=nb_units * 2)(bgru) bgru = Bidirectional(GRU(units=nb_units, return_sequences=True, dropout=0.5))(bgru) output_data = Dense(units=d_model, activation="softmax")(bgru) if learning_rate is None: learning_rate = 5e-4 optimizer = RMSprop(learning_rate=learning_rate) return (input_data, output_data, optimizer)
def model_01(self, input_size, d_model): input_data = Input(name="input", shape=input_size) cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(2, 2), padding="same", kernel_initializer="he_uniform")(input_data) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=16, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=32, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=40, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=40, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=48, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=56, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=56, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = MaxPooling2D(pool_size=(1, 2), strides=(1, 2), padding="valid")(cnn) shape = cnn.get_shape() bgru = Reshape((shape[1], shape[2] * shape[3]))(cnn) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) bgru = Dense(units=256)(bgru) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) output_data = Dense(units=d_model, activation="softmax")(bgru) return (input_data, output_data)
def sr_resnet_simp(input_shape,scale_ratio): #inputs = Input(shape=input_shape) # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths num_filters = 64 reg_scale = 0 scale_ratio = 2 num_filters_out = max(64, 3 * scale_ratio**2) inputs = Input(shape=input_shape) #test_initializer = RandomUniform(minval=-0.005, maxval=0.005,seed=None) test_initializer = 'he_normal' num_filters = [256,128,128,80] x1 = Conv2DWeightNorm(num_filters[0], kernel_size=3, strides=1, padding='same', kernel_initializer=test_initializer, kernel_regularizer=l2(reg_scale) )(inputs) x1 = PReLU(alpha_initializer='zero', shared_axes=[1, 2])(x1) x2 = Conv2DWeightNorm(num_filters[1], kernel_size=3, strides=2, padding='same', kernel_initializer=test_initializer, kernel_regularizer=l2(reg_scale) )(x1) x2 = PReLU(alpha_initializer='zero', shared_axes=[1, 2])(x2) x3 = Conv2DWeightNorm(num_filters[2], kernel_size=3, strides=2, padding='same', kernel_initializer=test_initializer, kernel_regularizer=l2(reg_scale) )(x2) x3 = PReLU(alpha_initializer='zero', shared_axes=[1, 2])(x3) x3_x2 = SubpixelConv2D([None, input_shape[0]//4, input_shape[1]//4], scale=scale_ratio, name='sub_1' )(x3) x2_concat = concatenate([x2, x3_x2]) print(x2.get_shape()) print(x3_x2.get_shape()) print(x2_concat.get_shape()) x2_x2 = SubpixelConv2D([None, input_shape[0] // 2, input_shape[1] // 2], scale = scale_ratio, name = 'sub_2' )(x2_concat) x1_concat = concatenate([x1, x2_x2]) x1_2x = SubpixelConv2D([None, input_shape[0] , input_shape[1]], scale = scale_ratio, name = 'sub_3' )(x1_concat) outputs = Conv2DWeightNorm(3, kernel_size=3, strides=1, padding='same', kernel_initializer=test_initializer, kernel_regularizer=l2(reg_scale) )(x1_2x) # Instantiate model. model = Model(inputs=inputs, outputs=outputs) return model
def ctc_interspeech_TIMIT_Model(d): n = d.num_layers sf = d.start_filter activation = d.act advanced_act = d.aact drop_prob = d.dropout inputShape = (778, 3, 40) # (3,41,None) filsize = (3, 5) Axis = 1 max_num_class = 61 if advanced_act != "none": activation = 'linear' convArgs = { "activation": activation, "data_format": "channels_first", "padding": "same", "bias_initializer": "zeros", "kernel_regularizer": l2(d.l2), "kernel_initializer": "random_uniform", } denseArgs = { "activation": d.act, "kernel_regularizer": l2(d.l2), "kernel_initializer": "random_uniform", "bias_initializer": "zeros", "use_bias": True } #### Check kernel_initializer for quaternion model #### if d.model == "quaternion": convArgs.update({"kernel_initializer": d.quat_init}) # # Input Layer & CTC Parameters for TIMIT # if d.model == "quaternion": I = Input(shape=(778, 4, 40)) # Input(shape=(4,41,None)) else: I = Input(shape=inputShape) # Others inputs for the CTC approach labels = Input(name='labels', shape=[None]) input_length = Input(name='input_length', shape=[1]) label_length = Input(name='label_length', shape=[1]) label_length_input = Input((1, ), name="label_length_input") pred_length_input = Input((1, ), name="pred_length_input") y_true_input = Input((max_num_class, ), name="y_true_input") # # Input stage: # if d.model == "real": O = Conv2D(sf, filsize, name='conv', use_bias=True, **convArgs)(I) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) else: O = QuaternionConv2D(sf, filsize, name='conv', use_bias=True, **convArgs)(I) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) # # Pooling # O = MaxPooling2D(pool_size=(1, 3), padding='same')(O) # # Stage 1 # for i in range(0, n // 2): if d.model == "real": O = Conv2D(sf, filsize, name='conv' + str(i), use_bias=True, **convArgs)(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) O = Dropout(drop_prob)(O) else: O = QuaternionConv2D(sf, filsize, name='conv' + str(i), use_bias=True, **convArgs)(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) O = Dropout(drop_prob)(O) # # Stage 2 # for i in range(0, n // 2): if d.model == "real": O = Conv2D(sf * 2, filsize, name='conv' + str(i + n / 2), use_bias=True, **convArgs)(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) O = Dropout(drop_prob)(O) else: O = QuaternionConv2D(sf * 2, filsize, name='conv' + str(i + n / 2), use_bias=True, **convArgs)(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) O = Dropout(drop_prob)(O) conv_shape = O.get_shape() # # Permutation for CTC # print("Last Q-Conv2D Layer (output): ", K.int_shape(O)) print("Shape tuple: ", K.int_shape(O)[0], K.int_shape(O)[1], K.int_shape(O)[2], K.int_shape(O)[3]) #### O = Permute((3,1,2))(O) #### print("Last Q-Conv2D Layer (Permute): ", O.shape) # O = Lambda(lambda x: K.reshape(x, (K.shape(x)[0], K.shape(x)[1], # K.shape(x)[2] * K.shape(x)[3])), # output_shape=lambda x: (None, None, x[2] * x[3]))(O) # O = Lambda(lambda x: K.reshape(x, (K.int_shape(x)[0], K.int_shape(x)[1], # K.int_shape(x)[2] * K.int_shape(x)[3])), # output_shape=lambda x: (None, None, x[2] * x[3]))(O) O = tf.keras.layers.Reshape( target_shape=[-1, K.int_shape(O)[2] * K.int_shape(O)[3]])(O) # # Dense # print("Q-Dense input: ", K.int_shape(O)) if d.model == "quaternion": print("first Q-dense layer: ", O.shape) O = TimeDistributed(QuaternionDense(256, **denseArgs))(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) O = Dropout(drop_prob)(O) O = TimeDistributed(QuaternionDense(256, **denseArgs))(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) O = Dropout(drop_prob)(O) O = TimeDistributed(QuaternionDense(256, **denseArgs))(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) else: O = TimeDistributed(Dense(1024, **denseArgs))(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) O = Dropout(drop_prob)(O) O = TimeDistributed(Dense(1024, **denseArgs))(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) O = Dropout(drop_prob)(O) O = TimeDistributed(Dense(1024, **denseArgs))(O) if advanced_act == "prelu": O = PReLU(shared_axes=[1, 0])(O) # pred = TimeDistributed( Dense(61, activation='softmax', kernel_regularizer=l2(d.l2), use_bias=True, bias_initializer="zeros", kernel_initializer='random_uniform' ))(O) y_pred = TimeDistributed( Dense(61, activation="softmax", kernel_regularizer=l2(d.l2), use_bias=True, bias_initializer="zeros", kernel_initializer='random_uniform'))(O) # output = Activation('softmax', name='softmax')(pred) # ctc_model = Model(inputs=[I, pred_length_input, label_length_input, y_true_input],outputs=output) #%% # the actual loss calc occurs here despite it not being an internal Keras loss function def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN tend to be garbage: #y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) #%% labels = Input(shape=[max_num_class], dtype='float32') # max_num_class=61 input_length = Input(shape=[1], dtype='int64') label_length = Input(shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) ctc_model = Model(inputs=[I, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss ctc_model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=sgd) # return Model(inputs=I, outputs=pred) return ctc_model, conv_shape # , label_length_input, pred_length_input, y_true_input
def QCNN_model(): kern = 8 dropout_prob = 0.3 advanced_act = 'prelu' max_str_len = 61 denseArgs = { "kernel_regularizer": l2(1e-5), "kernel_initializer": "random_uniform", "bias_initializer": "zeros", "use_bias": True } input_data = Input(shape=(778, 4, 40)) # First QConv layer x = QuaternionConv2D(kern, 2, strides=(1, 1), padding="same", use_bias=True)(input_data) x = PReLU(shared_axes=[1, 0])(x) x = Dropout(dropout_prob)(x) # Second QConv layer x = QuaternionConv2D(kern * 2, 2, strides=(1, 1), padding="same", use_bias=True)(x) x = PReLU(shared_axes=[1, 0])(x) x = Dropout(dropout_prob)(x) # Third QConv layer x = QuaternionConv2D(kern * 4, 2, strides=(1, 1), padding="same", use_bias=True)(x) x = PReLU(shared_axes=[1, 0])(x) x = Dropout(dropout_prob)(x) # Fourth QConv layer x = QuaternionConv2D(kern * 8, 2, strides=(1, 1), padding="same", use_bias=True)(x) x = PReLU(shared_axes=[1, 0])(x) x = Dropout(dropout_prob)(x) conv_shape = x.get_shape() """ Modified layers # Conv 1D layers (1-3) for i in range(n_layers//3): x = QuaternionConv1D(kern*2, 2, strides=1, activation="relu", padding="valid", use_bias=True)(x) x = PReLU()(x) # Conv 1D layers (4-6) for i in range(n_layers//3): x = QuaternionConv1D(kern*4, 2, strides=1, activation="relu", padding="valid", use_bias=True)(x) x = PReLU()(x) # Conv 1D layers (7-9) for i in range(n_layers//3): x = QuaternionConv1D(kern*8, 2, strides=1, activation="relu", padding="valid", use_bias=True)(x) x = PReLU()(x) """ # FLatten layer # flat = Flatten()(x) # Reshape Layer x = Reshape(target_shape=[ K.int_shape(x)[1], K.int_shape(x)[2] * K.int_shape(x)[3] ])(x) # Dense 1 d1 = TimeDistributed(QuaternionDense(256, **denseArgs))( x) #, activation='relu'))(flat) if advanced_act == "prelu": d1 = PReLU(shared_axes=[1, 0])(d1) d1 = Dropout(dropout_prob)(d1) """ # Dense 2 d2 = TimeDistributed( QuaternionDense(256, **denseArgs))(d1) #, activation='relu'))(x) if advanced_act == "prelu": d2 = PReLU(shared_axes=[1,0])(d2) d2 = Dropout(dropout_prob)(d2) # Dense 3 d3 = TimeDistributed( QuaternionDense(256, **denseArgs))(d2) #, activation='relu'))(x) if advanced_act == "prelu": d3 = PReLU(shared_axes=[1,0])(d3) """ y_pred = TimeDistributed(Dense(61, activation="softmax"))(d1) # model = Model(inputs = input_data, outputs = y_pred) # model.summary() # the actual loss calc occurs here despite it not being an internal Keras loss function def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN tend to be garbage: #y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) labels = Input(shape=[max_str_len], dtype='float32') # max_str_len=61 input_length = Input(shape=[1], dtype='int64') label_length = Input(shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence opt = SGD(learning_rate=0.01, momentum=0.5) # Adam(learning_rate=0.01, beta_1=0.5) ctc_model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss ctc_model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=opt) return ctc_model, conv_shape # , label_length_input, pred_length_input, y_true_input
def flor(input_size, output_size, learning_rate=5e-4): """Gated Convolucional Recurrent Neural Network by Flor.""" input_data = Input(name="input", shape=input_size) cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(2, 2), padding="same")(input_data) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=16, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 2), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=32, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=40, kernel_size=(2, 4), strides=(2, 2), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=40, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 2), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=48, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=56, kernel_size=(2, 4), strides=(2, 2), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=56, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = MaxPooling2D(pool_size=(1, 2), strides=(1, 2), padding="valid")(cnn) shape = cnn.get_shape() blstm = Reshape((shape[1], shape[2] * shape[3]))(cnn) blstm = Bidirectional(LSTM(units=128, return_sequences=True, dropout=0.5))(blstm) blstm = Dense(units=128)(blstm) blstm = Bidirectional(LSTM(units=128, return_sequences=True, dropout=0.5))(blstm) blstm = Dense(units=output_size)(blstm) output_data = Activation(activation="softmax")(blstm) optimizer = RMSprop(learning_rate=learning_rate) return (input_data, output_data, optimizer)