def build_model_multihead_attention_multiscaleCNN4_dropout( self, dim_attention, headnum, embedding_vec, load_weights=False, weight_dir=None, nb_filters=32, filters_length1=1, filters_length2=5, filters_length3=10, pooling_size=3, drop_input=0, drop_cnn=0.2, drop_flat=0, W_regularizer=0.0005, Att_regularizer_weight=0.0005, fc_dim=50, fcnum=0, ): """ build multihead attention model for mRNA localization. :param dim_attention: dim of attention :param headnum: number of head :param load_weights: whether to load the pretrained model weights :param weight_dir: pretrained model weights :param nb_filters: number of CNN filters :param filters_length1, filters_length2,filters_length3: CNN filter length for multiscale CNN layers :param pooling_size: 1D maxpooling pool_length :param drop_input: dropout ratio for input layer :param drop_cnn: dropout ratio for cnn layer :param drop_flat: dropout ratio for the flat layers and the fully connected layers :param W_regularizer: :param Att_regularizer_weight: :param fc_dim: :param fcnum: :return: an assembled model """ print('Advanced Masking') input = Input(shape=(self.max_len, ), dtype='int8') embedding_layer = Embedding(len(embedding_vec), len(embedding_vec[0]), weights=[embedding_vec], input_length=self.max_len, trainable=False) embedding_output = Dropout(drop_input)(embedding_layer(input)) #layer2 with tf.name_scope('CNN'): first_cnn = Convolution1D( nb_filters, filters_length1, #kernel_regularizer=regularizers.l2(0.0001), border_mode='same', activation=gelu, use_bias=False, name='CNN1')(embedding_output) #layer3 first_cnn2 = Convolution1D( int(nb_filters / 2), filters_length1, #kernel_regularizer=regularizers.l2(0.0001), border_mode='same', activation=gelu, use_bias=False)(first_cnn) #layer5 second_cnn = Convolution1D( nb_filters, filters_length2, #kernel_regularizer=regularizers.l2(0.0001), border_mode='same', activation=gelu, use_bias=False, name='CNN2')(embedding_output) #layer4 second_cnn2 = Convolution1D( int(nb_filters / 2), filters_length2, #kernel_regularizer=regularizers.l2(0.0001), border_mode='same', activation=gelu, use_bias=False)(second_cnn) third_cnn2 = Convolution1D( int(nb_filters / 2), filters_length3, #kernel_regularizer=regularizers.l2(0.0001), border_mode='same', activation=gelu, use_bias=False, name='CNN3')(embedding_output) cnn_output1 = Dropout(drop_cnn)(MaxPooling1D( pool_length=pooling_size, stride=pooling_size)(first_cnn2)) cnn_output2 = Dropout(drop_cnn)(MaxPooling1D( pool_length=pooling_size, stride=pooling_size)(second_cnn2)) cnn_output3 = Dropout(drop_cnn)(MaxPooling1D( pool_length=pooling_size, stride=pooling_size)(third_cnn2)) with tf.name_scope('multihead_attention'): att1, att1_A = Attention( hidden=cnn_output1.get_shape()[-1].value, da=dim_attention, r=headnum, att_weight=self.att_weight_var, init='glorot_uniform', activation='tanh', W1_regularizer=regularizers.l2(W_regularizer), W2_regularizer=regularizers.l2(W_regularizer), W1_constraint=None, W2_constraint=None, return_attention=True, attention_regularizer_weight=Att_regularizer_weight, name="att1")(cnn_output1) #-5 layer att2, att2_A = Attention( hidden=cnn_output1.get_shape()[-1].value, da=dim_attention, r=headnum, att_weight=self.att_weight_var, init='glorot_uniform', activation='tanh', W1_regularizer=regularizers.l2(W_regularizer), W2_regularizer=regularizers.l2(W_regularizer), W1_constraint=None, W2_constraint=None, return_attention=True, attention_regularizer_weight=Att_regularizer_weight, name="att2")(cnn_output2) #-4 layer att3, att3_A = Attention( hidden=cnn_output1.get_shape()[-1].value, da=dim_attention, r=headnum, att_weight=self.att_weight_var, init='glorot_uniform', activation='tanh', W1_regularizer=regularizers.l2(W_regularizer), W2_regularizer=regularizers.l2(W_regularizer), W1_constraint=None, W2_constraint=None, return_attention=True, attention_regularizer_weight=Att_regularizer_weight, name="att3")(cnn_output3) #-3 layer output = Dropout(drop_flat)(Flatten()(concatenate( [att1, att2, att3]))) #-2 layer fc = output for _ in range(fcnum): fc = Dense(fc_dim, activation='relu')(fc) fc = Dropout(drop_flat)(fc) with tf.name_scope(''): preds = Dense(self.nb_classes, activation='sigmoid')(fc) #-1 layer self.model = Model(inputs=[input], outputs=preds) from keras import optimizers optim = optimizers.Adam( lr=0.001, decay=5e-5 ) #The paper uses a decay rate alpha = alpha/sqrt(t) updted each epoch (t) for the logistic regression demonstration. self.model.compile( loss='binary_crossentropy', optimizer=optim, # todo metrics=['acc']) if load_weights: self.model.load_weights(weight_dir) self.is_built = True self.bn = False self.model.summary()
def resnet_model(): input = Input(shape=(None, ), dtype='int8') embedding_layer = Embedding(len(encoding_vectors), len(encoding_vectors[0]), weights=[encoding_vectors], input_length=None, trainable=False) embedding_output = embedding_layer(input) with tf.name_scope('first_cnn_layer'): cnn_output = Dropout(0.2)(Convolution1D(32, 10, border_mode='same', activation='relu', use_bias=False, strides=2)(embedding_output)) with tf.name_scope('first_residual_block'): # first cnn layer res_output_1 = Dropout(0.2)(Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)(cnn_output)) # stack another cnn layer on top res_output_1 = Dropout(0.2)( Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)(res_output_1) # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1 ) res_output_1 = Add()([cnn_output, res_output_1]) # with tf.name_scope('second_residual_block'): # res_output_2 = Dropout(0.2)( # Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_1) # ) # # # stack another cnn layer on top # res_output_2 = Dropout(0.2)( # Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_2) # # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1 # ) # # res_output_2 = Add()([res_output_1, res_output_2]) # # 2000, 32 # # with tf.name_scope('third_residual_block'): # res_output_3 = Dropout(0.2)( # Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_2) # ) # # # stack another cnn layer on top # res_output_3 = Dropout(0.2)( # Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_3) # # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1 # ) # # res_output_3 = Add()([res_output_2, res_output_3]) # # 2000, 32 with tf.name_scope('cnn_downsampling'): cnn_downsamping = Dropout(0.2)(Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False, strides=2)(res_output_1)) cnn_downsamping = Dropout(0.2)( Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)(cnn_downsamping)) downsample_shortcut = Convolution1D(64, 1, border_mode='same', activation='relu', use_bias=False, strides=2)(res_output_1) cnn_downsamping = Add()([downsample_shortcut, cnn_downsamping]) # 1000, 64 with tf.name_scope('fourth_residual_block'): res_output_4 = Dropout(0.2)( Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)(cnn_downsamping)) # stack another cnn layer on top res_output_4 = Dropout(0.2)( Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)(res_output_4) # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1 ) res_output_4 = Add()([cnn_downsamping, res_output_4]) # with tf.name_scope('fifth_residual_block'): # res_output_5 = Dropout(0.2)( # Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_4) # ) # # # stack another cnn layer on top # res_output_5 = Dropout(0.2)( # Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_5) # # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1 # ) # # res_output_5 = Add()([res_output_4, res_output_5]) # # with tf.name_scope('sixth_residual_block'): # res_output_6 = Dropout(0.2)( # Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_5) # ) # # # stack another cnn layer on top # res_output_6 = Dropout(0.2)( # Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_6) # # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1 # ) # # res_output_6 = Add()([res_output_5, res_output_6]) with tf.name_scope('second_cnn_downsampling'): cnn_downsamping_2 = Dropout(0.2)(Convolution1D( 128, 3, border_mode='same', activation='relu', use_bias=False, strides=2)(res_output_4)) cnn_downsamping_2 = Dropout(0.2)( Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)(cnn_downsamping_2)) # 500, 128 downsample_shortcut_2 = Convolution1D(128, 1, border_mode='same', activation='relu', use_bias=False, strides=2)(res_output_4) cnn_downsamping_2 = Add()([downsample_shortcut_2, cnn_downsamping_2]) with tf.name_scope('seventh_residual_block'): res_output_7 = Dropout(0.2)( Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)(cnn_downsamping_2)) # stack another cnn layer on top res_output_7 = Dropout(0.2)( Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)(res_output_7) # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1 ) res_output_7 = Add()([cnn_downsamping_2, res_output_7]) # with tf.name_scope('eighth_residual_block'): # res_output_8 = Dropout(0.2)( # Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_7) # ) # # # stack another cnn layer on top # res_output_8 = Dropout(0.2)( # Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_8) # # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1 # ) # # res_output_8 = Add()([res_output_7, res_output_8]) # # with tf.name_scope('ninth_residual_block'): # res_output_9 = Dropout(0.2)( # Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_8) # ) # # # stack another cnn layer on top # res_output_9 = Dropout(0.2)( # Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)( # res_output_9) # # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1 # ) # # res_output_9 = Add()([res_output_8, res_output_9]) with tf.name_scope('third_cnn_downsampling'): cnn_downsamping_3 = Dropout(0.2)(Convolution1D( 256, 3, border_mode='same', activation='relu', use_bias=False, strides=2)(res_output_7)) cnn_downsamping_3 = Dropout(0.2)( Convolution1D(256, 3, border_mode='same', activation='relu', use_bias=False)(cnn_downsamping_3)) # 500, 128 downsample_shortcut_3 = Convolution1D(256, 1, border_mode='same', activation='relu', use_bias=False, strides=2)(res_output_7) cnn_downsamping_3 = Add()([downsample_shortcut_3, cnn_downsamping_3]) sequence_length = cnn_downsamping_3.get_shape()[1].value print('sequence length:', sequence_length) hidden_size = cnn_downsamping_3.get_shape()[2].value print('hidden size:', hidden_size) with tf.name_scope('attention_module'): context_weights = Dense( 50, activation='tanh', input_shape=(None, hidden_size), kernel_initializer=random_normal(), bias_initializer=random_normal())(cnn_downsamping_3) # [batch_size, time_steps] scores = Lambda(lambda x: K.batch_flatten(x))(Dense( 1, kernel_initializer=random_normal(), input_shape=(None, 50), use_bias=False)(context_weights)) # softmax probability distribution, [batch_size, sequence_length] attention_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))( Activation("softmax")(scores)) # Multiply() behaves exactly as tf.multiply() which supports shape broadcasting, so its output_shape is [batch_size, time_steps, hidden_size] # Lambda(lambda x: K.sum(x, axis=1, keepdims=False)) is equivalent to tf.reduce_sum(axis=1) # [batch_size, hidden] output = Lambda(lambda x: K.sum(x, axis=1, keepdims=False))( Multiply()([cnn_downsamping_3, attention_weights])) preds = Dense(nb_classes, activation='softmax')(output) model = Model(inputs=[input], outputs=preds) model.compile(loss='kld', optimizer='adam', metrics=['acc']) return model