def build_model_multihead_attention_multiscaleCNN4_dropout(
        self,
        dim_attention,
        headnum,
        embedding_vec,
        load_weights=False,
        weight_dir=None,
        nb_filters=32,
        filters_length1=1,
        filters_length2=5,
        filters_length3=10,
        pooling_size=3,
        drop_input=0,
        drop_cnn=0.2,
        drop_flat=0,
        W_regularizer=0.0005,
        Att_regularizer_weight=0.0005,
        fc_dim=50,
        fcnum=0,
    ):
        """
        build multihead attention model for mRNA localization.
        :param dim_attention: dim of attention 
        :param headnum: number of head
        :param load_weights: whether to load the pretrained model weights        
        :param weight_dir: pretrained model weights 
        :param nb_filters: number of CNN filters
        :param filters_length1, filters_length2,filters_length3: CNN filter length for multiscale CNN layers
        :param pooling_size: 1D maxpooling pool_length
        :param drop_input: dropout ratio for input layer
        :param drop_cnn: dropout ratio for cnn layer
        :param drop_flat: dropout ratio for the flat layers and the fully connected layers
        :param W_regularizer: 
        :param Att_regularizer_weight:
        :param fc_dim:
        :param fcnum:
        :return: an assembled model
        """
        print('Advanced Masking')
        input = Input(shape=(self.max_len, ), dtype='int8')
        embedding_layer = Embedding(len(embedding_vec),
                                    len(embedding_vec[0]),
                                    weights=[embedding_vec],
                                    input_length=self.max_len,
                                    trainable=False)
        embedding_output = Dropout(drop_input)(embedding_layer(input))  #layer2
        with tf.name_scope('CNN'):
            first_cnn = Convolution1D(
                nb_filters,
                filters_length1,  #kernel_regularizer=regularizers.l2(0.0001),
                border_mode='same',
                activation=gelu,
                use_bias=False,
                name='CNN1')(embedding_output)  #layer3
            first_cnn2 = Convolution1D(
                int(nb_filters / 2),
                filters_length1,  #kernel_regularizer=regularizers.l2(0.0001),
                border_mode='same',
                activation=gelu,
                use_bias=False)(first_cnn)  #layer5
            second_cnn = Convolution1D(
                nb_filters,
                filters_length2,  #kernel_regularizer=regularizers.l2(0.0001),
                border_mode='same',
                activation=gelu,
                use_bias=False,
                name='CNN2')(embedding_output)  #layer4
            second_cnn2 = Convolution1D(
                int(nb_filters / 2),
                filters_length2,  #kernel_regularizer=regularizers.l2(0.0001),
                border_mode='same',
                activation=gelu,
                use_bias=False)(second_cnn)
            third_cnn2 = Convolution1D(
                int(nb_filters / 2),
                filters_length3,  #kernel_regularizer=regularizers.l2(0.0001),
                border_mode='same',
                activation=gelu,
                use_bias=False,
                name='CNN3')(embedding_output)

            cnn_output1 = Dropout(drop_cnn)(MaxPooling1D(
                pool_length=pooling_size, stride=pooling_size)(first_cnn2))
            cnn_output2 = Dropout(drop_cnn)(MaxPooling1D(
                pool_length=pooling_size, stride=pooling_size)(second_cnn2))
            cnn_output3 = Dropout(drop_cnn)(MaxPooling1D(
                pool_length=pooling_size, stride=pooling_size)(third_cnn2))

        with tf.name_scope('multihead_attention'):
            att1, att1_A = Attention(
                hidden=cnn_output1.get_shape()[-1].value,
                da=dim_attention,
                r=headnum,
                att_weight=self.att_weight_var,
                init='glorot_uniform',
                activation='tanh',
                W1_regularizer=regularizers.l2(W_regularizer),
                W2_regularizer=regularizers.l2(W_regularizer),
                W1_constraint=None,
                W2_constraint=None,
                return_attention=True,
                attention_regularizer_weight=Att_regularizer_weight,
                name="att1")(cnn_output1)  #-5 layer

            att2, att2_A = Attention(
                hidden=cnn_output1.get_shape()[-1].value,
                da=dim_attention,
                r=headnum,
                att_weight=self.att_weight_var,
                init='glorot_uniform',
                activation='tanh',
                W1_regularizer=regularizers.l2(W_regularizer),
                W2_regularizer=regularizers.l2(W_regularizer),
                W1_constraint=None,
                W2_constraint=None,
                return_attention=True,
                attention_regularizer_weight=Att_regularizer_weight,
                name="att2")(cnn_output2)  #-4 layer

            att3, att3_A = Attention(
                hidden=cnn_output1.get_shape()[-1].value,
                da=dim_attention,
                r=headnum,
                att_weight=self.att_weight_var,
                init='glorot_uniform',
                activation='tanh',
                W1_regularizer=regularizers.l2(W_regularizer),
                W2_regularizer=regularizers.l2(W_regularizer),
                W1_constraint=None,
                W2_constraint=None,
                return_attention=True,
                attention_regularizer_weight=Att_regularizer_weight,
                name="att3")(cnn_output3)  #-3 layer

            output = Dropout(drop_flat)(Flatten()(concatenate(
                [att1, att2, att3])))  #-2 layer

        fc = output
        for _ in range(fcnum):
            fc = Dense(fc_dim, activation='relu')(fc)
            fc = Dropout(drop_flat)(fc)

        with tf.name_scope(''):
            preds = Dense(self.nb_classes, activation='sigmoid')(fc)  #-1 layer

        self.model = Model(inputs=[input], outputs=preds)
        from keras import optimizers
        optim = optimizers.Adam(
            lr=0.001, decay=5e-5
        )  #The paper uses a decay rate alpha = alpha/sqrt(t) updted each epoch (t) for the logistic regression demonstration.
        self.model.compile(
            loss='binary_crossentropy',
            optimizer=optim,  # todo
            metrics=['acc'])

        if load_weights:
            self.model.load_weights(weight_dir)

        self.is_built = True
        self.bn = False
        self.model.summary()
Example #2
0
def resnet_model():
    input = Input(shape=(None, ), dtype='int8')
    embedding_layer = Embedding(len(encoding_vectors),
                                len(encoding_vectors[0]),
                                weights=[encoding_vectors],
                                input_length=None,
                                trainable=False)
    embedding_output = embedding_layer(input)
    with tf.name_scope('first_cnn_layer'):
        cnn_output = Dropout(0.2)(Convolution1D(32,
                                                10,
                                                border_mode='same',
                                                activation='relu',
                                                use_bias=False,
                                                strides=2)(embedding_output))

    with tf.name_scope('first_residual_block'):
        # first cnn layer
        res_output_1 = Dropout(0.2)(Convolution1D(32,
                                                  3,
                                                  border_mode='same',
                                                  activation='relu',
                                                  use_bias=False)(cnn_output))

        # stack another cnn layer on top
        res_output_1 = Dropout(0.2)(
            Convolution1D(32,
                          3,
                          border_mode='same',
                          activation='relu',
                          use_bias=False)(res_output_1)
            # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
        )

        res_output_1 = Add()([cnn_output, res_output_1])

    # with tf.name_scope('second_residual_block'):
    #     res_output_2 = Dropout(0.2)(
    #         Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_1)
    #     )
    #
    #     # stack another cnn layer on top
    #     res_output_2 = Dropout(0.2)(
    #         Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_2)
    #         # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
    #     )
    #
    #     res_output_2 = Add()([res_output_1, res_output_2])
    #     # 2000, 32
    #
    # with tf.name_scope('third_residual_block'):
    #     res_output_3 = Dropout(0.2)(
    #         Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_2)
    #     )
    #
    #     # stack another cnn layer on top
    #     res_output_3 = Dropout(0.2)(
    #         Convolution1D(32, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_3)
    #         # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
    #     )
    #
    #     res_output_3 = Add()([res_output_2, res_output_3])
    #     # 2000, 32

    with tf.name_scope('cnn_downsampling'):
        cnn_downsamping = Dropout(0.2)(Convolution1D(64,
                                                     3,
                                                     border_mode='same',
                                                     activation='relu',
                                                     use_bias=False,
                                                     strides=2)(res_output_1))

        cnn_downsamping = Dropout(0.2)(
            Convolution1D(64,
                          3,
                          border_mode='same',
                          activation='relu',
                          use_bias=False)(cnn_downsamping))

        downsample_shortcut = Convolution1D(64,
                                            1,
                                            border_mode='same',
                                            activation='relu',
                                            use_bias=False,
                                            strides=2)(res_output_1)
        cnn_downsamping = Add()([downsample_shortcut, cnn_downsamping])
        # 1000, 64

    with tf.name_scope('fourth_residual_block'):
        res_output_4 = Dropout(0.2)(
            Convolution1D(64,
                          3,
                          border_mode='same',
                          activation='relu',
                          use_bias=False)(cnn_downsamping))

        # stack another cnn layer on top
        res_output_4 = Dropout(0.2)(
            Convolution1D(64,
                          3,
                          border_mode='same',
                          activation='relu',
                          use_bias=False)(res_output_4)
            # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
        )

        res_output_4 = Add()([cnn_downsamping, res_output_4])

    # with tf.name_scope('fifth_residual_block'):
    #     res_output_5 = Dropout(0.2)(
    #         Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_4)
    #     )
    #
    #     # stack another cnn layer on top
    #     res_output_5 = Dropout(0.2)(
    #         Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_5)
    #         # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
    #     )
    #
    #     res_output_5 = Add()([res_output_4, res_output_5])
    #
    # with tf.name_scope('sixth_residual_block'):
    #     res_output_6 = Dropout(0.2)(
    #         Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_5)
    #     )
    #
    #     # stack another cnn layer on top
    #     res_output_6 = Dropout(0.2)(
    #         Convolution1D(64, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_6)
    #         # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
    #     )
    #
    #     res_output_6 = Add()([res_output_5, res_output_6])

    with tf.name_scope('second_cnn_downsampling'):
        cnn_downsamping_2 = Dropout(0.2)(Convolution1D(
            128,
            3,
            border_mode='same',
            activation='relu',
            use_bias=False,
            strides=2)(res_output_4))

        cnn_downsamping_2 = Dropout(0.2)(
            Convolution1D(128,
                          3,
                          border_mode='same',
                          activation='relu',
                          use_bias=False)(cnn_downsamping_2))

        # 500, 128

        downsample_shortcut_2 = Convolution1D(128,
                                              1,
                                              border_mode='same',
                                              activation='relu',
                                              use_bias=False,
                                              strides=2)(res_output_4)
        cnn_downsamping_2 = Add()([downsample_shortcut_2, cnn_downsamping_2])

    with tf.name_scope('seventh_residual_block'):
        res_output_7 = Dropout(0.2)(
            Convolution1D(128,
                          3,
                          border_mode='same',
                          activation='relu',
                          use_bias=False)(cnn_downsamping_2))

        # stack another cnn layer on top
        res_output_7 = Dropout(0.2)(
            Convolution1D(128,
                          3,
                          border_mode='same',
                          activation='relu',
                          use_bias=False)(res_output_7)
            # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
        )

        res_output_7 = Add()([cnn_downsamping_2, res_output_7])

    # with tf.name_scope('eighth_residual_block'):
    #     res_output_8 = Dropout(0.2)(
    #         Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_7)
    #     )
    #
    #     # stack another cnn layer on top
    #     res_output_8 = Dropout(0.2)(
    #         Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_8)
    #         # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
    #     )
    #
    #     res_output_8 = Add()([res_output_7, res_output_8])
    #
    # with tf.name_scope('ninth_residual_block'):
    #     res_output_9 = Dropout(0.2)(
    #         Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_8)
    #     )
    #
    #     # stack another cnn layer on top
    #     res_output_9 = Dropout(0.2)(
    #         Convolution1D(128, 3, border_mode='same', activation='relu', use_bias=False)(
    #             res_output_9)
    #         # output shape is in (batch_size, steps, filters), normalizing over the feature axis which is -1
    #     )
    #
    #     res_output_9 = Add()([res_output_8, res_output_9])

    with tf.name_scope('third_cnn_downsampling'):
        cnn_downsamping_3 = Dropout(0.2)(Convolution1D(
            256,
            3,
            border_mode='same',
            activation='relu',
            use_bias=False,
            strides=2)(res_output_7))

        cnn_downsamping_3 = Dropout(0.2)(
            Convolution1D(256,
                          3,
                          border_mode='same',
                          activation='relu',
                          use_bias=False)(cnn_downsamping_3))

        # 500, 128

        downsample_shortcut_3 = Convolution1D(256,
                                              1,
                                              border_mode='same',
                                              activation='relu',
                                              use_bias=False,
                                              strides=2)(res_output_7)
        cnn_downsamping_3 = Add()([downsample_shortcut_3, cnn_downsamping_3])

    sequence_length = cnn_downsamping_3.get_shape()[1].value
    print('sequence length:', sequence_length)
    hidden_size = cnn_downsamping_3.get_shape()[2].value
    print('hidden size:', hidden_size)

    with tf.name_scope('attention_module'):
        context_weights = Dense(
            50,
            activation='tanh',
            input_shape=(None, hidden_size),
            kernel_initializer=random_normal(),
            bias_initializer=random_normal())(cnn_downsamping_3)
        # [batch_size, time_steps]
        scores = Lambda(lambda x: K.batch_flatten(x))(Dense(
            1,
            kernel_initializer=random_normal(),
            input_shape=(None, 50),
            use_bias=False)(context_weights))

        # softmax probability distribution, [batch_size, sequence_length]
        attention_weights = Lambda(lambda x: K.expand_dims(x, axis=-1))(
            Activation("softmax")(scores))

        # Multiply() behaves exactly as tf.multiply() which supports shape broadcasting, so its output_shape is [batch_size, time_steps, hidden_size]
        # Lambda(lambda x: K.sum(x, axis=1, keepdims=False)) is equivalent to tf.reduce_sum(axis=1)
        # [batch_size, hidden]
        output = Lambda(lambda x: K.sum(x, axis=1, keepdims=False))(
            Multiply()([cnn_downsamping_3, attention_weights]))

    preds = Dense(nb_classes, activation='softmax')(output)
    model = Model(inputs=[input], outputs=preds)
    model.compile(loss='kld', optimizer='adam', metrics=['acc'])
    return model