Exemple #1
0
def base_network2(input_shape):
    input = Input(shape=input_shape)
    p = embedding_layer(input)
    p = LSTM(dim, return_sequences=True, dropout=0.5,name='f_input')(p)
    p = LSTM(dim, return_sequences=True, name='t_input')(p)
    multi_memory = AttentionWithContext()(p)
    
    return Model(input, multi_memory, name='review_base_nn')
Exemple #2
0
def base_network1(input_shape):
    input = Input(shape=input_shape)

    p = embedding_layer(input)
    p = LSTM(dim, return_sequences=True, dropout=0.5,name='f_input')(p)
    p = AttentionWithContext()(p)

    q = embedding_layer(input)     
    q = LSTM(dim, return_sequences=True, dropout=0.5,name='a_input')(q)
    q = LSTM(dim, return_sequences=False, name='b_input')(q)
    multi_memory_lstm = add([p,q])
    return Model(input, multi_memory_lstm, name='DFF')
def build_model(nb_classes, word_index, embedding_dim, seq_length, stamp):
    """
	"""

    embedding_matrix, nb_words = prepare_embeddings(word_index)

    input_layer = Input(shape=(seq_length, ), dtype='int32')

    embedding_layer = Embedding(input_dim=nb_words + 1,
                                output_dim=embedding_dim,
                                input_length=seq_length,
                                weights=[embedding_matrix],
                                embeddings_regularizer=regularizers.l2(0.00),
                                trainable=True)(input_layer)

    drop1 = SpatialDropout1D(0.3)(embedding_layer)

    lstm_1 = Bidirectional(LSTM(128,
                                name='blstm_1',
                                activation='tanh',
                                recurrent_activation='hard_sigmoid',
                                recurrent_dropout=0.0,
                                dropout=0.5,
                                kernel_initializer='glorot_uniform',
                                return_sequences=True),
                           merge_mode='concat')(drop1)
    lstm_1 = BatchNormalization()(lstm_1)

    att_layer = AttentionWithContext()(lstm_1)

    drop3 = Dropout(0.5)(att_layer)

    predictions = Dense(nb_classes, activation='sigmoid')(drop3)

    model = Model(inputs=input_layer, outputs=predictions)

    adam = Adam(lr=0.001, decay=0.0)

    model.compile(loss='binary_crossentropy',
                  optimizer=adam,
                  metrics=[f1_score])

    model.summary()
    print(stamp)

    # Save the model.
    model_json = model.to_json()
    with open(stamp + ".json", "w") as json_file:
        json_file.write(model_json)

    return model
Exemple #4
0
    def create_model(self):
        """
        Creates the Hybrid Model.
        Consists of two components:
            * Left Component : Computes the user item interaction through matrix factorization (Typical Collaborative Filtering)
            * Right Component : Uses user history, item features, time etc (features)
              to dynamically model the user interests (Collaborative + Content Features)
        ===============================================================================
        Desc : Left Component
        ===============================================================================
        Desc : Right Component
        ===============================================================================
        """

        #Initialises input for left component
        user_embed = Input(shape=(self.embedding_size_useritem, ))
        item_embed = Input(shape=(self.embedding_size_useritem, ))

        #Initialises input for right component
        user_read = Input(shape=(self.history, self.embedding_size_article))
        user_case = Input(shape=(self.embedding_size_article, ))

        # Creates Layers for the left component
        concatenated_layer = concatenate([user_embed, item_embed])
        left_layer1 = Dense(128, activation='relu')(concatenated_layer)
        left_layer2 = Dense(64, activation='relu')(left_layer1)

        # Creates Layers for the right component
        lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(user_read)
        attention_layer = AttentionWithContext()(lstm_layer)

        right_layer_input = Dense(128, activation='relu')(user_case)

        elem_wise = multiply([attention_layer, right_layer_input])
        right_layer1 = Dense(64, activation='relu')(elem_wise)

        # Merges the left and right component
        merged_layer = concatenate([left_layer2, right_layer1])
        merged_layer1 = Dense(256, activation='relu')(merged_layer)
        merged_layer2 = Dense(128, activation='relu')(merged_layer1)
        merged_layer3 = Dense(64, activation='relu')(merged_layer2)
        output = Dense(1, activation='sigmoid')(merged_layer3)

        self.model = Model(inputs=[user_embed, item_embed] + [user_read] +
                           [user_case],
                           outputs=output)
        self.model.compile(optimizer='adadelta',
                           loss='binary_crossentropy',
                           metrics=['accuracy'])
def build_hatt(word_vocab_size, classes):
    MAX_SENT_LENGTH = 100
    MAX_SENTS = 5
    sentence_input = Input(shape=(MAX_SENT_LENGTH, ), dtype='int32')
    embedded_sequences = Embedding(
        input_dim=word_vocab_size,
        output_dim=WORD_DIM,
        input_length=MAX_SENT_LENGTH)(sentence_input)
    l_lstm = Bidirectional(GRU(150, return_sequences=True))(embedded_sequences)
    l_dense = TimeDistributed(Dense(300))(l_lstm)
    l_att = AttentionWithContext()(l_dense)
    sentEncoder = Model(sentence_input, l_att)
    # print("sentEncoder Shape:", l_att._keras_shape)
    review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
    review_encoder = TimeDistributed(sentEncoder)(review_input)
    # print("RevewEncoder Shape:", review_encoder._keras_shape)
    l_lstm_sent = Bidirectional(GRU(150,
                                    return_sequences=True))(review_encoder)
    l_dense_sent = TimeDistributed(Dense(300))(l_lstm_sent)
    l_att_sent = AttentionWithContext()(l_dense_sent)
    preds = Dense(classes, activation='softmax')(l_att_sent)
    model = Model(review_input, preds)
    model.summary()
    return model
Exemple #6
0
    def create_model(self):

        title_words = Input(shape=(self.title_max, self.word_embed_size))
        
        lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(title_words)
        dropout = Dropout(0.3)(lstm_layer)
        attention_layer = AttentionWithContext()(dropout)
        dropout2 = Dropout(0.3)(attention_layer)
        dense = Dense(64, activation='relu')(dropout2)
        dropout3 = Dropout(0.3)(dense)
        #dense = Dense(32, activation='relu')(attention_layer)
        output = Dense(4, activation='sigmoid')(dropout3)

        self.model = Model(inputs=[title_words], outputs=output)
        self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['mse', 'accuracy'])
    def create_model(self):

        title_words = Input(shape=(self.title_max, self.word_embed_size))

        #lstm_layer = LSTM(64, return_sequences=False)(title_words)
        lstm_layer = Bidirectional(LSTM(64,
                                        return_sequences=True))(title_words)
        attention_layer = AttentionWithContext()(lstm_layer)
        dropout = Dropout(0.25)(attention_layer)
        dense = Dense(64, activation='relu')(dropout)
        dense = Dense(32, activation='relu')(dense)
        output = Dense(1, activation='sigmoid')(dense)

        self.model = Model(inputs=[title_words], outputs=output)
        self.model.compile(optimizer='adadelta',
                           loss='binary_crossentropy',
                           metrics=['accuracy'])
Exemple #8
0
    def create_right_only(self):
        """
        Only creates the right side of the Model.
        Specs remain the same.
        """

        user_read = Input(shape=(self.history, self.embedding_size_article))
        user_case = Input(shape=(self.embedding_size_article, ))

        lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(user_read)
        attention_layer = AttentionWithContext()(lstm_layer)

        right_layer_input = Dense(128, activation='relu')(user_case)

        elem_wise = multiply([attention_layer, right_layer_input])
        right_layer1 = Dense(64, activation='relu')(elem_wise)

        output = Dense(1, activation='sigmoid')(right_layer1)

        self.model = Model(inputs=[user_read] + [user_case], outputs=output)
        self.model.compile(optimizer='adadelta',
                           loss='binary_crossentropy',
                           metrics=['accuracy'])
Exemple #9
0
    def create_model(self):

        user_read = Input(shape=(self.history, self.embedding_size_article))
        user_case = Input(shape=(self.embedding_size_article, ))

        if self.rlg == 0:
            recurrent_layer = SimpleRNN(128, return_sequences=True)(user_read)
            recurrent_layer2 = SimpleRNN(128,
                                         return_sequences=False)(user_read)
        elif self.rlg == 1:
            recurrent_layer = LSTM(128, return_sequences=True)(user_read)
            recurrent_layer2 = LSTM(128, return_sequences=False)(user_read)
        else:
            recurrent_layer = GRU(128, return_sequences=True)(user_read)
            recurrent_layer2 = GRU(128, return_sequences=False)(user_read)

        attention_layer = AttentionWithContext()(recurrent_layer)
        concat_layer = concatenate([attention_layer, recurrent_layer2])

        if self.layers >= 1:
            left_layer = Dense(128, activation='relu')(concat_layer)
            right_layer = Dense(128, activation='relu')(user_case)
        if self.layers >= 2:
            left_layer = Dense(64, activation='relu')(left_layer)
            right_layer = Dense(64, activation='relu')(right_layer)
        if self.layers >= 3:
            left_layer = Dense(32, activation='relu')(left_layer)
            right_layer = Dense(32, activation='relu')(right_layer)

        elem_wise = multiply([left_layer, right_layer])

        output = Dense(1, activation='sigmoid')(elem_wise)

        self.model = Model(inputs=[user_read] + [user_case], outputs=output)
        self.model.compile(optimizer='adadelta',
                           loss='binary_crossentropy',
                           metrics=['accuracy'])
    def create_model(self):
        """ 
        Initialises the input and layers for the model.
        =============================================================
        Model Component 1 (title word embeddings)
        * Input to this component is the word embeddings of the title.
          The length of the title is fixed to a particular value. Padding
          is done in case the title falls short, or truncated if the title becomes
          too long.
        * A BiLSTM layer with attention follows.
        * Sigmoid is then used to predict the class.

        Model Component 2 (title embeddings + document embedings)
        This is similar to the siamese approach of training.
        Both title and body are brought to the same vector space for comparison
        =============================================================
        """

        title_words = Input(shape=(self.title_max, self.word_embedding_size))
        text_embed_input = Input(shape=(self.doc2vec_size, ))
        title_embed_input = Input(shape=(self.doc2vec_size, ))
        image_embed_input = Input(shape=(self.image_size, ))
        image_small = Dense(300, activation=self.activation)(image_embed_input)

        # Layers for the Model Component 1
        lstm_layer = Bidirectional(LSTM(64,
                                        return_sequences=True))(title_words)
        lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(lstm_layer)
        lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(lstm_layer)
        attention_layer = AttentionWithContext()(lstm_layer)
        dropout1 = Dropout(0.2)(attention_layer)
        left_hidden_layer1 = Dense(64, activation=self.activation)(dropout1)
        dropout2 = Dropout(0.2)(left_hidden_layer1)
        left_hidden_layer2 = Dense(32, activation=self.activation)(dropout2)

        # Layers for the Model Component 2 (weights are shared)
        shared_hidden_layer1 = Dense(128, activation=self.activation)
        text_hid1 = shared_hidden_layer1(text_embed_input)
        title_hid1 = shared_hidden_layer1(title_embed_input)

        shared_hidden_layer2 = Dense(64, activation=self.activation)
        text_hid2 = shared_hidden_layer2(text_hid1)
        title_hid2 = shared_hidden_layer2(title_hid1)

        shared_hidden_layer3 = Dense(32, activation=self.activation)
        text_hid3 = shared_hidden_layer3(text_hid2)
        title_hid3 = shared_hidden_layer3(title_hid2)

        elem_wise_vector = multiply([text_hid3, title_hid3])

        # Layers for the Model Component 3 (weights are shared)
        shared_hidden_layer_p1 = Dense(128, activation=self.activation)
        image_hid1 = shared_hidden_layer_p1(image_small)
        title_hid_p1 = shared_hidden_layer_p1(title_embed_input)

        shared_hidden_layer_p2 = Dense(64, activation=self.activation)
        image_hid2 = shared_hidden_layer_p2(image_hid1)
        title_hid_p2 = shared_hidden_layer_p2(title_hid_p1)

        shared_hidden_layer_p3 = Dense(32, activation=self.activation)
        image_hid3 = shared_hidden_layer_p3(image_hid2)
        title_hid_p3 = shared_hidden_layer_p3(title_hid_p2)

        elem_wise_vector2 = multiply([image_hid3, title_hid_p3])

        # Combines both the left and the right component
        combined1 = concatenate(
            [left_hidden_layer2, elem_wise_vector, elem_wise_vector2])
        dropout_overall1 = Dropout(0.2)(combined1)
        combined2 = Dense(32, activation=self.activation)(dropout_overall1)

        # Predicts
        output = Dense(1, activation='sigmoid')(combined2)
        #        output = Dense(1, activation='sigmoid')(elem_wise_vector2)

        #        self.model = Model(inputs=[title_embed_input] + [image_embed_input], outputs=output)
        self.model = Model(inputs=[title_words] + [text_embed_input] +
                           [title_embed_input] + [image_embed_input],
                           outputs=output)
        self.model.compile(optimizer='adadelta',
                           loss='binary_crossentropy',
                           metrics=['accuracy'])

        print self.model.summary()
def build_sentence_rnn(real_vocab_number,
                       word_vocab_size=10,
                       char_vocab_size=10,
                       classes=2,
                       attention=False,
                       dropout=0,
                       word=True,
                       char=False,
                       char_shape=True,
                       model="rnn",
                       cnn_encoder=True,
                       highway=None,
                       nohighway=None,
                       shape_filter=True,
                       char_filter=True):
    # build the rnn of words, use the output of build_word_feature as the feature of each word
    if char_shape:
        word_feature_encoder = build_word_feature_shape(
            vocab_size=real_vocab_number,
            cnn_encoder=cnn_encoder,
            highway=highway,
            nohighway=nohighway,
            shape_filter=shape_filter,
            char_filter=char_filter)
        sentence_input = Input(shape=(MAX_SENTENCE_LENGTH,
                                      COMP_WIDTH * MAX_WORD_LENGTH),
                               dtype='int32')
        word_feature_sequence = TimeDistributed(word_feature_encoder)(
            sentence_input)
        # print(word_feature_sequence._keras_shape)
    if word:
        sentence_word_input = Input(shape=(MAX_SENTENCE_LENGTH, ),
                                    dtype='int32')
        word_embedding_sequence = Embedding(
            input_dim=word_vocab_size,
            output_dim=WORD_DIM)(sentence_word_input)
    if char:
        word_feature_encoder = build_word_feature_char(
            vocab_size=char_vocab_size,
            cnn_encoder=cnn_encoder,
            highway=highway)
        char_input = Input(shape=(MAX_SENTENCE_LENGTH, MAX_WORD_LENGTH),
                           dtype='int32')
        word_feature_sequence = TimeDistributed(word_feature_encoder)(
            char_input)
    if char_shape and word and not char:
        word_feature_sequence = concatenate(
            [word_feature_sequence, word_embedding_sequence], axis=2)
    if word and not char_shape and not char:
        word_feature_sequence = word_embedding_sequence
    # print(word_feature_sequence._keras_shape)
    if model == "rnn":
        if attention:
            lstm_rnn = Bidirectional(
                LSTM(150, dropout=dropout,
                     return_sequences=True))(word_feature_sequence)
            if highway:
                lstm_rnn = TimeDistributed(
                    Highway(activation=highway))(lstm_rnn)
            elif nohighway:
                lstm_rnn = TimeDistributed(
                    Dense(units=300, activation=nohighway))(lstm_rnn)
            lstm_rnn = AttentionWithContext()(lstm_rnn)
        else:
            lstm_rnn = Bidirectional(
                LSTM(150, dropout=dropout,
                     return_sequences=False))(word_feature_sequence)
        x = lstm_rnn
    if classes < 2:
        print("class number cannot less than 2")
        exit(1)
    else:
        preds = Dense(classes, activation='softmax')(x)
    if char_shape and not word and not char:
        sentence_model = Model(sentence_input, preds)
    if word and not char_shape and not char:
        sentence_model = Model(sentence_word_input, preds)
    if word and char_shape and not char:
        sentence_model = Model([sentence_input, sentence_word_input], preds)
    if char and not word and not char_shape:
        sentence_model = Model(char_input, preds)
    sentence_model.summary()
    return sentence_model
Exemple #12
0
def build_model(nb_classes,
                word_index,
                embedding_dim,
                seq_length,
                stamp,
                multilabel=True):
    """
	"""

    embedding_matrix, nb_words = prepare_embeddings(word_index)

    input_layer = Input(shape=(MAX_SEQ_LEN, MAX_SENT_LEN), dtype='int32')

    sentence_input = Input(shape=(MAX_SENT_LEN, ), dtype='int32')
    embedding_layer = Embedding(input_dim=nb_words + 1,
                                output_dim=embedding_dim,
                                input_length=MAX_SENT_LEN,
                                weights=[embedding_matrix],
                                embeddings_regularizer=regularizers.l2(0.00),
                                trainable=True)(sentence_input)

    drop1 = SpatialDropout1D(0.3)(embedding_layer)

    sent_lstm = Bidirectional(LSTM(100,
                                   name='blstm_1',
                                   activation='tanh',
                                   recurrent_activation='hard_sigmoid',
                                   recurrent_dropout=0.0,
                                   dropout=0.4,
                                   kernel_initializer='glorot_uniform',
                                   return_sequences=True),
                              merge_mode='concat')(drop1)

    sent_att_layer = AttentionWithContext()(sent_lstm)
    sentEncoder = Model(sentence_input, sent_att_layer)
    sentEncoder.summary()

    textEncoder = TimeDistributed(sentEncoder)(input_layer)

    drop2 = Dropout(0.4)(textEncoder)

    lstm_1 = Bidirectional(LSTM(100,
                                name='blstm_2',
                                activation='tanh',
                                recurrent_activation='hard_sigmoid',
                                recurrent_dropout=0.0,
                                dropout=0.4,
                                kernel_initializer='glorot_uniform',
                                return_sequences=True),
                           merge_mode='concat')(drop2)
    lstm_1 = BatchNormalization()(lstm_1)

    att_layer = AttentionWithContext()(lstm_1)

    drop3 = Dropout(0.5)(att_layer)

    if multilabel:
        predictions = Dense(nb_classes, activation='sigmoid')(drop3)

        model = Model(inputs=input_layer, outputs=predictions)

        adam = Adam(lr=0.001, decay=0.0)

        model.compile(loss='binary_crossentropy',
                      optimizer=adam,
                      metrics=[f1_score])

    else:
        predictions = Dense(nb_classes, activation='softmax')(drop3)

        model = Model(inputs=input_layer, outputs=predictions)

        adam = Adam(lr=0.001, decay=0.0)

        model.compile(loss='categorical_crossentropy',
                      optimizer=adam,
                      metrics=['accuracy'])

    model.summary()
    print(stamp)

    # Save the model.
    model_json = model.to_json()
    with open(stamp + ".json", "w") as json_file:
        json_file.write(model_json)

    return model
def creat_model(wordindex, wordindex1, matrix0, maxlen0, X_train, X_test,
                y_train, y_test):
    embedding_layer0 = Embedding(len(wordindex) + len(wordindex1) + 2,
                                 256,
                                 weights=[matrix0],
                                 input_length=maxlen0)
    main_input0 = Input(shape=(maxlen0, ), dtype='float64')
    embed = embedding_layer0(main_input0)
    # embedding_layer1 = Embedding(len(wordindex1) + 1, 256, weights=[embedding_matrix1], input_length=maxlen1)
    # main_input1 = Input(shape=(maxlen1,), dtype='float64')
    # embed1 = embedding_layer1(main_input1)
    # 词嵌入(使用预训练的词向量)
    # embed = concatenate([embed0, embed1], axis=-1)
    # 词窗大小分别为3,4,5
    cnn1 = Convolution1D(100,
                         kernel_size=3,
                         padding='same',
                         strides=1,
                         activation='relu')(embed)
    cnn1 = MaxPool1D(pool_size=int(cnn1.shape[1]))(cnn1)
    drop1 = Dropout(0.5)(cnn1)
    cnn1 = Bidirectional(LSTM(256, return_sequences=True))(drop1)

    # att_layer1=AttentionWithContext()(cnn1)
    cnn2 = Convolution1D(100,
                         kernel_size=4,
                         padding='same',
                         strides=1,
                         activation='relu')(embed)
    cnn2 = MaxPool1D(pool_size=int(cnn2.shape[1]))(cnn2)
    drop2 = Dropout(0.5)(cnn2)
    cnn2 = Bidirectional(LSTM(256, return_sequences=True))(drop2)
    # att_layer2=AttentionWithContext()(cnn2)
    cnn3 = Convolution1D(100,
                         kernel_size=5,
                         padding='same',
                         strides=1,
                         activation='relu')(embed)
    cnn3 = MaxPool1D(pool_size=int(cnn3.shape[1]))(cnn3)
    drop3 = Dropout(0.5)(cnn3)
    cnn3 = Bidirectional(LSTM(256, return_sequences=True))(drop3)
    # att_layer3=AttentionWithContext()(cnn3)
    # 合并三个模型的输出向量
    cnn = concatenate([cnn1, cnn2, cnn3], axis=-1)
    drop4 = Dropout(0.5)(cnn)
    # flat = Flatten()(cnn)
    att_layer2 = AttentionWithContext()(drop4)
    main_output = Dense(7, activation='softmax')(att_layer2)
    model = Model(inputs=main_input0, outputs=main_output)
    optimizer = optimizers.Adam(lr=0.001)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    # model.compile(loss='binary_crossentropy',
    # 		optimizer=optimizer,
    # 		metrics=[f1_score])
    model.summary()
    earlystopping = EarlyStopping(monitor='val_acc',
                                  min_delta=1e-2,
                                  patience=3,
                                  verbose=2,
                                  mode='auto')
    model.fit(X_train,
              y_train,
              verbose=1,
              batch_size=batch_size,
              epochs=n_epoch,
              validation_data=(X_test, y_test),
              callbacks=[earlystopping])
    filepath = "./model/sen_model12_yh_theme.h5"
    model.save(filepath=filepath, include_optimizer=True)
    score, acc = model.evaluate(X_test,
                                y_test,
                                verbose=1,
                                batch_size=batch_size)
    return model
def creat_model(wordindex,wordindex1,matrix0,maxlen0,X_train, X_test, y_train, y_test):
    embedding_layer0 = Embedding(len(wordindex) + len(wordindex1) + 2, 256, weights=[matrix0], input_length=maxlen0)
    main_input0 = Input(shape=(maxlen0,), dtype='float64')
    embed = embedding_layer0(main_input0)
    # embedding_layer1 = Embedding(len(wordindex1) + 1, 256, weights=[embedding_matrix1], input_length=maxlen1)
    # main_input1 = Input(shape=(maxlen1,), dtype='float64')
    # embed1 = embedding_layer1(main_input1)
    # 词嵌入(使用预训练的词向量)
    # embed = concatenate([embed0, embed1], axis=-1)
    # 词窗大小分别为3,4,5
    # cnn1 = Convolution1D(100,kernel_size=3 , padding='same', strides=1, activation='relu')(embed)
    # cnn1 = MaxPool1D(pool_size=int(cnn1.shape[1]))(cnn1)
    # drop1 = Dropout(0.5)(cnn1)
    # cnn1 = Bidirectional(LSTM(256, return_sequences=True))(drop1)
    #
    # # att_layer1=AttentionWithContext()(cnn1)
    # cnn2 = Convolution1D(100,kernel_size=4, padding='same', strides=1, activation='relu')(embed)
    # cnn2 = MaxPool1D(pool_size=int(cnn2.shape[1]))(cnn2)
    # drop2 = Dropout(0.5)(cnn2)
    # cnn2 = Bidirectional(LSTM(256, return_sequences=True))(drop2)
    # # att_layer2=AttentionWithContext()(cnn2)
    cnn = Bidirectional(LSTM(256, return_sequences=True))(embed)
    drop3 = Dropout(0.5)(cnn)
    cnn1 = Bidirectional(LSTM(256, return_sequences=True))(drop3)
    # cnn = Convolution1D(100, kernel_size=3, padding='same', strides=1, activation='relu')(drop3)
    # cnn = MaxPool1D(pool_size=int(cnn.shape[1]))(cnn)
    # att_layer3=AttentionWithContext()(cnn3)
    # 合并三个模型的输出向量
    # cnn = concatenate([cnn1, cnn2, cnn3], axis=-1)
    # drop4 = Dropout(0.5)(cnn)
    # flat = Flatten()(cnn)
    att_layer2 = AttentionWithContext()(cnn1)
    main_output = Dense(3, activation='softmax')(att_layer2)
    model = Model(inputs=main_input0, outputs=main_output)
    optimizer = optimizers.Adam(lr=0.01)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    # plot_model(model, to_file='model_text_cnn.png', show_shapes=True)
    # model.compile(loss='binary_crossentropy',
    # 		optimizer=optimizer,
    # 		metrics=[f1_score])
    model.summary()
    earlystopping = EarlyStopping(monitor='val_acc', min_delta=1e-2, patience=3, verbose=2, mode='auto')
    history=model.fit(X_train, y_train, verbose=1, batch_size=batch_size, epochs=n_epoch, validation_data=(X_test, y_test),
              callbacks=[earlystopping])
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

    # 绘制训练 & 验证的损失值
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()
    filepath = "./model/sen_model11_bilstm_cnn.h5"
    model.save(filepath=filepath, include_optimizer=True)
    score, acc = model.evaluate(X_test, y_test, verbose=1, batch_size=batch_size)
    return model
        for y in range(height):
            ax.annotate(str(cm[x][y]), xy=(y, x), horizontalalignment='center',
                        verticalalignment='center', color=getFontColor(cm[x][y]))

    # add genres as ticks
    alphabet = mods
    plt.xticks(range(width), alphabet[:width], rotation=30)
    plt.yticks(range(height), alphabet[:height])
    return plt
from keras import layers
from keras import Sequential,layers
from attention import AttentionWithContext
from keras import regularizers
model=Sequential()
model.add(layers.LSTM(128,return_sequences=True,input_shape=(128,2),kernel_regularizer=regularizers.l2(0.001)))
model.add(AttentionWithContext())

model.add(layers.Dense(len(mods),activation="sigmoid"))

from keras import optimizers
#adam0=optimizers.Adam(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])
nb_epoch =60   # number of epochs to train on
batch_size = 128  # training batch size
###################################################train the network#################################################

history = model.fit(X_train,
    Y_train,
    batch_size=batch_size,
    epochs=nb_epoch,
    verbose=1,