def base_network2(input_shape): input = Input(shape=input_shape) p = embedding_layer(input) p = LSTM(dim, return_sequences=True, dropout=0.5,name='f_input')(p) p = LSTM(dim, return_sequences=True, name='t_input')(p) multi_memory = AttentionWithContext()(p) return Model(input, multi_memory, name='review_base_nn')
def base_network1(input_shape): input = Input(shape=input_shape) p = embedding_layer(input) p = LSTM(dim, return_sequences=True, dropout=0.5,name='f_input')(p) p = AttentionWithContext()(p) q = embedding_layer(input) q = LSTM(dim, return_sequences=True, dropout=0.5,name='a_input')(q) q = LSTM(dim, return_sequences=False, name='b_input')(q) multi_memory_lstm = add([p,q]) return Model(input, multi_memory_lstm, name='DFF')
def build_model(nb_classes, word_index, embedding_dim, seq_length, stamp): """ """ embedding_matrix, nb_words = prepare_embeddings(word_index) input_layer = Input(shape=(seq_length, ), dtype='int32') embedding_layer = Embedding(input_dim=nb_words + 1, output_dim=embedding_dim, input_length=seq_length, weights=[embedding_matrix], embeddings_regularizer=regularizers.l2(0.00), trainable=True)(input_layer) drop1 = SpatialDropout1D(0.3)(embedding_layer) lstm_1 = Bidirectional(LSTM(128, name='blstm_1', activation='tanh', recurrent_activation='hard_sigmoid', recurrent_dropout=0.0, dropout=0.5, kernel_initializer='glorot_uniform', return_sequences=True), merge_mode='concat')(drop1) lstm_1 = BatchNormalization()(lstm_1) att_layer = AttentionWithContext()(lstm_1) drop3 = Dropout(0.5)(att_layer) predictions = Dense(nb_classes, activation='sigmoid')(drop3) model = Model(inputs=input_layer, outputs=predictions) adam = Adam(lr=0.001, decay=0.0) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=[f1_score]) model.summary() print(stamp) # Save the model. model_json = model.to_json() with open(stamp + ".json", "w") as json_file: json_file.write(model_json) return model
def create_model(self): """ Creates the Hybrid Model. Consists of two components: * Left Component : Computes the user item interaction through matrix factorization (Typical Collaborative Filtering) * Right Component : Uses user history, item features, time etc (features) to dynamically model the user interests (Collaborative + Content Features) =============================================================================== Desc : Left Component =============================================================================== Desc : Right Component =============================================================================== """ #Initialises input for left component user_embed = Input(shape=(self.embedding_size_useritem, )) item_embed = Input(shape=(self.embedding_size_useritem, )) #Initialises input for right component user_read = Input(shape=(self.history, self.embedding_size_article)) user_case = Input(shape=(self.embedding_size_article, )) # Creates Layers for the left component concatenated_layer = concatenate([user_embed, item_embed]) left_layer1 = Dense(128, activation='relu')(concatenated_layer) left_layer2 = Dense(64, activation='relu')(left_layer1) # Creates Layers for the right component lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(user_read) attention_layer = AttentionWithContext()(lstm_layer) right_layer_input = Dense(128, activation='relu')(user_case) elem_wise = multiply([attention_layer, right_layer_input]) right_layer1 = Dense(64, activation='relu')(elem_wise) # Merges the left and right component merged_layer = concatenate([left_layer2, right_layer1]) merged_layer1 = Dense(256, activation='relu')(merged_layer) merged_layer2 = Dense(128, activation='relu')(merged_layer1) merged_layer3 = Dense(64, activation='relu')(merged_layer2) output = Dense(1, activation='sigmoid')(merged_layer3) self.model = Model(inputs=[user_embed, item_embed] + [user_read] + [user_case], outputs=output) self.model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])
def build_hatt(word_vocab_size, classes): MAX_SENT_LENGTH = 100 MAX_SENTS = 5 sentence_input = Input(shape=(MAX_SENT_LENGTH, ), dtype='int32') embedded_sequences = Embedding( input_dim=word_vocab_size, output_dim=WORD_DIM, input_length=MAX_SENT_LENGTH)(sentence_input) l_lstm = Bidirectional(GRU(150, return_sequences=True))(embedded_sequences) l_dense = TimeDistributed(Dense(300))(l_lstm) l_att = AttentionWithContext()(l_dense) sentEncoder = Model(sentence_input, l_att) # print("sentEncoder Shape:", l_att._keras_shape) review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32') review_encoder = TimeDistributed(sentEncoder)(review_input) # print("RevewEncoder Shape:", review_encoder._keras_shape) l_lstm_sent = Bidirectional(GRU(150, return_sequences=True))(review_encoder) l_dense_sent = TimeDistributed(Dense(300))(l_lstm_sent) l_att_sent = AttentionWithContext()(l_dense_sent) preds = Dense(classes, activation='softmax')(l_att_sent) model = Model(review_input, preds) model.summary() return model
def create_model(self): title_words = Input(shape=(self.title_max, self.word_embed_size)) lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(title_words) dropout = Dropout(0.3)(lstm_layer) attention_layer = AttentionWithContext()(dropout) dropout2 = Dropout(0.3)(attention_layer) dense = Dense(64, activation='relu')(dropout2) dropout3 = Dropout(0.3)(dense) #dense = Dense(32, activation='relu')(attention_layer) output = Dense(4, activation='sigmoid')(dropout3) self.model = Model(inputs=[title_words], outputs=output) self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['mse', 'accuracy'])
def create_model(self): title_words = Input(shape=(self.title_max, self.word_embed_size)) #lstm_layer = LSTM(64, return_sequences=False)(title_words) lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(title_words) attention_layer = AttentionWithContext()(lstm_layer) dropout = Dropout(0.25)(attention_layer) dense = Dense(64, activation='relu')(dropout) dense = Dense(32, activation='relu')(dense) output = Dense(1, activation='sigmoid')(dense) self.model = Model(inputs=[title_words], outputs=output) self.model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])
def create_right_only(self): """ Only creates the right side of the Model. Specs remain the same. """ user_read = Input(shape=(self.history, self.embedding_size_article)) user_case = Input(shape=(self.embedding_size_article, )) lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(user_read) attention_layer = AttentionWithContext()(lstm_layer) right_layer_input = Dense(128, activation='relu')(user_case) elem_wise = multiply([attention_layer, right_layer_input]) right_layer1 = Dense(64, activation='relu')(elem_wise) output = Dense(1, activation='sigmoid')(right_layer1) self.model = Model(inputs=[user_read] + [user_case], outputs=output) self.model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])
def create_model(self): user_read = Input(shape=(self.history, self.embedding_size_article)) user_case = Input(shape=(self.embedding_size_article, )) if self.rlg == 0: recurrent_layer = SimpleRNN(128, return_sequences=True)(user_read) recurrent_layer2 = SimpleRNN(128, return_sequences=False)(user_read) elif self.rlg == 1: recurrent_layer = LSTM(128, return_sequences=True)(user_read) recurrent_layer2 = LSTM(128, return_sequences=False)(user_read) else: recurrent_layer = GRU(128, return_sequences=True)(user_read) recurrent_layer2 = GRU(128, return_sequences=False)(user_read) attention_layer = AttentionWithContext()(recurrent_layer) concat_layer = concatenate([attention_layer, recurrent_layer2]) if self.layers >= 1: left_layer = Dense(128, activation='relu')(concat_layer) right_layer = Dense(128, activation='relu')(user_case) if self.layers >= 2: left_layer = Dense(64, activation='relu')(left_layer) right_layer = Dense(64, activation='relu')(right_layer) if self.layers >= 3: left_layer = Dense(32, activation='relu')(left_layer) right_layer = Dense(32, activation='relu')(right_layer) elem_wise = multiply([left_layer, right_layer]) output = Dense(1, activation='sigmoid')(elem_wise) self.model = Model(inputs=[user_read] + [user_case], outputs=output) self.model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])
def create_model(self): """ Initialises the input and layers for the model. ============================================================= Model Component 1 (title word embeddings) * Input to this component is the word embeddings of the title. The length of the title is fixed to a particular value. Padding is done in case the title falls short, or truncated if the title becomes too long. * A BiLSTM layer with attention follows. * Sigmoid is then used to predict the class. Model Component 2 (title embeddings + document embedings) This is similar to the siamese approach of training. Both title and body are brought to the same vector space for comparison ============================================================= """ title_words = Input(shape=(self.title_max, self.word_embedding_size)) text_embed_input = Input(shape=(self.doc2vec_size, )) title_embed_input = Input(shape=(self.doc2vec_size, )) image_embed_input = Input(shape=(self.image_size, )) image_small = Dense(300, activation=self.activation)(image_embed_input) # Layers for the Model Component 1 lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(title_words) lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(lstm_layer) lstm_layer = Bidirectional(LSTM(64, return_sequences=True))(lstm_layer) attention_layer = AttentionWithContext()(lstm_layer) dropout1 = Dropout(0.2)(attention_layer) left_hidden_layer1 = Dense(64, activation=self.activation)(dropout1) dropout2 = Dropout(0.2)(left_hidden_layer1) left_hidden_layer2 = Dense(32, activation=self.activation)(dropout2) # Layers for the Model Component 2 (weights are shared) shared_hidden_layer1 = Dense(128, activation=self.activation) text_hid1 = shared_hidden_layer1(text_embed_input) title_hid1 = shared_hidden_layer1(title_embed_input) shared_hidden_layer2 = Dense(64, activation=self.activation) text_hid2 = shared_hidden_layer2(text_hid1) title_hid2 = shared_hidden_layer2(title_hid1) shared_hidden_layer3 = Dense(32, activation=self.activation) text_hid3 = shared_hidden_layer3(text_hid2) title_hid3 = shared_hidden_layer3(title_hid2) elem_wise_vector = multiply([text_hid3, title_hid3]) # Layers for the Model Component 3 (weights are shared) shared_hidden_layer_p1 = Dense(128, activation=self.activation) image_hid1 = shared_hidden_layer_p1(image_small) title_hid_p1 = shared_hidden_layer_p1(title_embed_input) shared_hidden_layer_p2 = Dense(64, activation=self.activation) image_hid2 = shared_hidden_layer_p2(image_hid1) title_hid_p2 = shared_hidden_layer_p2(title_hid_p1) shared_hidden_layer_p3 = Dense(32, activation=self.activation) image_hid3 = shared_hidden_layer_p3(image_hid2) title_hid_p3 = shared_hidden_layer_p3(title_hid_p2) elem_wise_vector2 = multiply([image_hid3, title_hid_p3]) # Combines both the left and the right component combined1 = concatenate( [left_hidden_layer2, elem_wise_vector, elem_wise_vector2]) dropout_overall1 = Dropout(0.2)(combined1) combined2 = Dense(32, activation=self.activation)(dropout_overall1) # Predicts output = Dense(1, activation='sigmoid')(combined2) # output = Dense(1, activation='sigmoid')(elem_wise_vector2) # self.model = Model(inputs=[title_embed_input] + [image_embed_input], outputs=output) self.model = Model(inputs=[title_words] + [text_embed_input] + [title_embed_input] + [image_embed_input], outputs=output) self.model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy']) print self.model.summary()
def build_sentence_rnn(real_vocab_number, word_vocab_size=10, char_vocab_size=10, classes=2, attention=False, dropout=0, word=True, char=False, char_shape=True, model="rnn", cnn_encoder=True, highway=None, nohighway=None, shape_filter=True, char_filter=True): # build the rnn of words, use the output of build_word_feature as the feature of each word if char_shape: word_feature_encoder = build_word_feature_shape( vocab_size=real_vocab_number, cnn_encoder=cnn_encoder, highway=highway, nohighway=nohighway, shape_filter=shape_filter, char_filter=char_filter) sentence_input = Input(shape=(MAX_SENTENCE_LENGTH, COMP_WIDTH * MAX_WORD_LENGTH), dtype='int32') word_feature_sequence = TimeDistributed(word_feature_encoder)( sentence_input) # print(word_feature_sequence._keras_shape) if word: sentence_word_input = Input(shape=(MAX_SENTENCE_LENGTH, ), dtype='int32') word_embedding_sequence = Embedding( input_dim=word_vocab_size, output_dim=WORD_DIM)(sentence_word_input) if char: word_feature_encoder = build_word_feature_char( vocab_size=char_vocab_size, cnn_encoder=cnn_encoder, highway=highway) char_input = Input(shape=(MAX_SENTENCE_LENGTH, MAX_WORD_LENGTH), dtype='int32') word_feature_sequence = TimeDistributed(word_feature_encoder)( char_input) if char_shape and word and not char: word_feature_sequence = concatenate( [word_feature_sequence, word_embedding_sequence], axis=2) if word and not char_shape and not char: word_feature_sequence = word_embedding_sequence # print(word_feature_sequence._keras_shape) if model == "rnn": if attention: lstm_rnn = Bidirectional( LSTM(150, dropout=dropout, return_sequences=True))(word_feature_sequence) if highway: lstm_rnn = TimeDistributed( Highway(activation=highway))(lstm_rnn) elif nohighway: lstm_rnn = TimeDistributed( Dense(units=300, activation=nohighway))(lstm_rnn) lstm_rnn = AttentionWithContext()(lstm_rnn) else: lstm_rnn = Bidirectional( LSTM(150, dropout=dropout, return_sequences=False))(word_feature_sequence) x = lstm_rnn if classes < 2: print("class number cannot less than 2") exit(1) else: preds = Dense(classes, activation='softmax')(x) if char_shape and not word and not char: sentence_model = Model(sentence_input, preds) if word and not char_shape and not char: sentence_model = Model(sentence_word_input, preds) if word and char_shape and not char: sentence_model = Model([sentence_input, sentence_word_input], preds) if char and not word and not char_shape: sentence_model = Model(char_input, preds) sentence_model.summary() return sentence_model
def build_model(nb_classes, word_index, embedding_dim, seq_length, stamp, multilabel=True): """ """ embedding_matrix, nb_words = prepare_embeddings(word_index) input_layer = Input(shape=(MAX_SEQ_LEN, MAX_SENT_LEN), dtype='int32') sentence_input = Input(shape=(MAX_SENT_LEN, ), dtype='int32') embedding_layer = Embedding(input_dim=nb_words + 1, output_dim=embedding_dim, input_length=MAX_SENT_LEN, weights=[embedding_matrix], embeddings_regularizer=regularizers.l2(0.00), trainable=True)(sentence_input) drop1 = SpatialDropout1D(0.3)(embedding_layer) sent_lstm = Bidirectional(LSTM(100, name='blstm_1', activation='tanh', recurrent_activation='hard_sigmoid', recurrent_dropout=0.0, dropout=0.4, kernel_initializer='glorot_uniform', return_sequences=True), merge_mode='concat')(drop1) sent_att_layer = AttentionWithContext()(sent_lstm) sentEncoder = Model(sentence_input, sent_att_layer) sentEncoder.summary() textEncoder = TimeDistributed(sentEncoder)(input_layer) drop2 = Dropout(0.4)(textEncoder) lstm_1 = Bidirectional(LSTM(100, name='blstm_2', activation='tanh', recurrent_activation='hard_sigmoid', recurrent_dropout=0.0, dropout=0.4, kernel_initializer='glorot_uniform', return_sequences=True), merge_mode='concat')(drop2) lstm_1 = BatchNormalization()(lstm_1) att_layer = AttentionWithContext()(lstm_1) drop3 = Dropout(0.5)(att_layer) if multilabel: predictions = Dense(nb_classes, activation='sigmoid')(drop3) model = Model(inputs=input_layer, outputs=predictions) adam = Adam(lr=0.001, decay=0.0) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=[f1_score]) else: predictions = Dense(nb_classes, activation='softmax')(drop3) model = Model(inputs=input_layer, outputs=predictions) adam = Adam(lr=0.001, decay=0.0) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) model.summary() print(stamp) # Save the model. model_json = model.to_json() with open(stamp + ".json", "w") as json_file: json_file.write(model_json) return model
def creat_model(wordindex, wordindex1, matrix0, maxlen0, X_train, X_test, y_train, y_test): embedding_layer0 = Embedding(len(wordindex) + len(wordindex1) + 2, 256, weights=[matrix0], input_length=maxlen0) main_input0 = Input(shape=(maxlen0, ), dtype='float64') embed = embedding_layer0(main_input0) # embedding_layer1 = Embedding(len(wordindex1) + 1, 256, weights=[embedding_matrix1], input_length=maxlen1) # main_input1 = Input(shape=(maxlen1,), dtype='float64') # embed1 = embedding_layer1(main_input1) # 词嵌入(使用预训练的词向量) # embed = concatenate([embed0, embed1], axis=-1) # 词窗大小分别为3,4,5 cnn1 = Convolution1D(100, kernel_size=3, padding='same', strides=1, activation='relu')(embed) cnn1 = MaxPool1D(pool_size=int(cnn1.shape[1]))(cnn1) drop1 = Dropout(0.5)(cnn1) cnn1 = Bidirectional(LSTM(256, return_sequences=True))(drop1) # att_layer1=AttentionWithContext()(cnn1) cnn2 = Convolution1D(100, kernel_size=4, padding='same', strides=1, activation='relu')(embed) cnn2 = MaxPool1D(pool_size=int(cnn2.shape[1]))(cnn2) drop2 = Dropout(0.5)(cnn2) cnn2 = Bidirectional(LSTM(256, return_sequences=True))(drop2) # att_layer2=AttentionWithContext()(cnn2) cnn3 = Convolution1D(100, kernel_size=5, padding='same', strides=1, activation='relu')(embed) cnn3 = MaxPool1D(pool_size=int(cnn3.shape[1]))(cnn3) drop3 = Dropout(0.5)(cnn3) cnn3 = Bidirectional(LSTM(256, return_sequences=True))(drop3) # att_layer3=AttentionWithContext()(cnn3) # 合并三个模型的输出向量 cnn = concatenate([cnn1, cnn2, cnn3], axis=-1) drop4 = Dropout(0.5)(cnn) # flat = Flatten()(cnn) att_layer2 = AttentionWithContext()(drop4) main_output = Dense(7, activation='softmax')(att_layer2) model = Model(inputs=main_input0, outputs=main_output) optimizer = optimizers.Adam(lr=0.001) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) # model.compile(loss='binary_crossentropy', # optimizer=optimizer, # metrics=[f1_score]) model.summary() earlystopping = EarlyStopping(monitor='val_acc', min_delta=1e-2, patience=3, verbose=2, mode='auto') model.fit(X_train, y_train, verbose=1, batch_size=batch_size, epochs=n_epoch, validation_data=(X_test, y_test), callbacks=[earlystopping]) filepath = "./model/sen_model12_yh_theme.h5" model.save(filepath=filepath, include_optimizer=True) score, acc = model.evaluate(X_test, y_test, verbose=1, batch_size=batch_size) return model
def creat_model(wordindex,wordindex1,matrix0,maxlen0,X_train, X_test, y_train, y_test): embedding_layer0 = Embedding(len(wordindex) + len(wordindex1) + 2, 256, weights=[matrix0], input_length=maxlen0) main_input0 = Input(shape=(maxlen0,), dtype='float64') embed = embedding_layer0(main_input0) # embedding_layer1 = Embedding(len(wordindex1) + 1, 256, weights=[embedding_matrix1], input_length=maxlen1) # main_input1 = Input(shape=(maxlen1,), dtype='float64') # embed1 = embedding_layer1(main_input1) # 词嵌入(使用预训练的词向量) # embed = concatenate([embed0, embed1], axis=-1) # 词窗大小分别为3,4,5 # cnn1 = Convolution1D(100,kernel_size=3 , padding='same', strides=1, activation='relu')(embed) # cnn1 = MaxPool1D(pool_size=int(cnn1.shape[1]))(cnn1) # drop1 = Dropout(0.5)(cnn1) # cnn1 = Bidirectional(LSTM(256, return_sequences=True))(drop1) # # # att_layer1=AttentionWithContext()(cnn1) # cnn2 = Convolution1D(100,kernel_size=4, padding='same', strides=1, activation='relu')(embed) # cnn2 = MaxPool1D(pool_size=int(cnn2.shape[1]))(cnn2) # drop2 = Dropout(0.5)(cnn2) # cnn2 = Bidirectional(LSTM(256, return_sequences=True))(drop2) # # att_layer2=AttentionWithContext()(cnn2) cnn = Bidirectional(LSTM(256, return_sequences=True))(embed) drop3 = Dropout(0.5)(cnn) cnn1 = Bidirectional(LSTM(256, return_sequences=True))(drop3) # cnn = Convolution1D(100, kernel_size=3, padding='same', strides=1, activation='relu')(drop3) # cnn = MaxPool1D(pool_size=int(cnn.shape[1]))(cnn) # att_layer3=AttentionWithContext()(cnn3) # 合并三个模型的输出向量 # cnn = concatenate([cnn1, cnn2, cnn3], axis=-1) # drop4 = Dropout(0.5)(cnn) # flat = Flatten()(cnn) att_layer2 = AttentionWithContext()(cnn1) main_output = Dense(3, activation='softmax')(att_layer2) model = Model(inputs=main_input0, outputs=main_output) optimizer = optimizers.Adam(lr=0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) # plot_model(model, to_file='model_text_cnn.png', show_shapes=True) # model.compile(loss='binary_crossentropy', # optimizer=optimizer, # metrics=[f1_score]) model.summary() earlystopping = EarlyStopping(monitor='val_acc', min_delta=1e-2, patience=3, verbose=2, mode='auto') history=model.fit(X_train, y_train, verbose=1, batch_size=batch_size, epochs=n_epoch, validation_data=(X_test, y_test), callbacks=[earlystopping]) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() # 绘制训练 & 验证的损失值 plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() filepath = "./model/sen_model11_bilstm_cnn.h5" model.save(filepath=filepath, include_optimizer=True) score, acc = model.evaluate(X_test, y_test, verbose=1, batch_size=batch_size) return model
for y in range(height): ax.annotate(str(cm[x][y]), xy=(y, x), horizontalalignment='center', verticalalignment='center', color=getFontColor(cm[x][y])) # add genres as ticks alphabet = mods plt.xticks(range(width), alphabet[:width], rotation=30) plt.yticks(range(height), alphabet[:height]) return plt from keras import layers from keras import Sequential,layers from attention import AttentionWithContext from keras import regularizers model=Sequential() model.add(layers.LSTM(128,return_sequences=True,input_shape=(128,2),kernel_regularizer=regularizers.l2(0.001))) model.add(AttentionWithContext()) model.add(layers.Dense(len(mods),activation="sigmoid")) from keras import optimizers #adam0=optimizers.Adam(lr=0.001) model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy']) nb_epoch =60 # number of epochs to train on batch_size = 128 # training batch size ###################################################train the network################################################# history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1,