def fit_lstm_model(X_train, y_train, n_words, n_tags, seq_len, class_weights, epochs): '''Set up LSTM model with one input - equal length sequences of encoded text''' input_seq = Input(shape=(seq_len, )) '''Pass the GloVe pretrained model weights into the embedding layer''' embedding = Embedding(input_dim=n_words, output_dim=300, weights=[embedding_matrix], trainable=True)(input_seq) embedding = Dropout(0.1)(embedding) '''Add Bidirectional LSTM layer, dense hidden layer, and final output layer''' model = Bidirectional( LSTM(units=64, return_sequences=True, recurrent_dropout=0.1))(embedding) model = TimeDistributed(Dense(64, activation='relu'))(model) output = Dense(n_tags, activation="softmax")(model) '''Compile and fit deep neural network''' model = Model(inputs=input_seq, outputs=output) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) history = model.fit(X_train, y_train, epochs=epochs, batch_size=32, validation_split=0.1, verbose=1, class_weight=[class_weights]) '''Create simple performance report for the model''' val_loss, val_acc = model.evaluate(X_test, y_test) print(f'Model validation loss was {val_loss}') print(f'Model validation accuracy was {val_acc}') return model, history
def train(self, epochs, embedding=None): # Embedded Words txt_input = Input(shape=(None, ), name='txt_input') txt_embed = Embedding(input_dim=self.num_words, output_dim=MAX_LEN, input_length=None, name='txt_embedding', trainable=False, weights=([embedding]))(txt_input) txt_drpot = Dropout(0.1, name='txt_dropout')(txt_embed) # Embedded Part of Speech pos_input = Input(shape=(None, ), name='pos_input') pos_embed = Embedding(input_dim=self.num_pos, output_dim=MAX_LEN, input_length=None, name='pos_embedding')(pos_input) pos_drpot = Dropout(0.1, name='pos_dropout')(pos_embed) # Embedded Characters char_in = Input(shape=( None, MAX_LEN_CHAR, ), name="char_input") emb_char = TimeDistributed( Embedding(input_dim=self.num_chars, output_dim=MAX_LEN_CHAR, input_length=None))(char_in) char_enc = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(emb_char) # Concatenate inputs x = concatenate([txt_drpot, pos_drpot, char_enc], axis=2) x = SpatialDropout1D(0.3)(x) # Deep Layers model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(x) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) # Output out = TimeDistributed(Dense(self.num_entities, activation="softmax"))(model) model = Model(inputs=[txt_input, pos_input, char_in], outputs=[out]) model.compile(optimizer="rmsprop", loss='categorical_crossentropy', metrics=['accuracy']) plot_model(model, to_file=self.save_path + 'model_structure.png') print(model.summary()) history = model.fit( [self.X_train, self.train_pos, self.train_characters], np.array(self.Y_train), batch_size=32, epochs=epochs, validation_data=([ self.X_validation, self.valid_pos, self.valid_characters ], np.array(self.Y_validation)), verbose=1) model.save(self.save_path + 'model_ner') test_eval = model.evaluate( [self.X_test, self.test_pos, self.test_characters], np.array(self.Y_test)) print('Test loss:', test_eval[0]) print('Test accuracy:', test_eval[1]) return model, history
# set titles sub_fig1.set_title('Accuracy') sub_fig2.set_title('Loss') print(hist) # set values and labels sub_fig1.plot(hist["crf_viterbi_accuracy"],label='acc') sub_fig1.plot(hist["val_crf_viterbi_accuracy"], label='val_acc') sub_fig1.legend(loc="lower right") sub_fig2.plot(hist["loss"],label='loss') sub_fig2.plot(hist["val_loss"],label='val_loss') sub_fig2.legend(loc="upper right") plt.xlabel('epoch') # show figure plt.show() score = model.evaluate([X_w_te,np.array(X_c_te).reshape((len(X_c_te), max_len, max_len_char))], np.array(y_te), batch_size=batch_size,verbose=1) print(model.metrics_names) print("Score:") print(score) # ## Prediction on test set from seqeval.metrics import precision_score, recall_score, f1_score, classification_report # print("Input:") # print(X_te[0]) # print("Supposed output:") # print(y_te) # print(np.array(y_te)) test_pred = model.predict([X_w_te,np.array(X_c_te).reshape((len(X_c_te), max_len, max_len_char))], verbose=1) # print("Prediction result:") # print(test_pred[0]) idx2tag = {i: w for w,i in tags2idx.items()}