def train(self, epochs, embedding=None): # Embedded Words txt_input = Input(shape=(None, ), name='txt_input') txt_embed = Embedding(input_dim=self.num_words, output_dim=MAX_LEN, input_length=None, name='txt_embedding', trainable=False, weights=([embedding]))(txt_input) txt_drpot = Dropout(0.1, name='txt_dropout')(txt_embed) # Embedded Part of Speech pos_input = Input(shape=(None, ), name='pos_input') pos_embed = Embedding(input_dim=self.num_pos, output_dim=MAX_LEN, input_length=None, name='pos_embedding')(pos_input) pos_drpot = Dropout(0.1, name='pos_dropout')(pos_embed) # Embedded Characters char_in = Input(shape=( None, MAX_LEN_CHAR, ), name="char_input") emb_char = TimeDistributed( Embedding(input_dim=self.num_chars, output_dim=MAX_LEN_CHAR, input_length=None))(char_in) char_enc = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(emb_char) # Concatenate inputs x = concatenate([txt_drpot, pos_drpot, char_enc], axis=2) x = SpatialDropout1D(0.3)(x) # Deep Layers model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(x) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) # Output out = TimeDistributed(Dense(self.num_entities, activation="softmax"))(model) model = Model(inputs=[txt_input, pos_input, char_in], outputs=[out]) model.compile(optimizer="rmsprop", loss='categorical_crossentropy', metrics=['accuracy']) plot_model(model, to_file=self.save_path + 'model_structure.png') print(model.summary()) history = model.fit( [self.X_train, self.train_pos, self.train_characters], np.array(self.Y_train), batch_size=32, epochs=epochs, validation_data=([ self.X_validation, self.valid_pos, self.valid_characters ], np.array(self.Y_validation)), verbose=1) model.save(self.save_path + 'model_ner') test_eval = model.evaluate( [self.X_test, self.test_pos, self.test_characters], np.array(self.Y_test)) print('Test loss:', test_eval[0]) print('Test accuracy:', test_eval[1]) return model, history
model = Model(inputs=inputs, outputs=model) # model = multi_gpu_model(model, gpus=2) model.compile(loss='categorical_crossentropy', optimizer='adam', sample_weight_mode="temporal", metrics=['accuracy']) model.summary() # train on videos with sample weighting # model.fit(x=X_train_m, # y=Y_train_, # validation_data=(X_vali_m, Y_vali_, M_vali[:, :, 0]), # epochs=nb_epoch, # batch_size=batch_size, # verbose=1, # # sample_weight=M_train[:, :, 0], # sample_weight=sample_weights, # callbacks=[lr_reducer, early_stopper, tensor_board, checkpointer]) model.fit_generator(train_generator(X_train, Y_train), verbose=1, epochs=nb_epoch, steps_per_epoch=50, validation_steps=10, validation_data=vali_generator(X_vali, Y_vali), callbacks=[lr_reducer, early_stopper, tensor_board, checkpointer]) model.save('trained/' + model_name + '.h5')
def bilstm(X_train, X_test, Y_train, Y_test, wordembeddings): np.random.seed(1234) tf.random.set_seed(1234) random.seed(1234) max_length_sentence = X_train.str.split().str.len().max() tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n\'', lower=True) tokenizer.fit_on_texts(X_train) word_index = tokenizer.word_index EMBEDDING_DIM = 300 vocabulary_size = len(word_index) + 1 print('Found %s unique tokens.' % len(word_index)) sequences_train = tokenizer.texts_to_sequences(X_train) sequences_valid = tokenizer.texts_to_sequences(X_test) X_train = pad_sequences(sequences_train, maxlen=max_length_sentence) X_val = pad_sequences(sequences_valid, maxlen=X_train.shape[1]) y_train = np.asarray(Y_train) y_val = np.asarray(Y_test) #print(word_index) ''' print('Shape of data tensor:', X_train.shape) print('Shape of data tensor:', X_val.shape) print('Shape of data tensor:', y_train.shape) print('Shape of data tensor:', y_val.shape) print(X_train) print("*"*100) print(X_val) print("*"*100) print(y_train) print("*"*100) print(y_val) ''' embedding_matrix = np.zeros((vocabulary_size, EMBEDDING_DIM)) for word, i in word_index.items(): if (word in wordembeddings.keys()): embedding_vector = wordembeddings[word] if len(embedding_vector) == 0: #if array is empty embedding_vector = wordembeddings[word.title()] if len(embedding_vector) == 0: embedding_vector = wordembeddings[word.upper()] if len(embedding_vector) == 0: embedding_vector = np.array([ round(np.random.rand(), 8) for i in range(0, 300) ]) else: #print("WORD NOT IN DICT",word) embedding_vector = np.array( [round(np.random.rand(), 8) for i in range(0, 300)]) if len(embedding_vector) != 0: embedding_matrix[i] = embedding_vector embedding_layer = Embedding(vocabulary_size, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False) #Try with True inputs = Input(shape=(X_train.shape[1], )) model = (Embedding(vocabulary_size, EMBEDDING_DIM, input_length=max_length_sentence, weights=[embedding_matrix]))(inputs) model = Bidirectional(GRU(64))( model) # !!!!!!! CHANGE THIS FOR OTHER MODELS model = (Dense(900, activation='relu'))(model) model = (Dense(400, activation='relu'))(model) model = (Dense(250, activation='relu'))(model) model = (Dense(204, activation='softmax'))(model) model = Model(inputs=inputs, outputs=model) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() callbacks = [EarlyStopping(monitor='val_loss')] hist_adam = model.fit( X_train, y_train, batch_size=1000, epochs=200, verbose=1, validation_data=(X_val, y_val), callbacks=callbacks ) #!!!!!!!!!!!!!!!!!!!!!!!CHANGE BATCH SIZE TO 1000 #change epochs to 200 model.save(config.bigru_prepocessed_dataset1_chai ) # !!!!!!! CHANGE THIS FOR OTHER MODELS y_pred = model.predict(X_val) print(y_pred) y_val_class = pd.DataFrame(y_val).idxmax(axis=1) print(y_val_class) y_val_class_argmax = np.argmax(y_val, axis=1) y_pred_class_argmax = np.argmax(y_pred, axis=1) y_pred_class = pd.DataFrame(y_pred).idxmax(axis=1) print(y_pred_class) print(classification_report(y_val_class, y_pred_class)) plt.suptitle('Optimizer : Adam', fontsize=10) plt.ylabel('Loss', fontsize=16) plt.xlabel('Epoch', fontsize=14) plt.plot(hist_adam.history['loss'], color='b', label='Training Loss') plt.plot(hist_adam.history['val_loss'], color='r', label='Validation Loss') plt.legend(loc='upper right') plt.savefig( '/home/ubuntu/asset_classification/results/bigru_model_dataset1_preprocessed_chai.png' ) # !!!!!!! CHANGE THIS FOR OTHER MODELS tf.keras.utils.plot_model( model, to_file=config.bigru_architecture, show_shapes=True) # !!!!!!! CHANGE THIS FOR OTHER MODELS return (y_pred, y_val_class, y_pred_class, y_val_class_argmax, y_pred_class_argmax)
# pu.db Y_test_dense = np.reshape(Y_test, (Y_test.shape[0], Y_test.shape[1])) Y_test_dense = np.argmax(Y_test_dense, axis=-1) for i in xrange(100): print i model.fit(all_train, Y_train, batch_size=1000, epochs=5, verbose=1) Y_pred_train = model.predict(all_train, batch_size=1000) Y_pred_test = model.predict(all_test, batch_size=1000) Y_pred_train_dense = np.reshape( Y_pred_train, (Y_pred_train.shape[0], Y_pred_train.shape[1])) Y_pred_train_dense = np.argmax(Y_pred_train_dense, axis=-1) Y_pred_test_dense = np.reshape( Y_pred_test, (Y_pred_test.shape[0], Y_pred_test.shape[1])) Y_pred_test_dense = np.argmax(Y_pred_test_dense, axis=-1) train_acc = np.sum( Y_pred_train_dense == Y_train_dense) * 100.0 / len(Y_pred_train_dense) val_acc = np.sum( Y_pred_test_dense == Y_test_dense) * 100.0 / len(Y_pred_test_dense) print("Train acc: ", train_acc) print("Val acc: ", val_acc) model.save("check.h5") pu.db