def demo_lstm(): demo_data = load_demo_data() demo_train = demo_data.iloc[:int(0.8 * demo_data.shape[0])] demo_valid = demo_data.iloc[int(0.8 * demo_data.shape[0]):] text_fullset = demo_data.iloc[:, 1] x_cut = word_cut(text_fullset) print(x_cut.map(len)) print(max(x_cut.map(len))) index_dict, word_vectors, x_combined = word2vec_train(x_cut) print('Text Words Length:{}'.format(len(index_dict))) n_symbols, embedding_weights = get_model_data(index_dict, word_vectors) print(demo_train.shape, demo_train.shape) text_lstm = ChatTextLSTM(input_dim=n_symbols, embedding_dim=setting.VOCABULARY_VECTOR_DIM, embedding_weights=embedding_weights) x_train = x_combined[:int(demo_train.shape[0])] x_valid = x_combined[int(demo_train.shape[0]):] f1_score_dict = dict() for col in demo_data.columns[2:]: y_train = demo_train[col] + 2 y_valid = demo_valid[col] + 2 text_lstm.train(x_train, y_train, x_valid, y_valid) y_valid_pred = pd.Series([0] * x_valid.shape[0]) for ind in range(x_valid.shape[0]): y_pred = np.argmax( text_lstm.model.predict(x_valid[ind].reshape(1, -1))) y_valid_pred[ind] = y_pred print(y_valid_pred) f1_score_dict[col] = f1_score(y_valid, y_valid_pred, average='macro') print('{} F1 Score:{}'.format(col, f1_score_dict[col])) text_lstm.model.save('lstm_model_demo_{}.h5'.format(col)) f1_score_mn = np.mean(list(f1_score_dict.values())) print(f1_score_mn) return True
def cnn_train(X): x_cut = word_cut(X) index_dict, word_vectors, x_combined = word2vec_train(x_cut) n_symbols, embedding_weights, x_train, y_train, x_test, y_test = get_model_data( index_dict, word_vectors, x_combined, X.iloc[:, 1]) text_cnn = ChatTextCNN(input_dim=n_symbols, embedding_dim=setting.VOCABULARY_VECTOR_DIM, embedding_weights=embedding_weights) text_cnn.train(x_train, y_train, x_test, y_test) return text_cnn.model
def sentiment_train_manager(): print('Train Text Preprocess') x_train_cut, x_valid_cut, train_set, valid_set = train_preprocess() print('Train Text Word Embedding') index_dict, word_vectors, x_combined = word2vec_train(x_train_cut) n_symbols, embedding_weights = get_model_data(index_dict, word_vectors) x_valid = input_transform(x_valid_cut) x_train = x_combined print('Text Words Length:{}'.format(len(index_dict))) print('Model Select:{}'.format(ALGORITHM)) text_train_model = model_select(n_symbols, embedding_weights) f1_score_dict = dict() print('Start Model Training....') col_len = len(train_set.columns[2:]) for i, col in enumerate(train_set.columns[2:]): print('{} column is trainingļ¼ finish {}%!'.format( col, float(i) / float(col_len) * 100)) y_train = train_set[col] + 2 y_valid = valid_set[col] + 2 text_train_model.train(x_train, y_train, x_valid, y_valid) y_valid_pred = pd.Series([0] * x_valid.shape[0]) for ind in range(x_valid.shape[0]): y_pred = np.argmax( text_train_model.model.predict(x_valid[ind].reshape(1, -1))) y_valid_pred[ind] = y_pred f1_score_dict[col] = f1_score(y_valid, y_valid_pred, average='macro') print('{} F1 Score:{}'.format(col, f1_score_dict[col])) model_save_path = os.path.join( MODELS_SAVE_DIR, '{}_model_{}_{}.h5'.format(ALGORITHM.lower(), col, VERSION)) text_train_model.model.save(model_save_path) f1_score_mn = np.mean(list(f1_score_dict.values())) with open(F1_SCORE_PATH, 'w', encoding='utf-8') as fp: for col, f1_score_ in f1_score_dict.items(): fp.writelines('{}:{}\n'.format(col, f1_score_)) print('Train Finished, F1 Score:{}'.format(f1_score_mn)) return True