def demo_lstm(): demo_data = load_demo_data() demo_train = demo_data.iloc[:int(0.8 * demo_data.shape[0])] demo_valid = demo_data.iloc[int(0.8 * demo_data.shape[0]):] text_fullset = demo_data.iloc[:, 1] x_cut = word_cut(text_fullset) print(x_cut.map(len)) print(max(x_cut.map(len))) index_dict, word_vectors, x_combined = word2vec_train(x_cut) print('Text Words Length:{}'.format(len(index_dict))) n_symbols, embedding_weights = get_model_data(index_dict, word_vectors) print(demo_train.shape, demo_train.shape) text_lstm = ChatTextLSTM(input_dim=n_symbols, embedding_dim=setting.VOCABULARY_VECTOR_DIM, embedding_weights=embedding_weights) x_train = x_combined[:int(demo_train.shape[0])] x_valid = x_combined[int(demo_train.shape[0]):] f1_score_dict = dict() for col in demo_data.columns[2:]: y_train = demo_train[col] + 2 y_valid = demo_valid[col] + 2 text_lstm.train(x_train, y_train, x_valid, y_valid) y_valid_pred = pd.Series([0] * x_valid.shape[0]) for ind in range(x_valid.shape[0]): y_pred = np.argmax( text_lstm.model.predict(x_valid[ind].reshape(1, -1))) y_valid_pred[ind] = y_pred print(y_valid_pred) f1_score_dict[col] = f1_score(y_valid, y_valid_pred, average='macro') print('{} F1 Score:{}'.format(col, f1_score_dict[col])) text_lstm.model.save('lstm_model_demo_{}.h5'.format(col)) f1_score_mn = np.mean(list(f1_score_dict.values())) print(f1_score_mn) return True
def cnn_train(X): x_cut = word_cut(X) index_dict, word_vectors, x_combined = word2vec_train(x_cut) n_symbols, embedding_weights, x_train, y_train, x_test, y_test = get_model_data( index_dict, word_vectors, x_combined, X.iloc[:, 1]) text_cnn = ChatTextCNN(input_dim=n_symbols, embedding_dim=setting.VOCABULARY_VECTOR_DIM, embedding_weights=embedding_weights) text_cnn.train(x_train, y_train, x_test, y_test) return text_cnn.model