def train(gan, inputs, epochs=4000, batch_size=32): for epoch in range(epochs): # ========================================================================================================== # train discriminator # ========================================================================================================== if hasattr(gan, "embedding_size"): seed_noises = standard_normal(size=(batch_size, gan.noise_size, gan.embedding_size)).astype( dtype="float32") else: seed_noises = standard_normal( size=(batch_size, gan.noise_size)).astype(dtype="float32") fake_samples = gan.generator.predict(seed_noises) batch_index = sample(range(inputs.shape[0]), batch_size) if hasattr(gan, "word2embedded"): real_samples = gan.word2embedded(inputs[batch_index]) else: real_samples = inputs[batch_index] cat_samples = np_concate((real_samples, fake_samples), axis=0) target = zeros(shape=(batch_size * 2, 2), dtype="int32") target[:batch_size, 1] = 1 target[batch_size:, 0] = 1 # feed the concatenated samples and corresponding target into discriminator fix_model(gan.discriminator, is_trainable=True) dis_loss = gan.discriminator.train_on_batch(x=cat_samples, y=target) gan.losses["dis_loss"].append(dis_loss[0]) print(('epoch: {}, training discriminator, ' 'loss: {:.2f}, accuracy: {:.2f}').format(epoch + 1, *dis_loss)) # ====================================================================================================== # train generator # ====================================================================================================== if hasattr(gan, "embedding_size"): seed_noises = standard_normal(size=(batch_size, gan.noise_size, gan.embedding_size)).astype( dtype="float32") else: seed_noises = standard_normal( size=(batch_size, gan.noise_size)).astype(dtype="float32") target = zeros([batch_size, 2], dtype="int32") target[:, 1] = 1 # train gan with discriminator fixed fix_model(gan.discriminator, is_trainable=False) gen_loss = gan.gan_model.train_on_batch(x=seed_noises, y=target) gan.losses["gen_loss"].append(gen_loss[0]) print(("epoch: {}, training generator, " "loss: {:.2f}, accuracy: {:.2f}").format(epoch + 1, *gen_loss)) print('-' * 60)
validation_data=(X_test, Y_test)) model.save('/Users/leegho/Documents/pape/nlstm/sentiment_lstm_model.h5') model.evaluate(X_test, y_test, verbose=True, batch_size=32) if __name__ == '__main__': from sklearn.externals import joblib # 加载数据 with open('/Users/leegho/Desktop/pos_embedding_tensor.pkl', 'rb') as fr: embedding_tensor_pos = joblib.load(fr) labels_pos = repeat(0, 10000) with open('/Users/leegho/Desktop/neg_embedding_tensor.pkl', 'rb') as fr: embedding_tensor_neg = joblib.load(fr) labels_neg = repeat(1, 10000) x = np_concate((embedding_tensor_pos, embedding_tensor_neg), axis=0) x = x.astype("float32") y = np_concate((labels_pos, labels_neg), axis=0) y = y.astype("int32") # 划分数据集 X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.7, stratify=y) y_train = to_categorical(y_train) y_test = to_categorical(y_test) # 训练模型 train_lstm(X_train, y_train, X_test, y_test) model_filepath = '/Users/leegho/Documents/pape/nlstm/sentiment_lstm_model.h5'
if __name__ == '__main__': # 加载数据 x_train, x_test = load_file_and_preprocessing() # 获得词向量 get_train_vecs(x_train, x_test) # 获得训练集与测试集 train_vecs, y_train, test_vecs, y_test = get_data() # 训练模型 train_bayes(train_vecs, y_train) clf = joblib.load( '/Users/leegho/Documents/pape/bayes_data/sentiment_bayes_model.m') y_pred = clf.predict(test_vecs) print(classification_report(y_test, y_pred)) model_filepath = '/Users/leegho/Documents/pape/bayes_data/sentiment_bayes_model.m' evaluate_bayes(model_filepath, test_vecs, y_test) # 把训练集连同测试集的预测结果结合,重新训练模型 y_pred = clf.predict(test_vecs) xt = np_concate((train_vecs, test_vecs), axis=0) yt = np_concate((y_train, y_pred), axis=0) train_bayes(xt, yt) clf = joblib.load( '/Users/leegho/Documents/pape/bayes_data/sentiment_bayes_model.m') y_pred = clf.predict(test_vecs) print(classification_report(y_test, y_pred)) model_filepath = '/Users/leegho/Documents/pape/bayes_data/sentiment_bayes_model.m' evaluate_bayes(model_filepath, test_vecs, y_test)