def test(w, l2_reg, epoch, max_len, model_type, num_layers, data_type, classifier, word2vec, num_classes=2): if data_type == "WikiQA": test_data = WikiQA(word2vec=word2vec, max_len=max_len) else: test_data = MSRP(word2vec=word2vec, max_len=max_len) test_data.open_file(mode="test") model = ABCNN(s=max_len, w=w, l2_reg=l2_reg, model_type=model_type, num_features=test_data.num_features, num_classes=num_classes, num_layers=num_layers) model_path = build_path("./models/", data_type, model_type, num_layers) MAPs, MRRs = [], [] print("=" * 50) print("test data size:", test_data.data_size) # Due to GTX 970 memory issues #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) for e in range(1, epoch + 1): test_data.reset_index() #with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: with tf.Session() as sess: saver = tf.train.Saver() saver.restore(sess, model_path + "-" + str(e)) print(model_path + "-" + str(e), "restored.") if classifier == "LR" or classifier == "SVM": clf_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-" + classifier + ".pkl") clf = joblib.load(clf_path) print(clf_path, "restored.") QA_pairs = {} s1s, s2s, labels, features = test_data.next_batch( batch_size=test_data.data_size) for i in range(test_data.data_size): pred, clf_input = sess.run( [model.prediction, model.output_features], feed_dict={ model.x1: np.expand_dims(s1s[i], axis=0), model.x2: np.expand_dims(s2s[i], axis=0), model.y: np.expand_dims(labels[i], axis=0), model.features: np.expand_dims(features[i], axis=0) }) if classifier == "LR": clf_pred = clf.predict_proba(clf_input)[:, 1] pred = clf_pred elif classifier == "SVM": clf_pred = clf.decision_function(clf_input) pred = clf_pred s1 = " ".join(test_data.s1s[i]) s2 = " ".join(test_data.s2s[i]) if s1 in QA_pairs: QA_pairs[s1].append((s2, labels[i], np.asscalar(pred))) else: QA_pairs[s1] = [(s2, labels[i], np.asscalar(pred))] # Calculate MAP and MRR for comparing performance MAP, MRR = 0, 0 for s1 in QA_pairs.keys(): p, AP = 0, 0 MRR_check = False QA_pairs[s1] = sorted(QA_pairs[s1], key=lambda x: x[-1], reverse=True) for idx, (s2, label, prob) in enumerate(QA_pairs[s1]): if label == 1: if not MRR_check: MRR += 1 / (idx + 1) MRR_check = True p += 1 AP += p / (idx + 1) AP /= p MAP += AP num_questions = len(QA_pairs.keys()) MAP /= num_questions MRR /= num_questions MAPs.append(MAP) MRRs.append(MRR) print("[Epoch " + str(e) + "] MAP:", MAP, "/ MRR:", MRR) print("=" * 50) print("max MAP:", max(MAPs), "max MRR:", max(MRRs)) print("=" * 50) exp_path = build_path("./experiments/", data_type, model_type, num_layers, "-" + classifier + ".txt") with open(exp_path, "w", encoding="utf-8") as f: print("Epoch\tMAP\tMRR", file=f) for i in range(e): print(str(i + 1) + "\t" + str(MAPs[i]) + "\t" + str(MRRs[i]), file=f)
#----------------------------- define a logger ------------------------------- logger = logging.getLogger("test") logger.setLevel(logging.INFO) fh = logging.FileHandler("./test.log", mode="w") fh.setLevel(logging.INFO) logger.addHandler(fh) #----------------------------- define a logger end ---------------------------------- #------------------------------------load data ------------------------------- #load data word2Vec = Word2Vec() data_type = "WikiQA" if data_type == "WikiQA": test_data = WikiQA(word2vec=word2Vec) else: test_data = MSRP(word2vec=word2Vec) test_data.open_file(mode="test") #----------------------------------- load data end ---------------------- #----------------------------------- begin to train ----------------------------------- with tf.Graph().as_default(): with tf.device("/gpu:1"): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_options) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options) with tf.Session(config=session_conf).as_default() as sess:
def train(lr, w, l2_reg, epoch, batch_size, model_type, num_layers, data_type, word2vec, num_classes=2): if data_type == "WikiQA": train_data = WikiQA(word2vec=word2vec) else: train_data = MSRP(word2vec=word2vec) train_data.open_file(mode="train") print("=" * 50) print("training data size:", train_data.data_size) print("training max len:", train_data.max_len) print("=" * 50) model = ABCNN(s=train_data.max_len, w=w, l2_reg=l2_reg, model_type=model_type, num_features=train_data.num_features, num_classes=num_classes, num_layers=num_layers) optimizer = tf.train.AdagradOptimizer(lr, name="optimizer").minimize( model.cost) # Due to GTX 970 memory issues #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) # Initialize all variables init = tf.global_variables_initializer() # model(parameters) saver saver = tf.train.Saver(max_to_keep=100) #with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: with tf.Session() as sess: #train_summary_writer = tf.summary.FileWriter("C:/tf_logs/train", sess.graph) sess.run(init) print("=" * 50) for e in range(1, epoch + 1): print("[Epoch " + str(e) + "]") train_data.reset_index() i = 0 LR = linear_model.LogisticRegression() SVM = svm.LinearSVC() clf_features = [] while train_data.is_available(): i += 1 batch_x1, batch_x2, batch_y, batch_features = train_data.next_batch( batch_size=batch_size) merged, _, c, features = sess.run( [ model.merged, optimizer, model.cost, model.output_features ], feed_dict={ model.x1: batch_x1, model.x2: batch_x2, model.y: batch_y, model.features: batch_features }) clf_features.append(features) if i % 100 == 0: print("[batch " + str(i) + "] cost:", c) #train_summary_writer.add_summary(merged, i) save_path = saver.save(sess, build_path("./models/", data_type, model_type, num_layers), global_step=e) print("model saved as", save_path) clf_features = np.concatenate(clf_features) LR.fit(clf_features, train_data.labels) SVM.fit(clf_features, train_data.labels) LR_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-LR.pkl") SVM_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-SVM.pkl") joblib.dump(LR, LR_path) joblib.dump(SVM, SVM_path) print("LR saved as", LR_path) print("SVM saved as", SVM_path) print("training finished!") print("=" * 50)
def train(lr, epoch, batch_size, data_type, word2vec, model_type): if data_type == "WikiQA": train_data = WikiQA(word2vec=word2vec) elif data_type == 'Med': train_data = Med(word2vec=word2vec) else: train_data = MSRP(word2vec=word2vec) test_data = Med(word2vec=word2vec) train_data.open_file(mode="train") test_data.open_file(mode='test') print("=" * 50) print("training data size:", train_data.data_size) print("training max len:", train_data.max_len) print("=" * 50) model = MixModel(train_data.max_len, lr, model_type, train_data.num_features) # Due to GTX 970 memory issues #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) # Initialize all variables init = tf.global_variables_initializer() # model(parameters) saver # saver = tf.train.Saver(max_to_keep=100) #with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: with tf.Session() as sess: # train_summary_writer = tf.summary.FileWriter("C:/tf_logs/train", sess.graph) sess.run(init) print("=" * 50) for e in range(1, epoch + 1): print("[Epoch " + str(e) + "]") train_data.shuffle_data() train_data.reset_index() i = 0 # LR = linear_model.LogisticRegression() rfr_model = ensemble.RandomForestRegressor(n_estimators=20) ada = ensemble.AdaBoostRegressor(n_estimators=50) gbrt = ensemble.GradientBoostingRegressor(n_estimators=50) xgb_model = xgb.XGBRegressor(colsample_bytree=0.4603, gamma=0.0468, learning_rate=0.05, max_depth=3, min_child_weight=1.7817, n_estimators=50, reg_alpha=0.4640, reg_lambda=0.8571, subsample=0.5213, silent=1, random_state=7, nthread=-1) clf_features = [] all_features = [] test_all_features = [] while train_data.is_available(): i += 1 batch_x1, batch_x2, batch_y, batch_features = train_data.next_batch( batch_size=batch_size) # print(batch_x1.shape) # print(batch_x2.shape) # print(batch_y) # print(batch_features) _, c, predictions, output_features = sess.run( [ model.optimizer, model.mse_loss, model.predictions, model.output_features ], feed_dict={ model.embedded_x1: batch_x1, model.embedded_x2: batch_x2, model.labels: batch_y, model.features: batch_features, }) # print('predictions:', predictions) pearson = np.corrcoef(np.squeeze(predictions), np.squeeze(batch_y))[0][1] all_features.append(output_features) # clf_features.apppend() if i % 1 == 0: print("[batch " + str(i) + "] cost:", c, 'training pearson:', pearson) # train_x1, train_x2, train_y, train_feature, train_ml_features = train_data.get_data() # c, pre = sess.run([model.mse_loss, model.predictions], feed_dict={ # model.embedded_x1:train_x1, # model.embedded_x2:train_x2, # model.labels:train_y, # model.features: train_feature, # model.ml_features:train_ml_features # }) # pearson = np.corrcoef(np.squeeze(pre), np.squeeze(train_y))[0][1] # print("[epoch " + str(e) + "] cost:", c, 'training pearson:', pearson) test_x1, test_x2, test_y, test_feature = test_data.get_data() test, test_features = sess.run( [model.predictions, model.output_features], feed_dict={ model.embedded_x1: test_x1, model.embedded_x2: test_x2, model.labels: test_y, model.features: test_feature, }) test_pearson = np.corrcoef(np.squeeze(test), np.squeeze(test_y))[0][1] print('epoch: ', e, 'pearson:', 'test_pearson: ', test_pearson) # result_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-test-result.pkl") # joblib.dump(test, result_path) # train_summary_writer.add_summary(merged, i) # test_all_features.append(test_features) # save_path = saver.save(sess, build_path("./models/", data_type, model_type, model.num_layers), global_step=e) # print("model saved as", save_path) all_features = np.concatenate(all_features) # test_all_features = np.concatenate(test_all_features) #train_data.labels #print('train.labels: ', train_data.labels.dtype) # ada.fit(clf_features, train_data.labels) gbrt.fit(all_features, train_data.labels) xgb_model.fit(all_features, train_data.labels) rfr_model.fit(all_features, train_data.labels) # all_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-all_features.pkl") # joblib.dump(all_features, all_path) # clf_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-clf.pkl") # joblib.dump(clf_features, clf_path) # test_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-test_features.pkl") # joblib.dump(test_all_features, test_path) print('============gbrt============') ml_pre_gbrt = gbrt.predict(test_features) test_pearson = np.corrcoef(np.squeeze(ml_pre_gbrt), np.squeeze(test_y))[0][1] print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson) # print('============ada============') # ml_pre_ada = ada.predict(test_features) # test_pearson = np.corrcoef(np.squeeze(ml_pre_ada), np.squeeze(test_y))[0][1] # print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson) print('============xgb============') ml_pre_xgb = xgb_model.predict(test_features) test_pearson = np.corrcoef(np.squeeze(ml_pre_xgb), np.squeeze(test_y))[0][1] print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson) print('============rfr============') ml_pre_rfr = rfr_model.predict(test_features) test_pearson = np.corrcoef(np.squeeze(ml_pre_rfr), np.squeeze(test_y))[0][1] print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson) ##对三个ml model取均值 print('===============模型average结果==================') average_pred = (ml_pre_gbrt + ml_pre_xgb + ml_pre_rfr) / 3.0 test_pearson = np.corrcoef(np.squeeze(average_pred), np.squeeze(test_y))[0][1] print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson) print(test) print("training finished!") print("=" * 50)