Beispiel #1
0
def test(w,
         l2_reg,
         epoch,
         max_len,
         model_type,
         num_layers,
         data_type,
         classifier,
         word2vec,
         num_classes=2):
    if data_type == "WikiQA":
        test_data = WikiQA(word2vec=word2vec, max_len=max_len)
    else:
        test_data = MSRP(word2vec=word2vec, max_len=max_len)

    test_data.open_file(mode="test")

    model = ABCNN(s=max_len,
                  w=w,
                  l2_reg=l2_reg,
                  model_type=model_type,
                  num_features=test_data.num_features,
                  num_classes=num_classes,
                  num_layers=num_layers)

    model_path = build_path("./models/", data_type, model_type, num_layers)
    MAPs, MRRs = [], []

    print("=" * 50)
    print("test data size:", test_data.data_size)

    # Due to GTX 970 memory issues
    #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)

    for e in range(1, epoch + 1):
        test_data.reset_index()

        #with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        with tf.Session() as sess:
            saver = tf.train.Saver()
            saver.restore(sess, model_path + "-" + str(e))
            print(model_path + "-" + str(e), "restored.")

            if classifier == "LR" or classifier == "SVM":
                clf_path = build_path("./models/", data_type, model_type,
                                      num_layers,
                                      "-" + str(e) + "-" + classifier + ".pkl")
                clf = joblib.load(clf_path)
                print(clf_path, "restored.")

            QA_pairs = {}
            s1s, s2s, labels, features = test_data.next_batch(
                batch_size=test_data.data_size)

            for i in range(test_data.data_size):
                pred, clf_input = sess.run(
                    [model.prediction, model.output_features],
                    feed_dict={
                        model.x1: np.expand_dims(s1s[i], axis=0),
                        model.x2: np.expand_dims(s2s[i], axis=0),
                        model.y: np.expand_dims(labels[i], axis=0),
                        model.features: np.expand_dims(features[i], axis=0)
                    })

                if classifier == "LR":
                    clf_pred = clf.predict_proba(clf_input)[:, 1]
                    pred = clf_pred
                elif classifier == "SVM":
                    clf_pred = clf.decision_function(clf_input)
                    pred = clf_pred

                s1 = " ".join(test_data.s1s[i])
                s2 = " ".join(test_data.s2s[i])

                if s1 in QA_pairs:
                    QA_pairs[s1].append((s2, labels[i], np.asscalar(pred)))
                else:
                    QA_pairs[s1] = [(s2, labels[i], np.asscalar(pred))]

            # Calculate MAP and MRR for comparing performance
            MAP, MRR = 0, 0
            for s1 in QA_pairs.keys():
                p, AP = 0, 0
                MRR_check = False

                QA_pairs[s1] = sorted(QA_pairs[s1],
                                      key=lambda x: x[-1],
                                      reverse=True)

                for idx, (s2, label, prob) in enumerate(QA_pairs[s1]):
                    if label == 1:
                        if not MRR_check:
                            MRR += 1 / (idx + 1)
                            MRR_check = True

                        p += 1
                        AP += p / (idx + 1)

                AP /= p
                MAP += AP

            num_questions = len(QA_pairs.keys())
            MAP /= num_questions
            MRR /= num_questions

            MAPs.append(MAP)
            MRRs.append(MRR)

            print("[Epoch " + str(e) + "] MAP:", MAP, "/ MRR:", MRR)

    print("=" * 50)
    print("max MAP:", max(MAPs), "max MRR:", max(MRRs))
    print("=" * 50)

    exp_path = build_path("./experiments/", data_type, model_type, num_layers,
                          "-" + classifier + ".txt")
    with open(exp_path, "w", encoding="utf-8") as f:
        print("Epoch\tMAP\tMRR", file=f)
        for i in range(e):
            print(str(i + 1) + "\t" + str(MAPs[i]) + "\t" + str(MRRs[i]),
                  file=f)
Beispiel #2
0
#----------------------------- define a logger -------------------------------
logger = logging.getLogger("test")
logger.setLevel(logging.INFO)

fh = logging.FileHandler("./test.log", mode="w")
fh.setLevel(logging.INFO)

logger.addHandler(fh)
#----------------------------- define a logger end ----------------------------------

#------------------------------------load data -------------------------------
#load data
word2Vec = Word2Vec()
data_type = "WikiQA"
if data_type == "WikiQA":
    test_data = WikiQA(word2vec=word2Vec)
else:
    test_data = MSRP(word2vec=word2Vec)
test_data.open_file(mode="test")
#----------------------------------- load data end ----------------------

#----------------------------------- begin to train -----------------------------------
with tf.Graph().as_default():
    with tf.device("/gpu:1"):
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_options)
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement,
            gpu_options=gpu_options)
        with tf.Session(config=session_conf).as_default() as sess:
Beispiel #3
0
def train(lr,
          w,
          l2_reg,
          epoch,
          batch_size,
          model_type,
          num_layers,
          data_type,
          word2vec,
          num_classes=2):
    if data_type == "WikiQA":
        train_data = WikiQA(word2vec=word2vec)
    else:
        train_data = MSRP(word2vec=word2vec)

    train_data.open_file(mode="train")

    print("=" * 50)
    print("training data size:", train_data.data_size)
    print("training max len:", train_data.max_len)
    print("=" * 50)

    model = ABCNN(s=train_data.max_len,
                  w=w,
                  l2_reg=l2_reg,
                  model_type=model_type,
                  num_features=train_data.num_features,
                  num_classes=num_classes,
                  num_layers=num_layers)

    optimizer = tf.train.AdagradOptimizer(lr, name="optimizer").minimize(
        model.cost)

    # Due to GTX 970 memory issues
    #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)

    # Initialize all variables
    init = tf.global_variables_initializer()

    # model(parameters) saver
    saver = tf.train.Saver(max_to_keep=100)

    #with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    with tf.Session() as sess:
        #train_summary_writer = tf.summary.FileWriter("C:/tf_logs/train", sess.graph)

        sess.run(init)

        print("=" * 50)
        for e in range(1, epoch + 1):
            print("[Epoch " + str(e) + "]")

            train_data.reset_index()
            i = 0

            LR = linear_model.LogisticRegression()
            SVM = svm.LinearSVC()
            clf_features = []

            while train_data.is_available():
                i += 1

                batch_x1, batch_x2, batch_y, batch_features = train_data.next_batch(
                    batch_size=batch_size)

                merged, _, c, features = sess.run(
                    [
                        model.merged, optimizer, model.cost,
                        model.output_features
                    ],
                    feed_dict={
                        model.x1: batch_x1,
                        model.x2: batch_x2,
                        model.y: batch_y,
                        model.features: batch_features
                    })

                clf_features.append(features)

                if i % 100 == 0:
                    print("[batch " + str(i) + "] cost:", c)
                #train_summary_writer.add_summary(merged, i)

            save_path = saver.save(sess,
                                   build_path("./models/", data_type,
                                              model_type, num_layers),
                                   global_step=e)
            print("model saved as", save_path)

            clf_features = np.concatenate(clf_features)
            LR.fit(clf_features, train_data.labels)
            SVM.fit(clf_features, train_data.labels)

            LR_path = build_path("./models/", data_type, model_type,
                                 num_layers, "-" + str(e) + "-LR.pkl")
            SVM_path = build_path("./models/", data_type, model_type,
                                  num_layers, "-" + str(e) + "-SVM.pkl")
            joblib.dump(LR, LR_path)
            joblib.dump(SVM, SVM_path)

            print("LR saved as", LR_path)
            print("SVM saved as", SVM_path)

        print("training finished!")
        print("=" * 50)
Beispiel #4
0
def train(lr, epoch, batch_size, data_type, word2vec, model_type):
    if data_type == "WikiQA":
        train_data = WikiQA(word2vec=word2vec)
    elif data_type == 'Med':
        train_data = Med(word2vec=word2vec)
    else:
        train_data = MSRP(word2vec=word2vec)

    test_data = Med(word2vec=word2vec)
    train_data.open_file(mode="train")
    test_data.open_file(mode='test')

    print("=" * 50)
    print("training data size:", train_data.data_size)
    print("training max len:", train_data.max_len)
    print("=" * 50)

    model = MixModel(train_data.max_len, lr, model_type,
                     train_data.num_features)

    # Due to GTX 970 memory issues
    #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)

    # Initialize all variables
    init = tf.global_variables_initializer()

    # model(parameters) saver
    # saver = tf.train.Saver(max_to_keep=100)

    #with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    with tf.Session() as sess:
        # train_summary_writer = tf.summary.FileWriter("C:/tf_logs/train", sess.graph)

        sess.run(init)

        print("=" * 50)
        for e in range(1, epoch + 1):
            print("[Epoch " + str(e) + "]")
            train_data.shuffle_data()
            train_data.reset_index()
            i = 0

            # LR = linear_model.LogisticRegression()
            rfr_model = ensemble.RandomForestRegressor(n_estimators=20)
            ada = ensemble.AdaBoostRegressor(n_estimators=50)
            gbrt = ensemble.GradientBoostingRegressor(n_estimators=50)
            xgb_model = xgb.XGBRegressor(colsample_bytree=0.4603,
                                         gamma=0.0468,
                                         learning_rate=0.05,
                                         max_depth=3,
                                         min_child_weight=1.7817,
                                         n_estimators=50,
                                         reg_alpha=0.4640,
                                         reg_lambda=0.8571,
                                         subsample=0.5213,
                                         silent=1,
                                         random_state=7,
                                         nthread=-1)
            clf_features = []
            all_features = []
            test_all_features = []
            while train_data.is_available():
                i += 1

                batch_x1, batch_x2, batch_y, batch_features = train_data.next_batch(
                    batch_size=batch_size)

                # print(batch_x1.shape)
                # print(batch_x2.shape)
                # print(batch_y)
                # print(batch_features)
                _, c, predictions, output_features = sess.run(
                    [
                        model.optimizer, model.mse_loss, model.predictions,
                        model.output_features
                    ],
                    feed_dict={
                        model.embedded_x1: batch_x1,
                        model.embedded_x2: batch_x2,
                        model.labels: batch_y,
                        model.features: batch_features,
                    })

                # print('predictions:', predictions)
                pearson = np.corrcoef(np.squeeze(predictions),
                                      np.squeeze(batch_y))[0][1]

                all_features.append(output_features)
                # clf_features.apppend()

                if i % 1 == 0:
                    print("[batch " + str(i) + "] cost:", c,
                          'training pearson:', pearson)
            # train_x1, train_x2, train_y, train_feature, train_ml_features = train_data.get_data()
            # c, pre = sess.run([model.mse_loss, model.predictions], feed_dict={
            #     model.embedded_x1:train_x1,
            #     model.embedded_x2:train_x2,
            #     model.labels:train_y,
            #     model.features: train_feature,
            #     model.ml_features:train_ml_features
            #     })
            # pearson = np.corrcoef(np.squeeze(pre), np.squeeze(train_y))[0][1]
            # print("[epoch " + str(e) + "] cost:", c, 'training pearson:', pearson)

            test_x1, test_x2, test_y, test_feature = test_data.get_data()
            test, test_features = sess.run(
                [model.predictions, model.output_features],
                feed_dict={
                    model.embedded_x1: test_x1,
                    model.embedded_x2: test_x2,
                    model.labels: test_y,
                    model.features: test_feature,
                })
            test_pearson = np.corrcoef(np.squeeze(test),
                                       np.squeeze(test_y))[0][1]
            print('epoch: ', e, 'pearson:', 'test_pearson: ', test_pearson)

            # result_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-test-result.pkl")
            # joblib.dump(test, result_path)
            # train_summary_writer.add_summary(merged, i)

            # test_all_features.append(test_features)

            # save_path = saver.save(sess, build_path("./models/", data_type, model_type, model.num_layers), global_step=e)
            # print("model saved as", save_path)
            all_features = np.concatenate(all_features)

            # test_all_features = np.concatenate(test_all_features)
            #train_data.labels
            #print('train.labels: ', train_data.labels.dtype)
            # ada.fit(clf_features, train_data.labels)
            gbrt.fit(all_features, train_data.labels)
            xgb_model.fit(all_features, train_data.labels)
            rfr_model.fit(all_features, train_data.labels)

            # all_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-all_features.pkl")
            # joblib.dump(all_features, all_path)
            # clf_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-clf.pkl")
            # joblib.dump(clf_features, clf_path)
            # test_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-test_features.pkl")
            # joblib.dump(test_all_features, test_path)

            print('============gbrt============')
            ml_pre_gbrt = gbrt.predict(test_features)
            test_pearson = np.corrcoef(np.squeeze(ml_pre_gbrt),
                                       np.squeeze(test_y))[0][1]
            print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson)

            # print('============ada============')
            # ml_pre_ada = ada.predict(test_features)
            # test_pearson = np.corrcoef(np.squeeze(ml_pre_ada), np.squeeze(test_y))[0][1]
            # print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson)

            print('============xgb============')
            ml_pre_xgb = xgb_model.predict(test_features)
            test_pearson = np.corrcoef(np.squeeze(ml_pre_xgb),
                                       np.squeeze(test_y))[0][1]
            print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson)

            print('============rfr============')
            ml_pre_rfr = rfr_model.predict(test_features)
            test_pearson = np.corrcoef(np.squeeze(ml_pre_rfr),
                                       np.squeeze(test_y))[0][1]
            print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson)

            ##对三个ml model取均值
            print('===============模型average结果==================')
            average_pred = (ml_pre_gbrt + ml_pre_xgb + ml_pre_rfr) / 3.0
            test_pearson = np.corrcoef(np.squeeze(average_pred),
                                       np.squeeze(test_y))[0][1]
            print('epoch: ', e, 'pearson:', 'ml_test_pearson: ', test_pearson)

        print(test)
        print("training finished!")
        print("=" * 50)