def main(model_name): print('model name', model_name) if model_name == 'bimpm': model = bimpm() if model_name == 'drmmt': model = drmm_tks() if model_name == 'cnn': model = model_conv1D_() if model_name == 'slstm': model = Siamese_LSTM() if model_name == 'esim': model = esim() if model_name == 'dam': model = decomposable_attention() if model_name == 'abcnn': model = ABCNN( left_seq_len=config.word_maxlen, right_seq_len=config.word_maxlen, depth=3, nb_filter=100, filter_widths=[5, 4, 3], collect_sentence_representations=True, abcnn_1=True, abcnn_2=True, # mode="euclidean", mode="cos", # mode='dot' ) do_train_cv(model_name, model, epoch_nums=1, kfolds=5)
def get_model(model_name): lr = 0.001 if model_name == 'bimpm': #3,no model = bimpm() if model_name == 'drmmt': #3, yes, but all 1 model = drmm_tks(num_layer=3, hidden_sizes=[100, 80, 1], topk=20) if model_name == 'msrnn': model = MATCHSRNN() if model_name == 'dssm': model = dssm() #5 if model_name == 'arc2': model = arc2() if model_name == 'test': model = test() if model_name == 'cnn': lr = 0.01 model = model_conv1D_() if model_name == 'rnn': model = rnn_v1() if model_name == 'rnn0': #3,yes model = my_rnn() if model_name == 'slstm': model = Siamese_LSTM() #5,no if model_name == 'scnn': model = Siamese_CNN() #not exit if model_name == 'esim': #5,no lr = 0.01 model = esim() if model_name == 'dam': #3, yes model = decomposable_attention() if model_name == 'abcnn': model = ABCNN( left_seq_len=config.word_maxlen, right_seq_len=config.word_maxlen, depth=2, nb_filter=100, filter_widths=[5, 3], collect_sentence_representations=False, abcnn_1=True, abcnn_2=True, # mode="euclidean", # mode="cos", mode='dot') return model, lr
def main(model_name): print('model name', model_name) x_train, y_train, x_dev, y_dev = load_data() if model_name == 'bimpm': model = bimpm() if model_name == 'cnn': model = model_conv1D_() if model_name == 'slstm': model = Siamese_LSTM() if model_name == 'esim': model = esim() if model_name == 'dam': model = decomposable_attention() if model_name == 'abcnn': model = ABCNN( left_seq_len=config.word_maxlen, right_seq_len=config.word_maxlen, depth=3, nb_filter=100, filter_widths=[5, 4, 3], collect_sentence_representations=True, abcnn_1=True, abcnn_2=True, #mode="euclidean", mode="cos", #mode='dot' ) train(x_train, y_train, x_dev, y_dev, model_name, model)
def train(lr, w, l2_reg, epoch, batch_size, model_type, num_layers, data_type, word2vec, num_classes=2): if data_type == 'BioASQ': train_data = BioASQ(word2vec=word2vec) else: print("Wrong dataset...") ################################ # We open the train text file in train mode train_data.open_file(mode="train") ################################ print("=" * 50) # Get total lines in the train text file (QA pairs) print("training data size:", train_data.data_size) print("training max len:", train_data.max_len) print("=" * 50) ################################ model = ABCNN( s=train_data.max_len, w=w, l2_reg=l2_reg, model_type=model_type, d0=30, num_features=train_data.num_features, num_classes=num_classes, num_layers=num_layers ) ################################ # We use Adagrad optimizer for our train process optimizer = tf.train.AdagradOptimizer(lr, name="optimizer").minimize(model.cost) ################################ # Initialize all variables init = tf.global_variables_initializer() ################################ # The Saver class adds ops to save and restore variables to and from checkpoints. # Checkpoints are binary files in a proprietary format which map variable names to tensor values. # The best way to examine the contents of a checkpoint is to load it using a Saver. saver = tf.train.Saver(max_to_keep=100) ################################ # A class for running TensorFlow operations. # A Session object encapsulates the environment in which Operation objects are executed, # and Tensor objects are evaluated. with tf.Session() as sess: sess.run(init) # initialize variables print("=" * 50) print('Params:') print(np.sum([np.prod(v.shape) for v in tf.trainable_variables()])) print("=" * 50) # We start the loop for training, which is based on the number of epochs we entered as a parameter for e in range(1, epoch + 1): print("[Epoch " + str(e) + "]") train_data.reset_index() i = 0 LR = linear_model.LogisticRegression() SVM = svm.LinearSVC() clf_features = [] # While we have data to train our model while train_data.is_available(): i += 1 # We retrieved the next training batch # batch_x1 : Question sentence # batch_x2 : Candidate answer sentence # batch-y : label (0 or 1) # batch_features : batch features batch_x1, batch_x2, batch_y, batch_features = train_data.next_batch(batch_size=batch_size) merged, _, c, features = sess.run( [model.merged, optimizer, model.cost, model.output_features], feed_dict={model.x1: batch_x1, model.x2: batch_x2, model.y: batch_y, model.features: batch_features} ) clf_features.append(features) if i % 100 == 0: print("[batch " + str(i) + "] cost:", c) # Save info for the specific epoch save_path = saver.save(sess, build_path("./models/", data_type, model_type, num_layers), global_step=e) print("model saved as", save_path) ################################## clf_features = np.concatenate(clf_features) LR.fit(clf_features, train_data.labels) SVM.fit(clf_features, train_data.labels) ################################## LR_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-LR.pkl") SVM_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-SVM.pkl") joblib.dump(LR, LR_path) joblib.dump(SVM, SVM_path) ################################## print("LR saved as", LR_path) print("SVM saved as", SVM_path) # When the loop ends, our model was trained with success!!! print("training finished!") print("=" * 50)
def test(w, l2_reg, epoch, max_len, model_type, num_layers, data_type, classifier, word2vec, num_classes=2): if data_type == "WikiQA": test_data = WikiQA(word2vec=word2vec, max_len=max_len) else: test_data = MSRP(word2vec=word2vec, max_len=max_len) test_data.open_file(mode="test") model = ABCNN(s=max_len, w=w, l2_reg=l2_reg, model_type=model_type, num_features=test_data.num_features, num_classes=num_classes, num_layers=num_layers) model_path = build_path("./models/", data_type, model_type, num_layers) MAPs, MRRs = [], [] print("=" * 50) print("test data size:", test_data.data_size) # Due to GTX 970 memory issues #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) for e in range(1, epoch + 1): test_data.reset_index() #with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: with tf.Session() as sess: saver = tf.train.Saver() saver.restore(sess, model_path + "-" + str(e)) print(model_path + "-" + str(e), "restored.") if classifier == "LR" or classifier == "SVM": clf_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-" + classifier + ".pkl") clf = joblib.load(clf_path) print(clf_path, "restored.") QA_pairs = {} s1s, s2s, labels, features = test_data.next_batch( batch_size=test_data.data_size) for i in range(test_data.data_size): pred, clf_input = sess.run( [model.prediction, model.output_features], feed_dict={ model.x1: np.expand_dims(s1s[i], axis=0), model.x2: np.expand_dims(s2s[i], axis=0), model.y: np.expand_dims(labels[i], axis=0), model.features: np.expand_dims(features[i], axis=0) }) if classifier == "LR": clf_pred = clf.predict_proba(clf_input)[:, 1] pred = clf_pred elif classifier == "SVM": clf_pred = clf.decision_function(clf_input) pred = clf_pred s1 = " ".join(test_data.s1s[i]) s2 = " ".join(test_data.s2s[i]) if s1 in QA_pairs: QA_pairs[s1].append((s2, labels[i], np.asscalar(pred))) else: QA_pairs[s1] = [(s2, labels[i], np.asscalar(pred))] # Calculate MAP and MRR for comparing performance MAP, MRR = 0, 0 for s1 in QA_pairs.keys(): p, AP = 0, 0 MRR_check = False QA_pairs[s1] = sorted(QA_pairs[s1], key=lambda x: x[-1], reverse=True) for idx, (s2, label, prob) in enumerate(QA_pairs[s1]): if label == 1: if not MRR_check: MRR += 1 / (idx + 1) MRR_check = True p += 1 AP += p / (idx + 1) AP /= p MAP += AP num_questions = len(QA_pairs.keys()) MAP /= num_questions MRR /= num_questions MAPs.append(MAP) MRRs.append(MRR) print("[Epoch " + str(e) + "] MAP:", MAP, "/ MRR:", MRR) print("=" * 50) print("max MAP:", max(MAPs), "max MRR:", max(MRRs)) print("=" * 50) exp_path = build_path("./experiments/", data_type, model_type, num_layers, "-" + classifier + ".txt") with open(exp_path, "w", encoding="utf-8") as f: print("Epoch\tMAP\tMRR", file=f) for i in range(e): print(str(i + 1) + "\t" + str(MAPs[i]) + "\t" + str(MRRs[i]), file=f)
def train(lr, w, l2_reg, epoch, batch_size, model_type, num_layers, data_type, word2vec, num_classes=2): if data_type == "WikiQA": train_data = WikiQA(word2vec=word2vec) else: train_data = MSRP(word2vec=word2vec) train_data.open_file(mode="train") print("=" * 50) print("training data size:", train_data.data_size) print("training max len:", train_data.max_len) print("=" * 50) model = ABCNN(s=train_data.max_len, w=w, l2_reg=l2_reg, model_type=model_type, num_features=train_data.num_features, num_classes=num_classes, num_layers=num_layers) optimizer = tf.train.AdagradOptimizer(lr, name="optimizer").minimize( model.cost) # Due to GTX 970 memory issues #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) # Initialize all variables init = tf.global_variables_initializer() # model(parameters) saver saver = tf.train.Saver(max_to_keep=100) #with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: with tf.Session() as sess: #train_summary_writer = tf.summary.FileWriter("C:/tf_logs/train", sess.graph) sess.run(init) print("=" * 50) for e in range(1, epoch + 1): print("[Epoch " + str(e) + "]") train_data.reset_index() i = 0 LR = linear_model.LogisticRegression() SVM = svm.LinearSVC() clf_features = [] while train_data.is_available(): i += 1 batch_x1, batch_x2, batch_y, batch_features = train_data.next_batch( batch_size=batch_size) merged, _, c, features = sess.run( [ model.merged, optimizer, model.cost, model.output_features ], feed_dict={ model.x1: batch_x1, model.x2: batch_x2, model.y: batch_y, model.features: batch_features }) clf_features.append(features) if i % 100 == 0: print("[batch " + str(i) + "] cost:", c) #train_summary_writer.add_summary(merged, i) save_path = saver.save(sess, build_path("./models/", data_type, model_type, num_layers), global_step=e) print("model saved as", save_path) clf_features = np.concatenate(clf_features) LR.fit(clf_features, train_data.labels) SVM.fit(clf_features, train_data.labels) LR_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-LR.pkl") SVM_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(e) + "-SVM.pkl") joblib.dump(LR, LR_path) joblib.dump(SVM, SVM_path) print("LR saved as", LR_path) print("SVM saved as", SVM_path) print("training finished!") print("=" * 50)
def test(w, l2_reg, epoch, max_len, model_type, num_layers, data_type, classifier, word2vec, num_classes=2): if data_type == 'BioASQ': test_data = BioASQ(word2vec=word2vec, max_len=max_len) else: print("Wrong dataset...") test_data.open_file(mode="test") model = ABCNN(s=max_len, w=w, l2_reg=l2_reg, model_type=model_type, num_features=test_data.num_features, num_classes=num_classes, num_layers=num_layers) model_path = build_path("./models/", data_type, model_type, num_layers) MAPs, MRRs = [], [] print("=" * 50) print("test data size:", test_data.data_size) test_data.reset_index() with tf.Session() as sess: max_epoch = 2 # Enter the best epoch during the evaluation of the validation set saver = tf.train.Saver() saver.restore(sess, model_path + "-" + str(max_epoch)) print(model_path + "-" + str(max_epoch), "restored.") if classifier == "LR" or classifier == "SVM": clf_path = build_path("./models/", data_type, model_type, num_layers, "-" + str(max_epoch) + "-" + classifier + ".pkl") clf = joblib.load(clf_path) print(clf_path, "restored.") QA_pairs = {} Bio_pairs = {} s1s, s2s, labels, features = test_data.next_batch(batch_size=test_data.data_size) qids, old_qs, old_as, starts, ends, dids = test_data.getMoreInfo() for i in range(test_data.data_size): pred, clf_input = sess.run([model.prediction, model.output_features], feed_dict={model.x1: np.expand_dims(s1s[i], axis=0), model.x2: np.expand_dims(s2s[i], axis=0), model.y: np.expand_dims(labels[i], axis=0), model.features: np.expand_dims(features[i], axis=0)}) if classifier == "LR": clf_pred = clf.predict_proba(clf_input)[:, 1] pred = clf_pred elif classifier == "SVM": clf_pred = clf.decision_function(clf_input) pred = clf_pred s1 = " ".join(test_data.s1s[i]) s2 = " ".join(test_data.s2s[i]) if s1 in QA_pairs: QA_pairs[s1].append((s2, labels[i], np.asscalar(pred))) Bio_pairs[(s1, qids[i], old_qs[i])].append((s2, np.asscalar(pred), old_qs[i], old_as[i], starts[i], ends[i], dids[i])) else: QA_pairs[s1] = [(s2, labels[i], np.asscalar(pred))] Bio_pairs[(s1, qids[i], old_qs[i])] = [(s2, np.asscalar(pred), old_qs[i], old_as[i], starts[i], ends[i], dids[i])] createJsonFile(Bio_pairs) # Calculate MAP and MRR MAP, MRR = 0, 0 for s1 in QA_pairs.keys(): p, AP = 0, 0 MRR_check = False QA_pairs[s1] = sorted(QA_pairs[s1], key=lambda x: x[-1], reverse=True) for idx, (s2, label, prob) in enumerate(QA_pairs[s1]): if label == 1: if not MRR_check: MRR += 1 / (idx + 1) MRR_check = True p += 1 AP += p / (idx + 1) if p != 0: AP /= p else: AP = 0 MAP += AP num_questions = len(QA_pairs.keys()) MAP /= num_questions MRR /= num_questions MAPs.append(MAP) MRRs.append(MRR)
def test(mode, w, l2_reg, max_len, model_type, num_layers, data_type, classifier, word2vec, num_classes=2): if data_type == 'BioASQ': test_data = BioASQ(word2vec=word2vec, max_len=max_len) else: print("Wrong dataset...") ################################ test_data.open_file(mode=mode) for epoch in range(epoch_from, epoch_to): tf.reset_default_graph() ################################ model = ABCNN(s=max_len, w=w, l2_reg=l2_reg, model_type=model_type, d0=30, num_features=test_data.num_features, num_classes=num_classes, num_layers=num_layers) ################################ model_path = build_path("./models/", data_type, model_type, num_layers) ################################ MAPs, MRRs = [], [] ################################ print("=" * 50) print("test data size:", test_data.data_size) ################################ test_data.reset_index() ################################ with tf.Session() as sess: # max_epoch = 2 # Enter the best epoch during the evaluation of the validation set max_epoch = epoch saver = tf.train.Saver() saver.restore(sess, model_path + "-" + str(max_epoch)) print(model_path + "-" + str(max_epoch), "restored.") ################################ if classifier == "LR" or classifier == "SVM": clf_path = build_path( "./models/", data_type, model_type, num_layers, "-" + str(max_epoch) + "-" + classifier + ".pkl") clf = joblib.load(clf_path) print(clf_path, "restored.") ################################ total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print('total_parameters: {}'.format(total_parameters)) ################################ QA_pairs = {} Bio_pairs = {} ################################ s1s, s2s, labels, features = test_data.next_batch( batch_size=test_data.data_size) qids, old_qs, old_as, starts, ends, dids = test_data.getMoreInfo() ################################ for i in range(test_data.data_size): pred, clf_input = sess.run( [model.prediction, model.output_features], feed_dict={ model.x1: np.expand_dims(s1s[i], axis=0), model.x2: np.expand_dims(s2s[i], axis=0), model.y: np.expand_dims(labels[i], axis=0), model.features: np.expand_dims(features[i], axis=0) }) ################################ if classifier == "LR": clf_pred = clf.predict_proba(clf_input)[:, 1] pred = clf_pred elif classifier == "SVM": clf_pred = clf.decision_function(clf_input) pred = clf_pred ################################ s1 = " ".join(test_data.s1s[i]) s2 = " ".join(test_data.s2s[i]) ################################ if s1 in QA_pairs: QA_pairs[s1].append((s2, labels[i], np.asscalar(pred))) Bio_pairs[(s1, qids[i], old_qs[i])].append( (s2, np.asscalar(pred), old_qs[i], old_as[i], starts[i], ends[i], dids[i])) else: QA_pairs[s1] = [(s2, labels[i], np.asscalar(pred))] Bio_pairs[(s1, qids[i], old_qs[i])] = [ (s2, np.asscalar(pred), old_qs[i], old_as[i], starts[i], ends[i], dids[i]) ] ################################ createJsonFile(Bio_pairs, epoch) # Calculate MAP and MRR MAP, MRR = 0, 0 for s1 in QA_pairs.keys(): p, AP = 0, 0 MRR_check = False ################################ QA_pairs[s1] = sorted(QA_pairs[s1], key=lambda x: x[-1], reverse=True) for idx, (s2, label, prob) in enumerate(QA_pairs[s1]): if label == 1: if not MRR_check: MRR += 1 / (idx + 1) MRR_check = True p += 1 AP += p / (idx + 1) if p != 0: AP /= p else: AP = 0 MAP += AP ################################ num_questions = len(QA_pairs.keys()) MAP /= num_questions MRR /= num_questions ################################ MAPs.append(MAP) MRRs.append(MRR)