def main(train_path, test_path, accuracyOnt, trainAccuracyOnt, test_size, remaining_size, learning_rate=FLAGS.learning_rate, keep_prob=FLAGS.keep_prob1, momentum=0.9, l2=FLAGS.l2_reg, number_of_heads=FLAGS.heads): print_config() word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all', number_of_heads) loss = loss_func(y, prob) acc_num, acc_prob = acc_func(y, prob) global_step = tf.Variable(0, name='tr_global_step', trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=FLAGS.learning_rate, momentum=momentum).minimize( loss, global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) sess.run(tf.global_variables_initializer()) if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x_ont, te_sen_len_ont, te_x_bw_ont, te_sen_len_bw_ont, te_y_ont, te_target_word_ont, te_tar_len_ont, _, _, _ = load_inputs_twitter( FLAGS.remaining_test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) tr_x_ont, tr_sen_len_ont, tr_x_bw_ont, tr_sen_len_bw_ont, tr_y_ont, tr_target_word_ont, tr_tar_len_ont, _, _, _ = load_inputs_twitter( FLAGS.remaining_train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None all_training_losses, all_training_accuracies = [], [] all_test_losses, all_test_accuracies = [], [] for i in range(FLAGS.n_iter): learning_rate = (0.99) * learning_rate number_of_training_examples_correct, number_of_training_examples, training_loss = 0., 0, 0. for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): _, step, _trainacc, _training_loss = sess.run( [optimizer, global_step, acc_num, loss], feed_dict=train) number_of_training_examples_correct += _trainacc number_of_training_examples += numtrain training_loss += _training_loss * numtrain number_of_test_examples_correct, test_loss, number_of_test_examples = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 2000, 1.0, 1.0, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run( [ loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob ], feed_dict=test) # fw += list(_fw) # bw += list(_bw) # tl += list(_tl) # tr += list(_tr) else: _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) number_of_test_examples_correct += _acc test_loss += _loss * num number_of_test_examples += num number_of_test_examples_correct_ont, number_of_test_examples_ont = 0., 0 for test_ont, num_ont in get_batch_data( te_x_ont, te_sen_len_ont, te_x_bw_ont, te_sen_len_bw_ont, te_y_ont, te_target_word_ont, te_tar_len_ont, 2000, 1.0, 1.0, False): _acc_ont = sess.run(acc_num, feed_dict=test_ont) number_of_test_examples_correct_ont += _acc_ont number_of_test_examples_ont += num_ont number_of_train_examples_correct_ont, number_of_train_examples_ont = 0., 0 for train_ont, num_train_ont in get_batch_data( tr_x_ont, tr_sen_len_ont, tr_x_bw_ont, tr_sen_len_bw_ont, tr_y_ont, tr_target_word_ont, tr_tar_len_ont, 2000, 1.0, 1.0, False): _acc_ont_train = sess.run(acc_num, feed_dict=train_ont) number_of_train_examples_correct_ont += _acc_ont_train number_of_train_examples_ont += num_train_ont print( 'number of training examples={}, correct training examples={}, number of test examples={}, correct test examples={}, number of examples without onto = {}' .format(number_of_training_examples, number_of_training_examples_correct, number_of_test_examples, number_of_test_examples_correct, number_of_test_examples_ont)) training_accuracy = number_of_training_examples_correct / number_of_training_examples test_accuracy = number_of_test_examples_correct / number_of_test_examples test_accuracy_ont = number_of_test_examples_correct_ont / number_of_test_examples_ont train_accuracy_ont = number_of_train_examples_correct_ont / number_of_train_examples_ont totalacc_train = ( (train_accuracy_ont * number_of_train_examples_ont) + (trainAccuracyOnt * (number_of_training_examples - number_of_train_examples_ont)) ) / number_of_training_examples totalacc = ( (test_accuracy_ont * number_of_test_examples_ont) + (accuracyOnt * (number_of_test_examples - number_of_test_examples_ont)) ) / number_of_test_examples average_test_loss = test_loss / number_of_test_examples average_training_loss = training_loss / number_of_training_examples print( 'Epoch {}: average training loss={:.6f}, train acc={:.6f}, average test loss={:.6f}, test acc={:.6f}, combined acc={:.6f}, accuracy without onto={:.6f}, in-sample with onto = {}' .format(i, average_training_loss, training_accuracy, average_test_loss, test_accuracy, totalacc, test_accuracy_ont, totalacc_train)) # max_acc = test_accuracy # max_fw = np.average(np.abs(fw), axis=2) # max_bw = np.average(np.abs(bw), axis=2) # max_tl = np.average(np.abs(tl), axis=2) # max_tr = np.average(np.abs(tr), axis=2) # max_ty = ty # max_py = py # max_prob = p max_acc = test_accuracy max_fw = np.squeeze(fw) max_bw = np.squeeze(bw) max_tl = np.squeeze(tl) max_tr = np.squeeze(tr) max_ty = ty max_py = py max_prob = p # print(max_fw) # print(np.shape(max_fw)) # w1 = tf.get_variable("head_w_hiddenstl0") # print(w1.eval(session=sess)) # fp = open(FLAGS.prob_file + '_multihead' + str(FLAGS.year) + '.txt', 'w') # for y1, y2, item in zip(max_ty, max_py, max_prob): # fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(it) for it in item]) + '\n') # # with open(FLAGS.prob_file + '_fw_multihead' + str(FLAGS.year) + '.txt', 'w') as outfile: # np.savetxt(outfile, max_fw) # # with open(FLAGS.prob_file + '_bw_multihead' + str(FLAGS.year) + '.txt', 'w') as outfile: # np.savetxt(outfile, max_bw) # # with open(FLAGS.prob_file + '_tl_multihead' + str(FLAGS.year) + '.txt', 'w') as outfile: # np.savetxt(outfile, max_tl) # # with open(FLAGS.prob_file + '_tr_multihead' + str(FLAGS.year) + '.txt', 'w') as outfile: # np.savetxt(outfile, max_tr) print('Optimization Finished! Max acc={}'.format(max_acc)) print( 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}' .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size, FLAGS.n_hidden, FLAGS.l2_reg)) return training_accuracy, max_acc, totalacc_train, totalacc, test_accuracy_ont, np.where( np.subtract(max_py, max_ty) == 0, 0, 1), max_fw.tolist( ), max_bw.tolist(), max_tl.tolist(), max_tr.tolist()
def main(train_path, test_path, accuracyOnt, test_size, remaining_size, learning_rate=0.09, keep_prob=0.5, momentum=0.9, l2=0.0001): print_config() word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None attention, attention_masked = lcr_rot(inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') # loss = loss_func(y, prob) # acc_num, acc_prob = acc_func(y, prob) global_step = tf.Variable(0, name='tr_global_step', trainable=False) # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize(loss, # global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) true_y = tf.argmax(y, 1) # pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) sess.run(tf.global_variables_initializer()) if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x_ont, te_sen_len_ont, te_x_bw_ont, te_sen_len_bw_ont, te_y_ont, te_target_word_ont, te_tar_len_ont, _, _, _ = load_inputs_twitter( FLAGS.remaining_test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None all_training_losses, all_training_accuracies = [], [] all_test_losses, all_test_accuracies = [], [] for i in range(FLAGS.n_iter): learning_rate = (0.99) * learning_rate number_of_training_examples_correct, number_of_training_examples, training_loss = 0., 0, 0. for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): step = sess.run([global_step], feed_dict=train) # number_of_training_examples_correct += _trainacc # number_of_training_examples += numtrain # training_loss += _training_loss * numtrain number_of_test_examples_correct, test_loss, number_of_test_examples = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] m = 1 for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 3, 1.0, 1.0, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _output_old, _output = sess.run([output_old, output], feed_dict=test) else: if m == 1: _attention, _attention_masked = sess.run( [attention, attention_masked], feed_dict=test) m += 1 print(_attention) print(np.shape(_attention)) print(_attention_masked) print(np.shape(_attention_masked))
def main(train_path, test_path, accuracyOnt, test_size, remaining_size, sort, num_buckets, l2=0.0001): # learning_rate=0.07, # keep_prob=0.4, # momentum=0.9): # print_config() with tf.device('/gpu:1'): word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) learning_rate = tf.placeholder(tf.float32) momentum = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') loss = loss_func(y, prob) acc_num, acc_prob = acc_func(y, prob) global_step = tf.Variable(0, name='tr_global_step', trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize( loss, global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: import time timestamp = str(int(time.time())) _dir = 'summary/' + str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess) # save_dir = 'temp_model/babysteps2buckets' # saver = saver_func(save_dir) sess.run(tf.global_variables_initializer()) # saver.restore(sess, '/-') if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) def curr_get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, learning, moment, bucket, is_shuffle=True): for index in curr_batch_index(bucket, batch_size, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, learning_rate: learning, momentum: moment, } yield feed_dict, len(index) def eval_get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, bucket, is_shuffle=True): for index in curr_batch_index(bucket, batch_size, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None train_time = 0 max_time = 0 total_iter = 0 cost_func_test = [] cost_func_train = [] cost_func_eval = [] acc_func_train = [] acc_func_test = [] acc_func_eval = [] alltrainacc = [] # split data in num_buckets buckets = np.array_split(sort, num_buckets) bucket_number = 1 lowest_val = 100 best_train = 0. best_test = 0. best_iteration = 0 for bucket in buckets: # for every bucket of data print("bucket number:{}".format(bucket_number)) np.random.shuffle(bucket) tmp = int(round(0.8 * len(bucket))) traindata = bucket[:tmp] evaldata = bucket[tmp:] # update the hyperparameters for every bucket, depending on num_buckets if bucket_number == 1: lr = 0.01 keep_prob = 0.7 mom = 0.85 if bucket_number == 2: lr = 0.01 keep_prob = 0.7 mom = 0.85 if bucket_number == 3: lr = 0.02 keep_prob = 0.6 mom = 0.95 if bucket_number == 4: lr = 0.08 keep_prob = 0.3 mom = 0.9 if bucket_number == 5: lr = 0.07 keep_prob = 0.4 mom = 0.99 if bucket_number == 6: lr = 0.02 keep_prob = 0.6 mom = 0.9 if bucket_number == 7: lr = 0.05 keep_prob = 0.4 mom = 0.9 if bucket_number == 8: lr = 0.05 keep_prob = 0.4 mom = 0.95 if bucket_number == 9: lr = 0.01 keep_prob = 0.5 mom = 0.99 if bucket_number == 10: # all the instances lr = 0.01 keep_prob = 0.6 mom = 0.9 print("Training instances: {}, validation instances: {}".format( len(traindata), len(evaldata))) i = 0 converged = False all_evalloss = [] all_evalacc = [] while i < FLAGS.n_iter and converged == False: # until convergence or until certain amount of iterations trainacc, trainloss, traincnt = 0., 0., 0 start_time = time.time() for train, numtrain in curr_get_batch_data( tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob, lr, mom, traindata): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, _trainloss, step, summary, _trainacc = sess.run( [ optimizer, loss, global_step, train_summary_op, acc_num ], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc += _trainacc # saver.save(sess, save_dir, global_step=step) traincnt += numtrain trainloss += _trainloss * numtrain elapsed_time = time.time() - start_time train_time += elapsed_time evalacc, evalcost, evalcnt = 0., 0., 0 for eva, evalnum in eval_get_batch_data( tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, 2000, 1.0, 1.0, evaldata, False): _evalloss, _evalacc = sess.run([loss, acc_num], feed_dict=eva) evalacc += _evalacc evalcost += _evalloss * evalnum evalcnt += evalnum acc, cost, cnt = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 2000, 1.0, 1.0, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run( [ loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob ], feed_dict=test) fw += list(_fw) bw += list(_bw) tl += list(_tl) tr += list(_tr) else: _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) acc += _acc cost += _loss * num cnt += num comacc, comcnt = 0., 0 for com, comnum in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, 1.0, 1.0, False): _comloss, _comacc, _cty, _cpy, _cp, _cfw, _cbw, _ctl, _ctr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=com) comacc += _comacc comcnt += comnum print( 'all samples={}, correct prediction={}, training time={}, training time so far={}' .format(cnt, acc, elapsed_time, train_time)) trainacc = trainacc / traincnt acc = acc / cnt evalacc = evalacc / evalcnt comacc = comacc / comcnt alltrainacc.append(comacc) totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size cost = cost / cnt trainloss = trainloss / traincnt evalcost = evalcost / evalcnt cost_func_test.append(cost) cost_func_train.append(trainloss) cost_func_eval.append(evalcost) acc_func_test.append(acc) acc_func_train.append(trainacc) acc_func_eval.append(evalacc) print( 'Iter {}: mini-batch loss validation set={:.6f}, train loss={:.6f}, train acc={:.6f}, ' 'validation acc={:6f} test acc={:.6f}, total train acc={:6f}' .format(i, evalcost, trainloss, trainacc, evalacc, acc, comacc)) summary = sess.run(test_summary_op, feed_dict={ test_loss: cost, test_acc: acc }) test_summary_writer.add_summary(summary, step) all_evalloss.append(evalcost) all_evalacc.append(evalacc) if i > 1: # want to compare current validation accuracy with val acc previous iterations if (all_evalacc[i] - all_evalacc[i - 1] < 0.001) and ( all_evalacc[i - 1] - all_evalacc[i - 2] < 0.001) \ and (all_evalacc[i - 2] - all_evalacc[i - 3] < 0.001): converged = True # if (all_evalloss[i] - all_evalloss[i-1] > 0.00001) and (all_evalloss[i-1] - all_evalloss[i-2] > 0.00001) \ # and (all_evalloss[i-2] - all_evalloss[i-3] > 0.00001): # converged = True if bucket_number == num_buckets: if evalcost < lowest_val: lowest_val = evalcost best_test = acc best_train = comacc best_iteration = i i += 1 total_iter += 1 bucket_number += 1 # Plotting chart of training and testing losses as a function of iterations iterations = list(range(total_iter)) plt.plot(iterations, cost_func_train, label='Cost func train') plt.plot(iterations, cost_func_test, label='Cost func test') plt.plot(iterations, cost_func_eval, label='Cost func validation') plt.title('Model loss k={}'.format(num_buckets)) plt.ylabel('Loss') plt.xlabel('Iterations') plt.legend(['train', 'test', 'eval'], loc='upper left') plt.show() # Plotting chart of training and testing accuracies as a function of iterations iterations = list(range(total_iter)) plt.plot(iterations, acc_func_train, label='Acc func train') plt.plot(iterations, acc_func_test, label='Cost func test') plt.plot(iterations, acc_func_eval, label='Acc func validation') plt.title('Model accuracy k={}'.format(num_buckets)) plt.ylabel('Accuracy') plt.xlabel('Iterations') plt.legend(['train', 'test', 'eval'], loc='upper left') plt.show() print( 'Optimization Finished! Iteration:{}: Validation loss={}, validation accuracy={}, test accuracy={}, in-sample acc={}' .format(total_iter, evalcost, evalacc, acc, comacc)) print( "Lowest validation loss:{}, at iteration:{}, with out-of-sample acc:{} and in-sample acc:{}" .format(lowest_val, best_iteration, best_test, best_train)) print('iter_num={}, batch_size={}, hidden_num={}, l2={}'.format( total_iter, FLAGS.batch_size, FLAGS.n_hidden, l2)) print(acc_func_train) print(acc_func_test) print(acc_func_eval) print(cost_func_eval) print(alltrainacc) return acc
def main(train_path, test_path, accuracyOnt, test_size, remaining_size, learning_rate=0.09, keep_prob=0.3, momentum=0.85, l2=0.00001): print_config() with tf.device('/gpu:1'): word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') loss = loss_func(y, prob) acc_num, acc_prob = acc_func(y, prob) global_step = tf.Variable(0, name='tr_global_step', trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize( loss, global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: import time timestamp = str(int(time.time())) _dir = 'summary/' + str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess) sess.run(tf.global_variables_initializer()) if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None Results_File = np.zeros((3, 1)) for i in range(FLAGS.n_iter): trainacc, traincnt = 0., 0 for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, step, summary, _trainacc = sess.run( [optimizer, global_step, train_summary_op, acc_num], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc += _trainacc # saver.save(sess, save_dir, global_step=step) traincnt += numtrain acc, cost, cnt = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 2000, 1.0, 1.0, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run( [ loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob ], feed_dict=test) fw += list(_fw) bw += list(_bw) tl += list(_tl) tr += list(_tr) else: _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) acc += _acc cost += _loss * num cnt += num print('all samples={}, correct prediction={}'.format(cnt, acc)) trainacc = trainacc / traincnt acc = acc / cnt totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size cost = cost / cnt print( 'Iter {}: mini-batch loss={:.6f}, train acc={:.6f}, test acc={:.6f}, combined acc={:.6f}' .format(i, cost, trainacc, acc, totalacc)) summary = sess.run(test_summary_op, feed_dict={ test_loss: cost, test_acc: acc }) test_summary_writer.add_summary(summary, step) if acc > max_acc: max_acc = acc max_fw = fw max_bw = bw max_tl = tl max_tr = tr max_ty = ty max_py = py max_prob = p Added = [[i + 1], [trainacc], [acc]] Results_File = np.concatenate((Results_File, Added), 1) # Saving training information as csv file from datetime import datetime dateTimeObj = datetime.now() save_dir = '/Users/ronhochstenbach/Desktop/Ectrie Thesis/Venv_Thesis/Results_Run_Adversarial/Run_' + str( dateTimeObj) + '_lcrrot_' + str(FLAGS.year) + '.csv' np.savetxt(save_dir, Results_File, delimiter=",") P = precision_score(max_ty, max_py, average=None) R = recall_score(max_ty, max_py, average=None) F1 = f1_score(max_ty, max_py, average=None) print('P:', P, 'avg=', sum(P) / FLAGS.n_class) print('R:', R, 'avg=', sum(R) / FLAGS.n_class) print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class) fp = open(FLAGS.prob_file, 'w') for item in max_prob: fp.write(' '.join([str(it) for it in item]) + '\n') fp = open(FLAGS.prob_file + '_fw', 'w') for y1, y2, ws in zip(max_ty, max_py, max_fw): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') fp = open(FLAGS.prob_file + '_bw', 'w') for y1, y2, ws in zip(max_ty, max_py, max_bw): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') fp = open(FLAGS.prob_file + '_tl', 'w') for y1, y2, ws in zip(max_ty, max_py, max_tl): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') fp = open(FLAGS.prob_file + '_tr', 'w') for y1, y2, ws in zip(max_ty, max_py, max_tr): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') print('Optimization Finished! Max acc={}'.format(max_acc)) print( 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}' .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size, FLAGS.n_hidden, FLAGS.l2_reg)) return max_acc, np.where(np.subtract(max_py, max_ty) == 0, 0, 1), max_fw.tolist(), max_bw.tolist( ), max_tl.tolist(), max_tr.tolist()
def run(self): inputs_fw = tf.nn.embedding_lookup(self.word_embedding, self.x) inputs_bw = tf.nn.embedding_lookup(self.word_embedding, self.x_bw) target = tf.reduce_mean(tf.nn.embedding_lookup(self.word_embedding, self.target_words), 1, keep_dims=True) batch_size = tf.shape(inputs_bw)[0] target = tf.zeros([batch_size, self.max_sentence_len, self.embedding_dim]) + target inputs_fw = tf.concat([inputs_fw, target], 2) inputs_bw = tf.concat([inputs_bw, target], 2) prob = self.bi_dynamic_lstm(inputs_fw, inputs_bw) with tf.name_scope('loss'): cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prob, labels=self.y)) with tf.name_scope('train'): global_step = tf.Variable(0, name="tr_global_step", trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(cost, global_step=global_step) with tf.name_scope('predict'): correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(self.y, 1)) accuracy = tf.reduce_sum(tf.cast(correct_pred, tf.int32)) _acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) with tf.Session() as sess: summary_loss = tf.summary.scalar('loss', cost) summary_acc = tf.summary.scalar('acc', _acc) train_summary_op = tf.summary.merge([summary_loss, summary_acc]) validate_summary_op = tf.summary.merge([summary_loss, summary_acc]) test_summary_op = tf.summary.merge([summary_loss, summary_acc]) import time timestamp = str(int(time.time())) _dir = 'logs/' + str(timestamp) + '_' + self.type_ + '_r' + str(self.learning_rate) + '_b' + str(self.batch_size) + '_l' + str(self.l2_reg) train_summary_writer = tf.summary.FileWriter(_dir + '/train', sess.graph) test_summary_writer = tf.summary.FileWriter(_dir + '/test', sess.graph) validate_summary_writer = tf.summary.FileWriter(_dir + '/validate', sess.graph) tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word = load_inputs_twitter( FLAGS.train_file_path, self.word_id_mapping, self.max_sentence_len, self.type_ ) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word = load_inputs_twitter( FLAGS.test_file_path, self.word_id_mapping, self.max_sentence_len, self.type_ ) init = tf.global_variables_initializer() sess.run(init) max_acc = 0. for i in range(self.n_iter): for train, _ in self.get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, self.batch_size, 0.5): _, step, summary = sess.run([optimizer, global_step, train_summary_op], feed_dict=train) train_summary_writer.add_summary(summary, step) acc, loss, cnt = 0., 0., 0 for test, num in self.get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, 2000, 1.0): _loss, _acc, summary = sess.run([cost, accuracy, test_summary_op], feed_dict=test) acc += _acc loss += _loss * num cnt += num print(cnt) print(acc) test_summary_writer.add_summary(summary, step) print('Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format(step, loss / cnt, acc / cnt)) test_summary_writer.add_summary(summary, step) if acc / cnt > max_acc: max_acc = acc / cnt print('Optimization Finished! Max acc={}'.format(max_acc)) print('Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'.format( self.learning_rate, self.n_iter, self.batch_size, self.n_hidden, self.l2_reg ))
def run(self): inputs_fw = tf.nn.embedding_lookup(self.word_embedding, self.x) inputs_bw = tf.nn.embedding_lookup(self.word_embedding, self.x_bw) target = tf.reduce_mean(tf.nn.embedding_lookup(self.word_embedding, self.target_words), 1, keep_dims=True) batch_size = tf.shape(inputs_bw)[0] target = tf.zeros( [batch_size, self.max_sentence_len, self.embedding_dim]) + target inputs_fw = tf.concat(2, [inputs_fw, target]) inputs_bw = tf.concat(2, [inputs_bw, target]) prob = self.bi_dynamic_lstm(inputs_fw, inputs_bw) with tf.name_scope('loss'): cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prob, self.y)) with tf.name_scope('train'): global_step = tf.Variable(0, name="tr_global_step", trainable=False) optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize( cost, global_step=global_step) with tf.name_scope('predict'): correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(self.y, 1)) accuracy = tf.reduce_sum(tf.cast(correct_pred, tf.int32)) _acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) with tf.Session() as sess: summary_loss = tf.scalar_summary('loss', cost) summary_acc = tf.scalar_summary('acc', _acc) train_summary_op = tf.merge_summary([summary_loss, summary_acc]) validate_summary_op = tf.merge_summary([summary_loss, summary_acc]) test_summary_op = tf.merge_summary([summary_loss, summary_acc]) import time timestamp = str(int(time.time())) _dir = 'logs/' + str(timestamp) + '_' + self.type_ + '_r' + str( self.learning_rate) + '_b' + str(self.batch_size) + '_l' + str( self.l2_reg) train_summary_writer = tf.train.SummaryWriter( _dir + '/train', sess.graph) test_summary_writer = tf.train.SummaryWriter( _dir + '/test', sess.graph) validate_summary_writer = tf.train.SummaryWriter( _dir + '/validate', sess.graph) tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word = load_inputs_twitter( FLAGS.train_file_path, self.word_id_mapping, self.max_sentence_len, self.type_) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word = load_inputs_twitter( FLAGS.test_file_path, self.word_id_mapping, self.max_sentence_len, self.type_) init = tf.initialize_all_variables() sess.run(init) max_acc = 0. for i in xrange(self.n_iter): for train, _ in self.get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, self.batch_size, 0.5): _, step, summary = sess.run( [optimizer, global_step, train_summary_op], feed_dict=train) train_summary_writer.add_summary(summary, step) acc, loss, cnt = 0., 0., 0 for test, num in self.get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, 2000, 1.0): _loss, _acc, summary = sess.run( [cost, accuracy, test_summary_op], feed_dict=test) acc += _acc loss += _loss * num cnt += num print cnt print acc test_summary_writer.add_summary(summary, step) print 'Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format( step, loss / cnt, acc / cnt) test_summary_writer.add_summary(summary, step) if acc / cnt > max_acc: max_acc = acc / cnt print 'Optimization Finished! Max acc={}'.format(max_acc) print 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'.format( self.learning_rate, self.n_iter, self.batch_size, self.n_hidden, self.l2_reg)
def main(train_path, test_path, accuracyOnt, test_size, remaining_size, learning_rate=0.09, keep_prob=0.3, momentum=0.85, l2=0.0001): # print_config() with tf.device('/gpu:1'): word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') loss = loss_func(y, prob) acc_num, acc_prob = acc_func(y, prob) global_step = tf.Variable(0, name='tr_global_step', trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize( loss, global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: import time timestamp = str(int(time.time())) _dir = 'summary/' + str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess) save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/' # saver = saver_func(save_dir) sess.run(tf.global_variables_initializer()) # saver.restore(sess, '/-') if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None train_time = 0 max_time = 0 cost_func_test = [] cost_func_train = [] acc_func_train = [] acc_func_test = [] i = 0 converged = False all_testloss = [] all_evalacc = [] max_testloss = 100 while i < FLAGS.n_iter and converged == False: trainacc, trainloss, traincnt = 0., 0., 0 start_time = time.time() for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, _trainloss, step, summary, _trainacc = sess.run( [optimizer, loss, global_step, train_summary_op, acc_num], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc += _trainacc # saver.save(sess, save_dir, global_step=step) traincnt += numtrain trainloss += _trainloss * numtrain elapsed_time = time.time() - start_time train_time += elapsed_time acc, cost, cnt = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 2000, 1.0, 1.0, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run( [ loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob ], feed_dict=test) fw += list(_fw) bw += list(_bw) tl += list(_tl) tr += list(_tr) else: _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) acc += _acc cost += _loss * num cnt += num print('training samples= {}'.format(traincnt)) print( 'all samples={}, correct prediction={}, training time={}, training time so far={}' .format(cnt, acc, elapsed_time, train_time)) trainacc = trainacc / traincnt acc = acc / cnt totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size cost = cost / cnt trainloss = trainloss / traincnt cost_func_test.append(cost) cost_func_train.append(trainloss) acc_func_test.append(acc) acc_func_train.append(trainacc) print( 'Iter {}: mini-batch loss={:.6f}, train loss={:.6f}, train acc={:.6f}, test acc={:.6f}' .format(i, cost, trainloss, trainacc, acc)) summary = sess.run(test_summary_op, feed_dict={ test_loss: cost, test_acc: acc }) test_summary_writer.add_summary(summary, step) all_testloss.append(cost) all_evalacc.append(acc) if i > 2: if (all_testloss[i] - all_testloss[i - 1] > 0.00001) and ( all_testloss[i - 1] - all_testloss[i - 2] > 0.00001) \ and (all_testloss[i - 2] - all_testloss[i - 3] > 0.00001): converged = True if np.isnan(cost): acc = 0 converged = True #if i > 2: # want to compare current validation accuracy with val acc previous iterations # if (all_evalacc[i] - all_evalacc[i - 1] < 0.001) and ( # all_evalacc[i - 1] - all_evalacc[i - 2] < 0.001) \ # and (all_evalacc[i - 2] - all_evalacc[i - 3] < 0.001): #converged = True #if np.isnan(cost): # converged = True if cost < max_testloss: max_testloss = cost max_testacc = acc max_iter = i i += 1 if i == FLAGS.n_iter: # want niet geconvergeerd in n iteraties acc = 0 # if acc > max_acc: # max_acc = acc # max_fw = fw # max_bw = bw # max_tl = tl # max_tr = tr # max_ty = ty # max_py = py # max_prob = p # P = precision_score(max_ty, max_py, average=None) # R = recall_score(max_ty, max_py, average=None) # F1 = f1_score(max_ty, max_py, average=None) # print('P:', P, 'avg=', sum(P) / FLAGS.n_class) # print('R:', R, 'avg=', sum(R) / FLAGS.n_class) # print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class) # Plotting chart of training and testing loss as a function of iterations # iterations = list(range(i)) # plt.plot(iterations, cost_func_train, label='Cost func train') # plt.plot(iterations, cost_func_test, label='Cost func test') # plt.title('Model loss k=1') # plt.ylabel('Loss') # plt.xlabel('Iterations') # plt.legend(['train', 'test'], loc='upper left') # plt.show() # Plotting chart of training and testing accuracies as a function of iterations # iterations = list(range(i)) # plt.plot(iterations, acc_func_train, label='Acc func train') # plt.plot(iterations, acc_func_test, label='Cost func test') # plt.title('Model accuracy k=1') # plt.ylabel('Loss') # plt.xlabel('Iterations') # plt.legend(['train', 'test'], loc='upper left') # plt.show() print( 'Optimization Finished! Iteration:{}:Minimal test loss={}, test accuracy={}' .format(max_iter, max_testloss, max_testacc)) # nu alleen kijken naar laatste iteratie! Laatste testacc wil je zo laag mogelijk uiteindelijk (als je hyperoptimized) return acc
def main(train_path, eval_path, test_path, complete_path, accuracyOnt, test_size, remaining_size, l2=0.0001): #learning_rate=0.02, keep_prob=0.7, #momentum=0.95, l2=0.0001): #print_config() with tf.device('/gpu:1'): word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) learning_rate = tf.placeholder(tf.float32) momentum = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') loss = loss_func(y, prob) acc_num, acc_prob = acc_func(y, prob) global_step = tf.Variable(0, name='tr_global_step', trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize( loss, global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: import time timestamp = str(int(time.time())) _dir = 'summary/' + str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess) #save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/' # saver = saver_func(save_dir) #save_pth = 'savedModel' + str(FLAGS.year) #if save_pth is not None: # save_path = save_pth + '/' # saver = saver_func(save_path) sess.run(tf.global_variables_initializer()) # saver.restore(sess, '/-') # restore_pth = 'savedModel' + str(FLAGS.year) # meta = '-2350' #if restore_pth is not None and meta is not None: # restore_path = restore_pth + '/' # restore_meta_path = restore_pth + '/' + meta + '.meta' # restore = tf.train.import_meta_graph(restore_meta_path) #restore.restore(sess, tf.train.latest_checkpoint(restore_path)) if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) ev_x, ev_sen_len, ev_x_bw, ev_sen_len_bw, ev_y, ev_target_word, ev_tar_len, _, _, _ = load_inputs_twitter( eval_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) co_x, co_sen_len, co_x_bw, co_sen_len_bw, co_y, co_target_word, co_tar_len, _, _, _ = load_inputs_twitter( complete_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) def train_get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, learning, moment, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, learning_rate: learning, momentum: moment, } yield feed_dict, len(index) max_acc = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None train_time = 0 max_time = 0 print("number of training instances: {}, number of test instances: {}". format(len(tr_y), len(te_y))) cost_func_test = [] cost_func_train = [] cost_func_eval = [] acc_func_train = [] acc_func_test = [] acc_func_eval = [] i = 0 converged = False all_evalloss = [] all_evalacc = [] max_evalloss = 100 lr = 0.02 keep_prob = 0.7 mom = 0.95 while i < FLAGS.n_iter and converged == False: trainacc, trainloss, traincnt = 0., 0., 0 start_time = time.time() for train, numtrain in train_get_batch_data( tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob, lr, mom): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, _trainloss, step, summary, _trainacc = sess.run( [optimizer, loss, global_step, train_summary_op, acc_num], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc += _trainacc # saver.save(sess, save_dir, global_step=step) traincnt += numtrain trainloss += _trainloss * numtrain #if save_pth is not None: # saver.save(sess, save_path, global_step=step) elapsed_time = time.time() - start_time train_time += elapsed_time evalacc, evalcost, evalcnt = 0., 0., 0 for eva, evalnum in get_batch_data(ev_x, ev_sen_len, ev_x_bw, ev_sen_len_bw, ev_y, ev_target_word, ev_tar_len, 2000, 1.0, 1.0, False): _evalloss, _evalacc = sess.run([loss, acc_num], feed_dict=eva) evalacc += _evalacc evalcost += _evalloss * evalnum evalcnt += evalnum acc, cost, cnt = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 2000, 1.0, 1.0, False): _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) acc += _acc cost += _loss * num cnt += num comacc, comcnt = 0., 0 for com, comnum in get_batch_data(co_x, co_sen_len, co_x_bw, co_sen_len_bw, co_y, co_target_word, co_tar_len, FLAGS.batch_size, 1.0, 1.0, False): _comloss, _comacc, _cty, _cpy, _cp, _cfw, _cbw, _ctl, _ctr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=com) comacc += _comacc comcnt += comnum print( 'all samples={}, correct prediction={}, training time={}, training time so far={}' .format(cnt, acc, elapsed_time, train_time)) trainacc = trainacc / traincnt acc = acc / cnt evalacc = evalacc / evalcnt comacc = comacc / comcnt #totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size cost = cost / cnt trainloss = trainloss / traincnt evalcost = evalcost / evalcnt cost_func_test.append(cost) cost_func_train.append(trainloss) cost_func_eval.append(evalcost) acc_func_eval.append(evalacc) acc_func_test.append(acc) acc_func_train.append(trainacc) print( 'Iter {}: mini-batch loss validation set={:.6f}, train loss={:.6f}, train acc={:.6f}, validation acc={:6f} test acc={:.6f}, total training acc={:6f}' .format(i, evalcost, trainloss, trainacc, evalacc, acc, comacc)) summary = sess.run(test_summary_op, feed_dict={ test_loss: cost, test_acc: acc }) test_summary_writer.add_summary(summary, step) all_evalloss.append(evalcost) all_evalacc.append(evalacc) if i > 2: # want to compare current train accuracy with train acc previous iterations if (all_evalacc[i] - all_evalacc[i - 1] < 0.001) and ( all_evalacc[i - 1] - all_evalacc[i - 2] < 0.001) \ and (all_evalacc[i - 2] - all_evalacc[i - 3] < 0.001): converged = True i += 1 #if acc > max_acc: # max_acc = acc # max_fw = fw # max_bw = bw # max_tl = tl # max_tr = tr # max_ty = ty # max_py = py # max_prob = p #P = precision_score(max_ty, max_py, average=None) #R = recall_score(max_ty, max_py, average=None) #F1 = f1_score(max_ty, max_py, average=None) #print('P:', P, 'avg=', sum(P) / FLAGS.n_class) #print('R:', R, 'avg=', sum(R) / FLAGS.n_class) #print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class) print("total train acc = ") # Plotting chart of training and testing loss as a function of iterations iterations = list(range(i)) plt.plot(iterations, cost_func_train, label='Cost func train') plt.plot(iterations, cost_func_test, label='Cost func test') plt.plot(iterations, cost_func_eval, label='Cost func validation') plt.title('Model loss k=1') plt.ylabel('Loss') plt.xlabel('Iterations') plt.legend(['train', 'test', 'eval'], loc='upper left') plt.show() # Plotting chart of training and testing accuracies as a function of iterations iterations = list(range(i)) plt.plot(iterations, acc_func_train, label='Acc func train') plt.plot(iterations, acc_func_test, label='Acc func test') plt.plot(iterations, acc_func_eval, label='Acc func validation') plt.title('Model accuracy k=1') plt.ylabel('Loss') plt.xlabel('Iterations') plt.legend(['train', 'test', 'eval'], loc='upper left') plt.show() print( 'Optimization Finished! Iteration:{}: Validation loss={}, validation accuracy={}, test accuracy={}' .format(i, evalcost, evalacc, acc)) print(acc_func_train) print(acc_func_test) return acc
def main(train_path, test_path, accuracyOnt, test_size, remaining_size, learning_rate=0.09, keep_prob=0.3, momentum=0.85, l2=0.00001): #print_config() with tf.device('/gpu:1'): word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') loss = loss_func(y, prob) acc_num, acc_prob = acc_func(y, prob) global_step = tf.Variable(0, name='tr_global_step', trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize( loss, global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: import time timestamp = str(int(time.time())) _dir = 'summary/' + str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess) # save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/' # saver = saver_func(save_dir) sess.run(tf.global_variables_initializer()) # saver.restore(sess, '/-') restore_pth = 'savedModel' + str(FLAGS.year) meta = '-2444' if restore_pth is not None and meta is not None: restore_path = restore_pth + '/' restore_meta_path = restore_pth + '/' + meta + '.meta' restore = tf.train.import_meta_graph(restore_meta_path) restore.restore(sess, tf.train.latest_checkpoint(restore_path)) if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None train_time = 0 max_time = 0 print("number of training instances: {}, number of test instances: {}". format(len(tr_y), len(te_y))) cost_func_test = [] cost_func_train = [] acc_func_train = [] acc_func_test = [] i = 0 converged = False all_trainacc = [] while i < FLAGS.n_iter and converged == False: trainacc, trainloss, traincnt = 0., 0., 1 start_time = time.time() #for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, # tr_tar_len, # FLAGS.batch_size, keep_prob, keep_prob): # _, step = sess.run([optimizer, global_step], feed_dict=train) # _, _trainloss, step, summary, _trainacc = sess.run([optimizer, loss, global_step, train_summary_op, acc_num], # feed_dict=train) #train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) #trainacc += _trainacc # saver.save(sess, save_dir, global_step=step) #traincnt += numtrain #trainloss += _trainloss * numtrain elapsed_time = time.time() - start_time train_time += elapsed_time acc, cost, cnt = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 2000, 1.0, 1.0, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run( [ loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob ], feed_dict=test) fw += list(_fw) bw += list(_bw) tl += list(_tl) tr += list(_tr) else: _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) acc += _acc cost += _loss * num cnt += num print( 'all samples={}, correct prediction={}, training time={}, training time so far={}' .format(cnt, acc, elapsed_time, train_time)) trainacc = trainacc / traincnt acc = acc / cnt #totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size cost = cost / cnt trainloss = trainloss / traincnt cost_func_test.append(cost) cost_func_train.append(trainloss) acc_func_test.append(acc) acc_func_train.append(trainacc) print( 'Iter {}: mini-batch loss={:.6f}, train loss={:.6f}, train acc={:.6f}, test acc={:.6f}' .format(i, cost, trainloss, trainacc, acc)) summary = sess.run(test_summary_op, feed_dict={ test_loss: cost, test_acc: acc }) test_summary_writer.add_summary(summary, step) all_trainacc.append(trainacc) if i > 2: # want to compare current train accuracy with train acc previous iterations if (all_trainacc[i] - all_trainacc[i - 1] < 0.001) and (all_trainacc[i - 1] - all_trainacc[i - 2] < 0.001)\ and (all_trainacc[i-2] - all_trainacc[i-3] < 0.001): converged = True i += 1 if acc > max_acc: max_acc = acc max_fw = fw max_bw = bw max_tl = tl max_tr = tr max_ty = ty max_py = py max_prob = p P = precision_score(max_ty, max_py, average=None) R = recall_score(max_ty, max_py, average=None) F1 = f1_score(max_ty, max_py, average=None) print('P:', P, 'avg=', sum(P) / FLAGS.n_class) print('R:', R, 'avg=', sum(R) / FLAGS.n_class) print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class) # Plotting chart of training and testing loss as a function of iterations iterations = list(range(i)) plt.plot(iterations, cost_func_train, label='Cost func train') plt.plot(iterations, cost_func_test, label='Cost func test') plt.title('Model loss k=1') plt.ylabel('Loss') plt.xlabel('Iterations') plt.legend(['train', 'test'], loc='upper left') plt.show() # Plotting chart of training and testing accuracies as a function of iterations iterations = list(range(i)) plt.plot(iterations, acc_func_train, label='Acc func train') plt.plot(iterations, acc_func_test, label='Cost func test') plt.title('Model accuracy k=1') plt.ylabel('Loss') plt.xlabel('Iterations') plt.legend(['train', 'test'], loc='upper left') plt.show() fp = open(FLAGS.prob_file, 'w') for item in max_prob: fp.write(' '.join([str(it) for it in item]) + '\n') fp = open(FLAGS.prob_file + '_fw', 'w') for y1, y2, ws in zip(max_ty, max_py, max_fw): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') fp = open(FLAGS.prob_file + '_bw', 'w') for y1, y2, ws in zip(max_ty, max_py, max_bw): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') fp = open(FLAGS.prob_file + '_tl', 'w') for y1, y2, ws in zip(max_ty, max_py, max_tl): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') fp = open(FLAGS.prob_file + '_tr', 'w') for y1, y2, ws in zip(max_ty, max_py, max_tr): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') print('Optimization Finished! Final acc={}'.format(acc)) print( 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}' .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size, FLAGS.n_hidden, FLAGS.l2_reg)) return acc, np.where(np.subtract(max_py, max_ty) == 0, 0, 1), max_fw.tolist(), max_bw.tolist(), \ max_tl.tolist(), max_tr.tolist()
def main(train_path, test_path, accuracyOnt, test_size, remaining_size, momentum=0.85): # print_config() l2 = FLAGS.l2_reg learning_rate = FLAGS.learning_rate with tf.device('/gpu:1'): word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.constant(FLAGS.keep_prob1, tf.float32) keep_prob2 = tf.constant(FLAGS.keep_prob2, tf.float32) lambda_0 = tf.constant(FLAGS.lambda_0, tf.float32) lambda_1 = tf.constant(1 - FLAGS.lambda_0, tf.float32) with tf.name_scope('inputs'): y_sen = tf.placeholder(tf.float32, [None, FLAGS.n_class], name='y_sentence_level') n_asp = tf.placeholder(tf.int32, [None], name='n_asp') x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len], name='x') y = tf.placeholder(tf.float32, [None, FLAGS.n_class], name='y') sen_len = tf.placeholder(tf.int32, None, name='sentence_length') x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len], name='x_backwards') sen_len_bw = tf.placeholder(tf.int32, [None], name='sentence_length_backwards') target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len], name='target_words') tar_len = tf.placeholder(tf.int32, [None], name='target_length') inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None prob, prob_sen, _, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( n_asp, inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') loss_asp = loss_func(y, prob) loss_sen = loss_func(y_sen, prob_sen) loss = lambda_1 * loss_asp + lambda_0 * loss_sen acc_num, acc_prob, f1_micro, f1_macro, f1_weighted = acc_func( y, prob, y_sen, prob_sen, thre=FLAGS.threshold) global_step = tf.Variable(0, name='tr_global_step', trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize( loss, global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) # true_y = tf.argmax(y, 1) true_y = y_sen pred_y = tf.cast(tf.math.greater_equal(prob_sen, [FLAGS.threshold]), tf.int32) # pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: import datetime # timestamp = str(int(time.time())) timestamp = datetime.datetime.now().isoformat() _dir = str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) test_f1_micro = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func(loss, acc_prob, f1_micro, test_loss, test_acc, test_f1_micro, _dir, title, sess) # validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess) save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/' # saver = saver_func(save_dir) sess.run(tf.global_variables_initializer()) # saver.restore(sess, '/-') if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_y_sen, tr_target_word, tr_tar_len, _, _, _, tr_n_asp = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, # reverse FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_y_sen, te_target_word, te_tar_len, _, _, _, te_n_asp = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, n_asp_b, yi, y_sen_i, target, tl, batch_size, is_shuffle=True): # for index in batch_index(len(yi), batch_size, 1, is_shuffle): for index in batch_index(len(n_asp_b), batch_size, 1, is_shuffle): selected_rows = itemgetter(*index)(list(n_asp_b.values())) r_index = [] for idxs in selected_rows: if idxs != []: r_index.extend(idxs) _n_asp = np.asarray( [len(tup) for tup in list(selected_rows) if len(tup) != 0]) # print(f"length of _n_asp: {_n_asp.shape[0]}") feed_dict = { x: x_f[r_index], x_bw: x_b[r_index], y: yi[r_index], y_sen: y_sen_i[index], n_asp: _n_asp, sen_len: sen_len_f[r_index], sen_len_bw: sen_len_b[r_index], target_words: target[r_index], tar_len: tl[r_index] } yield feed_dict, len(r_index) max_acc = 0. max_f1 = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None for i in range(FLAGS.n_iter): trainacc, trainf1, traincnt, train_batchcnt = 0., 0., 0, 0 for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_n_asp, tr_y, \ tr_y_sen, tr_target_word, tr_tar_len, FLAGS.batch_size): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, step, summary, _trainacc, _trainf1 = sess.run( [ optimizer, global_step, train_summary_op, acc_num, f1_micro ], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc += _trainacc # saver.save(sess, save_dir, global_step=step) trainf1 += _trainf1 traincnt += numtrain train_batchcnt += 1 acc, f1, cost, cnt, test_batchcnt = 0., 0., 0., 0, 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_n_asp, te_y, te_y_sen, te_target_word, te_tar_len, 2000, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run( [ loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob ], feed_dict=test) fw += list(_fw) bw += list(_bw) tl += list(_tl) tr += list(_tr) else: _loss, _acc, _f1, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run( [ loss, acc_num, f1_micro, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) acc += _acc f1 += _f1 # cost += _loss * num cost += _loss cnt += num test_batchcnt += 1 print('all samples={}, correct prediction={}'.format(cnt, acc)) trainacc = trainacc / traincnt trainf1 = trainf1 / train_batchcnt acc = acc / cnt f1 = f1 / test_batchcnt totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size # cost = cost / cnt cost = cost / test_batchcnt print( 'Iter {}: mini-batch loss={:.6f}, train acc={:.6f}, train_f1_micro={:.6f}, test acc={:.6f}, \ test_f1_micro={:.6f}, combined acc={:.6f}'.format( i, cost, trainacc, trainf1, acc, f1, totalacc)) summary = sess.run(test_summary_op, feed_dict={ test_loss: cost, test_acc: acc, test_f1_micro: f1 }) test_summary_writer.add_summary(summary, step) # if acc > max_acc: if f1 > max_f1: max_acc = acc max_f1 = f1 max_fw = fw max_bw = bw max_tl = tl max_tr = tr max_ty = ty max_py = py max_prob = p # encode training data train_feed_dict = { x: tr_x, x_bw: tr_x_bw, y: tr_y, y_sen: tr_y_sen, n_asp: tr_n_asp, sen_len: tr_sen_len, sen_len_bw: tr_sen_len_bw, target_words: tr_target_word, tar_len: tr_target_len } test_feed_dict = { x: te_x, x_bw: te_x_bw, y: te_y, y_sen: te_y_sen, n_asp: te_n_asp, sen_len: te_sen_len, sen_len_bw: te_sen_len_bw, target_words: te_target_word, tar_len: te_target_len } tr_outputs = sess.run([outputs], feed_dict=train_feed_dict) te_outputs = sess.run([outputs], feed_dict=test_feed_dict) with open("results/embeddings/train_emb.npy", 'wb') as f: np.save(f, tr_outputs) with open("results/embeddings/test_emb.npy", 'wb') as f: np.save(f, te_outputs) P = precision_score(max_ty, max_py, average='micro') R = recall_score(max_ty, max_py, average='micro') F1 = f1_score(max_ty, max_py, average='micro') print('(Individual aspect) P:', P, 'avg=', sum(P) / FLAGS.n_class) print('(Individual aspect) R:', R, 'avg=', sum(R) / FLAGS.n_class) print('(Individual aspect) F1:', F1, 'avg=', sum(F1) / FLAGS.n_class) prob_data = { 'forward_att': max_fw, 'backward_att': max_bw, 'target_left_att': max_tl, 'target_right_att': max_tr, 'true': max_ty, 'predict': max_py } fp = open(FLAGS.prob_file, 'w') pickle.dump(prob_data, fp) # for item in max_prob: # fp.write(' '.join([str(it) for it in item]) + '\n') # fp = open(FLAGS.prob_file + '_fw', 'w') # for y1, y2, ws in zip(max_ty, max_py, max_fw): # fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') # fp = open(FLAGS.prob_file + '_bw', 'w') # for y1, y2, ws in zip(max_ty, max_py, max_bw): # fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') # fp = open(FLAGS.prob_file + '_tl', 'w') # for y1, y2, ws in zip(max_ty, max_py, max_tl): # fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') # fp = open(FLAGS.prob_file + '_tr', 'w') # for y1, y2, ws in zip(max_ty, max_py, max_tr): # fp.write(str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') print('Optimization Finished! Max acc={}, Max micro f1={}'.format( max_acc, max_f1)) print( 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}' .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size, FLAGS.n_hidden, FLAGS.l2_reg)) return max_acc, np.where(np.subtract(max_py, max_ty) == 0, 0, 1), max_fw.tolist(), max_bw.tolist( ), max_tl.tolist(), max_tr.tolist()
def run(self): inputs_fw = tf.nn.embedding_lookup(self.word_embedding, self.x) inputs_bw = tf.nn.embedding_lookup(self.word_embedding, self.x_bw) prob = self.bi_dynamic_lstm(inputs_fw, inputs_bw) with tf.name_scope('loss'): reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=prob, labels=self.y)) + sum(reg_loss) # cost = - tf.reduce_mean(tf.cast(self.y, tf.float32) * tf.log(prob)) + sum(reg_loss) with tf.name_scope('train'): global_step = tf.Variable(0, name="tr_global_step", trainable=False) optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize( cost, global_step=global_step) with tf.name_scope('predict'): correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(self.y, 1)) true_y = tf.argmax(self.y, 1) pred_y = tf.argmax(prob, 1) accuracy = tf.reduce_sum(tf.cast(correct_pred, tf.int32)) acc_ = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) with tf.Session() as sess: title = '-b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) summary_loss = tf.summary.scalar('loss' + title, cost) summary_acc = tf.summary.scalar('acc' + title, acc_) # Currently does not work train_summary_op = tf.summary.merge([summary_loss, summary_acc]) validate_summary_op = tf.summary.merge([summary_loss, summary_acc]) import time timestamp = str(int(time.time())) _dir = 'logs/' + str(timestamp) + '_' + self.type_ + '_r' + str( self.learning_rate) + '_b' + str(self.batch_size) + '_l' + str( self.l2_reg) train_summary_writer = tf.summary.FileWriter( _dir + '/train', sess.graph) test_summary_writer = tf.summary.FileWriter( _dir + '/test', sess.graph) validate_summary_writer = tf.summary.FileWriter( _dir + '/validate', sess.graph) saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) init = tf.global_variables_initializer() sess.run(init) save_dir = 'models/' + _dir + '/' import os if not os.path.exists(save_dir): os.makedirs(save_dir) tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y = load_inputs_twitter( FLAGS.train_file_path, self.word_id_mapping, self.max_sentence_len, self.type_) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y = load_inputs_twitter( FLAGS.test_file_path, self.word_id_mapping, self.max_sentence_len, self.type_) max_acc = 0. max_ty, max_py = None, None for i in range(self.n_iter): for train, _ in self.get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, self.batch_size, 1.0): _, step, summary = sess.run( [optimizer, global_step, train_summary_op], feed_dict=train) #_, step, summary = sess.run([optimizer, global_step, train_summary_op], feed_dict=train) train_summary_writer.add_summary(summary, step) acc, loss, cnt, summary = 0., 0., 0, None ty, py = None, None flag = True for test, num in self.get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, 3062, 1.0): _loss, _acc, _summary, _step, ty, py = sess.run( [ cost, accuracy, validate_summary_op, global_step, true_y, pred_y ], feed_dict=test) #_loss, _acc, summary = sess.run([cost, accuracy, test_summary_op], feed_dict=test) acc += _acc loss += _loss * num cnt += num if flag: ty = ty py = py flag = False summary = _summary step = _step print('all samples={}, correct prediction={}'.format(cnt, acc)) #test_summary_writer.add_summary(summary, step) print( 'Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format( step, loss / cnt, acc / cnt)) if acc / cnt > max_acc: max_acc = acc / cnt max_ty = ty max_py = py print('Optimization Finished! Max acc={}'.format(max_acc)) print( 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}' .format(self.learning_rate, self.n_iter, self.batch_size, self.n_hidden, self.l2_reg))
def main(train_path, test_path, learning_rate=FLAGS.learning_rate, keep_prob=FLAGS.keep_prob1, l2=FLAGS.l2_reg, beta=0.9, number_epochs=100): print_config() with tf.device('/gpu:1'): word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') loss = loss_func(y, prob) acc_num, acc_prob = acc_func(y, prob) global_step = tf.Variable(0, name='tr_global_step', trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=beta).minimize( loss, global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, learning_rate, l2, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) batch_size = FLAGS.batch_size config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: import time timestamp = str(int(time.time())) test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) sess.run(tf.global_variables_initializer()) # saver.restore(sess, '/-') if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc = 0. all_training_losses, all_training_accuracies = [], [] all_test_losses, all_test_accuracies = [], [] for i in range(number_epochs): learning_rate = (0.99) * learning_rate number_of_training_examples_correct, number_of_training_examples, training_loss = 0., 0, 0. for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, step, _trainacc, _training_loss = sess.run( [optimizer, global_step, acc_num, loss], feed_dict=train) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) number_of_training_examples_correct += _trainacc number_of_training_examples += numtrain training_loss += _training_loss * numtrain number_of_test_examples_correct, test_loss, number_of_test_examples = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 2000, 1.0, 1.0, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run( [ loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob ], feed_dict=test) fw += list(_fw) bw += list(_bw) tl += list(_tl) tr += list(_tr) else: _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) number_of_test_examples_correct += _acc test_loss += _loss * num number_of_test_examples += num print( 'number of training examples={}, correct training examples={}, number of test examples={}, correct test examples={}' .format(number_of_training_examples, number_of_training_examples_correct, number_of_test_examples, number_of_test_examples_correct)) training_accuracy = number_of_training_examples_correct / number_of_training_examples test_accuracy = number_of_test_examples_correct / number_of_test_examples average_test_loss = test_loss / number_of_test_examples average_training_loss = training_loss / number_of_training_examples all_training_losses.append(average_training_loss) all_training_accuracies.append(training_accuracy) all_test_losses.append(average_test_loss) all_test_accuracies.append(test_accuracy) print( 'Epoch {}: average training loss={:.6f}, train acc={:.6f}, average test loss={:.6f}, test acc={:.6f}' .format(i, average_training_loss, training_accuracy, average_test_loss, test_accuracy)) min_training_loss = min(all_training_losses) max_training_accuracy = max(all_training_accuracies) min_test_loss = min(all_test_losses) max_test_accuracy = max(all_test_accuracies) print('Optimization Finished! Max acc={}'.format(max_acc)) print( 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}' .format(learning_rate, number_epochs, FLAGS.batch_size, FLAGS.n_hidden, l2)) return min_training_loss, max_training_accuracy, min_test_loss, max_test_accuracy, all_training_losses, all_training_accuracies, all_test_losses, all_test_accuracies
def main(train_path, test_path, accuracyOnt, test_size, remaining_size, learning_rate_dis, learning_rate_gen, keep_prob, momentum_dis, momentum_gen, l2, k, WriteFile): print_config() with tf.device('/gpu:1'): word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x_real = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y_real = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x_real) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) l, r, t_l, t_r, l2, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') gen_l, gen_r, gen_t_l, gen_t_r = generator(l2) with tf.variable_scope( "var_D", reuse=tf.AUTO_REUSE ) as scope: #re-using the discriminator parameters since it is called twice per iter #Calculating prob for real data prob_real = discriminator(l, r, t_l, t_r, keep_prob2, l2) #Calculating prob for generated data prob_generated = discriminator(gen_l, gen_r, gen_t_l, gen_t_r, keep_prob2, l2) loss = loss_func_adversarial(prob_real, prob_generated, y_real) acc_num_real, acc_prob_real, acc_num_gen, acc_prob_gen = acc_func_adversarial( prob_real, prob_generated, y_real) global_step = tf.Variable(0, name='tr_global_step', trainable=False) #set variable lists var_list_D = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='var_D') var_list_G = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='var_G') #As we solve a min max problem, we optimize twice with respect to different variable sets , var_list = var_D , var_list = var_G opti_min = tf.train.MomentumOptimizer(learning_rate=learning_rate_dis, momentum=momentum_dis).minimize( loss, var_list=var_list_D, global_step=global_step) opti_max = tf.train.MomentumOptimizer(learning_rate=learning_rate_gen, momentum=momentum_gen).minimize( -loss, var_list=var_list_G) true_y = tf.argmax(y_real, 1) pred_y = tf.argmax(prob_real, 1) title = '-d1-{}d2-{}b-{}rd-{}rg-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, learning_rate_dis, learning_rate_gen, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) import time timestamp = str(int(time.time())) _dir = 'summary/' + str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func_adversarial(loss, acc_prob_real, acc_prob_gen, test_loss, test_acc, _dir, title, sess) save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/' # saver = saver_func(save_dir) # saver.restore(sess, '/-') if FLAGS.is_r == '1': is_r = True else: is_r = False tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x_real: x_f[index], x_bw: x_b[index], y_real: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None Results_File = np.zeros( (5, 1) ) #6 = number of rows / values to store:['Iteration','loss','trainacc_real','test_acc','avg prob assigned to correct generated'] for i in range(1, FLAGS.n_iter + 1): avg_p_real = None avg_p_gen = None #update D more often than G if k >= 1: if i % k == 0: print('In iter ' + str(i) + ' we update both G and D.') trainacc_real, trainacc_gen, traincnt = 0., 0., 0 for train, numtrain in get_batch_data( tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, _, step, summary, _trainacc_real, _trainacc_gen = sess.run( [ opti_max, opti_min, global_step, train_summary_op, acc_num_real, acc_num_gen ], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc_real += _trainacc_real # saver.save(sess, save_dir, global_step=step) trainacc_gen += _trainacc_gen traincnt += numtrain else: print('In iter ' + str(i) + ' we update only D.') trainacc_real, trainacc_gen, traincnt = 0., 0., 0 for train, numtrain in get_batch_data( tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, step, summary, _trainacc_real, _trainacc_gen = sess.run( [ opti_min, global_step, train_summary_op, acc_num_real, acc_num_gen ], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc_real += _trainacc_real # saver.save(sess, save_dir, global_step=step) trainacc_gen += _trainacc_gen traincnt += numtrain #Update G more often than D else: k_inv = 1 / k if i % k_inv == 0: print('In iter ' + str(i) + ' we update both G and D.') trainacc_real, trainacc_gen, traincnt = 0., 0., 0 for train, numtrain in get_batch_data( tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, _, step, summary, _trainacc_real, _trainacc_gen = sess.run( [ opti_max, opti_min, global_step, train_summary_op, acc_num_real, acc_num_gen ], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc_real += _trainacc_real # saver.save(sess, save_dir, global_step=step) trainacc_gen += _trainacc_gen traincnt += numtrain else: print('In iter ' + str(i) + ' we update only G.') trainacc_real, trainacc_gen, traincnt = 0., 0., 0 for train, numtrain in get_batch_data( tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, step, summary, _trainacc_real, _trainacc_gen = sess.run( [ opti_max, global_step, train_summary_op, acc_num_real, acc_num_gen ], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc_real += _trainacc_real # saver.save(sess, save_dir, global_step=step) trainacc_gen += _trainacc_gen traincnt += numtrain #Testing occurs in every iteration, regardless of what networks have been updated. acc, cost, cnt = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 2000, 1.0, 1.0, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run( [ loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob_real ], feed_dict=test) fw += list(_fw) bw += list(_bw) tl += list(_tl) tr += list(_tr) else: _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr, _p_g, _y_real, _prob_real = sess.run( [ loss, acc_num_real, true_y, pred_y, prob_real, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, prob_generated, y_real, prob_real ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) yr = np.asarray(y_real) acc += _acc cost += _loss * num cnt += num p_g = np.asarray(_p_g) print('all samples={}, correct prediction={}'.format(cnt, acc)) trainacc_real = trainacc_real / traincnt trainacc_gen = trainacc_gen / traincnt acc = acc / cnt totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size cost = cost / cnt print( 'Iter {}: mini-batch loss={:.6f}, train acc real ={:.6f}, test acc={:.6f}, combined acc={:.6f}' .format(i, cost, trainacc_real, acc, totalacc)) summary = sess.run(test_summary_op, feed_dict={ test_loss: cost, test_acc: acc }) test_summary_writer.add_summary(summary, step) if acc > max_acc: max_acc = acc max_fw = fw max_bw = bw max_tl = tl max_tr = tr max_ty = ty max_py = py max_prob = p #Writing File if WriteFile: avg_p_real = np.mean( np.multiply(_prob_real, _y_real) ) #average probability assigned to the correct class for real data avg_p_gen = np.mean( p_g, axis=0 )[3] #average probability assigned to the correct class for generated data Added = [[i], [cost], [trainacc_real], [acc], [avg_p_gen]] Results_File = np.concatenate((Results_File, Added), 1) if np.isnan(_loss): print('Ohw shit we obtained an NaN bro!!') max_acc = max_acc * ( (i / 200)**2 ) #Uncomment this line for hyperpar optim, to penalize break P = precision_score(max_ty, max_py, average=None) R = recall_score(max_ty, max_py, average=None) F1 = f1_score(max_ty, max_py, average=None) print('P:', P, 'avg=', sum(P) / FLAGS.n_class) print('R:', R, 'avg=', sum(R) / FLAGS.n_class) print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class) fp = open(FLAGS.prob_file, 'w') for item in max_prob: fp.write(' '.join([str(it) for it in item]) + '\n') fp = open(FLAGS.prob_file + '_fw', 'w') for y1, y2, ws in zip(max_ty, max_py, max_fw): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') fp = open(FLAGS.prob_file + '_bw', 'w') for y1, y2, ws in zip(max_ty, max_py, max_bw): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') fp = open(FLAGS.prob_file + '_tl', 'w') for y1, y2, ws in zip(max_ty, max_py, max_tl): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') fp = open(FLAGS.prob_file + '_tr', 'w') for y1, y2, ws in zip(max_ty, max_py, max_tr): fp.write( str(y1) + ' ' + str(y2) + ' ' + ' '.join([str(w) for w in ws[0]]) + '\n') print('Optimization Finished! Max acc={}'.format(max_acc)) print( 'Learning_rate_dis={},Learning_rate_gen={}, momentum_dis={},momentum_gen={}, iter_num={}, batch_size={}, hidden_num={}, l2={},k={}' .format(learning_rate_dis, learning_rate_gen, momentum_dis, momentum_gen, FLAGS.n_iter, FLAGS.batch_size, FLAGS.n_hidden, FLAGS.l2_reg, k)) if WriteFile: #Saving training information as csv file dateTimeObj = datetime.now() save_dir = '/Results_Run_Adversarial/Run_' + str( dateTimeObj) + '_lr' + str(learning_rate_dis) + '_lrg' + str( learning_rate_gen) + '_kp' + str( keep_prob) + '_mom_d' + str( momentum_dis) + '_mom_g' + str( momentum_gen) + '_k' + str(k) + '.csv' np.savetxt(save_dir, Results_File, delimiter=",") return max_acc, np.where(np.subtract(max_py, max_ty) == 0, 0, 1), max_fw.tolist(), max_bw.tolist( ), max_tl.tolist(), max_tr.tolist() if __name__ == '__main__': tf.app.run()
def main(train_path, test_path, accuracyOnt, test_size, remaining_size, augment_data, augmentation_file_path, ct, learning_rate=0.09, keep_prob=0.3, momentum=0.85, l2=0.00001): print_config() augmenter = Augmentation(FLAGS.EDA_type, need_mixup=True) with tf.device('/gpu:1'): word_id_mapping, w2v = load_w2v(FLAGS.embedding_path, FLAGS.embedding_dim) word_embedding = tf.constant(w2v, name='word_embedding') keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) with tf.name_scope('inputs'): x = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) sen_len = tf.placeholder(tf.int32, None) x_bw = tf.placeholder(tf.int32, [None, FLAGS.max_sentence_len]) sen_len_bw = tf.placeholder(tf.int32, [None]) target_words = tf.placeholder(tf.int32, [None, FLAGS.max_target_len]) tar_len = tf.placeholder(tf.int32, [None]) inputs_fw = tf.nn.embedding_lookup(word_embedding, x) inputs_bw = tf.nn.embedding_lookup(word_embedding, x_bw) target = tf.nn.embedding_lookup(word_embedding, target_words) alpha_fw, alpha_bw = None, None prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r = lcr_rot( inputs_fw, inputs_bw, sen_len, sen_len_bw, target, tar_len, keep_prob1, keep_prob2, l2, 'all') loss = loss_func(y, prob) acc_num, acc_prob = acc_func(y, prob) global_step = tf.Variable(0, name='tr_global_step', trainable=False) optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize( loss, global_step=global_step) # optimizer = train_func(loss, FLAGS.learning_rate, global_step) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: import time timestamp = str(int(time.time())) _dir = 'summary/' + str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func(loss, acc_prob, test_loss, test_acc, _dir, title, sess) save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/' # saver = saver_func(save_dir) sess.run(tf.global_variables_initializer()) # saver.restore(sess, '/-') if FLAGS.is_r == '1': is_r = True else: is_r = False len_non_augmented, tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, _, _, _ = load_inputs_twitter( train_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len, augment_data=augment_data, augmentation_file_path=augmentation_file_path) _, te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, _, _, _ = load_inputs_twitter( test_path, word_id_mapping, FLAGS.max_sentence_len, 'TC', is_r, FLAGS.max_target_len) max_records_mixup = len( tr_x) if FLAGS.mixup_on_augmentations > 0 else len_non_augmented if augment_data and FLAGS.use_word_mixup > 0: print("The amount of records on which mixup is applied: {}".format( max_records_mixup)) rand_mixup = np.array(range(max_records_mixup - 1)) print("applying mixup...") for _ in range(FLAGS.use_word_mixup): random.shuffle(rand_mixup) for i, j in tqdm(zip(*[iter(rand_mixup)] * 2)): first = (tr_x[i], tr_sen_len[i], tr_x_bw[i], tr_sen_len_bw[i], tr_y[i], tr_target_word[i], tr_tar_len[i]) second = (tr_x[j], tr_sen_len[j], tr_x_bw[j], tr_sen_len_bw[j], tr_y[j], tr_target_word[j], tr_tar_len[j]) augmenter.word_mixup(first, second) print("Word mixup embeddings: {}".format(augmenter.counter)) def get_batch_data(x_f, sen_len_f, x_b, sen_len_b, yi, target, tl, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yi), batch_size, 1, is_shuffle): feed_dict = { x: x_f[index], x_bw: x_b[index], y: yi[index], sen_len: sen_len_f[index], sen_len_bw: sen_len_b[index], target_words: target[index], tar_len: tl[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) max_acc = 0. max_fw, max_bw = None, None max_tl, max_tr = None, None max_ty, max_py = None, None max_prob = None step = None for i in range(FLAGS.n_iter): trainacc, traincnt = 0., 0 for train, numtrain in get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, tr_target_word, tr_tar_len, FLAGS.batch_size, keep_prob, keep_prob): # _, step = sess.run([optimizer, global_step], feed_dict=train) _, step, summary, _trainacc = sess.run( [optimizer, global_step, train_summary_op, acc_num], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat(0, [tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]])) # sess.run(embed_update) trainacc += _trainacc # saver.save(sess, save_dir, global_step=step) traincnt += numtrain acc, cost, cnt = 0., 0., 0 fw, bw, tl, tr, ty, py = [], [], [], [], [], [] p = [] for test, num in get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, te_target_word, te_tar_len, 2000, 1.0, 1.0, False): if FLAGS.method == 'TD-ATT' or FLAGS.method == 'IAN': _loss, _acc, _fw, _bw, _tl, _tr, _ty, _py, _p = sess.run( [ loss, acc_num, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r, true_y, pred_y, prob ], feed_dict=test) fw += list(_fw) bw += list(_bw) tl += list(_tl) tr += list(_tr) else: _loss, _acc, _ty, _py, _p, _fw, _bw, _tl, _tr = sess.run( [ loss, acc_num, true_y, pred_y, prob, alpha_fw, alpha_bw, alpha_t_l, alpha_t_r ], feed_dict=test) ty = np.asarray(_ty) py = np.asarray(_py) p = np.asarray(_p) fw = np.asarray(_fw) bw = np.asarray(_bw) tl = np.asarray(_tl) tr = np.asarray(_tr) acc += _acc cost += _loss * num cnt += num print('all samples={}, correct prediction={}'.format(cnt, acc)) trainacc = trainacc / traincnt acc = acc / cnt totalacc = ((acc * remaining_size) + (accuracyOnt * (test_size - remaining_size))) / test_size cost = cost / cnt print( 'Iter {}: mini-batch loss={:.6f}, train acc={:.6f}, test acc={:.6f}, combined acc={:.6f}' .format(i, cost, trainacc, acc, totalacc)) summary = sess.run(test_summary_op, feed_dict={ test_loss: cost, test_acc: acc }) test_summary_writer.add_summary(summary, step) if acc > max_acc: max_trainacc = trainacc max_totalacc = totalacc iteration = i max_acc = acc max_fw = fw max_bw = bw max_tl = tl max_tr = tr max_ty = ty max_py = py max_prob = p P = precision_score(max_ty, max_py, average=None) R = recall_score(max_ty, max_py, average=None) F1 = f1_score(max_ty, max_py, average=None) print('P:', P, 'avg=', sum(P) / FLAGS.n_class) print('R:', R, 'avg=', sum(R) / FLAGS.n_class) print('F1:', F1, 'avg=', sum(F1) / FLAGS.n_class) keys_to_save = 'year EDA_type EDA_deletion EDA_replacement original_multiplier EDA_insertion EDA_swap EDA_pct backtranslation_langs use_word_mixup mixup_beta mixup_on_augmentations'.split( ' ') try: df = pd.read_json(FLAGS.results_file) print('adding outcome to {}'.format(FLAGS.results_file)) except ValueError: print( 'did not find an existing result file, creating a new one...') df = pd.DataFrame([]) new_experiment = {} for k, v in sorted(FLAGS.flag_values_dict().items()): if k in keys_to_save: new_experiment[k] = v new_experiment['in_sample'] = max_trainacc new_experiment['out_of_sample'] = max_acc new_experiment['ontology_acc'] = accuracyOnt new_experiment['total_acc'] = max_totalacc new_experiment['at_iteration'] = iteration new_experiment['#of_test'] = cnt new_experiment['#of_train'] = len(tr_x) new_experiment['pre_embed_aug'] = ct new_experiment['post_embed_aug'] = augmenter.counter df = df.append(new_experiment, ignore_index=True) df.to_json(FLAGS.results_file) print('Optimization Finished! Max acc={}'.format(max_acc)) print( 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}' .format(FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size, FLAGS.n_hidden, FLAGS.l2_reg)) return max_acc, np.where(np.subtract(max_py, max_ty) == 0, 0, 1), max_fw.tolist(), max_bw.tolist( ), max_tl.tolist(), max_tr.tolist()
def run(self): inputs_fw = tf.nn.embedding_lookup(self.word_embedding, self.x) inputs_bw = tf.nn.embedding_lookup(self.word_embedding, self.x_bw) prob = self.bi_dynamic_lstm(inputs_fw, inputs_bw) with tf.name_scope('loss'): reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=prob, labels=self.y)) + sum(reg_loss) with tf.name_scope('train'): global_step = tf.Variable(0, name="tr_global_step", trainable=False) optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize( cost, global_step=global_step) with tf.name_scope('predict'): correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(self.y, 1)) accuracy = tf.reduce_sum(tf.cast(correct_pred, tf.int32)) acc_ = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) with tf.Session() as sess: summary_loss = tf.summary.scalar('loss', cost) summary_acc = tf.summary.scalar('acc', acc_) # Currently does not work #train_summary_op = tf.summary.FileWriter([summary_loss, summary_acc]) #validate_summary_op = tf.summary.FileWriter([summary_loss, summary_acc]) #test_summary_op = tf.summary.FileWriter([summary_loss, summary_acc]) import time timestamp = str(int(time.time())) _dir = 'logs/' + str(timestamp) + '_' + self.type_ + '_r' + str( self.learning_rate) + '_b' + str(self.batch_size) + '_l' + str( self.l2_reg) train_summary_writer = tf.summary.FileWriter( _dir + '/train', sess.graph) test_summary_writer = tf.summary.FileWriter( _dir + '/test', sess.graph) validate_summary_writer = tf.summary.FileWriter( _dir + '/validate', sess.graph) tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y = load_inputs_twitter( FLAGS.train_file_path, self.word_id_mapping, self.max_sentence_len, self.type_) te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y = load_inputs_twitter( FLAGS.test_file_path, self.word_id_mapping, self.max_sentence_len, self.type_) init = tf.global_variables_initializer() sess.run(init) max_acc = 0. for i in range(self.n_iter): for train, _ in self.get_batch_data(tr_x, tr_sen_len, tr_x_bw, tr_sen_len_bw, tr_y, self.batch_size, 1.0): _, step = sess.run([optimizer, global_step], feed_dict=train) #_, step, summary = sess.run([optimizer, global_step, train_summary_op], feed_dict=train) #train_summary_writer.add_summary(summary, step) acc, loss, cnt, summary = 0., 0., 0, None for test, num in self.get_batch_data(te_x, te_sen_len, te_x_bw, te_sen_len_bw, te_y, 2000, 1.0): _loss, _acc = sess.run([cost, accuracy], feed_dict=test) #_loss, _acc, summary = sess.run([cost, accuracy, test_summary_op], feed_dict=test) acc += _acc loss += _loss * num cnt += num print(cnt) print(acc) #test_summary_writer.add_summary(summary, step) print( 'Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format( step, loss / cnt, acc / cnt)) if acc / cnt > max_acc: max_acc = acc / cnt print('Optimization Finished! Max acc={}'.format(max_acc)) print( 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}' .format(self.learning_rate, self.n_iter, self.batch_size, self.n_hidden, self.l2_reg))