def dev(ckpt_path, k=30, mode=tf.estimator.ModeKeys.PREDICT): print "read data..." ans = read_ans("data/id_data_sort", 16) dev_data = read_dev("data/id_dev_2w", 16) print "read data done" with tf.Graph().as_default(): with tf.device("/gpu:0"): session_conf = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(): model = APCNN(FLAGS, mode) saver = tf.train.Saver(tf.global_variables()) sess.run(tf.global_variables_initializer()) saver.restore(sess=sess, save_path=ckpt_path) cnt = 0 dev_count = 0 for userq in dev_data: print "\tEvaluation step:", dev_count dev_count += 1 q = userq.strip().split() q = q[:FLAGS.max_sequence_length] q = q + [1] * (FLAGS.max_sequence_length - len(q)) devs = [] scores = [] for a in ans: devs.append((q, a)) batches = data_loader.batch_iter(devs, FLAGS.batch_size, 1, False) for batch in batches: feed_dict = { model.usrq: batch[:, 0], model.pos: batch[:, 1], model.dropout_keep_prob: 1.0, model.is_training: False } score = sess.run(model.score, feed_dict) score = tf.reshape(score, [-1]) scores.append(score) scores = tf.reshape(scores, [-1]) topk = tf.nn.top_k(scores, k) index = sess.run(topk)[1] recalls = np.array(ans)[index] # 召回的相似Q for recall in recalls: recall = list(recall) if recall in dev_data[userq]: cnt += 1 break # 有一个相似命中了就退出 return cnt / len(dev_data)
def train(): print("Configuring TensorBoard and Saver...") # 配置 Tensorboard,重新训练时,请将tensorboard文件夹删除,不然图会覆盖 tensorboard_dir = 'tensorboard/rnn' if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) merged = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) # 配置 Saver saver = tf.train.Saver() if not os.path.exists(save_dir): os.makedirs(save_dir) # 创建session sess = tf.Session() sess.run(tf.global_variables_initializer()) writer.add_graph(sess.graph) # 读取数据 print("Loading training data: %.1f.csv" % f) tarin_dir = os.path.join(base_dir, data_dir) x_train, y_train = process_file(tarin_dir, config) x_normlize = x_train y_normlize = y_train # 开始训练 print('Training and evaluating...') for epoch in range(config.epoches): x_batch, y_batch = batch_iter(x_normlize, y_normlize, config) for iteration in range(config.num_iterations): # print('Epoch: ', epoch + 1) # batch_train = batch_iter(x_train, y_train, config.batch_size) feed_dict = { model.xs: x_batch, model.ys: y_batch, # create initial state } _, cost, state, pred = sess.run([ model.train_op, model.cost, model.cell_final_state, model.pred ], feed_dict=feed_dict) # plotting # if v % 20 == 0: # plt.figure(v) # plt.plot(t[0, :], (v/10)*y_batch[0].flatten(), 'r', t[0, :], (v/10)*pred.flatten()[:config.time_steps], 'b--', # t[0, :], (v/10) *x_batch[0].flatten(), 'k-.') # plt.ylim((-16, 16)) # plt.draw() # plt.pause(0.3) if iteration % 20 == 0: print('cost: ', round(cost, 4)) result = sess.run(merged, feed_dict) writer.add_summary(result, iteration) # 保存网络 saver.save(sess=sess, save_path=save_path)
def evaluate(sess, x, y): data_len = len(x) batch_eval = batch_iter(x, y, 128) total_acc = 0.0 total_loss = 0.0 for x_batch, y_batch in batch_eval: feed_dict = feed_data(x_batch, y_batch, 1.0) # 算出来的loss和acc是在这一批次数据上的均值 loss, acc = sess.run([model.loss, model.acc], feed_dict=feed_dict) total_loss = total_loss + loss * len(x_batch) total_acc = total_acc + acc * len(x_batch) return total_loss / data_len, total_acc / data_len
def get_genre(): try: with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) albums = request.args.get('albums') x_raw = albums.split(',') all_predictions = [] x_test = np.array(list(vocab_processor.transform(x_raw))) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch batches = data_loader.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) return jsonify({ 'results': map(lambda x: data_loader.genre_ids[int(x)], all_predictions) }) except Exception as e: print e
def evaluate(sess, x_, y_): data_len = len(x_) batch_eval = batch_iter(x_, y_, 128) total_loss = 0.0 total_acc = 0.0 for x_batch, y_batch in batch_eval: batch_len = len(x_batch) feed_dict = feed_data(x_batch, y_batch, 1.0) loss, acc = sess.run([model.loss, model.acc], feed_dict=feed_dict) total_loss += loss * batch_len total_acc += acc * batch_len return total_loss / data_len, total_acc / data_len
def evaluate(sess, x_, y_): """评估在某一数据上的准确率和损失""" data_len = len(x_) batch_eval = batch_iter(x_, y_, 128) total_loss = 0.0 total_acc = 0.0 for x_batch, y_batch in batch_eval: batch_len = len(x_batch) feed_dict = feed_data(x_batch, y_batch, 1.0) y_pred_class, loss, acc = sess.run([model.y_pred_cls, model.loss, model.acc], feed_dict=feed_dict) total_loss += loss * batch_len total_acc += acc * batch_len return y_pred_class, total_loss / data_len, total_acc / data_len
def evaluate(sess, x_, y_): """评估在某一数据上的准确率和损失""" data_len = len(x_) batch_eval = batch_iter(x_, y_, config.batch_size) total_loss = 0.0 total_acc = 0.0 for x_batch, y_batch in batch_eval: batch_len = len(x_batch) # print('什么情况',x_batch,y_batch) feed_dict = feed_data(x_batch, y_batch, 1.0, False) loss, pred_prob = sess.run([model.loss, model.pred_prob], feed_dict=feed_dict) total_loss += loss * batch_len acc = get_acc(pred_prob, y_batch) total_acc += acc * batch_len return total_loss / data_len, total_acc / data_len
def evaluate(sess, x_, y_): """ 评估在某一数据上的准确率和损失 """ data_len = len(x_) # 获取能够迭代的数据器 batch_eval = batch_iter(x_, y_, 128) total_loss = 0.0 total_acc = 0.0 for x_batch, y_batch in batch_eval: batch_len = len(x_batch) feed_dict = feed_data(x_batch, y_batch, 1.0) # 在当前模型下进行损失和准确率的判断 loss, acc = sess.run([model.loss, model.acc], feed_dict=feed_dict) total_loss += loss * batch_len total_acc += acc * batch_len # 返回平均的损失和准确率 return total_loss / data_len, total_acc / data_len
def test(): # v = random.randint(start, end) # 读取数据 print("Loading test data: ") # tarin_data = os.path.join(base_dir, dir) tarin_dir = os.path.join(base_dir, data_dir) x_data, y_data = process_file(tarin_dir, config) # x_max = max(abs(x_test)) # y_max = max(y_train) # x_max = max(x_test) x_normlize = x_data y_normlize = y_data # 创建session sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess=sess, save_path=save_path) # 读取保存的模型 print('Testing...') x_batch, y_batch = batch_iter(x_normlize, y_normlize, config) x_test = x_batch[1].reshape(1, 1000, 1) y_test = y_batch[1].reshape(1, 1000, 1) feed_dict = { model.xs: x_test, model.ys: y_test, # create initial state } state, pred = sess.run([model.cell_final_state, model.pred], feed_dict=feed_dict) # plotting # plt.figure(v) plt.plot(x_test.flatten(), 'r', y_test.flatten(), 'b--', state.flatten(), 'k-.') plt.ylim((-16, 16)) plt.draw() plt.pause(0.3) os.system("pause")
def train(): print "Loading data..." data = data_loader.read_data2(FLAGS.train_file, FLAGS.max_sequence_length, FLAGS.pad_id) print "Data Size:", len(data) print "Loading dev data..." dev_data = load_dev_data(FLAGS.dev_data, FLAGS.max_sequence_length, FLAGS.pad_id) # assert len(dev_data) == 800 print "Dev data Size:", len(dev_data) with tf.device('/gpu:4'): with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) #sess = tf_debug.LocalCLIDebugWrapperSession(sess) with sess.as_default(): cnn = Transformer(sequence_length=FLAGS.max_sequence_length, word_vocab_size=FLAGS.vocab_size, hidden_units=FLAGS.hidden_units, tag_vocab_size=FLAGS.tag_vocab_size, num_blocks=FLAGS.num_blocks, num_heads=FLAGS.num_heads, margin=FLAGS.margin) global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(cnn.loss) capped_gvs = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in grads_and_vars] train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", FLAGS.model_name, timestamp)) print("Writing to {}\n".format(out_dir)) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) # Initialize all variables sess.run(tf.global_variables_initializer()) ############restore embedding################## if FLAGS.restore_pretrained_embedding: embedding_var_name = "embedding/embedding_W:0" # 得到该网络中,所有可以加载的参数 variables = tf.contrib.framework.get_variables_to_restore() variables_to_resotre = [ v for v in variables if v.name == embedding_var_name ] saver = tf.train.Saver(variables_to_resotre) saver.restore(sess, FLAGS.pretrained_embeddings_path) print "Restore embeddings from", FLAGS.pretrained_embeddings_path saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) restore = FLAGS.restore_model if restore: saver.restore(sess, FLAGS.model_path) print( "*" * 20 + "\nReading model parameters from %s \n" % FLAGS.model_path + "*" * 20) else: print("*" * 20 + "\nCreated model with fresh parameters.\n" + "*" * 20) def train_step(q_batch, pos_batch, neg_batch, q_tag_batch, pos_tag_batch, neg_tag_batch, epoch): """ A single training step """ feed_dict = { cnn.input_x_1: q_batch, cnn.input_x_2: pos_batch, cnn.input_x_3: neg_batch, cnn.input_x_11: q_tag_batch, cnn.input_x_22: pos_tag_batch, cnn.input_x_33: neg_tag_batch, cnn.dropout_prob: FLAGS.dropout_prob, cnn.is_training: True } _, step, loss = sess.run([train_op, global_step, cnn.loss], feed_dict) time_str = datetime.datetime.now().isoformat() print "{}: Epoch {} step {}, loss {:g}".format( time_str, epoch, step, loss) def dev_step(): index = [] for sample in dev_data: usrq, usrq_tag = sample[0] ess = sample[1:] usrqs = [] usrq_tags = [] esqs = [] esq_tags = [] for esq in ess: q, tag = esq usrqs.append(usrq) usrq_tags.append(usrq_tag) esqs.append(q) esq_tags.append(tag) feed_dict = { cnn.input_x_1: usrqs, cnn.input_x_2: esqs, cnn.input_x_11: usrq_tags, cnn.input_x_22: esq_tags, cnn.dropout_prob: 0.0, cnn.is_training: False } score = tf.reshape(cnn.output_prob, [-1]) ind = tf.argmax(score, 0) i = sess.run(ind, feed_dict) index.append(i) assert len(index) == len(dev_data) result = get_result(index) return result # Generate batches batches = data_loader.batch_iter(data, FLAGS.batch_size, FLAGS.max_epoch, True) num_batches_per_epoch = int((len(data)) / FLAGS.batch_size) + 1 # Training loop. For each batch... epoch = 0 max_dev_res = 0 max_step = 0 for batch in batches: q_batch = batch[:, 0] pos_batch = batch[:, 1] neg_batch = batch[:, 2] q_tag_batch = batch[:, 3] pos_tag_batch = batch[:, 4] neg_tag_batch = batch[:, 5] train_step(q_batch, pos_batch, neg_batch, q_tag_batch, pos_tag_batch, neg_tag_batch, epoch) current_step = tf.train.global_step(sess, global_step) if current_step % num_batches_per_epoch == 0: epoch += 1 if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) dev_res = dev_step() print "Evaluation result is: ", dev_res if dev_res > max_dev_res: max_dev_res = dev_res max_step = current_step print "Untill now, the max dev result is", max_dev_res, "in", max_step, "step."
for f in F1: print('\t'.join(['%0.1f'%f[0],str(f[2]),str(f[3]),str(f[4]),str(f[5])])) return auc,F1 if __name__ == '__main__': tf.reset_default_graph() base_dir = sys.argv[1] save_dir = sys.argv[2] ckpt_dir = sys.argv[3] train_dir = os.path.join(base_dir, 'train.txt') test_dir = os.path.join(base_dir, 'test.txt') val_dir = os.path.join(base_dir, 'val.txt') vocab_dir = os.path.join(base_dir, 'vocab.txt') predict_dir = os.path.join(base_dir, 'predict.txt') save_path = os.path.join(save_dir, 'best_validation') # 最佳验证结果保存路径 if len(sys.argv)>4: option = sys.argv[4] else: option = 'train' print('Configuring RNN model...') config = TRNNConfig() tokenizer = Tokenizer(vocab_dir) config.vocab_size = len(tokenizer.vocab) model = TextRNN(config) print('参数总量:%d'%np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])) if option == 'train': iter = batch_iter(train_dir, tokenizer, epochs=config.num_epochs) iter_test = batch_iter_test(val_dir, tokenizer) train() else: test()
def train(mode): print "Loading data..." data = data_loader.read_data(FLAGS.train_file, FLAGS.max_sequence_length) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(): cnn = APCNN(FLAGS, mode) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", FLAGS.model_name, timestamp)) print("Writing to {}\n".format(out_dir)) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initialize all variables sess.run(tf.global_variables_initializer()) restore = FLAGS.restore_model if restore: saver.restore(sess, FLAGS.model_path) print( "*" * 20 + "\nReading model parameters from %s \n" % FLAGS.model_path + "*" * 20) else: print("*" * 20 + "\nCreated model with fresh parameters.\n" + "*" * 20) def train_step(q_batch, pos_batch, neg_batch, epoch): """ A single training step """ feed_dict = { cnn.usrq: q_batch, cnn.pos: pos_batch, cnn.neg: neg_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob, cnn.is_training: True } _, step, loss = sess.run( [cnn.update, cnn.global_step, cnn.loss], feed_dict) time_str = datetime.datetime.now().isoformat() print "{}: Epoch {} step {}, loss {:g}".format( time_str, epoch, step, loss) # Generate batches batches = data_loader.batch_iter(data, FLAGS.batch_size, FLAGS.max_epoch, True) num_batches_per_epoch = int((len(data)) / FLAGS.batch_size) + 1 # Training loop. For each batch... epoch = 0 for batch in batches: q_batch = batch[:, 0] pos_batch = batch[:, 1] neg_batch = batch[:, 2] train_step(q_batch, pos_batch, neg_batch, epoch) current_step = tf.train.global_step(sess, cnn.global_step) if current_step % num_batches_per_epoch == 0: epoch += 1 if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train(): print("Configuring TensorBoard and Saver...") # 配置 Tensorboard,重新训练时,请将tensorboard文件夹删除,不然图会覆盖 tensorboard_dir = 'tensorboard/textcnn' if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) tf.summary.scalar("loss", model.loss) #tf.summary.scalar("accuracy", model.acc) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) print("Loading training and validation data...") # 载入训练集与验证集 start_time = time.time() x_train, y_train = process_file(train_dir, word_to_id, cat_to_id, config.seq_length) x_val, y_val = process_file(val_dir, word_to_id, cat_to_id, config.seq_length) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True # 创建session session = tf.Session(config=tf_config) session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) # 配置 Saver saver = tf.train.Saver(tf.global_variables()) if not os.path.exists(save_dir): os.makedirs(save_dir) else: saver.restore(session, save_path=save_path) print('Training and evaluating...') start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1000 # 如果超过1000轮未提升,提前结束训练 flag = False for epoch in range(config.num_epochs): print('Epoch:', epoch + 1) batch_train = batch_iter(x_train, y_train, config.batch_size) for x_batch, y_batch in batch_train: feed_dict = feed_data(x_batch, y_batch, config.dropout_keep_prob, True) session.run(model.optim, feed_dict=feed_dict) # 运行优化 #ot = session.run(model.rnn_output,feed_dict=feed_dict) #aot = session.run(model.att_out,feed_dict=feed_dict) #pools = session.run(model.pools,feed_dict=feed_dict) #pools2 = session.run(model.pool2, feed_dict=feed_dict) #alls = session.run(model.all,feed_dict=feed_dict) #convs = session.run(model.convs,feed_dict=feed_dict) if total_batch % config.save_per_batch == 0: # 每多少轮次将训练结果写入tensorboard scalar s = session.run(merged_summary, feed_dict=feed_dict) writer.add_summary(s, total_batch) if total_batch % config.print_per_batch == 0: # 每多少轮次输出在训练集和验证集上的性能 feed_dict[model.keep_prob] = 1.0 # feed_dict[model.istraining] = False loss_train, pred_prob_train = session.run( [model.loss, model.pred_prob], feed_dict=feed_dict) train_acc = get_acc(pred_prob_train, y_batch) loss_val, acc_val = evaluate(session, x_val, y_val) # todo if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch saver.save(sess=session, save_path=save_path) improved_str = '*' else: improved_str = '' time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print( msg.format(total_batch, loss_train, train_acc, loss_val, acc_val, time_dif, improved_str)) total_batch += 1 if total_batch - last_improved > require_improvement: config.decay_steps = total_batch + 1 config.decay_rate = config.decay_rate * 0.1
def main(_): print("Loading data...") x, y, sequence_length = data_loader.read_data(FLAGS.pos_data, FLAGS.neg_data, FLAGS.max_word_length, FLAGS.max_seq_length) print("Data Size:", len(y)) np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] seq_shuffled = sequence_length[shuffle_indices] dev_sample_index = -1 * int(FLAGS.dev_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[ dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[ dev_sample_index:] seq_train, seq_dev = seq_shuffled[:dev_sample_index], seq_shuffled[ dev_sample_index:] del x, y, sequence_length, x_shuffled, y_shuffled, seq_shuffled print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True #session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45 sess = tf.Session(config=session_conf) with sess.as_default(): cnn = CharCNN(char_vocab_size=FLAGS.char_vocab_size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, max_word_length=FLAGS.max_word_length, max_seq_length=FLAGS.max_seq_length, filters=eval(FLAGS.filters), filter_sizes=eval(FLAGS.filter_sizes), num_classes=FLAGS.num_classes, rnn_size=FLAGS.rnn_size, attention_size=FLAGS.attention_size) save_path = os.path.join(FLAGS.save_path) if not os.path.isdir(save_path): os.makedirs(save_path) saver = tf.train.Saver(tf.trainable_variables()) for v in tf.trainable_variables(): print("Save:", v.name) sess.run(tf.global_variables_initializer()) check_point_dir = os.path.join(FLAGS.save_path) ckpt = tf.train.get_checkpoint_state(check_point_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") batches = data_loader.batch_iter( list(zip(x_train, y_train, seq_train)), FLAGS.batch_size, FLAGS.num_epochs) gloabl_max_acc = 0 for batch in batches: x_batch, y_batch, seq_batch = zip(*batch) train_step(x_batch, y_batch, seq_batch, sess, cnn) current_step = tf.train.global_step(sess, cnn.global_step) if current_step % FLAGS.evaluate_every == 0: max_dev_acc = 0 print("\nEvaluation:") batches_dev = data_loader.batch_iter( list(zip(x_dev, y_dev, seq_dev)), FLAGS.batch_size, 1) for batch_dev in batches_dev: x_batch_dev, y_batch_dev, seq_batch_dev = zip( *batch_dev) max_dev_acc = dev_step(x_batch_dev, y_batch_dev, seq_batch_dev, sess, cnn, max_dev_acc) print("During this evaluation phase, the max accuracy is:", max_dev_acc) if max_dev_acc > gloabl_max_acc: gloabl_max_acc = max_dev_acc print("\n Until now, the max accuracy is:", gloabl_max_acc) if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, os.path.join(save_path, "model"), global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_loader.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
############################################################################################### input_model = InputModule(word_dim, word_dim, sent_maxlen).cuda() question_model = InputModule(word_dim, word_dim, 1).cuda() episodic_model = EpisodicModule(word_dim, word_dim).cuda() answer_model = AnswerModule(word_dim, len(train_dict), word_dim).cuda() loss_model = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(parameters(), lr=learning_rate, momentum=momentum) for i in xrange(num_epochs): if i == 5: beta = 1 # Training train_batches = data_loader.batch_iter( list( zip(train_input, train_input_len, train_question, train_question_len, train_target, train_gate)), batch_size) total_loss = 0. total_hc = 0. for j, train_batch in enumerate(train_batches): batch_input, batch_input_len, batch_question, batch_question_len, batch_target, batch_gate = zip( *train_batch) batch_hc, batch_loss = run(batch_input, batch_input_len, batch_question, batch_question_len, batch_target, batch_gate, step=1) total_hc += batch_hc total_loss += batch_loss
def inference(word_list, user_dict, train, ckpt_path, k=30, mode=tf.estimator.ModeKeys.PREDICT): k = int(k) tokenizer = jieba.Tokenizer() tokenizer.load_userdict(user_dict) vocab, id2tok = build_vocab( word_list) #vocab里token是unicode, id2tok里tok是str, 两个里面id都是int print "read data" alist = read_ans(train, FLAGS.max_sequence_length) #是个二维list with tf.Graph().as_default(): with tf.device("/gpu:0"): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(): model = APCNN(FLAGS, mode) saver = tf.train.Saver(tf.global_variables()) sess.run(tf.global_variables_initializer()) saver.restore(sess=sess, save_path=ckpt_path) while True: print "Please input query:" line = sys.stdin.readline().strip() if not line: line = "小米蓝牙手柄能连接手机玩吗" ws = tokenizer.cut(line) #切出来每个tok是unicode。 ws = list(ws) q = "_".join(ws) ws_enc = [tok.encode("utf-8") for tok in ws] q_enc = "_".join(ws_enc) print "tokenized query is:", q_enc q = tok2id(q, FLAGS.max_sequence_length, vocab) #是个list print "id q is:", q devs = [] scores = [] for a in alist: devs.append((q, a)) batches = data_loader.batch_iter(devs, FLAGS.batch_size, 1, False) for batch in batches: feed_dict = { model.usrq: batch[:, 0], model.pos: batch[:, 1], model.dropout_keep_prob: 1.0, model.is_training: False } score = sess.run(model.score, feed_dict) score = tf.reshape(score, [-1]) scores.append(score) scores = tf.reshape(scores, [-1]) topk = tf.nn.top_k(scores, k) index = sess.run(topk)[1] recalls = np.array(alist)[index] # 召回的相似Q print "Recall results are: \n" for recall in recalls: line = de_id(recall, id2tok) print line, "\n"
def train(X_train, X_dev, x_test, y_train, y_dev, y_test): with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=max_document_length, num_classes=2, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, embedding_table=W, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients( grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) # grad_summaries = [] # for g, v in grads_and_vars: # if g is not None: # grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g) # sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) # grad_summaries.append(grad_hist_summary) # grad_summaries.append(sparsity_summary) # grad_summaries_merged = tf.merge_summary(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join( os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("loss", cnn.loss) acc_summary = tf.scalar_summary("accuracy", cnn.accuracy) # Train Summaries # train_summary_op = tf.merge_summary([loss_summary, acc_summary, grad_summaries_merged]) # train_summary_dir = os.path.join(out_dir, "summaries", "train") # train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph) # Dev summaries # dev_summary_op = tf.merge_summary([loss_summary, acc_summary]) # dev_summary_dir = os.path.join(out_dir, "summaries", "dev") # dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it # checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) # checkpoint_prefix = os.path.join(checkpoint_dir, "model") # if not os.path.exists(checkpoint_dir): # os.makedirs(checkpoint_dir) # saver = tf.train.Saver(tf.all_variables()) # Write vocabulary # vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.initialize_all_variables()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } # _, step, summaries, loss, accuracy = sess.run( # [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], # feed_dict) _, step, loss, accuracy = sess.run( [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) # train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } # step, summaries, loss, accuracy = sess.run( # [global_step, dev_summary_op, cnn.loss, cnn.accuracy], # feed_dict) step, loss, accuracy = sess.run( [global_step, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) # if writer: # writer.add_summary(summaries, step) # Generate batches batches = data_loader.batch_iter( list(zip(X_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") # dev_step(X_dev, y_dev, writer=dev_summary_writer) dev_step(X_dev, y_dev, writer=None) print("") # if current_step % FLAGS.checkpoint_every == 0: # path = saver.save(sess, checkpoint_prefix, global_step=current_step) # print("Saved model checkpoint to {}\n".format(path)) # Test loop # Generate batches for one epoch batches = data_loader.batch_iter( list(x_test), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run( cnn.predictions, {cnn.input_x: x_test_batch, cnn.dropout_keep_prob: 1.0}) all_predictions = np.concatenate( [all_predictions, batch_predictions]) correct_predictions = float(sum( all_predictions == np.argmax(y_test, axis=1))) print("Total number of test examples: {}".format(len(y_test))) print("Accuracy: {:g}".format( correct_predictions / float(len(y_test)))) # open(os.path.join(out_dir,"test"),'a').write("Accuracy: {:g}".format(correct_predictions / float(len(y_test)))) out_path = os.path.abspath( os.path.join(os.path.curdir, "runs", "test")) open(out_path, 'a').write("{:g},".format( correct_predictions / float(len(y_test)))) print("\n写入成功!\n")
def predict_step(x_batch, y_batch): """ Use trained model to predict a test set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, output, scores = sess.run( [global_step, cnn.predictions, cnn.scores], feed_dict) return output, scores # Generate batches batches = data_loader.batch_iter(zip(x_train, y_train), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_acc = dev_step(x_dev, y_dev, writer=dev_summary_writer) if dev_acc > 0.9: if dev_acc > max_acc: max_acc = dev_acc max_checkpoint = current_step print "The max acc now is:", colored(max_acc, "red"), "step is:", colored(max_checkpoint, "red") predictions, scores = predict_step(x_test, y_test)
def train(x_train, y_train, y_text_train, vocab_processor, x_dev, y_dev, y_text_dev): # Training # ================================================== #vocab_size = len(vocab_processor.vocabulary_) mask_y_text_dev = np.ones([len(y_text_dev), len(y_text_dev[0])]) mask_y_text_dev[[i == 1 for i in y_dev], :] = 0 mask_y_text_dev[y_text_dev == 0] = 0 with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=2, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch, y_text_batch, mask_y_text): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.input_y_text: y_text_batch, cnn.mask_y_text: mask_y_text, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, y_text_batch, mask_y_text, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.input_y_text: y_text_batch, cnn.mask_y_text: mask_y_text, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_loader.batch_iter( list(zip(x_train, y_train, y_text_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch, y_text_batch, mask_y_text = zip(*batch) train_step(x_batch, y_batch, y_text_batch, mask_y_text) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, y_text_dev, mask_y_text_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train(train_dir, val_dir): #最佳验证模型保存路径 save_dir = "./best_model" save_path = os.path.join(save_dir, "best_validation") if not os.path.exists(save_dir): os.makedirs(save_dir) saver = tf.train.Saver() #配置Tensorboard #收集训练过程中的loss和acc tensorboard_dir = "./tensorboard" tf.summary.scalar("loss", model.loss) tf.summary.scalar("accuracy", model.acc) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) #获得训练和验证数据 print("Loading train and val data...................") start_time = time.time() x_train, y_train = word_to_id(train_dir, vocab_size=config.vocab_size, max_length=config.seq_length) x_val, y_val = word_to_id(val_dir, vocab_size=config.vocab_size, max_length=config.seq_length) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) print("Training and evaluating................................") start_time = time.time() total_batch = 0 #训练批次 best_val_acc = 0.0 #最佳验证集准确率 last_improved_batch = 0 #上一次验证集准确率提升时的训练批次 require_improved = 800 #若超过800轮次没有提升,则提前结束训练 flag = False #标识是否需要提前结束训练 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) writer.add_graph(sess.graph) for epoch in range(config.n_epochs): print("Epoch:", epoch) batch_train = batch_iter(x_train, y_train, config.batch_size) for x_batch, y_batch in batch_train: feed_dict = feed_data(x_batch, y_batch, dropout_prob=config.dropout_keep_prob) #每多少轮次将数据写入tensorboard if total_batch % config.save_per_batch == 0: summary = sess.run(merged_summary, feed_dict=feed_dict) writer.add_summary(summary, total_batch) #每多少轮输出在训练集和验证集上的损失及准确率 if total_batch % config.print_per_batch == 0: #训练集损失和准确率 feed_dict[model.dropout_prob] = 1.0 train_loss, train_acc = sess.run([model.loss, model.acc], feed_dict=feed_dict) #验证集损失和准确率 val_loss, val_acc = evaluate(sess, x_val, y_val) #保存最佳模型 if val_acc > best_val_acc: best_val_acc = val_acc saver.save(sess, save_path) last_improved_batch = total_batch improved_str = "***" else: improved_str = "" time_dif = get_time_dif(start_time) msg = "Iter:{0:>4},Train loss:{1:>6.2}, Train accuracy:{2:>6.2%}, Val loss:{3:>6.2}, Val accuracy:{4:>6.2%}, Time usage:{5} {6}" print( msg.format(total_batch, train_loss, train_acc, val_loss, val_acc, time_dif, improved_str)) #优化 sess.run(model.optim, feed_dict=feed_dict) total_batch = total_batch + 1 del x_batch del y_batch gc.collect() if total_batch - last_improved_batch == require_improved: #验证集准确率长时间未提升,提前结束训练 print( "No improvement for a long time, auto-stopping.................." ) flag = True break if flag: break
def train(): print("Configuring TensorBoard and Saver...") # 配置 Tensorboard,重新训练时,请将tensorboard文件夹删除,不然图会覆盖 tensorboard_dir = os.path.join(base_dir, str(train_ratio) + '/tensorboard/textrnn') if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) tf.summary.scalar("loss", model.loss) tf.summary.scalar("accuracy", model.acc) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) # 配置 Saver saver = tf.train.Saver() if not os.path.exists(save_dir): os.makedirs(save_dir) #print("Loading training and validation data...") # 载入训练集与验证集 #start_time = time.time() #x_train, y_train = process_file(train_dir, word_to_id, cat_to_id,config.seq_length) #x_val, y_val = process_file(val_dir, word_to_id, cat_to_id, config.seq_length) #time_dif = get_time_dif(start_time) #print("Time usage:", time_dif) # 创建session session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) print('Training and evaluating...') start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1000 # 如果超过1000轮未提升,提前结束训练 flag = False for epoch in range(config.num_epochs): print('Epoch:', epoch + 1) batch_train = batch_iter(x_train, y_train, config.batch_size) for x_batch, y_batch in batch_train: feed_dict = feed_data(x_batch, y_batch, config.dropout_keep_prob) if total_batch % config.save_per_batch == 0: # 每多少轮次将训练结果写入tensorboard scalar s = session.run(merged_summary, feed_dict=feed_dict) writer.add_summary(s, total_batch) if total_batch % config.print_per_batch == 0: # 每多少轮次输出在训练集和验证集上的性能 feed_dict[model.keep_prob] = 1.0 loss_train, acc_train = session.run([model.loss, model.acc], feed_dict=feed_dict) loss_val, acc_val = evaluate(session, x_val, y_val) # todo if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch saver.save(sess=session, save_path=save_path) improved_str = '*' else: improved_str = '' time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print( msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) session.run(model.optim, feed_dict=feed_dict) # 运行优化 total_batch += 1 if total_batch - last_improved > require_improvement: # 验证集正确率长期不提升,提前结束训练 print("No optimization for a long time, auto-stopping...") flag = True break # 跳出循环 if flag: # 同上 break
def train(): print("Configuring TensorBoard and Saver...") # tensorboard_dir = 'tensorboard/cnn' if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) tf.summary.scalar("loss", model.loss) tf.summary.scalar("accuracy", model.acc) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) # saver = tf.train.Saver() if not os.path.exists(save_dir): os.makedirs(save_dir) print("Loading training and validation data...") # start_time = time.time() x_train, y_train = process_file(train_dir, cat_to_id, config.seq_length) x_val, y_val = process_file(val_dir, cat_to_id, config.seq_length) # session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) print('Training and evaluating...') start_time = time.time() total_batch = 0 # best_acc_val = 0.0 # last_improved = 0 # require_improvement = 10000 # flag = False for epoch in range(config.num_epochs): print('Epoch:', epoch + 1) batch_train = batch_iter(x_train, y_train, config.batch_size) for x_batch, y_batch in batch_train: feed_dict = feed_data(x_batch, y_batch, config.dropout_keep_prob) if total_batch % config.save_per_batch == 0: #print(feed_dict) s = session.run(merged_summary, feed_dict=feed_dict) writer.add_summary(s, total_batch) if total_batch % config.print_per_batch == 0: # feed_dict[model.keep_prob] = 1.0 loss_train, acc_train = session.run([model.loss, model.acc], feed_dict=feed_dict) loss_val, acc_val = evaluate(session, x_val, y_val) # todo if acc_val > best_acc_val: # best_acc_val = acc_val last_improved = total_batch saver.save(sess=session, save_path=save_path) improved_str = '*' else: improved_str = '' time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print( msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) session.run(model.optim, feed_dict=feed_dict) total_batch += 1 if total_batch - last_improved > require_improvement: # print("No optimization for a long time, auto-stopping...") flag = True break # if flag: # break
if step == 3: np.save("composer_weight.npy", cnn_model.comp_weight.weight.data.cpu().numpy()) np.save("id2comp.npy", id2comp) ############################################################################################### linear_model = LinearModule(feat_dim * max_comp_cnt, len(id2cult), len(id2comp)).cuda() loss_model = nn.CrossEntropyLoss().cuda() #optimizer = torch.optim.SGD(parameters(), lr=learning_rate, momentum=momentum) optimizer = torch.optim.Adam(parameters(), lr=learning_rate, betas=momentum) for i in xrange(num_epochs): # Training train_batches = data_loader.batch_iter( list(zip(train_cult, train_comp, train_comp_len)), batch_size) total_hc = 0. total_loss = 0. for j, train_batch in enumerate(train_batches): batch_cult, batch_comp, batch_comp_len = zip(*train_batch) batch_hc, batch_loss = run(batch_cult, batch_comp, batch_comp_len, step=1) total_hc += batch_hc total_loss += batch_loss if (j + 1) % 1000 == 0: print( "batch #{:d}: ".format(j + 1) ), "batch_loss :", total_loss / j, "acc. :", total_hc / batch_size / j * 100, datetime.datetime.now( )
def train(): # Data Preparation # Load data print("Loading data...") x, y = data_loader.read_data(FLAGS.pos_data, FLAGS.neg_data, FLAGS.max_sequence_length) print("Data Size:", len(y)) np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] dev_sample_index = -1 * int(FLAGS.dev_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] del x, y, x_shuffled, y_shuffled print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) num_batches_per_epoch = int((len(x_train) - 1) / FLAGS.batch_size) + 1 print("Loading data succees...") # ConvNet acc_list = [0] session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True # session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45 sess = tf.Session(config=session_conf) cnn = VDCNN(num_classes=y_train.shape[1], num_quantized_chars=FLAGS.vocab_size, depth=FLAGS.depth, sequence_max_length=FLAGS.max_sequence_length, downsampling_type=FLAGS.downsampling_type, use_he_uniform=FLAGS.use_he_uniform, optional_shortcut=FLAGS.optional_shortcut) # Optimizer and LR Decay update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.num_epochs*num_batches_per_epoch, 0.95, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) gradients, variables = zip(*optimizer.compute_gradients(cnn.loss)) gradients, _ = tf.clip_by_global_norm(gradients, 5.0) train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step) ### # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initialize Graph sess.run(tf.global_variables_initializer()) # sess = tfdbg.LocalCLIDebugWrapperSession(sess) # 被调试器封装的会话 # sess.add_tensor_filter("has_inf_or_nan", tfdbg.has_inf_or_nan) # 调试器添加过滤规则 # Train Step and Test Step def train_step(x_batch, y_batch): """ A single training step """ feed_dict = {cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.is_training: True} _, step, summaries, loss, accuracy = sess.run([train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], feed_dict) train_summary_writer.add_summary(summaries, step) time_str = datetime.datetime.now().isoformat() print("{}: Step {}, Epoch {}, Loss {:g}, Acc {:g}".format(time_str, step, int(step//num_batches_per_epoch)+1, loss, accuracy)) #if step%FLAGS.evaluate_every == 0 and FLAGS.enable_tensorboard: # summaries = sess.run(train_summary_op, feed_dict) # train_summary_writer.add_summary(summaries, global_step=step) def test_step(x_batch, y_batch): """ Evaluates model on a dev set """ feed_dict = {cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.is_training: False} summaries_dev, loss, preds, step = sess.run([dev_summary_op, cnn.loss, cnn.predictions, global_step], feed_dict) dev_summary_writer.add_summary(summaries_dev, step) time_str = datetime.datetime.now().isoformat() return preds, loss # Generate batches # train_batches = data_helper.batch_iter(list(zip(train_data, train_label)), FLAGS.batch_size, FLAGS.num_epochs) batches = data_loader.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for train_batch in batches: x_batch, y_batch = zip(*train_batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) # Testing loop if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") i = 0 index = 0 sum_loss = 0 test_batches = data_loader.batch_iter(list(zip(x_dev, y_dev)), FLAGS.batch_size, 1, shuffle=False) y_preds = np.ones(shape=len(y_dev), dtype=np.int) for test_batch in test_batches: x_test_batch, y_test_batch = zip(*test_batch) preds, test_loss = test_step(x_test_batch, y_test_batch) sum_loss += test_loss res = np.absolute(preds - np.argmax(y_test_batch, axis=1)) y_preds[index:index+len(res)] = res i += 1 index += len(res) time_str = datetime.datetime.now().isoformat() acc = np.count_nonzero(y_preds==0)/len(y_preds) acc_list.append(acc) print("{}: Evaluation Summary, Loss {:g}, Acc {:g}".format(time_str, sum_loss/i, acc)) print("{}: Current Max Acc {:g} in Iteration {}".format(time_str, max(acc_list), int(acc_list.index(max(acc_list))*FLAGS.evaluate_every))) if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train(): print("Configuring TensorBoard and Saver...") # 配置 Tensorboard,重新训练时,请将tensorboard文件夹删除,不然图会覆盖 tensorboard_dir = 'tensorboard/tb30641000v01/' if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) train_loss_summ = tf.summary.scalar("train_loss", model.loss) train_acc_summ = tf.summary.scalar("trian_accuracy", model.acc) val_loss_summ = tf.summary.scalar("validation_loss", model.loss) val_acc_summ = tf.summary.scalar("validation_accuracy", model.acc) writer = tf.summary.FileWriter(tensorboard_dir) # 配置 Saver saver = tf.train.Saver() if not os.path.exists(save_dir): os.makedirs(save_dir) print("Loading training and validation data...") # 载入训练集与验证集 start_time = time.time() x_train, y_train = process_file(train_txt_dirs, seq_length, word_to_id, cat_to_id, config.seq_length) print "训练样本总数是{}".format(len(x_train)) x_val, y_val = process_file(test_txt_dirs, seq_length, word_to_id, cat_to_id, config.seq_length) print "测试集样本总数是{}".format(len(x_val)) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) # 创建session session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) print('Training and evaluating...') start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1000 # 如果超过1000轮未提升,提前结束训练 # flag = False for epoch in range(config.num_epochs): print('Epoch:', epoch + 1) batch_train = batch_iter(x_train, y_train, config.batch_size) for x_batch, y_batch in batch_train: feed_dict = feed_data(x_batch, y_batch, config.dropout_keep_prob) if total_batch % config.print_per_batch == 0: # 每多少轮次输出在训练集和验证集上的性能 # feed_dict[model.keep_prob] = 1.0 loss_train, acc_train = session.run([model.loss, model.acc], feed_dict=feed_dict) train_loss_summ_, train_acc_summ_ = session.run([train_loss_summ, train_acc_summ], feed_dict=feed_dict) writer.add_summary(train_loss_summ_, total_batch) writer.add_summary(train_acc_summ_, total_batch) loss_val, acc_val = evaluate(session, x_val, y_val) # todo val_loss_summ_, val_acc_summ_ = session.run([val_loss_summ, val_acc_summ], feed_dict={model.input_x: x_val, model.input_y: y_val, model.keep_prob: 1.0}) writer.add_summary(val_loss_summ_, total_batch) writer.add_summary(val_acc_summ_, total_batch) if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch saver.save(sess=session, save_path=save_path) improved_str = '*' else: improved_str = '' time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print(msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) session.run(model.optim, feed_dict=feed_dict) # 运行优化 total_batch += 1
def train(net='cnn', epoch=20): # 读取训练集 x_train, y_train = None, None for i in range(1, 6): x, y = process_file('data_batch_%d' % i) if x_train is None: x_train = x y_train = y else: x_train = np.append(x_train, x, axis=0) y_train = np.append(y_train, y, axis=0) del x, y x_dev, y_dev = process_file('test_batch') x = tf.placeholder(tf.float32, [ None, config.image_width * config.image_height * config.image_channel ]) x_reshape = tf.reshape( x, [-1, config.image_channel, config.image_height, config.image_width]) # [batch, depth, height, width] => [batch, height, width, depth] x_reshape = tf.transpose(x_reshape, [0, 2, 3, 1]) if net == 'cnn': model = CNN() elif net == 'vgg': model = VGG() else: pass out = model.output(input=x_reshape) y_ = tf.placeholder(tf.float32, [None, config.classes]) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=y_)) tf.summary.scalar('loss', loss) correct_prediction = tf.equal(tf.argmax(out, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) optimizer = tf.train.AdamOptimizer(config.learning_rate).minimize(loss) with tf.Session() as sess: writer = tf.summary.FileWriter("logs/", sess.graph) merged = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) ''' variable_names = [v.name for v in tf.trainable_variables()] values = sess.run(variable_names) for k, v in zip(variable_names, values): print("Variable: ", k) print("Shape: ", v.shape) print_num_of_total_parameters() ''' step = 1 best_acc = 0. start_time = time.time() for e in range(1, epoch + 1): for x_batch, y_batch in batch_iter(x=x_train, y=y_train, batch_size=config.batch_size): step = step + 1 _, trainloss, train_acc = sess.run( [optimizer, loss, accuracy], feed_dict={ x: x_batch, y_: y_batch, model.keep_prob: 0.5, model.is_training: True }) if step % 20 == 0: pass # print('Iterator:%d loss:%f train acc:%f' % (step, trainloss, train_acc)) if step % 781 == 0: train_acc, summary = sess.run( [accuracy, merged], feed_dict={ x: x_train[:10000], y_: y_train[:10000], model.keep_prob: 1., model.is_training: False }) writer.add_summary(summary, e) acc = sess.run(accuracy, feed_dict={ x: x_dev, y_: y_dev, model.keep_prob: 1., model.is_training: False }) print('Iterator:%d loss:%f train acc:%f' % (step, trainloss, train_acc)) elapsed_time = time.time() - start_time print('\033[1;32mepoch:%d/%d' % (e, config.epoch)) print('\033[1;32mvalidation accuracy:%f\033[0m' % acc, end='') if acc > best_acc: best_acc = acc print('\033[1;35m(new best acc!)\033[0m') else: print('')
""" feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } loss, preds = sess.run([cnn.loss, cnn.predictions], feed_dict) time_str = datetime.datetime.now().isoformat() return preds, loss # In[23]: batch_size = 32 num_epochs = 75 train_batches = data_loader.batch_iter(list(zip(train_data, train_label)), batch_size, num_epochs) # In[ ]: folder_to_test = 1 for train_batch in train_batches: x_batch, y_batch = zip(*train_batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) evaluate_every = 30 # Testing loop if current_step % evaluate_every == 0: print("\nEvaluation:") i = 0 index = 0 sum_loss = 0
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/predictions").outputs[0] # Generate batches for one epoch batches = data_loader.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, { input_x: x_test_batch, dropout_keep_prob: 1.0 }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) print all_predictions