def train(x_train, y_train, vocab_processor, x_dev, y_dev): # Training rnn = TextRNN( sequence_length=config.timesteps, num_classes=config.num_classes, vocab_size=len(vocab_processor.vocabulary_), embedding_size=config.embedding_dim, num_hidden=config.num_hidden, l2_reg_lambda=config.l2_reg_lambda, keep_prob=config.dropout_keep_prob, attention_size=config.attention_size ) # Define training procedure optimizer = tf.train.AdamOptimizer(config.learning_rate) train_op = optimizer.minimize(rnn.loss) init = tf.global_variables_initializer() with tf.Session() as sess: # Initialize all variables sess.run(init) # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), config.batch_size, config.training_steps) eval_min_loss = 100 early_stop_steps = 0 # Train loop. For each batch ... for epoch, batch in enumerate(batches, 1): x_batch, y_batch = zip(*batch) feed_dict_train = { rnn.input_x: x_batch, rnn.input_y: y_batch, } _, loss_, accuracy_ = sess.run([train_op, rnn.loss, rnn.accuracy], feed_dict_train) if epoch % 50 == 0: print("epoch:{} loss {:g}, acc {:g}".format(epoch, loss_, accuracy_)) if epoch % 100 == 0: print("\nEvaluation:") feed_dict_eval = { rnn.input_x: x_dev, rnn.input_y: y_dev, } loss_, accuracy_ = sess.run([rnn.loss, rnn.accuracy], feed_dict_eval) time_str = datetime.datetime.now().isoformat() print("{}: epoch {}, loss {:g}, acc {:g}\n".format(time_str, epoch, loss_, accuracy_)) if loss_ < eval_min_loss: eval_min_loss = loss_ else: early_stop_steps += 1 if early_stop_steps == config.early_stop_steps: print('eval loss no improvment, early stopped!!') break
def train(): train_data, DATA_SIZE, VOCAB_SIZE = dh.load_model() with tf.Graph().as_default() as session: session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN( avg_seq_len = FLAGS.avg_seq_len, vocab_size=VOCAB_SIZE, hidden_size=, num_hidden_layers = FLAGS.num_hidden_layers, embedding_size=, l2_reg_lambda=FLAGS.l2_reg_lambda, ) initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) # Generate batches batches_train = dh with tf.variable_scope("model", reuse=None, initializer=initializer): m = Model.Model(is_training=True, config=config) tf.global_variables_initializer().run() model_saver = tf.train.Saver(tf.global_variables()) for i in range(FLAGS.num_epochs): logging.info("Training Epoch: %d ..." % (i+1)) train_perplexity = run_epoch(session, m, train_data, m.train_op) logging.info("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) if (i+1) % config.save_freq == 0: print 'model saving ...' model_saver.save(session, config.model_path+'-%d'%(i+1)) print 'Done!'
CORPUS_DIR = '../data' epochs = 1 EMBEDDING_SIZE = 768 batch_size = 64 CORPUS_DIR = '../data/' print('Loading data...') # Load data and labels data = np.load("../npy/data.npy") labels = np.load("../npy/labels.npy") embeddings = np.load("../npy/embeddings.npy") num_words = len(np.load("../npy/embeddings.npy")) print('Build model...') model = TextRNN(embedding_matrix=embeddings, maxlen=data.shape[1], max_features=num_words, embedding_dims=EMBEDDING_SIZE, class_num=labels.shape[1]).get_model() model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) print('Train...') model.fit(data, labels, batch_size=batch_size, epochs=epochs, verbose=1) flatten_layer = K.function([model.get_layer("input").input, K.learning_phase()], [model.get_layer("lstm").output]) flatten_layer_vec = flatten_layer([data, 0])[0] print(flatten_layer_vec) with open("rnn.txt", "w", encoding="utf-8") as f: for i, j in enumerate(flatten_layer_vec): for k in j: f.write(str(k)+",") f.write(str(list(np.nonzero(labels[i]))[0][0])) f.write("\n")
# Parameters learning_rate = 0.01 training_iters = 1000 batch_size = 64 display_step = 10 keep = 0.5 # Network Parameters n_input = 1 n_steps = 56 # Sentence length n_hidden = 256 # hidden layer num of features n_classes = 2 embedding_size = 128 # Load model myModel = TextRNN(learning_rate, n_input, n_steps, n_hidden, n_classes, embedding_size, vocab_size, True, final_embeddings) # Initializing the variables init = tf.initialize_all_variables() # Launch the graph with tf.Session() as sess: #Writing Directory information timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "rnn_runs", timestamp)) print("Writing to {}\n".format(out_dir)) checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir):
lengths_train, lengths_dev = lengths_shuffled[:-1000], lengths_shuffled[-1000:] print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN(max_seq_length=x_train.shape[1], num_classes=FLAGS.num_classes, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, num_lstm_layers=FLAGS.num_lstm_layers, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(rnn.losses) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary(
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # 打印切分的比例 # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN( sequence_length=x.shape[1], num_classes=y.shape[1], vocab_size=len(vocab_processor.vocabulary_), # 计算单词的数目 embedding_size=FLAGS.embedding_dim, l2_reg_lambda=FLAGS.l2_reg_lambda, hidden_dim=FLAGS.hidden_dim, num_layers=FLAGS.num_layers) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) # 首先将step定义为变量初始化为0 optimizer = tf.train.AdamOptimizer(1e-3)# 定义优化器使用adam优化 grads_and_vars = optimizer.compute_gradients(rnn.loss) # 将使用卷积神经网络计算出来的损失函数最小化。 该方法会返回list[(gradients,variable)] train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # 使用优化器更新参数,每进行一次参数更新就加一次global step # Keep track of gradient values and sparsity (optional) grad_summaries = [] # 每一步都存参数,tensorboard可以看 for g, v in grads_and_vars: if g is not None:
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) print("squence length is %d" % x_train.shape[1]) with sess.as_default(): rnn = TextRNN(sequence_length=x_train.shape[1], num_classes=2, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, cell_size=FLAGS.cell_size) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-4) grads_and_vars = optimizer.compute_gradients(rnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary(
def train(x_train, y_train, vocab_processor, x_dev, y_dev): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, num_hidden=FLAGS.num_hidden, batch_size=FLAGS.batch_size, init_state=FLAGS.init_state, cell_type=FLAGS.cell_type) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(rnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", rnn.loss) acc_summary = tf.summary.scalar("accuracy", rnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, rnn.loss, rnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch): """ Evaluates model on a dev set """ feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, rnn.loss, rnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) def my_debugging(): # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: FLAGS.dropout_keep_prob } # print(np.shape(x_batch)) _input_x, _embedded_words, _outputs, _scores, _input_y = sess.run( [ rnn.input_x, rnn.embedded_words, rnn.outputs, rnn.scores, rnn.input_y ], feed_dict) # print(np.shape(_input_x), '_input_x: ', _input_x) # print(np.shape(_embedded_words), '_embedd_words: ', _embedded_words) # print(np.shape(_outputs), '_outputs', _outputs) # print(np.shape(_scores), '_scores', _scores) # print(np.shape(_input_y), '_input_y', _input_y) print('----- print shape -----') print(np.shape(x_batch), 'x_batch') print(np.shape(_input_x), '_input_x') print(np.shape(_embedded_words), '_embedded_words') print(np.shape(_outputs), '_outputs') print(np.shape(_scores), '_scores') print(np.shape(_input_y), '_input_y') return 0 def do_train(): # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) ## Use Initial State if FLAGS.init_state is True: dev_batches = data_helpers.batch_iter( list(zip(x_dev, y_dev)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: if FLAGS.init_state is True: if len(batch) != FLAGS.batch_size: continue x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") ## Use Initial State if FLAGS.init_state is True: for dev_batch in dev_batches: if len(dev_batch) != FLAGS.batch_size: continue x_dev_batch, y_dev_batch = zip(*dev_batch) dev_step(x_dev_batch, y_dev_batch, writer=dev_summary_writer) else: ## Do Not Use Initial State dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) # do # my_debugging() do_train()
elif FLAGS.using_nn_type == 'textcnn': nn = TextCNN(model_type=FLAGS.model_type, sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=embedding_dimension, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.using_nn_type == 'textrnn': nn = TextRNN( model_type=FLAGS.model_type, sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=embedding_dimension, rnn_size=FLAGS.rnn_size, num_layers=FLAGS.num_rnn_layers, # batch_size=FLAGS.batch_size, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.using_nn_type == 'textbirnn': nn = TextBiRNN( model_type=FLAGS.model_type, sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=embedding_dimension, rnn_size=FLAGS.rnn_size, num_layers=FLAGS.num_rnn_layers, # batch_size=FLAGS.batch_size, l2_reg_lambda=FLAGS.l2_reg_lambda)
def train_rnn(): """Training RNN model.""" # Print parameters used for the model dh.tab_printer(args, logger) # Load sentences, labels, and training parameters logger.info("Loading data...") logger.info("Data processing...") train_data = dh.load_data_and_labels(args.train_file, args.num_classes, args.word2vec_file, data_aug_flag=False) val_data = dh.load_data_and_labels(args.validation_file, args.num_classes, args.word2vec_file, data_aug_flag=False) logger.info("Data padding...") x_train, y_train = dh.pad_data(train_data, args.pad_seq_len) x_val, y_val = dh.pad_data(val_data, args.pad_seq_len) # Build vocabulary VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix( args.word2vec_file) # Build a graph and rnn object with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement) session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN(sequence_length=args.pad_seq_len, vocab_size=VOCAB_SIZE, embedding_type=args.embedding_type, embedding_size=EMBEDDING_SIZE, lstm_hidden_size=args.lstm_dim, fc_hidden_size=args.fc_dim, num_classes=args.num_classes, l2_reg_lambda=args.l2_lambda, pretrained_embedding=pretrained_word2vec_matrix) # Define training procedure with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): learning_rate = tf.train.exponential_decay( learning_rate=args.learning_rate, global_step=rnn.global_step, decay_steps=args.decay_steps, decay_rate=args.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) grads, vars = zip(*optimizer.compute_gradients(rnn.loss)) grads, _ = tf.clip_by_global_norm(grads, clip_norm=args.norm_ratio) train_op = optimizer.apply_gradients( zip(grads, vars), global_step=rnn.global_step, name="train_op") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in zip(grads, vars): if g is not None: grad_hist_summary = tf.summary.histogram( "{0}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{0}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries out_dir = dh.get_out_dir(OPTION, logger) checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) best_checkpoint_dir = os.path.abspath( os.path.join(out_dir, "bestcheckpoints")) # Summaries for loss loss_summary = tf.summary.scalar("loss", rnn.loss) # Train summaries train_summary_op = tf.summary.merge( [loss_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Validation summaries validation_summary_op = tf.summary.merge([loss_summary]) validation_summary_dir = os.path.join(out_dir, "summaries", "validation") validation_summary_writer = tf.summary.FileWriter( validation_summary_dir, sess.graph) saver = tf.train.Saver(tf.global_variables(), max_to_keep=args.num_checkpoints) best_saver = cm.BestCheckpointSaver(save_dir=best_checkpoint_dir, num_to_keep=3, maximize=True) if OPTION == 'R': # Load rnn model logger.info("Loading model...") checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) logger.info(checkpoint_file) # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) if OPTION == 'T': if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Embedding visualization config config = projector.ProjectorConfig() embedding_conf = config.embeddings.add() embedding_conf.tensor_name = "embedding" embedding_conf.metadata_path = args.metadata_file projector.visualize_embeddings(train_summary_writer, config) projector.visualize_embeddings(validation_summary_writer, config) # Save the embedding visualization saver.save( sess, os.path.join(out_dir, "embedding", "embedding.ckpt")) current_step = sess.run(rnn.global_step) def train_step(x_batch, y_batch): """A single training step""" feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: args.dropout_rate, rnn.is_training: True } _, step, summaries, loss = sess.run( [train_op, rnn.global_step, train_summary_op, rnn.loss], feed_dict) logger.info("step {0}: loss {1:g}".format(step, loss)) train_summary_writer.add_summary(summaries, step) def validation_step(x_val, y_val, writer=None): """Evaluates model on a validation set""" batches_validation = dh.batch_iter(list(zip(x_val, y_val)), args.batch_size, 1) # Predict classes by threshold or topk ('ts': threshold; 'tk': topk) eval_counter, eval_loss = 0, 0.0 eval_pre_tk = [0.0] * args.topK eval_rec_tk = [0.0] * args.topK eval_F1_tk = [0.0] * args.topK true_onehot_labels = [] predicted_onehot_scores = [] predicted_onehot_labels_ts = [] predicted_onehot_labels_tk = [[] for _ in range(args.topK)] for batch_validation in batches_validation: x_batch_val, y_batch_val = zip(*batch_validation) feed_dict = { rnn.input_x: x_batch_val, rnn.input_y: y_batch_val, rnn.dropout_keep_prob: 1.0, rnn.is_training: False } step, summaries, scores, cur_loss = sess.run([ rnn.global_step, validation_summary_op, rnn.scores, rnn.loss ], feed_dict) # Prepare for calculating metrics for i in y_batch_val: true_onehot_labels.append(i) for j in scores: predicted_onehot_scores.append(j) # Predict by threshold batch_predicted_onehot_labels_ts = \ dh.get_onehot_label_threshold(scores=scores, threshold=args.threshold) for k in batch_predicted_onehot_labels_ts: predicted_onehot_labels_ts.append(k) # Predict by topK for top_num in range(args.topK): batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk( scores=scores, top_num=top_num + 1) for i in batch_predicted_onehot_labels_tk: predicted_onehot_labels_tk[top_num].append(i) eval_loss = eval_loss + cur_loss eval_counter = eval_counter + 1 if writer: writer.add_summary(summaries, step) eval_loss = float(eval_loss / eval_counter) # Calculate Precision & Recall & F1 eval_pre_ts = precision_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') eval_rec_ts = recall_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') eval_F1_ts = f1_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_ts), average='micro') for top_num in range(args.topK): eval_pre_tk[top_num] = precision_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') eval_rec_tk[top_num] = recall_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') eval_F1_tk[top_num] = f1_score( y_true=np.array(true_onehot_labels), y_pred=np.array(predicted_onehot_labels_tk[top_num]), average='micro') # Calculate the average AUC eval_auc = roc_auc_score( y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average='micro') # Calculate the average PR eval_prc = average_precision_score( y_true=np.array(true_onehot_labels), y_score=np.array(predicted_onehot_scores), average='micro') return eval_loss, eval_auc, eval_prc, eval_pre_ts, eval_rec_ts, eval_F1_ts, \ eval_pre_tk, eval_rec_tk, eval_F1_tk # Generate batches batches_train = dh.batch_iter(list(zip(x_train, y_train)), args.batch_size, args.epochs) num_batches_per_epoch = int( (len(x_train) - 1) / args.batch_size) + 1 # Training loop. For each batch... for batch_train in batches_train: x_batch_train, y_batch_train = zip(*batch_train) train_step(x_batch_train, y_batch_train) current_step = tf.train.global_step(sess, rnn.global_step) if current_step % args.evaluate_steps == 0: logger.info("\nEvaluation:") eval_loss, eval_auc, eval_prc, \ eval_pre_ts, eval_rec_ts, eval_F1_ts, eval_pre_tk, eval_rec_tk, eval_F1_tk = \ validation_step(x_val, y_val, writer=validation_summary_writer) logger.info( "All Validation set: Loss {0:g} | AUC {1:g} | AUPRC {2:g}" .format(eval_loss, eval_auc, eval_prc)) # Predict by threshold logger.info( "Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}" .format(eval_pre_ts, eval_rec_ts, eval_F1_ts)) # Predict by topK logger.info("Predict by topK:") for top_num in range(args.topK): logger.info( "Top{0}: Precision {1:g}, Recall {2:g}, F1 {3:g}". format(top_num + 1, eval_pre_tk[top_num], eval_rec_tk[top_num], eval_F1_tk[top_num])) best_saver.handle(eval_prc, sess, current_step) if current_step % args.checkpoint_steps == 0: checkpoint_prefix = os.path.join(checkpoint_dir, "model") path = saver.save(sess, checkpoint_prefix, global_step=current_step) logger.info("Saved model checkpoint to {0}\n".format(path)) if current_step % num_batches_per_epoch == 0: current_epoch = current_step // num_batches_per_epoch logger.info( "Epoch {0} has finished!".format(current_epoch)) logger.info("All Done.")
def train_rnn(): """Training RNN model.""" # Load sentences, labels, and training parameters logger.info('✔︎ Loading data...') logger.info('✔︎ Training data processing...') train_data = data_helpers.load_data_and_labels(FLAGS.training_data_file, FLAGS.num_classes, FLAGS.embedding_dim) logger.info('✔︎ Validation data processing...') validation_data = \ data_helpers.load_data_and_labels(FLAGS.validation_data_file, FLAGS.num_classes, FLAGS.embedding_dim) logger.info('Recommand padding Sequence length is: {}'.format( FLAGS.pad_seq_len)) logger.info('✔︎ Training data padding...') x_train, y_train = data_helpers.pad_data(train_data, FLAGS.pad_seq_len) logger.info('✔︎ Validation data padding...') x_validation, y_validation = data_helpers.pad_data(validation_data, FLAGS.pad_seq_len) y_validation_bind = validation_data.labels_bind # Build vocabulary VOCAB_SIZE = data_helpers.load_vocab_size(FLAGS.embedding_dim) pretrained_word2vec_matrix = data_helpers.load_word2vec_matrix( VOCAB_SIZE, FLAGS.embedding_dim) # Build a graph and rnn object with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN(sequence_length=FLAGS.pad_seq_len, num_classes=FLAGS.num_classes, vocab_size=VOCAB_SIZE, hidden_size=FLAGS.hidden_size, fc_hidden_size=FLAGS.fc_hidden_size, embedding_size=FLAGS.embedding_dim, embedding_type=FLAGS.embedding_type, l2_reg_lambda=FLAGS.l2_reg_lambda, pretrained_embedding=pretrained_word2vec_matrix) # Define Training procedure # learning_rate = tf.train.exponential_decay(learning_rate=FLAGS.learning_rate, global_step=cnn.global_step, # decay_steps=FLAGS.decay_steps, decay_rate=FLAGS.decay_rate, # staircase=True) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(rnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=rnn.global_step, name="train_op") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries if FLAGS.train_or_restore == 'R': MODEL = input( "☛ Please input the checkpoints model you want to restore, " "it should be like(1490175368): " ) # The model you want to restore while not (MODEL.isdigit() and len(MODEL) == 10): MODEL = input( '✘ The format of your input is illegal, please re-input: ' ) logger.info( '✔︎ The format of your input is legal, now loading to next step...' ) checkpoint_dir = 'runs/' + MODEL + '/checkpoints/' out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", MODEL)) logger.info("✔︎ Writing to {}\n".format(out_dir)) else: timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) logger.info("✔︎ Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", rnn.loss) # acc_summary = tf.summary.scalar("accuracy", rnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Validation summaries validation_summary_op = tf.summary.merge([loss_summary]) validation_summary_dir = os.path.join(out_dir, "summaries", "validation") validation_summary_writer = tf.summary.FileWriter( validation_summary_dir, sess.graph) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) if FLAGS.train_or_restore == 'R': # Load rnn model logger.info("✔ Loading model...") checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) logger.info(checkpoint_file) # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) else: checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) current_step = sess.run(rnn.global_step) def train_step(x_batch, y_batch): """A single training step""" feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: FLAGS.dropout_keep_prob, rnn.is_training: True } _, step, summaries, loss = sess.run( [train_op, rnn.global_step, train_summary_op, rnn.loss], feed_dict) time_str = datetime.datetime.now().isoformat() logger.info("{}: step {}, loss {:g}".format( time_str, step, loss)) train_summary_writer.add_summary(summaries, step) def validation_step(x_validation, y_validation, y_validation_bind, writer=None): """Evaluates model on a validation set""" batches_validation = data_helpers.batch_iter( list(zip(x_validation, y_validation, y_validation_bind)), FLAGS.batch_size, FLAGS.num_epochs) eval_loss, eval_rec, eval_acc, eval_counter = 0.0, 0.0, 0.0, 0 for batch_validation in batches_validation: x_batch_validation, y_batch_validation, y_batch_validation_bind = zip( *batch_validation) feed_dict = { rnn.input_x: x_batch_validation, rnn.input_y: y_batch_validation, rnn.dropout_keep_prob: 1.0, rnn.is_training: False } step, summaries, logits, cur_loss = sess.run([ rnn.global_step, validation_summary_op, rnn.logits, rnn.loss ], feed_dict) if FLAGS.use_classbind_or_not == 'Y': predicted_labels = data_helpers.get_label_using_logits_and_classbind( logits, y_batch_validation_bind, top_number=FLAGS.top_num) if FLAGS.use_classbind_or_not == 'N': predicted_labels = data_helpers.get_label_using_logits( logits, top_number=FLAGS.top_num) cur_rec, cur_acc = 0.0, 0.0 for index, predicted_label in enumerate(predicted_labels): rec_inc, acc_inc = data_helpers.cal_rec_and_acc( predicted_label, y_batch_validation[index]) cur_rec, cur_acc = cur_rec + rec_inc, cur_acc + acc_inc cur_rec = cur_rec / len(y_batch_validation) cur_acc = cur_acc / len(y_batch_validation) eval_loss, eval_rec, eval_acc, eval_counter = eval_loss + cur_loss, eval_rec + cur_rec, \ eval_acc + cur_acc, eval_counter + 1 logger.info("✔︎ validation batch {} finished.".format( eval_counter)) if writer: writer.add_summary(summaries, step) eval_loss = float(eval_loss / eval_counter) eval_rec = float(eval_rec / eval_counter) eval_acc = float(eval_acc / eval_counter) return eval_loss, eval_rec, eval_acc # Generate batches batches_train = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch_train in batches_train: x_batch_train, y_batch_train = zip(*batch_train) train_step(x_batch_train, y_batch_train) current_step = tf.train.global_step(sess, rnn.global_step) if current_step % FLAGS.evaluate_every == 0: logger.info("\nEvaluation:") eval_loss, eval_rec, eval_acc = validation_step( x_validation, y_validation, y_validation_bind, writer=validation_summary_writer) time_str = datetime.datetime.now().isoformat() logger.info( "{}: step {}, loss {:g}, rec {:g}, acc {:g}".format( time_str, current_step, eval_loss, eval_rec, eval_acc)) if current_step % FLAGS.checkpoint_every == 0: checkpoint_prefix = os.path.join(checkpoint_dir, "model") path = saver.save(sess, checkpoint_prefix, global_step=current_step) logger.info( "✔︎ Saved model checkpoint to {}\n".format(path)) logger.info("✔︎ Done.")
import tensorflow as tf from text_rnn import TextRNN sess = tf.Session() new_saver = tf.train.import_meta_graph( './10000examples_lr0.001_epochs100/rnn.meta') new_saver.restore( sess, tf.train.latest_checkpoint('./10000examples_lr0.001_epochs100/')) all_vars = tf.trainable_variables() for v in all_vars: print(v) rnn = TextRNN(x_train.shape[1], y_train.shape[1], 100, len(vocab_processor.vocabulary_), 200, l2_reg=0.0) rnn = TextRnn
def main(_): # FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Data Preparation # ================================================== # Load data print("Loading data...") x_text, y = data_helpers.load_data_and_labels(FLAGS.train_file, FLAGS.num_class) # Build vocabulary max_document_length = 64 vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) x = np.array(list(vocab_processor.fit_transform(x_text))) # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] del x, y, x_shuffled, y_shuffled print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_dim=FLAGS.embedding_dim, hidden_size=256, multi_layer=1) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(0.01) grads_and_vars = optimizer.compute_gradients(rnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", rnn.loss) acc_summary = tf.summary.scalar("accuracy", rnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, rnn.loss, rnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, rnn.loss, rnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train_rnn(): # Data Preparation # ================================================== if FLAGS.init_embedding_path is not None: embedding = np.load(FLAGS.init_embedding_path) print("Using pre-trained word embedding which shape is {}\n".format(embedding.shape)) FLAGS.vocab_size = embedding.shape[0] FLAGS.embedding_size = embedding.shape[1] if FLAGS.init_model_path is not None: assert os.path.isdir(FLAGS.init_model_path), "init_model_path must be a directory\n" ckpt = tf.train.get_checkpoint_state(FLAGS.init_model_path) assert ckpt, "No checkpoint found in {}\n".format(FLAGS.init_model_path) assert ckpt.model_checkpoint_path, "No model_checkpoint_path found in checkpoint\n" # Create root directory timestamp = str(int(time.time())) root_dir = os.path.join(os.path.curdir, 'runs', 'textrnn', 'trained_result_' + timestamp) os.makedirs(root_dir) # Load data # print("Loading data...\n") # x, y = data_helpers.load_data(FLAGS.data_file, FLAGS.sequence_length, FLAGS.vocab_size, root_dir=root_dir) # FLAGS.num_classes = len(y[0]) print("Loading data...\n") x_data = np.loadtxt(FLAGS.x_data_file) y_data = np.loadtxt(FLAGS.y_data_file) print("data load finished") # Split dataset # x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=FLAGS.test_size, stratify=y_data, random_state=0) # x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=0) # Training # ================================================== with tf.Graph().as_default(): tf_config = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth with tf.Session(config=tf_config).as_default() as sess: rnn = TextRNN( vocab_size=FLAGS.vocab_size, embedding_size=FLAGS.embedding_size, sequence_length=FLAGS.sequence_length, rnn_size=FLAGS.rnn_size, num_layers=FLAGS.num_layers, attention_size=FLAGS.attention_size, num_classes=FLAGS.num_classes, learning_rate=FLAGS.learning_rate, grad_clip=FLAGS.grad_clip) # Output directory for models and summaries out_dir = os.path.abspath(root_dir) print("Writing to {}...\n".format(out_dir)) # Summaries for loss and accuracy tf.summary.scalar("loss", rnn.loss) tf.summary.scalar("accuracy", rnn.accuracy) merged_summary = tf.summary.merge_all() # Summaries dictionary train_summary_dir = os.path.join(out_dir, 'summaries', 'train') val_summary_dir = os.path.join(out_dir, 'summaries', 'val') train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph) # Checkpoint directory, will not create itself checkpoint_dir = os.path.abspath(os.path.join(out_dir, 'checkpoints')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # Initialize all variables sess.run(tf.global_variables_initializer()) # Using pre-trained word embedding if FLAGS.init_embedding_path is not None: sess.run(rnn.embedding.assign(embedding)) del embedding # Continue training from saved model if FLAGS.init_model_path is not None: saver.restore(sess, ckpt.model_checkpoint_path) # Training start print("Start training...\n") best_at_step = 0 best_val_accuracy = 0 train_batches = data_utils.batch_iter(list(zip(x_data, y_data)), FLAGS.batch_size) start = time.time() rnn_feature_temp = [] for batch in train_batches: # Training model on x_batch and y_batch x_batch, y_batch = zip(*batch) # seq_len_train = data_helpers.real_len(x_batch) seq_len_train = data_utils.real_len(x_batch) feed_dict = {rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.seq_len: seq_len_train, rnn.keep_prob: FLAGS.dropout_keep_prob} attention_output, _, global_step, train_summaries, train_loss, train_accuracy = sess.run([rnn.attention_output,rnn.train_op, rnn.global_step, merged_summary, rnn.loss, rnn.accuracy], feed_dict=feed_dict) rnn_feature_temp.append(attention_output.tolist()) print(rnn_feature_temp[0:2]) print(len(rnn_feature_temp)) np.savetxt("../data/word_data/word_dim/word_rnn_attention_embeddings_600_dim256.txt", np.array(rnn_feature_temp).reshape(20480,200))
print("# Loading training data") training_data_raw = open(config['TRAINING_DATA_LOCATION'],'r',encoding='latin-1').readlines() random.shuffle(training_data_raw) num_examples = config['NUM_EXAMPLES'] training_data_raw= training_data_raw[:num_examples] print("# Processing training data") x_train, y_train, vocab_processor = util.load_training_data(training_data_raw) print(" Loading and Processing testing data") testing_data_raw = open(config['TESTING_DATA_LOCATION'],'r',encoding='latin-1').readlines() x_test, y_test = util.load_testing_data(testing_data_raw, vocab_processor) print("# Creating RNN") rnn = TextRNN(x_train.shape[1], y_train.shape[1], config['HIDDEN_LAYER_SIZE'], len(vocab_processor.vocabulary_), config['WORD_VECTOR_DIM'], l2_reg=0.0) optimizer = tf.train.AdamOptimizer(config['LEARNING_RATE']) minimizer = optimizer.minimize(rnn.loss) print("# Initializing Tensorflow") init_op = tf.global_variables_initializer() sess = tf.Session() sess.run(init_op) saver = tf.train.Saver() print("# Training") batch_size = config['BATCH_SIZE'] no_of_batches = int(len(training_data_raw)/batch_size) epoch = config['NUM_EPOCHS'] losses = []
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextRNN( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, rnn_size=FLAGS.rnn_size, batch_size=FLAGS.batch_size, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) # RNN中常用的梯度截断,防止出现梯度过大难以求导的现象 tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cnn.loss, tvars), FLAGS.grad_clip) grads_and_vars = tuple(zip(grads, tvars)) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # grads_and_vars = optimizer.compute_gradients(cnn.loss)
def train_rnn(): """Training RNN model.""" # Load sentences, labels, and training parameters logger.info("✔︎ Loading data...") logger.info("✔︎ Training data processing...") train_data = dh.load_data_and_labels(FLAGS.training_data_file, FLAGS.embedding_dim) logger.info("✔︎ Validation data processing...") validation_data = dh.load_data_and_labels(FLAGS.validation_data_file, FLAGS.embedding_dim) logger.info("Recommended padding Sequence length is: {0}".format( FLAGS.pad_seq_len)) logger.info("✔︎ Training data padding...") x_train_front, x_train_behind, y_train = dh.pad_data( train_data, FLAGS.pad_seq_len) logger.info("✔︎ Validation data padding...") x_validation_front, x_validation_behind, y_validation = dh.pad_data( validation_data, FLAGS.pad_seq_len) # Build vocabulary VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix( FLAGS.embedding_dim) # Build a graph and rnn object with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN(sequence_length=FLAGS.pad_seq_len, num_classes=y_train.shape[1], vocab_size=VOCAB_SIZE, lstm_hidden_size=FLAGS.lstm_hidden_size, fc_hidden_size=FLAGS.fc_hidden_size, embedding_size=FLAGS.embedding_dim, embedding_type=FLAGS.embedding_type, l2_reg_lambda=FLAGS.l2_reg_lambda, pretrained_embedding=pretrained_word2vec_matrix) # Define training procedure with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): learning_rate = tf.train.exponential_decay( learning_rate=FLAGS.learning_rate, global_step=rnn.global_step, decay_steps=FLAGS.decay_steps, decay_rate=FLAGS.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) grads, vars = zip(*optimizer.compute_gradients(rnn.loss)) grads, _ = tf.clip_by_global_norm(grads, clip_norm=FLAGS.norm_ratio) train_op = optimizer.apply_gradients( zip(grads, vars), global_step=rnn.global_step, name="train_op") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in zip(grads, vars): if g is not None: grad_hist_summary = tf.summary.histogram( "{0}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{0}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries if FLAGS.train_or_restore == 'R': MODEL = input( "☛ Please input the checkpoints model you want to restore, " "it should be like(1490175368): " ) # The model you want to restore while not (MODEL.isdigit() and len(MODEL) == 10): MODEL = input( "✘ The format of your input is illegal, please re-input: " ) logger.info( "✔︎ The format of your input is legal, now loading to next step..." ) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", MODEL)) logger.info("✔︎ Writing to {0}\n".format(out_dir)) else: timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) logger.info("✔︎ Writing to {0}\n".format(out_dir)) checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) best_checkpoint_dir = os.path.abspath( os.path.join(out_dir, "bestcheckpoints")) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", rnn.loss) acc_summary = tf.summary.scalar("accuracy", rnn.accuracy) # Train summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Validation summaries validation_summary_op = tf.summary.merge( [loss_summary, acc_summary]) validation_summary_dir = os.path.join(out_dir, "summaries", "validation") validation_summary_writer = tf.summary.FileWriter( validation_summary_dir, sess.graph) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) best_saver = cm.BestCheckpointSaver(save_dir=best_checkpoint_dir, num_to_keep=3, maximize=True) if FLAGS.train_or_restore == 'R': # Load rnn model logger.info("✔︎ Loading model...") checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) logger.info(checkpoint_file) # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) else: if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Embedding visualization config config = projector.ProjectorConfig() embedding_conf = config.embeddings.add() embedding_conf.tensor_name = "embedding" embedding_conf.metadata_path = FLAGS.metadata_file projector.visualize_embeddings(train_summary_writer, config) projector.visualize_embeddings(validation_summary_writer, config) # Save the embedding visualization saver.save( sess, os.path.join(out_dir, "embedding", "embedding.ckpt")) current_step = sess.run(rnn.global_step) def train_step(x_batch_front, x_batch_behind, y_batch): """A single training step""" feed_dict = { rnn.input_x_front: x_batch_front, rnn.input_x_behind: x_batch_behind, rnn.input_y: y_batch, rnn.dropout_keep_prob: FLAGS.dropout_keep_prob, rnn.is_training: True } _, step, summaries, loss, accuracy = sess.run([ train_op, rnn.global_step, train_summary_op, rnn.loss, rnn.accuracy ], feed_dict) logger.info("step {0}: loss {1:g}, acc {2:g}".format( step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def validation_step(x_batch_front, x_batch_behind, y_batch, writer=None): """Evaluates model on a validation set""" feed_dict = { rnn.input_x_front: x_batch_front, rnn.input_x_behind: x_batch_behind, rnn.input_y: y_batch, rnn.dropout_keep_prob: 1.0, rnn.is_training: False } step, summaries, loss, accuracy, recall, precision, f1, auc = sess.run( [ rnn.global_step, validation_summary_op, rnn.loss, rnn.accuracy, rnn.recall, rnn.precision, rnn.F1, rnn.AUC ], feed_dict) logger.info( "step {0}: loss {1:g}, acc {2:g}, recall {3:g}, precision {4:g}, f1 {5:g}, AUC {6}" .format(step, loss, accuracy, recall, precision, f1, auc)) if writer: writer.add_summary(summaries, step) return accuracy # Generate batches batches = dh.batch_iter( list(zip(x_train_front, x_train_behind, y_train)), FLAGS.batch_size, FLAGS.num_epochs) num_batches_per_epoch = int( (len(x_train_front) - 1) / FLAGS.batch_size) + 1 # Training loop. For each batch... for batch in batches: x_batch_front, x_batch_behind, y_batch = zip(*batch) train_step(x_batch_front, x_batch_behind, y_batch) current_step = tf.train.global_step(sess, rnn.global_step) if current_step % FLAGS.evaluate_every == 0: logger.info("\nEvaluation:") accuracy = validation_step( x_validation_front, x_validation_behind, y_validation, writer=validation_summary_writer) best_saver.handle(accuracy, sess, current_step) if current_step % FLAGS.checkpoint_every == 0: checkpoint_prefix = os.path.join(checkpoint_dir, "model") path = saver.save(sess, checkpoint_prefix, global_step=current_step) logger.info( "✔︎ Saved model checkpoint to {0}\n".format(path)) if current_step % num_batches_per_epoch == 0: current_epoch = current_step // num_batches_per_epoch logger.info( "✔︎ Epoch {0} has finished!".format(current_epoch)) logger.info("✔︎ Done.")
def main(_): # Load data print("Loading data...") x_, y = data_helpers.build_train_data(FLAGS.label_file, FLAGS.train_file) train_int_to_vab, train_to_int = data_helpers.cret_dict(x_) #保存对应的词和词索引 #存储所有字的文件,以便测试加载 pickle.dump(train_int_to_vab, open('./vocab_index.pkl', 'wb')) train_ids = [[ train_to_int.get(term, train_to_int['<UNK>']) for term in line ] for line in x_] x_ = data_helpers.pad_sentences(train_ids, 20) # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x_[shuffle_indices] y = np.array(y) y_shuffled = y[shuffle_indices] folids_list = data_helpers.cross_validation_split_for_smp( x_shuffled, y_shuffled) for i in range(10): if not os.path.exists('save_model/' + str(i) + '/'): os.makedirs(os.path.join('save_model', str(i))) else: continue for i in range(10): best_acc = 0.0 print(i) print('##################') x_train, y_train, x_dev, y_dev = folids_list[i] y_train = np_utils.to_categorical(y_train) y_dev = np_utils.to_categorical(y_dev) # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(train_int_to_vab), batch_size=FLAGS.batch_size, embedding_size=FLAGS.embedding_dim, hidden_size=FLAGS.hidden_size, num_layers=FLAGS.num_layers #word_embedding_matrix=embeding_matric ) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(rnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, loss, accuracy = sess.run( [train_op, global_step, rnn.loss, rnn.accuracy], feed_dict) return step, loss, accuracy def dev_step(x_batch, y_batch): """ Evaluates model on a dev set """ feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: 1.0 } step, loss, accuracy = sess.run( [global_step, rnn.loss, rnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print('dev') print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) return accuracy def save_best_model(sess, path): path = saver.save(sess, path) for epoch in range(FLAGS.num_epochs): print('epoch', epoch) # Generate batches for batch_i, (x_batch, y_batch) in enumerate( data_helpers.get_batches(y_train, x_train, FLAGS.batch_size)): step, train_loss, train_accuracy = train_step( x_batch, y_batch) #print('step',step) if batch_i % FLAGS.evaluate_every == 0: time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, train_loss, train_accuracy)) #===================== accuracy = dev_step(x_dev, y_dev) if accuracy > best_acc: best_acc = accuracy print('save_model' + str(i) + '/best_model.ckpt') save_best_model( sess, 'save_model/' + str(i) + '/best_model.ckpt')
maxlen = 400 batch_size = 32 embedding_dims = 50 epochs = 10 print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)...') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Build model...') model = TextRNN(maxlen, max_features, embedding_dims).get_model() model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) print('Train...') early_stopping = EarlyStopping(monitor='val_acc', patience=3, mode='max') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[early_stopping], validation_data=(x_test, y_test)) print('Test...') result = model.predict(x_test)
del x vocabsize = len(vocab_processor.vocabulary_) print("Vocabulary Size: {:d}".format(vocabsize)) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=vocabsize, embedding_size=FLAGS.embedding_dim) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(rnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g)
train_data, train_label, test_data, test_label = data_helpers.data_processing( FLAGS.positive_data_file, FLAGS.negative_data_file) print("training...") # Training # ======================================================= with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): # 卷积池化网络导入 rnn = TextRNN(sequence_length=len(train_data[0]), num_classes=len(train_label[0]), embedding_size=FLAGS.embedding_dim, l2_reg_lambda=FLAGS.l2_reg_lambda, hidden_dim=FLAGS.hidden_dim, num_layers=FLAGS.num_layers) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(rnn.loss) # 计算梯度 train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # 进行参数更新 # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram(
def evaluate(): if FLAGS.checkpoint_dir == None or not os.path.exists( FLAGS.checkpoint_dir): raise IOError("checkpoint_dir not found") if FLAGS.model_type == None or not FLAGS.model_type in ['CNN', 'RNN']: raise ValueError("model_type must be CNN or RNN") root_dir = os.path.join(FLAGS.checkpoint_dir, '..') + '/' # Create result directory eval_dir = os.path.join(root_dir, 'eval') if not os.path.exists(eval_dir): os.mkdir(eval_dir) # Load parameters print("Loading parameters...\n") params = json.loads(open(root_dir + 'parameters.json').read()) # Load data print("Loading data...\n") x_eval, y_eval = data_helpers.load_data(FLAGS.eval_data, params['sequence_length'], root_dir=root_dir, has_label=FLAGS.has_label, is_training=False) # Evaluating # ================================================== with tf.Graph().as_default(): tf_config = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth with tf.Session(config=tf_config).as_default() as sess: # Model initialization if FLAGS.model_type == 'CNN': model = TextCNN(vocab_size=params['vocab_size'], embedding_size=params['embedding_size'], sequence_length=params['sequence_length'], filter_sizes=list( map(int, params['filter_sizes'].split(","))), num_filters=params['num_filters'], num_classes=params['num_classes'], learning_rate=params['learning_rate'], grad_clip=params['grad_clip'], l2_reg_lambda=params['l2_reg_lambda']) feed_dict = {model.keep_prob: 1.0, model.is_training: False} elif FLAGS.model_type == 'RNN': model = TextRNN(vocab_size=params['vocab_size'], embedding_size=params['embedding_size'], sequence_length=params['sequence_length'], rnn_size=params['rnn_size'], num_layers=params['num_layers'], attention_size=params['attention_size'], num_classes=params['num_classes'], learning_rate=params['learning_rate'], grad_clip=params['grad_clip']) feed_dict = {model.keep_prob: 1.0} saver = tf.train.Saver(tf.global_variables()) sess.run(tf.global_variables_initializer()) # Restore all variables from checkpoint ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: best_model_path = os.path.join( '/'.join(ckpt.model_checkpoint_path.split("/")[:-1]), 'best_model') saver.restore(sess, best_model_path) # Evaluate the model print("Start evaluating...\n") y_logits = [] start = time.time() data_size = len(x_eval) # Generate eval batches eval_batches = data_helpers.batch_iter(x_eval, FLAGS.batch_size, shuffle=False) for x_batch in eval_batches: feed_dict[model.input_x] = x_batch if FLAGS.model_type == 'RNN': feed_dict[model.seq_len] = data_helpers.real_len(x_batch) batch_predictions = sess.run(model.logits, feed_dict=feed_dict) y_logits.extend(batch_predictions) print( "Mission complete, total number of eval examples: {}, evaluating speed: {:.0f} examples/sec\n" .format(data_size, data_size / (time.time() - start))) label_transformer = joblib.load( os.path.join(root_dir, 'label_transformer.pkl')) y_logits_original = label_transformer.inverse_transform( np.array(y_logits)) # Print accuracy if eval examples have label if FLAGS.has_label == True: df = pd.DataFrame([ line.strip().split("\t") for line in open( FLAGS.eval_data, 'r', encoding='UTF-8').readlines() if len(line.strip().split("\t")) == 2 ], columns=['content', 'real_label']) y_eval_original = label_transformer.inverse_transform(y_eval) eval_accuracy = sum( y_logits_original == y_eval_original) / data_size print("Evaluating Accuracy: {:.3f}\n".format(eval_accuracy)) print( "Precision, Recall and F1-Score:\n\n", classification_report(y_eval_original, y_logits_original)) else: df = pd.DataFrame([ line.strip() for line in open( FLAGS.eval_data, 'r', encoding='UTF-8').readlines() if line.strip() ], columns=['content']) # Save prediction result timestamp = str(int(time.time())) save_path = os.path.abspath( os.path.join(eval_dir, 'predicted_result_' + timestamp + '.csv')) df['predicted_label'] = y_logits_original print("Writing prediction result to {}...\n".format(save_path)) df.to_csv(save_path, header=True, index=False, sep='\t', encoding='utf-8')
# Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN( embedding_mat=word2vec_helpers.wordvector.astype(np.float32), non_static=FLAGS.non_static, GRU=FLAGS.GRU, sequence_length=max_document_length, num_classes=y_train.shape[1], hidden_layer_size=FLAGS.hidden_layer_size, vocab_size=word2vec_helpers.vocab_size, embedding_size=FLAGS.embedding_dim, attention_size=FLAGS.attention_size, l2_reg_lambda=FLAGS.l2_reg_lambda, ) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(rnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = []
def train_rnn(): """Training RNN model.""" # Load sentences, labels, and training parameters logger.info('✔︎ Loading data...') logger.info('✔︎ Training data processing...') train_data = dh.load_data_and_labels(FLAGS.training_data_file, FLAGS.num_classes, FLAGS.embedding_dim) logger.info('✔︎ Validation data processing...') validation_data = \ dh.load_data_and_labels(FLAGS.validation_data_file, FLAGS.num_classes, FLAGS.embedding_dim) logger.info('Recommended padding Sequence length is: {0}'.format(FLAGS.pad_seq_len)) logger.info('✔︎ Training data padding...') x_train, y_train = dh.pad_data(train_data, FLAGS.pad_seq_len) logger.info('✔︎ Validation data padding...') x_validation, y_validation = dh.pad_data(validation_data, FLAGS.pad_seq_len) # Build vocabulary VOCAB_SIZE = dh.load_vocab_size(FLAGS.embedding_dim) pretrained_word2vec_matrix = dh.load_word2vec_matrix(VOCAB_SIZE, FLAGS.embedding_dim) # Build a graph and rnn object with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth sess = tf.Session(config=session_conf) with sess.as_default(): rnn = TextRNN( sequence_length=FLAGS.pad_seq_len, num_classes=FLAGS.num_classes, vocab_size=VOCAB_SIZE, lstm_hidden_size=FLAGS.lstm_hidden_size, fc_hidden_size=FLAGS.fc_hidden_size, embedding_size=FLAGS.embedding_dim, embedding_type=FLAGS.embedding_type, l2_reg_lambda=FLAGS.l2_reg_lambda, pretrained_embedding=pretrained_word2vec_matrix) # Define training procedure with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): learning_rate = tf.train.exponential_decay(learning_rate=FLAGS.learning_rate, global_step=rnn.global_step, decay_steps=FLAGS.decay_steps, decay_rate=FLAGS.decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) grads, vars = zip(*optimizer.compute_gradients(rnn.loss)) grads, _ = tf.clip_by_global_norm(grads, clip_norm=FLAGS.norm_ratio) train_op = optimizer.apply_gradients(zip(grads, vars), global_step=rnn.global_step, name="train_op") # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in zip(grads, vars): if g is not None: grad_hist_summary = tf.summary.histogram("{0}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{0}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries if FLAGS.train_or_restore == 'R': MODEL = input("☛ Please input the checkpoints model you want to restore, " "it should be like(1490175368): ") # The model you want to restore while not (MODEL.isdigit() and len(MODEL) == 10): MODEL = input('✘ The format of your input is illegal, please re-input: ') logger.info('✔︎ The format of your input is legal, now loading to next step...') checkpoint_dir = 'runs/' + MODEL + '/checkpoints/' out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", MODEL)) logger.info("✔︎ Writing to {0}\n".format(out_dir)) else: timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) logger.info("✔︎ Writing to {0}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", rnn.loss) # Train summaries train_summary_op = tf.summary.merge([loss_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Validation summaries validation_summary_op = tf.summary.merge([loss_summary]) validation_summary_dir = os.path.join(out_dir, "summaries", "validation") validation_summary_writer = tf.summary.FileWriter(validation_summary_dir, sess.graph) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) if FLAGS.train_or_restore == 'R': # Load rnn model logger.info("✔ Loading model...") checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) logger.info(checkpoint_file) # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) else: checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Embedding visualization config config = projector.ProjectorConfig() embedding_conf = config.embeddings.add() embedding_conf.tensor_name = 'embedding' embedding_conf.metadata_path = FLAGS.metadata_file projector.visualize_embeddings(train_summary_writer, config) projector.visualize_embeddings(validation_summary_writer, config) # Save the embedding visualization saver.save(sess, os.path.join(out_dir, 'embedding', 'embedding.ckpt')) current_step = sess.run(rnn.global_step) def train_step(x_batch, y_batch): """A single training step""" feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.dropout_keep_prob: FLAGS.dropout_keep_prob, rnn.is_training: True } _, step, summaries, loss = sess.run( [train_op, rnn.global_step, train_summary_op, rnn.loss], feed_dict) logger.info("step {0}: loss {1:g}".format(step, loss)) train_summary_writer.add_summary(summaries, step) def validation_step(x_validation, y_validation, writer=None): """Evaluates model on a validation set""" batches_validation = dh.batch_iter( list(zip(x_validation, y_validation)), FLAGS.batch_size, 1) # Predict classes by threshold or topk ('ts': threshold; 'tk': topk) eval_counter, eval_loss, eval_rec_ts, eval_acc_ts, eval_F_ts = 0, 0.0, 0.0, 0.0, 0.0 eval_rec_tk = [0.0] * FLAGS.top_num eval_acc_tk = [0.0] * FLAGS.top_num eval_F_tk = [0.0] * FLAGS.top_num for batch_validation in batches_validation: x_batch_validation, y_batch_validation = zip(*batch_validation) feed_dict = { rnn.input_x: x_batch_validation, rnn.input_y: y_batch_validation, rnn.dropout_keep_prob: 1.0, rnn.is_training: False } step, summaries, scores, cur_loss = sess.run( [rnn.global_step, validation_summary_op, rnn.scores, rnn.loss], feed_dict) # Predict by threshold predicted_labels_threshold, predicted_values_threshold = \ dh.get_label_using_scores_by_threshold(scores=scores, threshold=FLAGS.threshold) cur_rec_ts, cur_acc_ts, cur_F_ts = 0.0, 0.0, 0.0 for index, predicted_label_threshold in enumerate(predicted_labels_threshold): rec_inc_ts, acc_inc_ts = dh.cal_metric(predicted_label_threshold, y_batch_validation[index]) cur_rec_ts, cur_acc_ts = cur_rec_ts + rec_inc_ts, cur_acc_ts + acc_inc_ts cur_rec_ts = cur_rec_ts / len(y_batch_validation) cur_acc_ts = cur_acc_ts / len(y_batch_validation) cur_F_ts = dh.cal_F(cur_rec_ts, cur_acc_ts) eval_rec_ts, eval_acc_ts = eval_rec_ts + cur_rec_ts, eval_acc_ts + cur_acc_ts # Predict by topK topK_predicted_labels = [] for top_num in range(FLAGS.top_num): predicted_labels_topk, predicted_values_topk = \ dh.get_label_using_scores_by_topk(scores=scores, top_num=top_num+1) topK_predicted_labels.append(predicted_labels_topk) cur_rec_tk = [0.0] * FLAGS.top_num cur_acc_tk = [0.0] * FLAGS.top_num cur_F_tk = [0.0] * FLAGS.top_num for top_num, predicted_labels_topK in enumerate(topK_predicted_labels): for index, predicted_label_topK in enumerate(predicted_labels_topK): rec_inc_tk, acc_inc_tk = dh.cal_metric(predicted_label_topK, y_batch_validation[index]) cur_rec_tk[top_num], cur_acc_tk[top_num] = \ cur_rec_tk[top_num] + rec_inc_tk, cur_acc_tk[top_num] + acc_inc_tk cur_rec_tk[top_num] = cur_rec_tk[top_num] / len(y_batch_validation) cur_acc_tk[top_num] = cur_acc_tk[top_num] / len(y_batch_validation) cur_F_tk[top_num] = dh.cal_F(cur_rec_tk[top_num], cur_acc_tk[top_num]) eval_rec_tk[top_num], eval_acc_tk[top_num] = \ eval_rec_tk[top_num] + cur_rec_tk[top_num], eval_acc_tk[top_num] + cur_acc_tk[top_num] eval_loss = eval_loss + cur_loss eval_counter = eval_counter + 1 logger.info("✔︎ validation batch {0}: loss {1:g}".format(eval_counter, cur_loss)) logger.info("︎☛ Predict by threshold: recall {0:g}, accuracy {1:g}, F {2:g}" .format(cur_rec_ts, cur_acc_ts, cur_F_ts)) logger.info("︎☛ Predict by topK:") for top_num in range(FLAGS.top_num): logger.info("Top{0}: recall {1:g}, accuracy {2:g}, F {3:g}" .format(top_num + 1, cur_rec_tk[top_num], cur_acc_tk[top_num], cur_F_tk[top_num])) if writer: writer.add_summary(summaries, step) eval_loss = float(eval_loss / eval_counter) eval_rec_ts = float(eval_rec_ts / eval_counter) eval_acc_ts = float(eval_acc_ts / eval_counter) eval_F_ts = dh.cal_F(eval_rec_ts, eval_acc_ts) for top_num in range(FLAGS.top_num): eval_rec_tk[top_num] = float(eval_rec_tk[top_num] / eval_counter) eval_acc_tk[top_num] = float(eval_acc_tk[top_num] / eval_counter) eval_F_tk[top_num] = dh.cal_F(eval_rec_tk[top_num], eval_acc_tk[top_num]) return eval_loss, eval_rec_ts, eval_acc_ts, eval_F_ts, eval_rec_tk, eval_acc_tk, eval_F_tk # Generate batches batches_train = dh.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) num_batches_per_epoch = int((len(x_train) - 1) / FLAGS.batch_size) + 1 # Training loop. For each batch... for batch_train in batches_train: x_batch_train, y_batch_train = zip(*batch_train) train_step(x_batch_train, y_batch_train) current_step = tf.train.global_step(sess, rnn.global_step) if current_step % FLAGS.evaluate_every == 0: logger.info("\nEvaluation:") eval_loss, eval_rec_ts, eval_acc_ts, eval_F_ts, eval_rec_tk, eval_acc_tk, eval_F_tk = \ validation_step(x_validation, y_validation, writer=validation_summary_writer) logger.info("All Validation set: Loss {0:g}".format(eval_loss)) # Predict by threshold logger.info("︎☛ Predict by threshold: Recall {0:g}, Accuracy {1:g}, F {2:g}" .format(eval_rec_ts, eval_acc_ts, eval_F_ts)) # Predict by topK logger.info("︎☛ Predict by topK:") for top_num in range(FLAGS.top_num): logger.info("Top{0}: Recall {1:g}, Accuracy {2:g}, F {3:g}" .format(top_num+1, eval_rec_tk[top_num], eval_acc_tk[top_num], eval_F_tk[top_num])) if current_step % FLAGS.checkpoint_every == 0: checkpoint_prefix = os.path.join(checkpoint_dir, "model") path = saver.save(sess, checkpoint_prefix, global_step=current_step) logger.info("✔︎ Saved model checkpoint to {0}\n".format(path)) if current_step % num_batches_per_epoch == 0: current_epoch = current_step // num_batches_per_epoch logger.info("✔︎ Epoch {0} has finished!".format(current_epoch)) logger.info("✔︎ Done.")
def train(x_train, y_train, vocab_processor, x_dev, y_dev, x_real_len_train, x_real_len_dev, sorted_label): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): if FLAGS.model_type == "cnnrnn": obj = TextCNNRNN(sequence_length=FLAGS.max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "rnncnn": obj = TextRNNCNN(sequence_length=FLAGS.max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "rnnandcnn": obj = TextRNNandCNN( sequence_length=FLAGS.max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "rnn": obj = TextRNN(sequence_length=FLAGS.max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), hidden_unit=FLAGS.hidden_unit, embedding_size=FLAGS.embedding_dim, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "dan": obj = TextDAN(sequence_length=FLAGS.max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "attn_cnn": obj = TextAttnCNN(sequence_length=FLAGS.max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, num_heads=FLAGS.num_heads, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) elif FLAGS.model_type == "dpcnn": obj = TextDPCNN(sequence_length=FLAGS.max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, num_blocks=FLAGS.num_blocks, l2_reg_lambda=FLAGS.l2_reg_lambda) else: obj = TextCNN(sequence_length=FLAGS.max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): grads_and_vars = optimizer.compute_gradients(obj.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", FLAGS.model_version)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", obj.loss) acc_summary = tf.summary.scalar("accuracy", obj.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Save train params since eval.py needs them trained_dir = os.path.abspath( os.path.join(out_dir, "trained_results")) if not os.path.exists(trained_dir): os.makedirs(trained_dir) with open(trained_dir + '/sorted_label.json', 'w') as outfile: json.dump(sorted_label, outfile, indent=4, ensure_ascii=False) with open(trained_dir + '/train_params.json', 'w') as outfile: json.dump({"max_document_length": FLAGS.max_document_length}, outfile, indent=4, ensure_ascii=False) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch, x_real_len_batch): """ A single training step """ if FLAGS.model_type == "cnn" or FLAGS.model_type == "dan" or FLAGS.model_type == "attn_cnn" or FLAGS.model_type == "dpcnn": feed_dict = { obj.input_x: x_batch, obj.input_y: y_batch, obj.dropout_keep_prob: FLAGS.dropout_keep_prob, obj.is_training: True } else: feed_dict = { obj.input_x: x_batch, obj.input_y: y_batch, obj.dropout_keep_prob: FLAGS.dropout_keep_prob, obj.real_len: x_real_len_batch } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, obj.loss, obj.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def overfit(dev_loss, eva_num=3): n = len(dev_loss) if n < eva_num: return False for i in xrange(n - eva_num + 1, n): if dev_loss[i] > dev_loss[i - 1]: return False return True def dev_step(x_batch, y_batch, x_real_len_batch, writer=None): """ Evaluates model on a dev set """ dev_batches = data_helpers.batch_iter(list( zip(x_batch, y_batch, x_real_len_batch)), FLAGS.batch_size, 1, shuffle=False) all_pred = [] correct_total_num = 0 for batch in dev_batches: x_dev_batch, y_dev_batch, x_real_len_dev_batch = zip( *batch) if FLAGS.model_type == "cnn" or FLAGS.model_type == "dan" or FLAGS.model_type == "attn_cnn" or FLAGS.model_type == "dpcnn": feed_dict = { obj.input_x: x_dev_batch, obj.input_y: y_dev_batch, obj.dropout_keep_prob: 1.0, obj.is_training: False } else: feed_dict = { obj.input_x: x_dev_batch, obj.input_y: y_dev_batch, obj.dropout_keep_prob: 1.0, obj.real_len: x_real_len_dev_batch } step, summaries, pred, correct_pred_num = sess.run([ global_step, dev_summary_op, obj.predictions, obj.correct_pred_num ], feed_dict) all_pred = np.concatenate([all_pred, pred]) correct_total_num += correct_pred_num if writer: writer.add_summary(summaries, step) dev_acc = 1.0 * correct_total_num / len(y_batch) print("right_sample {}, dev_sample {}, dev_acc {:g}".format( correct_total_num, len(y_batch), dev_acc)) return dev_acc # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train, x_real_len_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... dev_acc = [] for batch in batches: x_batch, y_batch, x_real_len_batch = zip(*batch) train_step(x_batch, y_batch, x_real_len_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:", current_step) cur_acc = dev_step(x_dev, y_dev, x_real_len_dev, writer=dev_summary_writer) path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) dev_acc.append(cur_acc) if overfit(dev_acc): print("current accuracy drop and stop train..\n") sys.exit(0) print("")
def train_rnn(): # Data Preparation # ================================================== if FLAGS.init_embedding_path is not None: embedding = np.load(FLAGS.init_embedding_path) print("Using pre-trained word embedding which shape is {}\n".format( embedding.shape)) FLAGS.vocab_size = embedding.shape[0] FLAGS.embedding_size = embedding.shape[1] if FLAGS.init_model_path is not None: assert os.path.isdir( FLAGS.init_model_path), "init_model_path must be a directory\n" ckpt = tf.train.get_checkpoint_state(FLAGS.init_model_path) assert ckpt, "No checkpoint found in {}\n".format( FLAGS.init_model_path) assert ckpt.model_checkpoint_path, "No model_checkpoint_path found in checkpoint\n" # Create root directory timestamp = str(int(time.time())) root_dir = os.path.join(os.path.curdir, 'runs', 'textrnn', 'trained_result_' + timestamp) os.makedirs(root_dir) # Load data print("Loading data...\n") x, y = data_helpers.load_data(FLAGS.data_file, FLAGS.sequence_length, FLAGS.vocab_size, root_dir=root_dir) FLAGS.num_classes = len(y[0]) # Split dataset x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=FLAGS.test_size, stratify=y, random_state=0) x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=0) # Training # ================================================== with tf.Graph().as_default(): tf_config = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth with tf.Session(config=tf_config).as_default() as sess: rnn = TextRNN(vocab_size=FLAGS.vocab_size, embedding_size=FLAGS.embedding_size, sequence_length=FLAGS.sequence_length, rnn_size=FLAGS.rnn_size, num_layers=FLAGS.num_layers, attention_size=FLAGS.attention_size, num_classes=FLAGS.num_classes, learning_rate=FLAGS.learning_rate, grad_clip=FLAGS.grad_clip) # Output directory for models and summaries out_dir = os.path.abspath(root_dir) print("Writing to {}...\n".format(out_dir)) # Summaries for loss and accuracy tf.summary.scalar("loss", rnn.loss) tf.summary.scalar("accuracy", rnn.accuracy) merged_summary = tf.summary.merge_all() # Summaries dictionary train_summary_dir = os.path.join(out_dir, 'summaries', 'train') val_summary_dir = os.path.join(out_dir, 'summaries', 'val') train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) val_summary_writer = tf.summary.FileWriter(val_summary_dir, sess.graph) # Checkpoint directory, will not create itself checkpoint_dir = os.path.abspath( os.path.join(out_dir, 'checkpoints')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # Initialize all variables sess.run(tf.global_variables_initializer()) # Using pre-trained word embedding if FLAGS.init_embedding_path is not None: sess.run(rnn.embedding.assign(embedding)) del embedding # Continue training from saved model if FLAGS.init_model_path is not None: saver.restore(sess, ckpt.model_checkpoint_path) # Training start print("Start training...\n") best_at_step = 0 best_val_accuracy = 0 for epoch in range(FLAGS.num_epochs): # Generate train batches train_batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size) start = time.time() for batch in train_batches: # Training model on x_batch and y_batch x_batch, y_batch = zip(*batch) seq_len_train = data_helpers.real_len(x_batch) feed_dict = { rnn.input_x: x_batch, rnn.input_y: y_batch, rnn.seq_len: seq_len_train, rnn.keep_prob: FLAGS.dropout_keep_prob } _, global_step, train_summaries, train_loss, train_accuracy = sess.run( [ rnn.train_op, rnn.global_step, merged_summary, rnn.loss, rnn.accuracy ], feed_dict=feed_dict) # Evaluates model on val set if global_step % FLAGS.evaluate_every == 0: end = time.time() train_summary_writer.add_summary( train_summaries, global_step) seq_len_val = data_helpers.real_len(x_val) feed_dict = { rnn.input_x: x_val, rnn.input_y: y_val, rnn.seq_len: seq_len_val, rnn.keep_prob: 1.0 } val_summaries, val_loss, val_accuracy = sess.run( [merged_summary, rnn.loss, rnn.accuracy], feed_dict=feed_dict) val_summary_writer.add_summary(val_summaries, global_step) print( "Epoch: {}, global step: {}, training speed: {:.3f}sec/batch" .format(epoch, global_step, (end - start) / FLAGS.evaluate_every)) print( "train loss: {:.3f}, train accuracy: {:.3f}, val loss: {:.3f}, val accuracy: {:.3f}\n" .format(train_loss, train_accuracy, val_loss, val_accuracy)) # If improved, save the model if val_accuracy > best_val_accuracy: print( "Get a best val accuracy at step {}, model saving...\n" .format(global_step)) saver.save(sess, checkpoint_prefix, global_step=global_step) best_val_accuracy = val_accuracy best_at_step = global_step start = time.time() # Rename the checkpoint best_model_prefix = checkpoint_prefix + '-' + str(best_at_step) os.rename(best_model_prefix + '.index', os.path.join(checkpoint_dir, 'best_model.index')) os.rename(best_model_prefix + '.meta', os.path.join(checkpoint_dir, 'best_model.meta')) os.rename( best_model_prefix + '.data-00000-of-00001', os.path.join(checkpoint_dir, 'best_model.data-00000-of-00001')) # Testing on test set print( "\nTraining complete, testing the best model on test set...\n") saver.restore(sess, os.path.join(checkpoint_dir, 'best_model')) seq_len_test = data_helpers.real_len(x_test) feed_dict = { rnn.input_x: x_test, rnn.input_y: y_test, rnn.seq_len: seq_len_test, rnn.keep_prob: 1.0 } y_logits, test_accuracy = sess.run([rnn.logits, rnn.accuracy], feed_dict=feed_dict) print("Testing Accuracy: {:.3f}\n".format(test_accuracy)) label_transformer = joblib.load( os.path.join(out_dir, 'label_transformer.pkl')) y_test_original = label_transformer.inverse_transform(y_test) y_logits_original = label_transformer.inverse_transform(y_logits) print("Precision, Recall and F1-Score:\n\n", classification_report(y_test_original, y_logits_original)) # Save parameters print("Parameters saving...\n") params = {} for param, value in FLAGS.__flags.items(): params[param] = value with open(os.path.join(out_dir, 'parameters.json'), 'w') as outfile: json.dump(params, outfile, indent=4, sort_keys=True, ensure_ascii=False) # Save word embedding print("Word embedding saving...\n") np.save(os.path.join(out_dir, 'embedding.npy'), sess.run(rnn.embedding))
lambda x: int(x) if x != "inf" else max_document_length, FLAGS.filter_sizes_char.split(","))), ) elif FLAGS.architecture == 7: neural_net = TextRNN( sequence_length=max_document_length, num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, embedding_dim_char=FLAGS.embedding_dim_char, num_filters_char=FLAGS.num_filters_char, num_neurons_fc=FLAGS.num_neurons_fc, num_neurons_fc_2=FLAGS.num_neurons_fc_2, margin=FLAGS.margin, rnn_num_layers=FLAGS.rnn_num_layers, loss_function=FLAGS.loss_function, rnn_hidden_size=FLAGS.rnn_hidden_size, max_token_length=max_token_length, char_vocab_size=len(char_vocabulary), filter_sizes_char=list( map( lambda x: int(x) if x != "inf" else max_document_length, FLAGS.filter_sizes_char.split(","))), ) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) # optimizer = tf.train.AdagradOptimizer(1e-3) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
sess1 = tf.Session(config=session_conf) sess2 = tf.Session(config=session_conf) with sess1.as_default() and sess2.as_default(): tf.set_random_seed(seed_value + number_of_network) rnn = TextRNN( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=embedding_dimension, l2_reg_lambda=FLAGS.l2_reg_lambda, weights=class_weights_for_cross_entropy, rel_pos_embedding_size=FLAGS.rel_pos_embedding_size, rel_pos_cardinality=relative_positions_cardinality, lstm_units=FLAGS.lstm_units, pos_tags_embedding_size=FLAGS.pos_tags_embedding_size, pos_tags_cardinality=pos_tags_cardinality, with_eye_tracking=cfg["features"]["gaze"], et_features_size=et.shape[2], et_number_of_bins=cfg["features"]["binned"], et_embedding_dimension=FLAGS.et_embedding_dimension, with_eeg=cfg["features"]["eeg"], eeg_features_size=eeg.shape[2], use_normalization_layer=True) cnn = TextCNN( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=embedding_dimension,