def Predict(data_dir, checkpoint_dir, document_length_limit, batch_size, is_line_as_word): # Restore vocab vocab_path = os.path.join(checkpoint_dir, "..", "vocab") vocab_processor = learn.preprocessing.VocabularyProcessor.restore( vocab_path) # Load data x_raw, filenames = load_data_and_filenames(data_dir, document_length_limit, is_line_as_word) x_test = np.array(list(vocab_processor.transform(x_raw))) # Evaluation checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) graph = tf.Graph() with graph.as_default(): sess = tf.Session() with sess.as_default(): saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("x_input").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] mode = graph.get_operation_by_name("mode").outputs[0] fc = graph.get_operation_by_name("fc/fc").outputs[0] predictions = graph.get_operation_by_name("predictions").outputs[0] batches = data_iter(list(x_test), batch_size, 1, shuffle=False) all_predictions = [] all_fc_scores = None for x_test_batch in batches: batch_fc_score, batch_predictions = sess.run( [fc, predictions], { input_x: x_test_batch, dropout_keep_prob: 1.0, mode: False }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) if all_fc_scores is None: all_fc_scores = batch_fc_score else: all_fc_scores = np.concatenate( [all_fc_scores, batch_fc_score]) return all_predictions
def train(x_train, y_train, vocab_processor, x_dev, y_dev, num_classes, embedding_size, filter_sizes, stride_h, num_filters, keep_prob_rate, learning_rate_val, decay_steps, decay_rate, l2_lambda, batch_size, ecoph_num, evaluate_every, checkpoint_every, is_finetune, is_bn, out_dir): # Paras sequence_length = x_train.shape[1] vocab_size = len(vocab_processor.vocabulary_) print("debug sequence len:", sequence_length) # Graph input x_input = tf.placeholder(tf.int32, [None, sequence_length], name="x_input") y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input") keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") is_training = tf.placeholder(tf.bool, name="mode") # Step global_step = tf.Variable(0, name="global_step", trainable=False) # Model 加载模型 pred = Model.cnn_text(x_input, sequence_length, vocab_size, embedding_size, stride_h, filter_sizes, num_filters, num_classes, keep_prob, l2_lambda, is_bn, is_training) # Loss losses = tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y_input) loss = tf.reduce_mean(losses) if l2_lambda > 0: tf.add_to_collection("losses", loss) loss = tf.add_n( tf.get_collection("losses") ) # if l2=0, then the first value of tf.get_collection will be None # Learning rate decay learning_rate = tf.train.exponential_decay(learning_rate=learning_rate_val, global_step=global_step, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) # Optimizer optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): grads_and_vars = optimizer.compute_gradients(loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Evaluation predictions = tf.argmax(pred, 1, name="predictions") # needed in test correct_pred = tf.equal(predictions, tf.argmax(y_input, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, "float"), name="accuracy") # Init init = tf.global_variables_initializer() # Summary dev_ratio = x_dev.shape[0] * 1.0 / (x_train.shape[0] + x_dev.shape[0]) train_summary_path = os.path.join(out_dir, "summaries", "train_v1") dev_summary_path = os.path.join(out_dir, "summaries", "dev") if not os.path.exists(train_summary_path): os.makedirs(train_summary_path) if not os.path.exists(dev_summary_path): os.makedirs(dev_summary_path) train_summary_writer = tf.summary.FileWriter(train_summary_path) dev_summary_writer = tf.summary.FileWriter(dev_summary_path) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) loss_summary = tf.summary.scalar("loss", loss) acc_summary = tf.summary.scalar("accuracy", accuracy) train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) # Saver checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) var_list = tf.trainable_variables() g_list = tf.global_variables() bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name] var_list += bn_moving_vars saver = tf.train.Saver(var_list=var_list, max_to_keep=5) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) def train_step(x_batch, y_batch): feed_dict = { x_input: x_batch, y_input: y_batch, keep_prob: keep_prob_rate, is_training: True } _, step, summaries, train_loss, train_accuracy = sess.run( [train_op, global_step, train_summary_op, loss, accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("[train_v1] {}: step {}, loss {}, acc {}".format( time_str, step, train_loss, train_accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch): feed_dict = { x_input: x_batch, y_input: y_batch, keep_prob: 1.0, # no dropout when test is_training: False } step, summaries, test_loss, test_accuracy = sess.run( [global_step, dev_summary_op, loss, accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("[test] {}: step {}, loss {}, acc {}".format( time_str, step, test_loss, test_accuracy)) dev_summary_writer.add_summary(summaries, step) with tf.Session() as sess: sess.run(init) train_summary_writer.add_graph(sess.graph) dev_summary_writer.add_graph(sess.graph) if is_finetune: print("Load pre-trained word vector") init_weight(np.concatenate([x_train, x_dev]), sess, embedding_size, vocab_size) print("=============list ziped============") print(list(zip(x_train, y_train))) print("=============list ziped end,print size============") print(len(list(zip(x_train, y_train)))) batches = data_iter(zip(x_train, y_train), batch_size, ecoph_num) for batch_x_y in batches: x_batch, y_batch = zip(*batch_x_y) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev) print("") if current_step % checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def test(data_dirs, checkpoint_dir, document_length_limit, batch_size, is_line_as_word): # »Ö¸´´Ê¿â vocab_path = os.path.join(checkpoint_dir, "..", "vocab") vocab_processor = learn.preprocessing.VocabularyProcessor.restore( vocab_path) # ÏÂÔØÊý¾Ý x_raw, y_test, filenames = load_data_label_and_filenames( data_dirs, document_length_limit, is_line_as_word) x_test = np.array(list(vocab_processor.transform(x_raw))) y_test = np.argmax(y_test, axis=1) # ÆÀ¹À checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) graph = tf.Graph() with graph.as_default(): sess = tf.Session() with sess.as_default(): saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) input_x = graph.get_operation_by_name("x_input").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] mode = graph.get_operation_by_name("mode").outputs[0] fc = graph.get_operation_by_name("fc/fc").outputs[0] predictions = graph.get_operation_by_name("predictions").outputs[0] batches = data_iter(list(x_test), batch_size, 1, shuffle=False) all_predictions = [] all_fc_scores = None for x_test_batch in batches: batch_fc_score, batch_predictions = sess.run( [fc, predictions], { input_x: x_test_batch, dropout_keep_prob: 1.0, mode: False }) all_predictions = np.concatenate( [all_predictions, batch_predictions]) if all_fc_scores is None: all_fc_scores = batch_fc_score else: all_fc_scores = np.concatenate( [all_fc_scores, batch_fc_score]) correct_predictions = float(sum(all_predictions == y_test)) print("Total number of test examples: {}".format(len(y_test))) print("Accuracy: {:g}".format(correct_predictions / float(len(y_test)))) # ±£´æÊý¾Ý predictions_human_readable = np.column_stack( (np.array(filenames), all_predictions, y_test, all_fc_scores)) out_path = os.path.join(checkpoint_dir, "..", "prediction.csv") print("Saving evaluation to {0}".format(out_path)) with open(out_path, "w") as f: csv.writer(f).writerows(predictions_human_readable) wrong_predictions_human_readable = predictions_human_readable[ all_predictions != y_test] wrong_path = os.path.join(checkpoint_dir, "..", "wrong.csv") print("Saving wrong evaluation to {0}".format(wrong_path)) with open(wrong_path, "w") as f: csv.writer(f).writerows(wrong_predictions_human_readable)
totals_ = 0 corrects_ = 0 feed_dict = {lstm.input : input_, lstm.dropout:d} targets = sess.run([lstm.targets], feed_dict) lens = sess.run(lstm.sen_lens) print(targets) #corrects_ += cal #totals_ += lens for targets_, l_, lens_ in zip(targets, label_, lens): t = targets[:lens_] l = l_[:lens_] for k in range(lens_): fw.write(label_hotone[l[k]] + ' ' + label_hotone[t[k]] + '\n') fw.write('\n') return corrects_, totals_ data = data_iter(doc, label, sparse, num_epoch, batch_size) for input_, label_ in data: train_step(input_, label_, True) corrects = 0 totals = 0 test_data = data_iter(doc_t, label_t, sparse_t, 1, 1) #trans = sess.run(lstm.trans) for input_, label_ in test_data: corrects_, totals_ = test_step(input_, label_, False, fw) corrects += corrects_ totals += totals_ print('accurry: ' + str(1.0 * corrects / totals)) fw.close()
param = param - lr * grad / batch_size out_params.append(param) return out_params grad_w, grad_b = ad.gradients(loss, [weight, bias]) loss_total = ad.reduce_sum_op(loss) executor = ad.Executor([loss_total, grad_w, grad_b]) weight_val = np.zeros((1, feature_dim)) bias_val = np.zeros((1)) #weight_val = true_w.reshape((1,feature_dim)) #bias_val = true_b for epoch in range(epoches): for batch_idx, feat_val, label_val in data_iter(batch_size, features, labels): label_val = label_val.reshape((batch_size, 1)) loss_val, grad_w_val, grad_b_val = executor.run(feed_dict={ x: feat_val, label: label_val, weight: weight_val, bias: bias_val }) if batch_idx % 100 == 0: print("[Epoch {}, Batch {}] loss : {}".format( epoch, batch_idx, loss_val)) params = [weight_val, bias_val] params_grads = [grad_w_val.T, np.sum(grad_b_val)] weight_val, bias_val = sgd(params, params_grads, lr, batch_size)