def vdcnn_model(X_train, y_train, X_test, y_test, sequence_max_length): model = VDCNN(num_classes=y_train.shape[1], sequence_length=sequence_max_length) if y_train.shape[1] == 1: model.compile(loss='binary_crossentropy', optimizer=SGD(lr=0.0001, momentum=0.9), metrics=['accuracy']) else: model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.0001, momentum=0.9), metrics=['accuracy']) model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=128) scores = model.evaluate(X_test, y_test) accuracy = scores[1] * 100 y_pred = model.predict(X_test) return accuracy, y_pred
def __init__(self, task='ag'): sequence_max_length = 200 downsampling_type = 'maxpool' depth = 9 num_layers = 4 use_he_uniform = True optional_shortcut = True current_path = '/'.join(os.path.realpath(__file__).split('/')[:-1]) model_root = opj(current_path, 'pretrained_models') model_path = { 'ag': opj(model_root, 'ag', 'model-step92000.ckpt'), 'yelp': opj(model_root, 'yelp', 'model-step282000.ckpt'), 'ag-cam': 'pretrained_models/ag-cam/model-step10000.ckpt', 'yelp-cam': 'pretrained_models/yelp-cam/model-step54000.ckpt', 'dbpedia': opj(model_root, 'dbpedia', 'model-step40000.ckpt') } num_classes = { 'ag': 4, 'yelp': 2, 'ag-cam': 4, 'yelp-cam': 2, 'dbpedia': 14 } self.cnn = VDCNN(num_classes=num_classes[task], depth=depth, sequence_max_length=sequence_max_length, use_he_uniform=use_he_uniform, optional_shortcut=optional_shortcut) self.data_helper = data_helper(sequence_max_length=sequence_max_length) self.sess = tf.Session() saver = tf.train.Saver() saver.restore(self.sess, model_path[task]) self.features = {} for layer_index in range(num_layers): if layer_index == 0: layer_tensor_name = 'add:0' else: layer_tensor_name = 'add_%d:0' % (layer_index * 2) layer_name = 'conv_%d' % layer_index layer_tensor = tf.get_default_graph().get_tensor_by_name( layer_tensor_name) self.features[layer_name] = layer_tensor
def predict_label_vdcnn(model_file, unlabeled_file, resulting_labels, num_classes, classes_weights=tf.constant([1.0, 1.0])): tf.reset_default_graph() # print("Loading data...") data_helper_cl = DataHelper(sequence_max_length=FLAGS.sequence_max_length) unlabeled_data = data_helper_cl.load_unlabeled_data(unlabeled_file) # print("Loading data succees...") # ConvNet sess = tf.Session() cnn = VDCNN(num_classes=num_classes, depth=FLAGS.depth, sequence_max_length=FLAGS.sequence_max_length, downsampling_type=FLAGS.downsampling_type, use_he_uniform=FLAGS.use_he_uniform, optional_shortcut=FLAGS.optional_shortcut, keep_prob=FLAGS.keep_prob, classes_weights=classes_weights) def prediction_step(x_batch): feed_dict = {cnn.input_x: x_batch, cnn.is_training: False} softmax_output = sess.run([cnn.softmax_output], feed_dict) return softmax_output saver = tf.train.Saver() saver.restore(sess, model_file) # Labels prediction unlabeled_batches = data_helper_cl.batch_iter(unlabeled_data, FLAGS.batch_size, 1, shuffle=False) for unlabeled_batch in unlabeled_batches: softmax_output = prediction_step(unlabeled_batch) with open(resulting_labels, 'a') as f: for row in softmax_output[0]: for item in row: if item == row[-1]: f.write(str(item)) else: f.write(str(item) + ",") f.write("\n")
depth = [1, 1, 1, 1] elif depth == 17: depth = [2, 2, 2, 2] elif depth == 29: depth = [5, 5, 2, 2] else: print('depth must be (9, 17, 29)') sys.exit() print('VDCNN setting: emb_dim={} n_out={}, depth={}'.format( len(char2id) + 1, kind, sum(depth) * 2 + 1)) gpu_id = args.gpu model = VDCNN(len(char2id) + 1, kind, depth) if gpu_id >= 0: model.to_gpu(gpu_id) print(mode, train_x.shape, train_y.shape, test_x.shape, test_y.shape) train = datasets.TupleDataset(train_x, train_y) test = datasets.TupleDataset(test_x, test_y) batch_size = 128 train_iter = iterators.SerialIterator(train, batch_size) test_iter = iterators.SerialIterator(test, batch_size, False, False) epoch_size = 5000 max_epoch = 15
# Data Preparation # Load data print("Loading data...") data_helper = data_helper(sequence_max_length=FLAGS.sequence_max_length, use_title=FLAGS.use_title) train_data, train_label, train_texts, test_data, test_label, test_texts = data_helper.load_dataset( FLAGS.database_path) num_batches_per_epoch = int((len(train_data) - 1) / FLAGS.batch_size) + 1 print("Loading data succees...") # ConvNet acc_list = [0] sess = tf.Session() cnn = VDCNN(num_classes=train_label.shape[1], depth=FLAGS.depth, sequence_max_length=FLAGS.sequence_max_length, downsampling_type=FLAGS.downsampling_type, use_he_uniform=FLAGS.use_he_uniform, optional_shortcut=FLAGS.optional_shortcut) # Optimizer and LR Decay update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.num_epochs * num_batches_per_epoch, 0.95, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) gradients, variables = zip(*optimizer.compute_gradients(cnn.loss))
# Training # ================================================== # ----------------- Phase de construction du graphe ------------------------------- # Input data. with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = VDCNN() # Ensures that we execute the update_ops before performing the train update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Initialize all variables print("START %s" % datetime.datetime.now()) sess.run(tf.initialize_all_variables()) saver = tf.train.Saver()
def train(): # Data Preparation # Load data print("Loading data...") x, y = data_loader.read_data(FLAGS.pos_data, FLAGS.neg_data, FLAGS.max_sequence_length) print("Data Size:", len(y)) np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] dev_sample_index = -1 * int(FLAGS.dev_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] del x, y, x_shuffled, y_shuffled print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) num_batches_per_epoch = int((len(x_train) - 1) / FLAGS.batch_size) + 1 print("Loading data succees...") # ConvNet acc_list = [0] session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) session_conf.gpu_options.allow_growth = True # session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45 sess = tf.Session(config=session_conf) cnn = VDCNN(num_classes=y_train.shape[1], num_quantized_chars=FLAGS.vocab_size, depth=FLAGS.depth, sequence_max_length=FLAGS.max_sequence_length, downsampling_type=FLAGS.downsampling_type, use_he_uniform=FLAGS.use_he_uniform, optional_shortcut=FLAGS.optional_shortcut) # Optimizer and LR Decay update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.num_epochs*num_batches_per_epoch, 0.95, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) gradients, variables = zip(*optimizer.compute_gradients(cnn.loss)) gradients, _ = tf.clip_by_global_norm(gradients, 5.0) train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step) ### # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initialize Graph sess.run(tf.global_variables_initializer()) # sess = tfdbg.LocalCLIDebugWrapperSession(sess) # 被调试器封装的会话 # sess.add_tensor_filter("has_inf_or_nan", tfdbg.has_inf_or_nan) # 调试器添加过滤规则 # Train Step and Test Step def train_step(x_batch, y_batch): """ A single training step """ feed_dict = {cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.is_training: True} _, step, summaries, loss, accuracy = sess.run([train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], feed_dict) train_summary_writer.add_summary(summaries, step) time_str = datetime.datetime.now().isoformat() print("{}: Step {}, Epoch {}, Loss {:g}, Acc {:g}".format(time_str, step, int(step//num_batches_per_epoch)+1, loss, accuracy)) #if step%FLAGS.evaluate_every == 0 and FLAGS.enable_tensorboard: # summaries = sess.run(train_summary_op, feed_dict) # train_summary_writer.add_summary(summaries, global_step=step) def test_step(x_batch, y_batch): """ Evaluates model on a dev set """ feed_dict = {cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.is_training: False} summaries_dev, loss, preds, step = sess.run([dev_summary_op, cnn.loss, cnn.predictions, global_step], feed_dict) dev_summary_writer.add_summary(summaries_dev, step) time_str = datetime.datetime.now().isoformat() return preds, loss # Generate batches # train_batches = data_helper.batch_iter(list(zip(train_data, train_label)), FLAGS.batch_size, FLAGS.num_epochs) batches = data_loader.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for train_batch in batches: x_batch, y_batch = zip(*train_batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) # Testing loop if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") i = 0 index = 0 sum_loss = 0 test_batches = data_loader.batch_iter(list(zip(x_dev, y_dev)), FLAGS.batch_size, 1, shuffle=False) y_preds = np.ones(shape=len(y_dev), dtype=np.int) for test_batch in test_batches: x_test_batch, y_test_batch = zip(*test_batch) preds, test_loss = test_step(x_test_batch, y_test_batch) sum_loss += test_loss res = np.absolute(preds - np.argmax(y_test_batch, axis=1)) y_preds[index:index+len(res)] = res i += 1 index += len(res) time_str = datetime.datetime.now().isoformat() acc = np.count_nonzero(y_preds==0)/len(y_preds) acc_list.append(acc) print("{}: Evaluation Summary, Loss {:g}, Acc {:g}".format(time_str, sum_loss/i, acc)) print("{}: Current Max Acc {:g} in Iteration {}".format(time_str, max(acc_list), int(acc_list.index(max(acc_list))*FLAGS.evaluate_every))) if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def train_vdcnn(dataset_path, model_file, classes_weights=tf.constant([1.0, 1.0])): tf.reset_default_graph() max_accuracy = 0 restore = True results_output = dataset_path + 'vdcnn_res.txt' # print("Loading data...") data_helper = DataHelper(sequence_max_length=FLAGS.sequence_max_length) train_data, train_label, test_data, test_label = data_helper.load_dataset( dataset_path) num_batches_per_epoch = int((len(train_data) - 1) / FLAGS.batch_size) + 1 # print("Loading data succees...") # ConvNet acc_list = [0] sess = tf.Session() cnn = VDCNN(num_classes=train_label.shape[1], depth=FLAGS.depth, sequence_max_length=FLAGS.sequence_max_length, downsampling_type=FLAGS.downsampling_type, use_he_uniform=FLAGS.use_he_uniform, optional_shortcut=FLAGS.optional_shortcut, keep_prob=FLAGS.keep_prob, classes_weights=classes_weights) # Optimizer and LR Decay update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.num_epochs * num_batches_per_epoch, 0.95, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) gradients, variables = zip(*optimizer.compute_gradients(cnn.loss)) gradients, _ = tf.clip_by_global_norm(gradients, 7.0) train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step) if restore: saver = tf.train.Saver(tf.all_variables()) saver.restore(sess, os.path.join(os.getcwd(), model_file)) else: sess.run(tf.global_variables_initializer()) with open(results_output, 'a') as f: f.write( "================================\n New round of training \n ================================" ) f.write( str(FLAGS.optional_shortcut) + ' ' + str(FLAGS.keep_prob) + '\n') # Train Step and Test Step def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.is_training: True } _, step, loss, accuracy = sess.run( [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() with open(results_output, 'a') as f: f.write("{}: Step {}, Epoch {}, Loss {:g}, Acc {:g}\n".format( time_str, step, int(step // num_batches_per_epoch) + 1, loss, accuracy)) def test_step(x_batch, y_batch): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.is_training: False } loss, preds = sess.run([cnn.loss, cnn.predictions], feed_dict) return preds, loss # Generate batches train_batches = data_helper.batch_iter(list(zip(train_data, train_label)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for train_batch in train_batches: x_batch, y_batch = zip(*train_batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) # Testing loop if current_step % FLAGS.evaluate_every == 0: with open(results_output, 'a') as f: f.write("\nEvaluation:\n") i = 0 index = 0 sum_loss = 0 test_batches = data_helper.batch_iter(list( zip(test_data, test_label)), FLAGS.batch_size, 1, shuffle=False) y_preds = np.ones(shape=len(test_label), dtype=np.int) for test_batch in test_batches: x_test_batch, y_test_batch = zip(*test_batch) preds, test_loss = test_step(x_test_batch, y_test_batch) sum_loss += test_loss res = np.absolute(preds - np.argmax(y_test_batch, axis=1)) y_preds[index:index + len(res)] = res i += 1 index += len(res) time_str = datetime.datetime.now().isoformat() acc = np.count_nonzero(y_preds == 0) / len(y_preds) acc_list.append(acc) with open(results_output, 'a') as f: if acc > max_accuracy: max_accuracy = acc saver = tf.train.Saver(tf.global_variables()) saver.save(sess, model_file) f.write("New best model is at step " + str(current_step) + "\n") f.write("{}: Evaluation Summary, Loss {:g}, Acc {:g}\n".format( time_str, sum_loss / i, acc)) f.write("{}: Current Max Acc {:g} in Iteration {}\n".format( time_str, max(acc_list), int(acc_list.index(max(acc_list)) * FLAGS.evaluate_every)))
# Data Preparation # Load data print("Loading data...") data_helper = data_helper(sequence_max_length=FLAGS.sequence_max_length) train_data, train_label, test_data, test_label = data_helper.load_dataset( FLAGS.database_path) num_batches_per_epoch = int((len(train_data) - 1) / FLAGS.batch_size) + 1 print("Loading data succees...") # ConvNet acc_list = [0] sess = tf.Session() cnn = VDCNN(num_classes=train_label.shape[1], sequence_max_length=FLAGS.sequence_max_length, downsampling_type=FLAGS.downsampling_type, weight_decay=FLAGS.weight_decay, use_he_uniform=FLAGS.use_he_uniform, use_bias=FLAGS.use_bias, num_filters=list(map(int, FLAGS.num_filters.split(","))), num_layers=list(map(int, FLAGS.num_layers.split(",")))) # Optimizer and LR Decay update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.num_epochs * num_batches_per_epoch, 0.95, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
def train(x_train, y_train, x_test, y_test): session_ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") log_dir = str(pathlib.Path(FLAGS.train_log_dir) / session_ts) if FLAGS.dataset_type == "embeddings": embedding_input = True embedding_dim = x_train.shape[-1] else: embedding_input = False embedding_dim = 16 # Build model model = VDCNN( num_classes=y_train.shape[1], depth=FLAGS.depth, sequence_length=FLAGS.sequence_length, shortcut=FLAGS.shortcut, pool_type=FLAGS.pool_type, sort=FLAGS.sort, use_bias=FLAGS.use_bias, embedding_input=embedding_input, embedding_dim=embedding_dim, ) model.compile( optimizer=tf.keras.optimizers.SGD(lr=FLAGS.lr, momentum=0.9), loss="categorical_crossentropy", metrics=["acc"], ) # Save model architecture model_json = model.to_json() with open("vdcnn_model.json", "w") as json_file: json_file.write(model_json) time_str = datetime.datetime.now().isoformat() print("{}: Model saved as json.".format(time_str)) print("") # Trainer # Tensorboard and extra callback to support steps history tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=50, write_graph=True, write_images=True) checkpointer = tf.keras.callbacks.ModelCheckpoint( filepath="./checkpoints/vdcnn_weights_val_acc_{val_acc:.4f}.h5", save_freq="epoch", verbose=1, save_best_only=True, mode="max", monitor="val_acc", ) loss_history = custom_callbacks.LossHistory(model, tensorboard, logdir=log_dir) evaluate_step = custom_callbacks.EvaluateStep( model, checkpointer, tensorboard, FLAGS.evaluate_every, FLAGS.batch_size, x_test, y_test, log_dir, ) # Fit model model.fit( x_train, y_train, batch_size=FLAGS.batch_size, epochs=FLAGS.num_epochs, validation_data=(x_test, y_test), verbose=1, callbacks=[ checkpointer, tensorboard, # loss_history, evaluate_step, ], ) print("-" * 30) time_str = datetime.datetime.now().isoformat() print("{}: Done training.".format(time_str)) tf.keras.backend.clear_session() print("-" * 30) print()