capped_gvs, global_step=self.global_step, name="train_op") def summary(self): self.merged = tf.summary.merge_all() def train(self, session, batch_x, batch_y, dropout): feed_dict = { self.input_x: batch_x, self.input_y: batch_y, self.dkp: dropout } _, step, loss, summary, accuracy, mask, correct_labels, lengths, losses, loss1, scores = session.run( [ self.train_op, self.global_step, self.loss, self.merged, self.accuracy, self.mask, self.correct_labels, self.sequence_lengths, self.losses, self.loss1, self.scores ], feed_dict) return step, loss, summary, accuracy, mask, correct_labels, lengths, losses, loss1, scores def predict(self, session, x, y): feed_dict = {self.input_x: x, self.input_y: y, self.dkp: 1.0} loss, accuracy, predictions = session.run( [self.loss, self.accuracy, self.predictions], feed_dict) return loss, accuracy, predictions if __name__ == '__main__': emb = du.initialize_random_embeddings(100, 20) sl = SequenceLabeler(20, emb, 16, 5, 12, True) sl.build_network()
def train_and_test(train_path, data_save_path, embedding_path, conf_path, model_save_path, model_name, summaries_dir): with tf.Graph().as_default(): np.random.seed(1337) tf.set_random_seed(1337) config = configparser.ConfigParser() config.read(conf_path) sentence_f = config.get("Data", "sentence_field") token_f = config.get("Data", "token_field") label_f = config.get("Data", "categories_field") processors_num = int(config.get("Training", "processors")) batch_size = int(config.get("Training", "batch_size")) epochs = int(config.get("Training", "epochs")) patience = int(config.get("Training", "patience")) validation_split = float(config.get("Training", "val_split")) learning_rate = float(config.get("Training", "learning_rate")) embedding_size = int(config.get("Network", "embedding_size")) cell_rnn_size = int(config.get("Network", "cell_rnn_size")) dropout = float(config.get("Network", "dropout")) hidden_layer_size = int(config.get("Network", "hidden_layer_size")) data, vocab, max_length, nc, cl, cl_inv = du.load_data( train_path, text_field=sentence_f, category_field=label_f, feature_field=token_f) params = [ vocab, nc, max_length, cl, cl_inv, sentence_f, label_f, token_f ] du.save_params(params, data_save_path) train_data, valid_data = du.split(data, validation_split) session_conf = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, inter_op_parallelism_threads=processors_num, intra_op_parallelism_threads=processors_num) sess = tf.Session(config=session_conf) with sess.as_default(): print("Initializing Embedding") embedding = du.load_embeddings( embedding_path, vocab ) if embedding_path != 'random' else du.initialize_random_embeddings( len(vocab), embedding_size) print("Building nn_model") sequence_labeler = CRFSequenceLabeler(sequence_length=max_length, embedding=embedding, cell_size=cell_rnn_size, num_classes=len(cl), hls=hidden_layer_size) sequence_labeler.build_network() print("Building training operations") sequence_labeler.build_train_ops(learning_rate) sequence_labeler.summary() tf.global_variables_initializer().run() valid_x, valid_y = du.get_training_data(valid_data, len(cl)) saver = tf.train.Saver(max_to_keep=1) if os.path.exists(model_save_path): saver.restore(sess=sess, save_path=model_save_path + "/" + model_name) else: os.mkdir(model_save_path) best_vd_accuracy = 0.0 best_vd_loss = 50.0 num_without_improvement = 0 # writer = tf.summary.FileWriter(summaries_dir + "/train", # sess.graph) print("Start training") for epoch in range(epochs): if num_without_improvement > patience: break np.random.shuffle(train_data) batches = du.get_training_batches(train_data, batch_size) # Training on batches for batch in batches: train_x, train_y = du.get_training_data(batch, len(cl)) step, loss, summary = sequence_labeler.train( sess, train_x, train_y, dropout) #writer.add_summary(summary, step) print( "Training: epoch\t{:g}\tstep\t{:g}\tloss\t{:g}".format( epoch, step, loss)) # Evaluate on validation and test set vd_loss, vd_scores, vd_accuracy, transition_params, tagged_sequences = sequence_labeler.predict( sess, valid_x, valid_y) print("Validation: loss\t{:g}\taccuracy\t{:g}".format( vd_loss, vd_accuracy)) if vd_accuracy > best_vd_accuracy: best_vd_accuracy = vd_accuracy best_vd_loss = vd_loss print("Saving nn_model") saver.save(sess, model_save_path + "/" + model_name) num_without_improvement = 0 else: num_without_improvement += 1 print("Best Validation: loss\t{:g}\taccuracy\t{:g}".format( best_vd_loss, best_vd_accuracy))
def test(test_path, data_save_path, conf_path, model_save_path, model_name, embedding_path, out_file_path): ow = open(out_file_path, "w") with tf.Graph().as_default(): np.random.seed(1337) tf.set_random_seed(1337) config = configparser.ConfigParser() config.read(conf_path) processors_num = int(config.get("Training", "processors")) embedding_size = int(config.get("Network", "embedding_size")) cell_rnn_size = int(config.get("Network", "cell_rnn_size")) hidden_layer_size = int(config.get("Network", "hidden_layer_size")) vocab, num_classes, max_length, cl, cl_inv, text_field, category_field, feature_field = du.load_params( data_save_path) test_data = du.load_data_with_maps(test_path, vocab=vocab, max_length=max_length, text_field=text_field, category_field=category_field, feature_field=feature_field, cl_map=cl) session_conf = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, inter_op_parallelism_threads=processors_num, intra_op_parallelism_threads=processors_num) sess = tf.Session(config=session_conf) with sess.as_default(): print("Initializing Embedding") embedding = du.load_embeddings( embedding_path, vocab ) if embedding_path != 'random' else du.initialize_random_embeddings( len(vocab), embedding_size) print("Building nn_model") sequence_labeler = SequenceLabeler(sequence_length=max_length, embedding=embedding, cell_size=cell_rnn_size, num_classes=len(cl), hls=hidden_layer_size, verbose=False) sequence_labeler.build_network() tf.global_variables_initializer().run() test_x, test_y = du.get_training_data(test_data, len(cl)) saver = tf.train.Saver(max_to_keep=1) saver.restore(sess=sess, save_path=model_save_path + "/" + model_name) loss, accuracy, predictions = sequence_labeler.predict( sess, test_x, test_y) for i in range(len(test_data)): l = test_data[i].original_length tokens = test_data[i].original_tokens[0:l] golds = test_data[i].labels[0:l] predic = predictions[i] pred_nums = predic[0:l] pred_labels = [cl_inv[x] for x in pred_nums] for a in zip(tokens, golds, pred_labels): ow.write(" ".join(a) + "\n") ow.write("\n") ow.close()