def dynamic_rnn():
    # load data
    train_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.train.pos'))
    dev_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.devel.pos'))

    embedding = tf.get_variable("embedding", [FLAGS.emb_size, FLAGS.word_dim],
                                tf.float32)

    with tf.name_scope('placeholder'):
        x_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
        y_ = tf.placeholder(tf.int32, [None])
        mask = tf.placeholder(tf.int32, [None])
        output_keep_prob = tf.placeholder(tf.float32)

    # x:[batch_size,n_steps,n_input]
    x = tf.nn.embedding_lookup(embedding, x_)

    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden,
                                             state_is_tuple=True,
                                             activation=tf.nn.relu)
    lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell,
                                              output_keep_prob=1 -
                                              FLAGS.dropout)

    # Get lstm cell output
    outputs, _ = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
    outputs = tf.reshape(outputs, [-1, FLAGS.n_hidden])

    # define weights and biases of logistic layer
    with tf.variable_scope('linear'):
        weights = tf.get_variable("weight", [FLAGS.n_hidden, FLAGS.n_classes],
                                  tf.float32)
        biases = tf.get_variable("biases", [FLAGS.n_classes], tf.float32)
        logits = tf.matmul(outputs, weights) + biases

    #loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits,y_) * tf.cast(mask,tf.float32))
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_))
    train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss)

    y = tf.cast(tf.nn.in_top_k(logits, y_, 1), tf.int32) * mask
    correct = tf.reduce_sum(y)

    with tf.Session(config=util.gpu_config()) as sess:
        sess.run(tf.initialize_all_variables())
        FLAGS.epoch_size = train_data.numbers() // FLAGS.batch_size
        for step in range(FLAGS.epoch_size * FLAGS.epoch_step):
            batch_x, batch_y, mask_feed = util.padding(
                *train_data.next_batch(FLAGS.batch_size))
            sess.run(train_op,
                     feed_dict={
                         x_: batch_x,
                         y_: batch_y,
                         output_keep_prob: 1 - FLAGS.dropout,
                         mask: mask_feed
                     })
            if step % FLAGS.epoch_size == 0:
                evalution(sess, correct, x_, y_, mask, output_keep_prob,
                          dev_data)
Ejemplo n.º 2
0
    def train():
        global max_f1
        with tf.Session(config=util.gpu_config()) as session:
            session.run(tf.global_variables_initializer())
            model.start_enqueue_thread(session)
            accumulated_loss = 0.0

            ckpt = tf.train.get_checkpoint_state(log_dir)
            if ckpt and ckpt.model_checkpoint_path:
                print("Restoring from: {}".format(ckpt.model_checkpoint_path))
                saver.restore(session, ckpt.model_checkpoint_path)

            initial_time = time.time()

            while True:
                tf_loss, tf_global_step, _ = session.run(
                    [model.loss, model.global_step, model.train_op])
                accumulated_loss += tf_loss

                if tf_global_step % report_frequency == 0:
                    total_time = time.time() - initial_time
                    steps_per_second = tf_global_step / total_time

                    average_loss = accumulated_loss / report_frequency
                    print("[{}] loss={:.2f}, steps/s={:.2f}".format(
                        tf_global_step, average_loss, steps_per_second))
                    writer.add_summary(
                        util.make_summary({"loss": average_loss}),
                        tf_global_step)
                    accumulated_loss = 0.0

                if tf_global_step % save_frequency == 0:
                    saver.save(session,
                               os.path.join(log_dir, "model"),
                               global_step=tf_global_step)

                if tf_global_step % eval_frequency == 0:
                    eval_summary, eval_f1 = model.evaluate(session)

                    if eval_f1 > max_f1:
                        max_f1 = eval_f1
                        util.copy_checkpoint(
                            os.path.join(log_dir,
                                         "model-{}".format(tf_global_step)),
                            os.path.join(log_dir, "model.max.ckpt"))

                    writer.add_summary(eval_summary, tf_global_step)
                    writer.add_summary(
                        util.make_summary({"max_eval_f1": max_f1}),
                        tf_global_step)

                    print("[{}] evaL_f1={:.2f}, max_f1={:.2f}".format(
                        tf_global_step, eval_f1, max_f1))
def eval(model, vocab):
    datapath = model.hps.data_path.replace('train', 'test')
    data_loader = Batcher(vocab, datapath, args)
    assert 'eval' in model.hps.mode
    assert data_loader.single_pass

    acc_list = []

    with tf.Session(config=util.gpu_config()) as sess:
        util.load_ckpt(model.hps, model.saver, sess)
        print("Running evaluation...\n")
        while True:
            print(len(acc_list), np.mean(acc_list))
            batch = data_loader.next_batch()
            if batch == 'FINISH': break

            res = model.run_eval(batch, sess)
            acc = float(res['accuracy'])
            acc_list.append(acc)
        print("FINAL ACCURACY: {}".format(
            round(100 * sum(acc_list) / len(acc_list), 2)))
Ejemplo n.º 4
0
def bi_lstm():
    tf.set_random_seed(1)
    # load data
    train_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.train.pos'))
    dev_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.devel.pos'))
    with tf.device('/cpu:0'):
        embedding = tf.get_variable("embedding",
                                    [FLAGS.emb_size, FLAGS.word_dim],
                                    tf.float32)

    with tf.name_scope('placeholder'):
        x_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
        y_ = tf.placeholder(tf.int32, [None])
        mask = tf.placeholder(tf.int32, [None])
        output_keep_prob = tf.placeholder(tf.float32)
        seq_len = tf.placeholder(tf.int32, [None])
    # x:[batch_size,n_steps,n_input]
    x = tf.nn.embedding_lookup(embedding, x_)
    with tf.device('/gpu:2'):
        # lstm cell
        lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden)
        lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden)

        # dropout
        lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_fw,
                                                     output_keep_prob=1 -
                                                     FLAGS.dropout)
        lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_bw,
                                                     output_keep_prob=1 -
                                                     FLAGS.dropout)

        # Get lstm cell output
        outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_cell_fw,
                                                     lstm_cell_bw,
                                                     x,
                                                     sequence_length=seq_len,
                                                     dtype=tf.float32)
        outputs = tf.concat(2, outputs)
        outputs = tf.reshape(outputs, [-1, 2 * FLAGS.n_hidden])

        # define weights and biases of logistic layer
        with tf.variable_scope('linear'):
            weights = tf.get_variable("weight",
                                      [2 * FLAGS.n_hidden, FLAGS.n_classes],
                                      tf.float32)
            biases = tf.get_variable("biases", [FLAGS.n_classes], tf.float32)
            logits = tf.matmul(outputs, weights) + biases

        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_) *
            tf.cast(mask, tf.float32))
        train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss)

        y = tf.cast(tf.nn.in_top_k(logits, y_, 1), tf.int32) * mask
        correct = tf.reduce_sum(y)

        with tf.Session(config=util.gpu_config()) as sess:
            sess.run(tf.global_variables_initializer())
            FLAGS.epoch_size = train_data.numbers() // FLAGS.batch_size
            for step in range(FLAGS.epoch_size * FLAGS.epoch_step):
                batch_x, batch_y, mask_feed = util.padding(
                    *train_data.next_batch(FLAGS.batch_size))
                sequence_length = batch_x.shape[1] * np.ones(
                    [FLAGS.batch_size], np.int32)
                sess.run(train_op,
                         feed_dict={
                             x_: batch_x,
                             y_: batch_y,
                             output_keep_prob: 1 - FLAGS.dropout,
                             mask: mask_feed,
                             seq_len: sequence_length
                         })
                if step % 100 == 0:
                    evalution(sess, correct, x_, y_, mask, output_keep_prob,
                              seq_len, dev_data)
def train(model, vocab, pretrain_vardicts=None):
    print('train function called.')
    print(model.hps.data_path)
    devpath = model.hps.data_path.replace('train', 'dev')
    assert model.hps.data_path != devpath and os.path.exists(
        model.hps.data_path) and os.path.exists(devpath)

    train_data_loader = Batcher(vocab, model.hps.data_path, args)
    valid_data_loader = Batcher(vocab, devpath, args)

    print(train_data_loader.example_queue.qsize())
    print(valid_data_loader.example_queue.qsize())
    print(train_data_loader.batch_queue.qsize())
    print(valid_data_loader.batch_queue.qsize())

    with tf.Session(config=util.gpu_config()) as sess:
        train_logdir, dev_logdir = os.path.join(args.model_path,
                                                'logdir/train'), os.path.join(
                                                    args.model_path,
                                                    'logdir/dev')
        train_savedir = os.path.join(args.model_path, 'train/')
        print("[*] Train save directory is: {}".format(train_savedir))
        if not os.path.exists(train_logdir): os.makedirs(train_logdir)
        if not os.path.exists(dev_logdir): os.makedirs(dev_logdir)
        if not os.path.exists(train_savedir): os.makedirs(train_savedir)

        summary_writer1 = tf.summary.FileWriter(train_logdir, sess.graph)
        summary_writer2 = tf.summary.FileWriter(dev_logdir, sess.graph)
        """
        Initialize with pretrain variables
        """
        if 'esim' not in model.hps.mode:
            assign_ops, uninitialized_varlist = util.assign_pretrain_weights(
                pretrain_vardicts)
            sess.run(assign_ops)
            sess.run(tf.initialize_variables(uninitialized_varlist))
        else:
            sess.run(tf.global_variables_initializer())
        print("Variable initialization end.")
        step = 0
        while True:
            beg_time = time()

            batch = train_data_loader.next_batch()
            sample_per_epoch = 550153 if 'esim' in model.hps.mode else 1211

            res = model.run_step(batch,
                                 sess,
                                 fcn_keeprate=model.hps.fcn_keep_rate,
                                 rnn_keeprate=model.hps.rnn_keep_rate,
                                 is_train=True)

            loss, summaries, step = res['loss'], res['summaries'], res[
                'global_step']

            end_time = time()
            print("{} epoch, {} step, {}sec, {} loss".format(
                int(step * model.hps.batch_size / sample_per_epoch), step,
                round(end_time - beg_time, 3), round(float(loss), 3)))
            summary_writer1.add_summary(summaries, step)

            if step % 5 == 0:
                dev_batch = valid_data_loader.next_batch()
                res = model.run_step(dev_batch,
                                     sess,
                                     fcn_keeprate=-1,
                                     rnn_keeprate=-1,
                                     is_train=False)
                loss, summaries, step = res['loss'], res['summaries'], res[
                    'global_step']
                assert step % 5 == 0
                print("[VALID] {} loss".format(round(loss, 3)))
                summary_writer2.add_summary(summaries, step)

            if step == 10 or step % 10000 == 0:
                model.saver.save(sess, train_savedir, global_step=step)

            if int(step * model.hps.batch_size /
                   sample_per_epoch) > model.hps.max_epoch:
                model.saver.save(sess, train_savedir, global_step=step)
                print("training end")
                break
Ejemplo n.º 6
0
def bi_lstm_crf():
    # load data
    print 'start read dataset'
    train_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.train.pos'))
    dev_data = read_dataset(os.path.join(FLAGS.data_path, 'penn.devel.pos'))
    dev_data.fake_data(FLAGS.batch_size)
    print 'stop read dataset'

    tf.set_random_seed(1)

    # 词向量放到cpu里面可以节省显存
    with tf.device('/cpu:0'):
        with tf.variable_scope('embedding') as scope:
            random_embedding = tf.get_variable(
                name="random_embedding",
                shape=[FLAGS.emb_size, FLAGS.word_dim],
                dtype=tf.float32)

    with tf.name_scope('placeholder'):
        x_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
        y_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
        output_keep_prob = tf.placeholder(tf.float32)

    sequence_length = tf.reduce_sum(tf.sign(x_), reduction_indices=1)
    sequence_length = tf.cast(sequence_length, tf.int32)

    with tf.device('/gpu:2'):
        with tf.variable_scope('input_layer'):
            # x:[batch_size,n_steps,n_input]
            x = tf.nn.embedding_lookup(random_embedding, x_)
        # lstm cell
        with tf.name_scope('bi_lstm_layer'):
            lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden)
            lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.n_hidden)

        # Get lstm cell output
        outputs, _ = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=lstm_cell_fw,
            cell_bw=lstm_cell_bw,
            inputs=x,
            sequence_length=sequence_length,
            dtype=tf.float32)
        outputs = tf.concat(2, outputs)
        outputs = tf.reshape(outputs, [-1, 2 * FLAGS.n_hidden])

        outputs = tf.nn.dropout(outputs, keep_prob=output_keep_prob)
        with tf.variable_scope('Softmax'):
            weights = tf.get_variable(
                name="weights",
                shape=[2 * FLAGS.n_hidden, FLAGS.n_classes],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.01))
            biases = tf.get_variable(name="biases",
                                     shape=[FLAGS.n_classes],
                                     dtype=tf.float32)
        matricized_unary_scores = tf.matmul(outputs, weights) + biases
        unary_scores = tf.reshape(matricized_unary_scores,
                                  [FLAGS.batch_size, -1, FLAGS.n_classes])

        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
            unary_scores, y_, sequence_length)
        l2_loss = tf.nn.l2_loss(weights) * FLAGS.beta
        loss = tf.reduce_mean(-log_likelihood) + l2_loss
        train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss)

    saver = tf.train.Saver()
    best_acc = 0
    if FLAGS.is_training == 1:
        with tf.Session(config=util.gpu_config()) as sess:
            sess.run(tf.global_variables_initializer())
            epoch_size = train_data.numbers() // FLAGS.batch_size
            for step in range(epoch_size * FLAGS.epoch_step):
                batch_x, batch_y, _ = util.padding(
                    *train_data.next_batch(FLAGS.batch_size))
                sess.run(
                    [l2_loss, loss, train_op],
                    feed_dict={
                        x_: batch_x,
                        y_: batch_y.reshape([FLAGS.batch_size, -1]),
                        output_keep_prob: 1 - FLAGS.dropout
                    })
                if step % 100 == 0:
                    cur_acc = evalution(sess, transition_params, dev_data, x_,
                                        y_, output_keep_prob, unary_scores,
                                        sequence_length)
                    if cur_acc > best_acc:
                        best_acc = cur_acc
                        #saver.save(sess,'best.model')
        print 'best_acc: ' + str(best_acc)
    else:
        pass
Ejemplo n.º 7
0
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

import tensorflow as tf
import coref_model as cm
import util
import time

if __name__ == "__main__":
    config = util.initialize_from_env()
    model = cm.CorefModel(config)
    while True:
        try:
            with tf.Session(config=util.gpu_config()) as session:
                model.restore(session)
                model.evaluate(session, official_stdout=True)
        except tf.errors.ResourceExhaustedError:
            print("OOM")
            time.sleep(3)
            continue
        break