Python Monitor.monitor Examples

Programming Language: Python

Namespace/Package Name: utils.monitor

Class/Type: Monitor

Method/Function: monitor

Examples at hotexamples.com: 4

Python Monitor.monitor - 4 examples found. These are the top rated real world Python examples of utils.monitor.Monitor.monitor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Monitor(18)

monitor(4)

update(4)

add(3)

add_log(3)

init_log(3)

make_gifs(3)

get_stats(2)

start_monitor(2)

stop_monitor(2)

from_monitor(1)

seed(1)

Example #1

Show file

def main(args):
    # process config
    c = Configs(args.config)
    ROOT = os.environ['TENSOROFLOW']
    model_directory = '%s/examples/model/multi_layer_nmt' % ROOT
    model_path = '%s/model' % model_directory
    dictionary_path = {
        'source': '%s/source_dictionary.pickle' % model_directory,
        'source_reverse':
        '%s/source_reverse_dictionary.pickle' % model_directory,
        'target': '%s/target_dictionary.pickle' % model_directory,
        'target_reverse':
        '%s/target_reverse_dictionary.pickle' % model_directory
    }
    PAD = c.const['PAD']
    EOS = c.const['EOS']
    train_step = c.option['train_step']
    max_time = c.option['max_time']
    batch_size = c.option['batch_size']
    vocabulary_size = c.option['vocabulary_size']
    input_embedding_size = c.option['embedding_size']
    hidden_units = c.option['hidden_units']
    layers = c.option['layers']
    source_train_data_path = c.data['source_train_data']
    target_train_data_path = c.data['target_train_data']
    source_valid_data_path = c.data['source_valid_data']
    target_valid_data_path = c.data['target_valid_data']
    source_test_data_path = c.data['source_test_data']
    target_test_data_path = c.data['target_test_data']

    # read data
    if args.mode == 'train':
        source_dictionary, source_reverse_dictionary = build_dictionary(
            read_words(source_train_data_path), vocabulary_size)
        source_train_datas = [
            sentence_to_onehot(lines, source_dictionary)
            for lines in read_data(source_train_data_path)
        ]
        target_dictionary, target_reverse_dictionary = build_dictionary(
            read_words(target_train_data_path), vocabulary_size)
        target_train_datas = [
            sentence_to_onehot(lines, target_dictionary)
            for lines in read_data(target_train_data_path)
        ]

        source_valid_datas = [
            sentence_to_onehot(lines, source_dictionary)
            for lines in read_data(source_valid_data_path)
        ]
        target_valid_datas = [
            sentence_to_onehot(lines, target_dictionary)
            for lines in read_data(target_valid_data_path)
        ]

        if args.debug:
            source_train_datas = source_train_datas[:1000]
            target_train_datas = source_train_datas[:1000]
    else:
        with open(dictionary_path['source'], 'rb') as f1, \
             open(dictionary_path['source_reverse'], 'rb') as f2, \
             open(dictionary_path['target'], 'rb') as f3, \
             open(dictionary_path['target_reverse'], 'rb') as f4:
            source_dictionary = pickle.load(f1)
            source_reverse_dictionary = pickle.load(f2)
            target_dictionary = pickle.load(f3)
            target_reverse_dictionary = pickle.load(f4)

    source_test_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_test_data_path)
    ]
    target_test_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_test_data_path)
    ]

    # placeholder
    encoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    decoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_labels = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_labels')

    # embed
    embeddings = tf.Variable(tf.random_uniform(
        [vocabulary_size, input_embedding_size], -1.0, 1.0),
                             dtype=tf.float32,
                             name='embeddings')
    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     encoder_inputs)
    decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     decoder_inputs)

    # encoder
    encoder_units = hidden_units
    encoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers
    ]
    encoder_cell = tf.contrib.rnn.MultiRNNCell(encoder_layers)
    encoder_output, encoder_final_state = tf.nn.dynamic_rnn(
        encoder_cell,
        encoder_inputs_embedded,
        dtype=tf.float32,
        time_major=True)
    del encoder_output

    # decoder
    decoder_units = encoder_units
    decoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers
    ]
    decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_layers)
    decoder_output, decoder_final_state = tf.nn.dynamic_rnn(
        decoder_cell,
        decoder_inputs_embedded,
        initial_state=encoder_final_state,
        scope="plain_decoder",
        dtype=tf.float32,
        time_major=True)

    decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size)
    decoder_prediction = tf.argmax(
        decoder_logits, 2)  # max_time: axis=0, batch: axis=1, vocab: axis=2

    # optimizer
    stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.one_hot(decoder_labels,
                          depth=vocabulary_size,
                          dtype=tf.float32),
        logits=decoder_logits,
    )

    loss = tf.reduce_mean(stepwise_cross_entropy)
    train_op = tf.train.AdamOptimizer().minimize(loss)

    saver = tf.train.Saver()
    minibatch_idx = {'train': 0, 'valid': 0, 'test': 0}
    with tf.Session() as sess:
        if args.mode == 'train':
            # train
            global_max_step = train_step * (
                len(source_train_datas) // batch_size + 1)
            loss_freq = global_max_step // 100 if global_max_step > 100 else 1
            loss_log = []
            batch_loss_log = []
            loss_suffix = ''
            es = EarlyStopper(max_size=5, edge_threshold=0.1)
            m = Monitor(global_max_step)
            sess.run(tf.global_variables_initializer())
            global_step = 0
            stop_flag = False
            for batch in range(train_step):
                if stop_flag:
                    break
                current_batch_loss_log = []
                while True:  # minibatch process
                    m.monitor(global_step, loss_suffix)
                    source_train_batch, _ = batchnize(source_train_datas,
                                                      batch_size,
                                                      minibatch_idx['train'])
                    target_train_batch, minibatch_idx['train'] = batchnize(
                        target_train_datas, batch_size, minibatch_idx['train'])
                    batch_data = seq2seq(source_train_batch,
                                         target_train_batch, max_time,
                                         vocabulary_size)
                    feed_dict = {
                        encoder_inputs: batch_data['encoder_inputs'],
                        decoder_inputs: batch_data['decoder_inputs'],
                        decoder_labels: batch_data['decoder_labels']
                    }
                    sess.run(fetches=[train_op, loss], feed_dict=feed_dict)
                    if global_step % loss_freq == 0:
                        source_valid_batch, _ = batchnize(
                            source_valid_datas, batch_size,
                            minibatch_idx['valid'])
                        target_valid_batch, minibatch_idx['valid'] = batchnize(
                            target_valid_datas, batch_size,
                            minibatch_idx['valid'])
                        batch_data = seq2seq(source_valid_batch,
                                             target_valid_batch, max_time,
                                             vocabulary_size)
                        feed_dict = {
                            encoder_inputs: batch_data['encoder_inputs'],
                            decoder_inputs: batch_data['decoder_inputs'],
                            decoder_labels: batch_data['decoder_labels']
                        }
                        loss_val = sess.run(fetches=loss, feed_dict=feed_dict)
                        loss_log.append(loss_val)
                        current_batch_loss_log.append(loss_val)
                        loss_suffix = 'loss: %f' % loss_val
                        es_status = es(loss_val)
                        if batch > train_step // 2 and es_status:
                            print('early stopping at step: %d' % global_step)
                            stop_flag = True
                            break
                    global_step += 1
                    if minibatch_idx['train'] == 0:
                        batch_loss = np.mean(current_batch_loss_log)
                        batch_loss_log.append(batch_loss)
                        print('Batch: {}/{}, batch loss: {}'.format(
                            batch + 1, train_step, batch_loss))
                        break

            # save tf.graph and variables
            saver.save(sess, model_path)
            print('save at %s' % model_path)

            # save plot of loss
            plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log)
            plt.savefig('%s_global_loss.png' % model_path)
            plt.figure()
            plt.plot(np.arange(len(batch_loss_log)), batch_loss_log)
            plt.savefig('%s_batch_loss.png' % model_path)

            # save dictionary
            with open(dictionary_path['source'], 'wb') as f1, \
                 open(dictionary_path['source_reverse'], 'wb') as f2, \
                 open(dictionary_path['target'], 'wb') as f3, \
                 open(dictionary_path['target_reverse'], 'wb') as f4:
                pickle.dump(source_dictionary, f1)
                pickle.dump(source_reverse_dictionary, f2)
                pickle.dump(target_dictionary, f3)
                pickle.dump(target_reverse_dictionary, f4)

        elif args.mode == 'eval':
            saver.restore(sess, model_path)
            print('load from %s' % model_path)

        else:
            raise  # args.mode should be train or eval

        # evaluate
        loss_val = []
        input_vectors = None
        predict_vectors = None
        for i in range(len(source_test_datas) // batch_size + 1):
            source_test_batch, _ = batchnize(source_test_datas, batch_size,
                                             minibatch_idx['test'])
            target_test_batch, minibatch_idx['test'] = batchnize(
                target_test_datas, batch_size, minibatch_idx['test'])
            batch_data = seq2seq(source_test_batch, target_test_batch,
                                 max_time, vocabulary_size)
            feed_dict = {
                encoder_inputs: batch_data['encoder_inputs'],
                decoder_inputs: batch_data['decoder_inputs'],
                decoder_labels: batch_data['decoder_labels']
            }
            pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict)
            if predict_vectors is None:
                predict_vectors = pred.T
            else:
                predict_vectors = np.vstack((predict_vectors, pred.T))
            input_ = batch_data['encoder_inputs']
            if input_vectors is None:
                input_vectors = input_.T
            else:
                input_vectors = np.vstack((input_vectors, input_.T))
            loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict))

        input_sentences = ''
        predict_sentences = ''
        for i, (input_vector, predict_vector) in enumerate(
                zip(input_vectors[:len(source_test_datas)],
                    predict_vectors[:len(target_test_datas)])):
            input_sentences += ' '.join([
                source_reverse_dictionary[vector] for vector in input_vector
                if not vector == PAD
            ])
            predict_sentences += ' '.join([
                target_reverse_dictionary[vector] for vector in predict_vector
                if not vector == PAD
            ])
            if i < len(source_test_datas) - 1:
                input_sentences += '\n'
                predict_sentences += '\n'

        evaluate_input_path = '%s.evaluate_input' % model_path
        evaluate_predict_path = '%s.evaluate_predict' % model_path
        with open(evaluate_input_path, 'w') as f1, \
             open(evaluate_predict_path, 'w') as f2:
            f1.write(input_sentences)
            f2.write(predict_sentences)

        print('input sequences at {}'.format(evaluate_input_path))
        print('predict sequences at {}'.format(evaluate_predict_path))
        print('mean of loss: %f' % np.mean(loss_val))

    print('finish.')

Example #2

Show file

File: multi_layer_seq2seq.py Project: siida36/TensorOFlow

def main(args):
    tf.reset_default_graph()

    # process config
    c = Configs(args.config)
    ROOT = os.environ['TENSOROFLOW']
    model_path = '%s/examples/model/multi_layer_seq2seq/model' % ROOT
    PAD = c.const['PAD']
    EOS = c.const['EOS']
    train_step = c.option['train_step']
    max_time = c.option['max_time']
    batch_size = c.option['batch_size']
    vocabulary_size = c.option['vocabulary_size']
    input_embedding_size = c.option['embedding_size']
    hidden_units = c.option['hidden_units']
    layers = c.option['layers']
    datas = []

    # placeholder
    encoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    decoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_labels = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_labels')

    # embed
    embeddings = tf.Variable(tf.random_uniform(
        [vocabulary_size, input_embedding_size], -1.0, 1.0),
                             dtype=tf.float32,
                             name='embeddings')
    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     encoder_inputs)
    decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     decoder_inputs)

    # encoder
    encoder_units = hidden_units
    encoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers
    ]
    encoder_cell = tf.contrib.rnn.MultiRNNCell(encoder_layers)
    encoder_output, encoder_final_state = tf.nn.dynamic_rnn(
        encoder_cell,
        encoder_inputs_embedded,
        dtype=tf.float32,
        time_major=True)
    del encoder_output

    # decoder
    decoder_units = encoder_units
    decoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers
    ]
    decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_layers)
    decoder_output, decoder_final_state = tf.nn.dynamic_rnn(
        decoder_cell,
        decoder_inputs_embedded,
        initial_state=encoder_final_state,
        scope="plain_decoder",
        dtype=tf.float32,
        time_major=True)

    decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size)
    decoder_prediction = tf.argmax(
        decoder_logits, 2)  # max_time: axis=0, batch: axis=1, vocab: axis=2

    # optimizer
    stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.one_hot(decoder_labels,
                          depth=vocabulary_size,
                          dtype=tf.float32),
        logits=decoder_logits,
    )

    loss = tf.reduce_mean(stepwise_cross_entropy)
    train_op = tf.train.AdamOptimizer().minimize(loss)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        if args.mode == 'train':
            # train
            loss_freq = train_step // 100
            loss_log = []
            loss_suffix = ''
            es = EarlyStopper(max_size=5, edge_threshold=0.1)
            m = Monitor(train_step)
            sess.run(tf.global_variables_initializer())
            for i in range(train_step):
                m.monitor(i, loss_suffix)
                batch_data = through(datas, max_time, batch_size,
                                     vocabulary_size)
                feed_dict = {
                    encoder_inputs: batch_data['encoder_inputs'],
                    decoder_inputs: batch_data['decoder_inputs'],
                    decoder_labels: batch_data['decoder_labels']
                }
                sess.run(fetches=[train_op, loss], feed_dict=feed_dict)
                if i % loss_freq == 0:
                    batch_data = through(datas, max_time, batch_size,
                                         vocabulary_size)
                    feed_dict = {
                        encoder_inputs: batch_data['encoder_inputs'],
                        decoder_inputs: batch_data['decoder_inputs'],
                        decoder_labels: batch_data['decoder_labels']
                    }
                    loss_val = sess.run(fetches=loss, feed_dict=feed_dict)
                    loss_log.append(loss_val)
                    loss_suffix = 'loss: %f' % loss_val
                    es_status = es(loss_val)
                    if i > train_step // 2 and es_status:
                        print('early stopping at step: %d' % i)
                        break
            saver.save(sess, model_path)
            print('save at %s' % model_path)
            plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log)
            plt.savefig('%s_loss.png' % model_path)
        elif args.mode == 'eval':
            saver.restore(sess, model_path)
            print('load from %s' % model_path)
        else:
            raise

        # evaluate
        batch_data = through(datas, max_time, batch_size, vocabulary_size)
        feed_dict = {
            encoder_inputs: batch_data['encoder_inputs'],
            decoder_inputs: batch_data['decoder_inputs'],
            decoder_labels: batch_data['decoder_labels']
        }
        pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict)
        input_ = batch_data['encoder_inputs']
        loss_val = sess.run(fetches=loss, feed_dict=feed_dict)

        print('input sequences...\n{}'.format(input_))
        print('predict sequences...\n{}'.format(pred))
        print('loss: %f' % loss_val)

    print('finish.')

Example #3

Show file

File: basic_nmt.py Project: siida36/TensorOFlow

def main(args):
    # process config
    c = Configs(args.config)
    ROOT = os.environ['TENSOROFLOW']
    model_path = '%s/examples/model/basic_nmt/model' % ROOT
    PAD = c.const['PAD']
    EOS = c.const['EOS']
    train_step = c.option['train_step']
    max_time = c.option['max_time']
    batch_size = c.option['batch_size']
    vocabulary_size = c.option['vocabulary_size']
    input_embedding_size = c.option['embedding_size']
    hidden_units = c.option['hidden_units']
    source_train_data_path = c.data['source_train_data']
    target_train_data_path = c.data['target_train_data']
    source_valid_data_path = c.data['source_valid_data']
    target_valid_data_path = c.data['target_valid_data']
    source_test_data_path = c.data['source_test_data']
    target_test_data_path = c.data['target_test_data']

    # read data
    source_dictionary, source_reverse_dictionary = build_dictionary(
        read_words(source_train_data_path), vocabulary_size)
    source_train_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_train_data_path)
    ]
    target_dictionary, target_reverse_dictionary = build_dictionary(
        read_words(target_train_data_path), vocabulary_size)
    target_train_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_train_data_path)
    ]

    source_valid_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_valid_data_path)
    ]
    target_valid_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_valid_data_path)
    ]
    source_test_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_test_data_path)
    ]
    target_test_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_test_data_path)
    ]

    # placeholder
    encoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    decoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_labels = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_labels')

    # embed
    embeddings = tf.Variable(tf.random_uniform(
        [vocabulary_size, input_embedding_size], -1.0, 1.0),
                             dtype=tf.float32,
                             name='embeddings')
    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     encoder_inputs)
    decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     decoder_inputs)

    # encoder
    encoder_units = hidden_units
    encoder_cell = tf.contrib.rnn.LSTMCell(encoder_units)
    _, encoder_final_state = tf.nn.dynamic_rnn(encoder_cell,
                                               encoder_inputs_embedded,
                                               dtype=tf.float32,
                                               time_major=True)

    # decoder
    decoder_units = encoder_units
    decoder_cell = tf.contrib.rnn.LSTMCell(decoder_units)
    decoder_output, decoder_final_state = tf.nn.dynamic_rnn(
        decoder_cell,
        decoder_inputs_embedded,
        initial_state=encoder_final_state,
        scope="plain_decoder",
        dtype=tf.float32,
        time_major=True)

    decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size)
    decoder_prediction = tf.argmax(
        decoder_logits, 2)  # max_time: axis=0, batch: axis=1, vocab: axis=2

    # optimizer
    stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.one_hot(decoder_labels,
                          depth=vocabulary_size,
                          dtype=tf.float32),
        logits=decoder_logits,
    )

    loss = tf.reduce_mean(stepwise_cross_entropy)
    train_op = tf.train.AdamOptimizer().minimize(loss)

    saver = tf.train.Saver()
    batch_idx = {'train': 0, 'valid': 0, 'test': 0}
    with tf.Session() as sess:
        if args.mode == 'train':
            # train
            loss_freq = train_step // 100
            loss_log = []
            loss_suffix = ''
            es = EarlyStopper(max_size=5, edge_threshold=0.1)
            m = Monitor(train_step)
            sess.run(tf.global_variables_initializer())
            for i in range(train_step):
                m.monitor(i, loss_suffix)
                source_train_batch, _ = batchnize(source_train_datas,
                                                  batch_size,
                                                  batch_idx['train'])
                target_train_batch, batch_idx['train'] = batchnize(
                    target_train_datas, batch_size, batch_idx['train'])
                batch_data = seq2seq(source_train_batch, target_train_batch,
                                     max_time, vocabulary_size)
                feed_dict = {
                    encoder_inputs: batch_data['encoder_inputs'],
                    decoder_inputs: batch_data['decoder_inputs'],
                    decoder_labels: batch_data['decoder_labels']
                }
                sess.run(fetches=[train_op, loss], feed_dict=feed_dict)
                if i % loss_freq == 0:
                    source_valid_batch, _ = batchnize(source_valid_datas,
                                                      batch_size,
                                                      batch_idx['valid'])
                    target_valid_batch, batch_idx['valid'] = batchnize(
                        target_valid_datas, batch_size, batch_idx['valid'])
                    batch_data = seq2seq(source_valid_batch,
                                         target_valid_batch, max_time,
                                         vocabulary_size)
                    feed_dict = {
                        encoder_inputs: batch_data['encoder_inputs'],
                        decoder_inputs: batch_data['decoder_inputs'],
                        decoder_labels: batch_data['decoder_labels']
                    }
                    loss_val = sess.run(fetches=loss, feed_dict=feed_dict)
                    loss_log.append(loss_val)
                    loss_suffix = 'loss: %f' % loss_val
                    es_status = es(loss_val)
                    if i > train_step // 2 and es_status:
                        print('early stopping at step: %d' % i)
                        break
            saver.save(sess, model_path)
            print('save at %s' % model_path)
            plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log)
            plt.savefig('%s_loss.png' % model_path)
        elif args.mode == 'eval':
            saver.restore(sess, model_path)
            print('load from %s' % model_path)
        else:
            raise

        # evaluate
        loss_val = []
        input_vectors = None
        predict_vectors = None
        for i in range(len(source_test_datas) // batch_size + 1):
            source_test_batch, _ = batchnize(source_test_datas, batch_size,
                                             batch_idx['test'])
            target_test_batch, batch_idx['test'] = batchnize(
                target_test_datas, batch_size, batch_idx['test'])
            batch_data = seq2seq(source_test_batch, target_test_batch,
                                 max_time, vocabulary_size)
            feed_dict = {
                encoder_inputs: batch_data['encoder_inputs'],
                decoder_inputs: batch_data['decoder_inputs'],
                decoder_labels: batch_data['decoder_labels']
            }
            pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict)
            if predict_vectors is None:
                predict_vectors = pred.T
            else:
                predict_vectors = np.vstack((predict_vectors, pred.T))
            input_ = batch_data['encoder_inputs']
            if input_vectors is None:
                input_vectors = input_.T
            else:
                input_vectors = np.vstack((input_vectors, input_.T))
            loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict))

        input_sentences = ''
        predict_sentences = ''
        for i, (input_vector, predict_vector) in enumerate(
                zip(input_vectors[:len(source_test_datas)],
                    predict_vectors[:len(target_test_datas)])):
            input_sentences += ' '.join([
                source_reverse_dictionary[vector] for vector in input_vector
                if not vector == PAD
            ])
            predict_sentences += ' '.join([
                target_reverse_dictionary[vector] for vector in predict_vector
                if not vector == PAD
            ])
            if i < len(source_test_datas) - 1:
                input_sentences += '\n'
                predict_sentences += '\n'

        evaluate_input_path = '%s.evaluate_input' % model_path
        evaluate_predict_path = '%s.evaluate_predict' % model_path
        with open(evaluate_input_path, 'w') as f1, \
             open(evaluate_predict_path, 'w') as f2:
            f1.write(input_sentences)
            f2.write(predict_sentences)

        print('input sequences at {}'.format(evaluate_input_path))
        print('predict sequences at {}'.format(evaluate_predict_path))
        print('mean of loss: %f' % np.mean(loss_val))

    print('finish.')

Example #4

Show file

def main(args):
    # process config
    c = Configs(args.config)
    ROOT = os.environ['TENSOROFLOW']
    output = c.option.get('output', 'examples/model/buf')
    model_directory = '%s/%s' % (ROOT, output)
    model_path = '%s/model' % model_directory
    dictionary_path = {
        'source': '%s/source_dictionary.pickle' % model_directory,
        'source_reverse':
        '%s/source_reverse_dictionary.pickle' % model_directory,
        'target': '%s/target_dictionary.pickle' % model_directory,
        'target_reverse':
        '%s/target_reverse_dictionary.pickle' % model_directory
    }
    PAD = c.const['PAD']
    BOS = c.const['BOS']
    EOS = c.const['EOS']
    train_step = c.option['train_step']
    max_time = c.option['max_time']
    batch_size = c.option['batch_size']
    vocabulary_size = c.option['vocabulary_size']
    input_embedding_size = c.option['embedding_size']
    hidden_units = c.option['hidden_units']
    layers = c.option['layers']
    source_train_data_path = c.data['source_train_data']
    target_train_data_path = c.data['target_train_data']
    source_valid_data_path = c.data['source_valid_data']
    target_valid_data_path = c.data['target_valid_data']
    source_test_data_path = c.data['source_test_data']
    target_test_data_path = c.data['target_test_data']

    # initialize output directory
    if pathlib.Path(model_directory).exists():
        print('Warning: model %s is exists.')
        print('Old model will be overwritten.')
        while True:
            print('Do you wanna continue? [yes|no]')
            command = input('> ')
            if command == 'yes':
                shutil.rmtree(model_directory)
                break
            elif command == 'no':
                sys.exit()
            else:
                print('You can only input "yes" or "no".')

    print('Make new model: %s' % model_directory)
    pathlib.Path(model_directory).mkdir()

    # read data
    if args.mode == 'train':
        source_dictionary, source_reverse_dictionary = build_dictionary(
            read_words(source_train_data_path), vocabulary_size)
        source_train_datas = [
            sentence_to_onehot(lines, source_dictionary)
            for lines in read_data(source_train_data_path)
        ]
        target_dictionary, target_reverse_dictionary = build_dictionary(
            read_words(target_train_data_path), vocabulary_size)
        target_train_datas = [
            sentence_to_onehot(lines, target_dictionary)
            for lines in read_data(target_train_data_path)
        ]

        source_valid_datas = [
            sentence_to_onehot(lines, source_dictionary)
            for lines in read_data(source_valid_data_path)
        ]
        target_valid_datas = [
            sentence_to_onehot(lines, target_dictionary)
            for lines in read_data(target_valid_data_path)
        ]

        if args.debug:
            source_train_datas = source_train_datas[:1000]
            target_train_datas = source_train_datas[:1000]
    else:
        with open(dictionary_path['source'], 'rb') as f1, \
             open(dictionary_path['source_reverse'], 'rb') as f2, \
             open(dictionary_path['target'], 'rb') as f3, \
             open(dictionary_path['target_reverse'], 'rb') as f4:
            source_dictionary = pickle.load(f1)
            source_reverse_dictionary = pickle.load(f2)
            target_dictionary = pickle.load(f3)
            target_reverse_dictionary = pickle.load(f4)

    source_test_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_test_data_path)
    ]
    target_test_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_test_data_path)
    ]

    # placeholder
    encoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    decoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_labels = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_labels')

    # embed
    embeddings = tf.Variable(tf.random_uniform(
        [vocabulary_size, input_embedding_size], -1.0, 1.0),
                             dtype=tf.float32,
                             name='embeddings')
    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     encoder_inputs)
    decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     decoder_inputs)

    # encoder with bidirection
    encoder_units = hidden_units
    encoder_layers_fw = [
        tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers
    ]
    encoder_cell_fw = tf.contrib.rnn.MultiRNNCell(encoder_layers_fw)
    encoder_layers_bw = [
        tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers
    ]
    encoder_cell_bw = tf.contrib.rnn.MultiRNNCell(encoder_layers_bw)
    (encoder_output_fw,
     encoder_output_bw), encoder_state = tf.nn.bidirectional_dynamic_rnn(
         encoder_cell_fw,
         encoder_cell_bw,
         encoder_inputs_embedded,
         dtype=tf.float32,
         time_major=True)
    encoder_outputs = tf.concat((encoder_output_fw, encoder_output_bw), 2)
    encoder_state = tuple(
        tf.contrib.rnn.LSTMStateTuple(
            tf.concat((encoder_state[0][layer].c,
                       encoder_state[1][layer].c), 1),
            tf.concat((encoder_state[0][layer].h,
                       encoder_state[1][layer].h), 1))
        for layer in range(layers))

    # decoder with attention
    decoder_units = encoder_units * 2
    attention_units = decoder_units
    decoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers
    ]
    cell = tf.contrib.rnn.MultiRNNCell(decoder_layers)

    sequence_length = tf.cast([max_time] * batch_size, dtype=tf.int32)
    beam_width = 1
    tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
        encoder_outputs, multiplier=beam_width)
    tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
        encoder_state, multiplier=beam_width)
    tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
        sequence_length, multiplier=beam_width)
    attention_mechanism = tf.contrib.seq2seq.LuongAttention(
        num_units=attention_units,
        memory=tiled_encoder_outputs,
        memory_sequence_length=tiled_sequence_length)
    attention_cell = tf.contrib.seq2seq.AttentionWrapper(
        cell, attention_mechanism, attention_layer_size=256)
    decoder_initial_state = attention_cell.zero_state(dtype=tf.float32,
                                                      batch_size=batch_size *
                                                      beam_width)
    decoder_initial_state = decoder_initial_state.clone(
        cell_state=tiled_encoder_final_state)

    if args.mode == 'train':
        helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=decoder_inputs_embedded,
            sequence_length=tf.cast([max_time] * batch_size, dtype=tf.int32),
            time_major=True)
    elif args.mode == 'eval':
        """
    helper = tf.contrib.seq2seq.TrainingHelper(
      inputs=decoder_inputs_embedded,
      sequence_length=tf.cast([max_time] * batch_size, dtype=tf.int32),
      time_major=True)
    """
        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding=embeddings,
            start_tokens=tf.tile([BOS], [batch_size]),
            end_token=EOS)

    decoder = tf.contrib.seq2seq.BasicDecoder(
        cell=attention_cell,
        helper=helper,
        initial_state=decoder_initial_state)
    decoder_outputs = tf.contrib.seq2seq.dynamic_decode(
        decoder=decoder,
        output_time_major=True,
        impute_finished=False,
        maximum_iterations=max_time)

    decoder_logits = tf.contrib.layers.linear(decoder_outputs[0][0],
                                              vocabulary_size)
    decoder_prediction = tf.argmax(
        decoder_logits, 2)  # max_time: axis=0, batch: axis=1, vocab: axis=2

    # optimizer
    stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.one_hot(decoder_labels,
                          depth=vocabulary_size,
                          dtype=tf.float32),
        logits=decoder_logits,
    )

    loss = tf.reduce_mean(stepwise_cross_entropy)
    regularizer = 0.0 * tf.nn.l2_loss(decoder_outputs[0][0])
    train_op = tf.train.AdamOptimizer().minimize(loss + regularizer)

    saver = tf.train.Saver()
    minibatch_idx = {'train': 0, 'valid': 0, 'test': 0}
    with tf.Session() as sess:
        if args.mode == 'train':
            # train
            global_max_step = train_step * (
                len(source_train_datas) // batch_size + 1)
            loss_freq = global_max_step // 100 if global_max_step > 100 else 1
            loss_log = []
            batch_loss_log = []
            loss_suffix = ''
            es = EarlyStopper(max_size=5, edge_threshold=0.1)
            m = Monitor(global_max_step)
            log = Logger('%s/log' % model_directory)
            sess.run(tf.global_variables_initializer())
            global_step = 0
            stop_flag = False
            for batch in range(train_step):
                if stop_flag:
                    break
                current_batch_loss_log = []
                while True:  # minibatch process
                    m.monitor(global_step, loss_suffix)
                    source_train_batch, _ = batchnize(source_train_datas,
                                                      batch_size,
                                                      minibatch_idx['train'])
                    target_train_batch, minibatch_idx['train'] = batchnize(
                        target_train_datas, batch_size, minibatch_idx['train'])
                    batch_data = seq2seq(source_train_batch,
                                         target_train_batch,
                                         max_time,
                                         vocabulary_size,
                                         reverse=True)
                    feed_dict = {
                        encoder_inputs: batch_data['encoder_inputs'],
                        decoder_inputs: batch_data['decoder_inputs'],
                        decoder_labels: batch_data['decoder_labels']
                    }
                    sess.run(fetches=[train_op, loss], feed_dict=feed_dict)

                    if global_step % loss_freq == 0:
                        source_valid_batch, _ = batchnize(
                            source_valid_datas, batch_size,
                            minibatch_idx['valid'])
                        target_valid_batch, minibatch_idx['valid'] = batchnize(
                            target_valid_datas, batch_size,
                            minibatch_idx['valid'])
                        batch_data = seq2seq(source_valid_batch,
                                             target_valid_batch,
                                             max_time,
                                             vocabulary_size,
                                             reverse=True)
                        feed_dict = {
                            encoder_inputs: batch_data['encoder_inputs'],
                            decoder_inputs: batch_data['decoder_inputs'],
                            decoder_labels: batch_data['decoder_labels']
                        }
                        loss_val = sess.run(fetches=loss, feed_dict=feed_dict)
                        loss_log.append(loss_val)
                        current_batch_loss_log.append(loss_val)
                        loss_suffix = 'loss: %f' % loss_val
                    global_step += 1
                    if minibatch_idx['train'] == 0:
                        batch_loss = np.mean(current_batch_loss_log)
                        batch_loss_log.append(batch_loss)
                        loss_msg = 'Batch: {}/{}, batch loss: {}'.format(
                            batch + 1, train_step, batch_loss)
                        print(loss_msg)
                        log(loss_msg)
                        es_status = es(batch_loss)
                        if batch > train_step // 2 and es_status:
                            print('early stopping at step: %d' % global_step)
                            stop_flag = True
                        break

            # save tf.graph and variables
            saver.save(sess, model_path)
            print('save at %s' % model_path)

            # save plot of loss
            plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log)
            plt.savefig('%s_global_loss.png' % model_path)
            plt.figure()
            plt.plot(np.arange(len(batch_loss_log)), batch_loss_log)
            plt.savefig('%s_batch_loss.png' % model_path)

            # save dictionary
            with open(dictionary_path['source'], 'wb') as f1, \
                 open(dictionary_path['source_reverse'], 'wb') as f2, \
                 open(dictionary_path['target'], 'wb') as f3, \
                 open(dictionary_path['target_reverse'], 'wb') as f4:
                pickle.dump(source_dictionary, f1)
                pickle.dump(source_reverse_dictionary, f2)
                pickle.dump(target_dictionary, f3)
                pickle.dump(target_reverse_dictionary, f4)

        elif args.mode == 'eval':
            saver.restore(sess, model_path)
            print('load from %s' % model_path)

        else:
            raise  # args.mode should be train or eval

        # evaluate
        loss_val = []
        input_vectors = None
        predict_vectors = None
        for i in range(len(source_test_datas) // batch_size + 1):
            source_test_batch, _ = batchnize(source_test_datas, batch_size,
                                             minibatch_idx['test'])
            target_test_batch, minibatch_idx['test'] = batchnize(
                target_test_datas, batch_size, minibatch_idx['test'])
            batch_data = seq2seq(source_test_batch,
                                 target_test_batch,
                                 max_time,
                                 vocabulary_size,
                                 reverse=True)
            feed_dict = {
                encoder_inputs: batch_data['encoder_inputs'],
                decoder_inputs: batch_data['decoder_inputs'],
                decoder_labels: batch_data['decoder_labels']
            }
            pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict)
            if predict_vectors is None:
                predict_vectors = pred.T
            else:
                predict_vectors = np.vstack((predict_vectors, pred.T))
            input_ = batch_data['encoder_inputs']
            if input_vectors is None:
                input_vectors = input_.T
            else:
                input_vectors = np.vstack((input_vectors, input_.T))
            loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict))

        input_sentences = ''
        predict_sentences = ''
        ignore_token = EOS
        for i, (input_vector, predict_vector) in enumerate(
                zip(input_vectors[:len(source_test_datas)],
                    predict_vectors[:len(target_test_datas)])):
            input_sentences += ' '.join([
                source_reverse_dictionary[vector] for vector in input_vector
                if not vector == ignore_token
            ])
            predict_sentences += ' '.join([
                target_reverse_dictionary[vector] for vector in predict_vector
                if not vector == ignore_token
            ])
            if i < len(source_test_datas) - 1:
                input_sentences += '\n'
                predict_sentences += '\n'

        evaluate_input_path = '%s.evaluate_input' % model_path
        evaluate_predict_path = '%s.evaluate_predict' % model_path
        with open(evaluate_input_path, 'w') as f1, \
             open(evaluate_predict_path, 'w') as f2:
            f1.write(input_sentences)
            f2.write(predict_sentences)

        print('input sequences at {}'.format(evaluate_input_path))
        print('predict sequences at {}'.format(evaluate_predict_path))
        print('mean of loss: %f' % np.mean(loss_val))

    print('finish.')