Example #1
0
def main(args):
    # process config
    c = Configs(args.config)
    ROOT = os.environ['TENSOROFLOW']
    model_directory = '%s/examples/model/multi_layer_nmt' % ROOT
    model_path = '%s/model' % model_directory
    dictionary_path = {
        'source': '%s/source_dictionary.pickle' % model_directory,
        'source_reverse':
        '%s/source_reverse_dictionary.pickle' % model_directory,
        'target': '%s/target_dictionary.pickle' % model_directory,
        'target_reverse':
        '%s/target_reverse_dictionary.pickle' % model_directory
    }
    PAD = c.const['PAD']
    EOS = c.const['EOS']
    train_step = c.option['train_step']
    max_time = c.option['max_time']
    batch_size = c.option['batch_size']
    vocabulary_size = c.option['vocabulary_size']
    input_embedding_size = c.option['embedding_size']
    hidden_units = c.option['hidden_units']
    layers = c.option['layers']
    source_train_data_path = c.data['source_train_data']
    target_train_data_path = c.data['target_train_data']
    source_valid_data_path = c.data['source_valid_data']
    target_valid_data_path = c.data['target_valid_data']
    source_test_data_path = c.data['source_test_data']
    target_test_data_path = c.data['target_test_data']

    # read data
    if args.mode == 'train':
        source_dictionary, source_reverse_dictionary = build_dictionary(
            read_words(source_train_data_path), vocabulary_size)
        source_train_datas = [
            sentence_to_onehot(lines, source_dictionary)
            for lines in read_data(source_train_data_path)
        ]
        target_dictionary, target_reverse_dictionary = build_dictionary(
            read_words(target_train_data_path), vocabulary_size)
        target_train_datas = [
            sentence_to_onehot(lines, target_dictionary)
            for lines in read_data(target_train_data_path)
        ]

        source_valid_datas = [
            sentence_to_onehot(lines, source_dictionary)
            for lines in read_data(source_valid_data_path)
        ]
        target_valid_datas = [
            sentence_to_onehot(lines, target_dictionary)
            for lines in read_data(target_valid_data_path)
        ]

        if args.debug:
            source_train_datas = source_train_datas[:1000]
            target_train_datas = source_train_datas[:1000]
    else:
        with open(dictionary_path['source'], 'rb') as f1, \
             open(dictionary_path['source_reverse'], 'rb') as f2, \
             open(dictionary_path['target'], 'rb') as f3, \
             open(dictionary_path['target_reverse'], 'rb') as f4:
            source_dictionary = pickle.load(f1)
            source_reverse_dictionary = pickle.load(f2)
            target_dictionary = pickle.load(f3)
            target_reverse_dictionary = pickle.load(f4)

    source_test_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_test_data_path)
    ]
    target_test_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_test_data_path)
    ]

    # placeholder
    encoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    decoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_labels = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_labels')

    # embed
    embeddings = tf.Variable(tf.random_uniform(
        [vocabulary_size, input_embedding_size], -1.0, 1.0),
                             dtype=tf.float32,
                             name='embeddings')
    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     encoder_inputs)
    decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     decoder_inputs)

    # encoder
    encoder_units = hidden_units
    encoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers
    ]
    encoder_cell = tf.contrib.rnn.MultiRNNCell(encoder_layers)
    encoder_output, encoder_final_state = tf.nn.dynamic_rnn(
        encoder_cell,
        encoder_inputs_embedded,
        dtype=tf.float32,
        time_major=True)
    del encoder_output

    # decoder
    decoder_units = encoder_units
    decoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers
    ]
    decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_layers)
    decoder_output, decoder_final_state = tf.nn.dynamic_rnn(
        decoder_cell,
        decoder_inputs_embedded,
        initial_state=encoder_final_state,
        scope="plain_decoder",
        dtype=tf.float32,
        time_major=True)

    decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size)
    decoder_prediction = tf.argmax(
        decoder_logits, 2)  # max_time: axis=0, batch: axis=1, vocab: axis=2

    # optimizer
    stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.one_hot(decoder_labels,
                          depth=vocabulary_size,
                          dtype=tf.float32),
        logits=decoder_logits,
    )

    loss = tf.reduce_mean(stepwise_cross_entropy)
    train_op = tf.train.AdamOptimizer().minimize(loss)

    saver = tf.train.Saver()
    minibatch_idx = {'train': 0, 'valid': 0, 'test': 0}
    with tf.Session() as sess:
        if args.mode == 'train':
            # train
            global_max_step = train_step * (
                len(source_train_datas) // batch_size + 1)
            loss_freq = global_max_step // 100 if global_max_step > 100 else 1
            loss_log = []
            batch_loss_log = []
            loss_suffix = ''
            es = EarlyStopper(max_size=5, edge_threshold=0.1)
            m = Monitor(global_max_step)
            sess.run(tf.global_variables_initializer())
            global_step = 0
            stop_flag = False
            for batch in range(train_step):
                if stop_flag:
                    break
                current_batch_loss_log = []
                while True:  # minibatch process
                    m.monitor(global_step, loss_suffix)
                    source_train_batch, _ = batchnize(source_train_datas,
                                                      batch_size,
                                                      minibatch_idx['train'])
                    target_train_batch, minibatch_idx['train'] = batchnize(
                        target_train_datas, batch_size, minibatch_idx['train'])
                    batch_data = seq2seq(source_train_batch,
                                         target_train_batch, max_time,
                                         vocabulary_size)
                    feed_dict = {
                        encoder_inputs: batch_data['encoder_inputs'],
                        decoder_inputs: batch_data['decoder_inputs'],
                        decoder_labels: batch_data['decoder_labels']
                    }
                    sess.run(fetches=[train_op, loss], feed_dict=feed_dict)
                    if global_step % loss_freq == 0:
                        source_valid_batch, _ = batchnize(
                            source_valid_datas, batch_size,
                            minibatch_idx['valid'])
                        target_valid_batch, minibatch_idx['valid'] = batchnize(
                            target_valid_datas, batch_size,
                            minibatch_idx['valid'])
                        batch_data = seq2seq(source_valid_batch,
                                             target_valid_batch, max_time,
                                             vocabulary_size)
                        feed_dict = {
                            encoder_inputs: batch_data['encoder_inputs'],
                            decoder_inputs: batch_data['decoder_inputs'],
                            decoder_labels: batch_data['decoder_labels']
                        }
                        loss_val = sess.run(fetches=loss, feed_dict=feed_dict)
                        loss_log.append(loss_val)
                        current_batch_loss_log.append(loss_val)
                        loss_suffix = 'loss: %f' % loss_val
                        es_status = es(loss_val)
                        if batch > train_step // 2 and es_status:
                            print('early stopping at step: %d' % global_step)
                            stop_flag = True
                            break
                    global_step += 1
                    if minibatch_idx['train'] == 0:
                        batch_loss = np.mean(current_batch_loss_log)
                        batch_loss_log.append(batch_loss)
                        print('Batch: {}/{}, batch loss: {}'.format(
                            batch + 1, train_step, batch_loss))
                        break

            # save tf.graph and variables
            saver.save(sess, model_path)
            print('save at %s' % model_path)

            # save plot of loss
            plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log)
            plt.savefig('%s_global_loss.png' % model_path)
            plt.figure()
            plt.plot(np.arange(len(batch_loss_log)), batch_loss_log)
            plt.savefig('%s_batch_loss.png' % model_path)

            # save dictionary
            with open(dictionary_path['source'], 'wb') as f1, \
                 open(dictionary_path['source_reverse'], 'wb') as f2, \
                 open(dictionary_path['target'], 'wb') as f3, \
                 open(dictionary_path['target_reverse'], 'wb') as f4:
                pickle.dump(source_dictionary, f1)
                pickle.dump(source_reverse_dictionary, f2)
                pickle.dump(target_dictionary, f3)
                pickle.dump(target_reverse_dictionary, f4)

        elif args.mode == 'eval':
            saver.restore(sess, model_path)
            print('load from %s' % model_path)

        else:
            raise  # args.mode should be train or eval

        # evaluate
        loss_val = []
        input_vectors = None
        predict_vectors = None
        for i in range(len(source_test_datas) // batch_size + 1):
            source_test_batch, _ = batchnize(source_test_datas, batch_size,
                                             minibatch_idx['test'])
            target_test_batch, minibatch_idx['test'] = batchnize(
                target_test_datas, batch_size, minibatch_idx['test'])
            batch_data = seq2seq(source_test_batch, target_test_batch,
                                 max_time, vocabulary_size)
            feed_dict = {
                encoder_inputs: batch_data['encoder_inputs'],
                decoder_inputs: batch_data['decoder_inputs'],
                decoder_labels: batch_data['decoder_labels']
            }
            pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict)
            if predict_vectors is None:
                predict_vectors = pred.T
            else:
                predict_vectors = np.vstack((predict_vectors, pred.T))
            input_ = batch_data['encoder_inputs']
            if input_vectors is None:
                input_vectors = input_.T
            else:
                input_vectors = np.vstack((input_vectors, input_.T))
            loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict))

        input_sentences = ''
        predict_sentences = ''
        for i, (input_vector, predict_vector) in enumerate(
                zip(input_vectors[:len(source_test_datas)],
                    predict_vectors[:len(target_test_datas)])):
            input_sentences += ' '.join([
                source_reverse_dictionary[vector] for vector in input_vector
                if not vector == PAD
            ])
            predict_sentences += ' '.join([
                target_reverse_dictionary[vector] for vector in predict_vector
                if not vector == PAD
            ])
            if i < len(source_test_datas) - 1:
                input_sentences += '\n'
                predict_sentences += '\n'

        evaluate_input_path = '%s.evaluate_input' % model_path
        evaluate_predict_path = '%s.evaluate_predict' % model_path
        with open(evaluate_input_path, 'w') as f1, \
             open(evaluate_predict_path, 'w') as f2:
            f1.write(input_sentences)
            f2.write(predict_sentences)

        print('input sequences at {}'.format(evaluate_input_path))
        print('predict sequences at {}'.format(evaluate_predict_path))
        print('mean of loss: %f' % np.mean(loss_val))

    print('finish.')
def main(args):
    tf.reset_default_graph()

    # process config
    c = Configs(args.config)
    ROOT = os.environ['TENSOROFLOW']
    model_path = '%s/examples/model/multi_layer_seq2seq/model' % ROOT
    PAD = c.const['PAD']
    EOS = c.const['EOS']
    train_step = c.option['train_step']
    max_time = c.option['max_time']
    batch_size = c.option['batch_size']
    vocabulary_size = c.option['vocabulary_size']
    input_embedding_size = c.option['embedding_size']
    hidden_units = c.option['hidden_units']
    layers = c.option['layers']
    datas = []

    # placeholder
    encoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    decoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_labels = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_labels')

    # embed
    embeddings = tf.Variable(tf.random_uniform(
        [vocabulary_size, input_embedding_size], -1.0, 1.0),
                             dtype=tf.float32,
                             name='embeddings')
    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     encoder_inputs)
    decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     decoder_inputs)

    # encoder
    encoder_units = hidden_units
    encoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers
    ]
    encoder_cell = tf.contrib.rnn.MultiRNNCell(encoder_layers)
    encoder_output, encoder_final_state = tf.nn.dynamic_rnn(
        encoder_cell,
        encoder_inputs_embedded,
        dtype=tf.float32,
        time_major=True)
    del encoder_output

    # decoder
    decoder_units = encoder_units
    decoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers
    ]
    decoder_cell = tf.contrib.rnn.MultiRNNCell(decoder_layers)
    decoder_output, decoder_final_state = tf.nn.dynamic_rnn(
        decoder_cell,
        decoder_inputs_embedded,
        initial_state=encoder_final_state,
        scope="plain_decoder",
        dtype=tf.float32,
        time_major=True)

    decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size)
    decoder_prediction = tf.argmax(
        decoder_logits, 2)  # max_time: axis=0, batch: axis=1, vocab: axis=2

    # optimizer
    stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.one_hot(decoder_labels,
                          depth=vocabulary_size,
                          dtype=tf.float32),
        logits=decoder_logits,
    )

    loss = tf.reduce_mean(stepwise_cross_entropy)
    train_op = tf.train.AdamOptimizer().minimize(loss)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        if args.mode == 'train':
            # train
            loss_freq = train_step // 100
            loss_log = []
            loss_suffix = ''
            es = EarlyStopper(max_size=5, edge_threshold=0.1)
            m = Monitor(train_step)
            sess.run(tf.global_variables_initializer())
            for i in range(train_step):
                m.monitor(i, loss_suffix)
                batch_data = through(datas, max_time, batch_size,
                                     vocabulary_size)
                feed_dict = {
                    encoder_inputs: batch_data['encoder_inputs'],
                    decoder_inputs: batch_data['decoder_inputs'],
                    decoder_labels: batch_data['decoder_labels']
                }
                sess.run(fetches=[train_op, loss], feed_dict=feed_dict)
                if i % loss_freq == 0:
                    batch_data = through(datas, max_time, batch_size,
                                         vocabulary_size)
                    feed_dict = {
                        encoder_inputs: batch_data['encoder_inputs'],
                        decoder_inputs: batch_data['decoder_inputs'],
                        decoder_labels: batch_data['decoder_labels']
                    }
                    loss_val = sess.run(fetches=loss, feed_dict=feed_dict)
                    loss_log.append(loss_val)
                    loss_suffix = 'loss: %f' % loss_val
                    es_status = es(loss_val)
                    if i > train_step // 2 and es_status:
                        print('early stopping at step: %d' % i)
                        break
            saver.save(sess, model_path)
            print('save at %s' % model_path)
            plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log)
            plt.savefig('%s_loss.png' % model_path)
        elif args.mode == 'eval':
            saver.restore(sess, model_path)
            print('load from %s' % model_path)
        else:
            raise

        # evaluate
        batch_data = through(datas, max_time, batch_size, vocabulary_size)
        feed_dict = {
            encoder_inputs: batch_data['encoder_inputs'],
            decoder_inputs: batch_data['decoder_inputs'],
            decoder_labels: batch_data['decoder_labels']
        }
        pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict)
        input_ = batch_data['encoder_inputs']
        loss_val = sess.run(fetches=loss, feed_dict=feed_dict)

        print('input sequences...\n{}'.format(input_))
        print('predict sequences...\n{}'.format(pred))
        print('loss: %f' % loss_val)

    print('finish.')
Example #3
0
def main(args):
    # process config
    c = Configs(args.config)
    ROOT = os.environ['TENSOROFLOW']
    model_path = '%s/examples/model/basic_nmt/model' % ROOT
    PAD = c.const['PAD']
    EOS = c.const['EOS']
    train_step = c.option['train_step']
    max_time = c.option['max_time']
    batch_size = c.option['batch_size']
    vocabulary_size = c.option['vocabulary_size']
    input_embedding_size = c.option['embedding_size']
    hidden_units = c.option['hidden_units']
    source_train_data_path = c.data['source_train_data']
    target_train_data_path = c.data['target_train_data']
    source_valid_data_path = c.data['source_valid_data']
    target_valid_data_path = c.data['target_valid_data']
    source_test_data_path = c.data['source_test_data']
    target_test_data_path = c.data['target_test_data']

    # read data
    source_dictionary, source_reverse_dictionary = build_dictionary(
        read_words(source_train_data_path), vocabulary_size)
    source_train_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_train_data_path)
    ]
    target_dictionary, target_reverse_dictionary = build_dictionary(
        read_words(target_train_data_path), vocabulary_size)
    target_train_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_train_data_path)
    ]

    source_valid_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_valid_data_path)
    ]
    target_valid_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_valid_data_path)
    ]
    source_test_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_test_data_path)
    ]
    target_test_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_test_data_path)
    ]

    # placeholder
    encoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    decoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_labels = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_labels')

    # embed
    embeddings = tf.Variable(tf.random_uniform(
        [vocabulary_size, input_embedding_size], -1.0, 1.0),
                             dtype=tf.float32,
                             name='embeddings')
    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     encoder_inputs)
    decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     decoder_inputs)

    # encoder
    encoder_units = hidden_units
    encoder_cell = tf.contrib.rnn.LSTMCell(encoder_units)
    _, encoder_final_state = tf.nn.dynamic_rnn(encoder_cell,
                                               encoder_inputs_embedded,
                                               dtype=tf.float32,
                                               time_major=True)

    # decoder
    decoder_units = encoder_units
    decoder_cell = tf.contrib.rnn.LSTMCell(decoder_units)
    decoder_output, decoder_final_state = tf.nn.dynamic_rnn(
        decoder_cell,
        decoder_inputs_embedded,
        initial_state=encoder_final_state,
        scope="plain_decoder",
        dtype=tf.float32,
        time_major=True)

    decoder_logits = tf.contrib.layers.linear(decoder_output, vocabulary_size)
    decoder_prediction = tf.argmax(
        decoder_logits, 2)  # max_time: axis=0, batch: axis=1, vocab: axis=2

    # optimizer
    stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.one_hot(decoder_labels,
                          depth=vocabulary_size,
                          dtype=tf.float32),
        logits=decoder_logits,
    )

    loss = tf.reduce_mean(stepwise_cross_entropy)
    train_op = tf.train.AdamOptimizer().minimize(loss)

    saver = tf.train.Saver()
    batch_idx = {'train': 0, 'valid': 0, 'test': 0}
    with tf.Session() as sess:
        if args.mode == 'train':
            # train
            loss_freq = train_step // 100
            loss_log = []
            loss_suffix = ''
            es = EarlyStopper(max_size=5, edge_threshold=0.1)
            m = Monitor(train_step)
            sess.run(tf.global_variables_initializer())
            for i in range(train_step):
                m.monitor(i, loss_suffix)
                source_train_batch, _ = batchnize(source_train_datas,
                                                  batch_size,
                                                  batch_idx['train'])
                target_train_batch, batch_idx['train'] = batchnize(
                    target_train_datas, batch_size, batch_idx['train'])
                batch_data = seq2seq(source_train_batch, target_train_batch,
                                     max_time, vocabulary_size)
                feed_dict = {
                    encoder_inputs: batch_data['encoder_inputs'],
                    decoder_inputs: batch_data['decoder_inputs'],
                    decoder_labels: batch_data['decoder_labels']
                }
                sess.run(fetches=[train_op, loss], feed_dict=feed_dict)
                if i % loss_freq == 0:
                    source_valid_batch, _ = batchnize(source_valid_datas,
                                                      batch_size,
                                                      batch_idx['valid'])
                    target_valid_batch, batch_idx['valid'] = batchnize(
                        target_valid_datas, batch_size, batch_idx['valid'])
                    batch_data = seq2seq(source_valid_batch,
                                         target_valid_batch, max_time,
                                         vocabulary_size)
                    feed_dict = {
                        encoder_inputs: batch_data['encoder_inputs'],
                        decoder_inputs: batch_data['decoder_inputs'],
                        decoder_labels: batch_data['decoder_labels']
                    }
                    loss_val = sess.run(fetches=loss, feed_dict=feed_dict)
                    loss_log.append(loss_val)
                    loss_suffix = 'loss: %f' % loss_val
                    es_status = es(loss_val)
                    if i > train_step // 2 and es_status:
                        print('early stopping at step: %d' % i)
                        break
            saver.save(sess, model_path)
            print('save at %s' % model_path)
            plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log)
            plt.savefig('%s_loss.png' % model_path)
        elif args.mode == 'eval':
            saver.restore(sess, model_path)
            print('load from %s' % model_path)
        else:
            raise

        # evaluate
        loss_val = []
        input_vectors = None
        predict_vectors = None
        for i in range(len(source_test_datas) // batch_size + 1):
            source_test_batch, _ = batchnize(source_test_datas, batch_size,
                                             batch_idx['test'])
            target_test_batch, batch_idx['test'] = batchnize(
                target_test_datas, batch_size, batch_idx['test'])
            batch_data = seq2seq(source_test_batch, target_test_batch,
                                 max_time, vocabulary_size)
            feed_dict = {
                encoder_inputs: batch_data['encoder_inputs'],
                decoder_inputs: batch_data['decoder_inputs'],
                decoder_labels: batch_data['decoder_labels']
            }
            pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict)
            if predict_vectors is None:
                predict_vectors = pred.T
            else:
                predict_vectors = np.vstack((predict_vectors, pred.T))
            input_ = batch_data['encoder_inputs']
            if input_vectors is None:
                input_vectors = input_.T
            else:
                input_vectors = np.vstack((input_vectors, input_.T))
            loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict))

        input_sentences = ''
        predict_sentences = ''
        for i, (input_vector, predict_vector) in enumerate(
                zip(input_vectors[:len(source_test_datas)],
                    predict_vectors[:len(target_test_datas)])):
            input_sentences += ' '.join([
                source_reverse_dictionary[vector] for vector in input_vector
                if not vector == PAD
            ])
            predict_sentences += ' '.join([
                target_reverse_dictionary[vector] for vector in predict_vector
                if not vector == PAD
            ])
            if i < len(source_test_datas) - 1:
                input_sentences += '\n'
                predict_sentences += '\n'

        evaluate_input_path = '%s.evaluate_input' % model_path
        evaluate_predict_path = '%s.evaluate_predict' % model_path
        with open(evaluate_input_path, 'w') as f1, \
             open(evaluate_predict_path, 'w') as f2:
            f1.write(input_sentences)
            f2.write(predict_sentences)

        print('input sequences at {}'.format(evaluate_input_path))
        print('predict sequences at {}'.format(evaluate_predict_path))
        print('mean of loss: %f' % np.mean(loss_val))

    print('finish.')
Example #4
0
def main(args):
    # process config
    c = Configs(args.config)
    ROOT = os.environ['TENSOROFLOW']
    output = c.option.get('output', 'examples/model/buf')
    model_directory = '%s/%s' % (ROOT, output)
    model_path = '%s/model' % model_directory
    dictionary_path = {
        'source': '%s/source_dictionary.pickle' % model_directory,
        'source_reverse':
        '%s/source_reverse_dictionary.pickle' % model_directory,
        'target': '%s/target_dictionary.pickle' % model_directory,
        'target_reverse':
        '%s/target_reverse_dictionary.pickle' % model_directory
    }
    PAD = c.const['PAD']
    BOS = c.const['BOS']
    EOS = c.const['EOS']
    train_step = c.option['train_step']
    max_time = c.option['max_time']
    batch_size = c.option['batch_size']
    vocabulary_size = c.option['vocabulary_size']
    input_embedding_size = c.option['embedding_size']
    hidden_units = c.option['hidden_units']
    layers = c.option['layers']
    source_train_data_path = c.data['source_train_data']
    target_train_data_path = c.data['target_train_data']
    source_valid_data_path = c.data['source_valid_data']
    target_valid_data_path = c.data['target_valid_data']
    source_test_data_path = c.data['source_test_data']
    target_test_data_path = c.data['target_test_data']

    # initialize output directory
    if pathlib.Path(model_directory).exists():
        print('Warning: model %s is exists.')
        print('Old model will be overwritten.')
        while True:
            print('Do you wanna continue? [yes|no]')
            command = input('> ')
            if command == 'yes':
                shutil.rmtree(model_directory)
                break
            elif command == 'no':
                sys.exit()
            else:
                print('You can only input "yes" or "no".')

    print('Make new model: %s' % model_directory)
    pathlib.Path(model_directory).mkdir()

    # read data
    if args.mode == 'train':
        source_dictionary, source_reverse_dictionary = build_dictionary(
            read_words(source_train_data_path), vocabulary_size)
        source_train_datas = [
            sentence_to_onehot(lines, source_dictionary)
            for lines in read_data(source_train_data_path)
        ]
        target_dictionary, target_reverse_dictionary = build_dictionary(
            read_words(target_train_data_path), vocabulary_size)
        target_train_datas = [
            sentence_to_onehot(lines, target_dictionary)
            for lines in read_data(target_train_data_path)
        ]

        source_valid_datas = [
            sentence_to_onehot(lines, source_dictionary)
            for lines in read_data(source_valid_data_path)
        ]
        target_valid_datas = [
            sentence_to_onehot(lines, target_dictionary)
            for lines in read_data(target_valid_data_path)
        ]

        if args.debug:
            source_train_datas = source_train_datas[:1000]
            target_train_datas = source_train_datas[:1000]
    else:
        with open(dictionary_path['source'], 'rb') as f1, \
             open(dictionary_path['source_reverse'], 'rb') as f2, \
             open(dictionary_path['target'], 'rb') as f3, \
             open(dictionary_path['target_reverse'], 'rb') as f4:
            source_dictionary = pickle.load(f1)
            source_reverse_dictionary = pickle.load(f2)
            target_dictionary = pickle.load(f3)
            target_reverse_dictionary = pickle.load(f4)

    source_test_datas = [
        sentence_to_onehot(lines, source_dictionary)
        for lines in read_data(source_test_data_path)
    ]
    target_test_datas = [
        sentence_to_onehot(lines, target_dictionary)
        for lines in read_data(target_test_data_path)
    ]

    # placeholder
    encoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    decoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_labels = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_labels')

    # embed
    embeddings = tf.Variable(tf.random_uniform(
        [vocabulary_size, input_embedding_size], -1.0, 1.0),
                             dtype=tf.float32,
                             name='embeddings')
    encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     encoder_inputs)
    decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings,
                                                     decoder_inputs)

    # encoder with bidirection
    encoder_units = hidden_units
    encoder_layers_fw = [
        tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers
    ]
    encoder_cell_fw = tf.contrib.rnn.MultiRNNCell(encoder_layers_fw)
    encoder_layers_bw = [
        tf.contrib.rnn.LSTMCell(size) for size in [encoder_units] * layers
    ]
    encoder_cell_bw = tf.contrib.rnn.MultiRNNCell(encoder_layers_bw)
    (encoder_output_fw,
     encoder_output_bw), encoder_state = tf.nn.bidirectional_dynamic_rnn(
         encoder_cell_fw,
         encoder_cell_bw,
         encoder_inputs_embedded,
         dtype=tf.float32,
         time_major=True)
    encoder_outputs = tf.concat((encoder_output_fw, encoder_output_bw), 2)
    encoder_state = tuple(
        tf.contrib.rnn.LSTMStateTuple(
            tf.concat((encoder_state[0][layer].c,
                       encoder_state[1][layer].c), 1),
            tf.concat((encoder_state[0][layer].h,
                       encoder_state[1][layer].h), 1))
        for layer in range(layers))

    # decoder with attention
    decoder_units = encoder_units * 2
    attention_units = decoder_units
    decoder_layers = [
        tf.contrib.rnn.LSTMCell(size) for size in [decoder_units] * layers
    ]
    cell = tf.contrib.rnn.MultiRNNCell(decoder_layers)

    sequence_length = tf.cast([max_time] * batch_size, dtype=tf.int32)
    beam_width = 1
    tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
        encoder_outputs, multiplier=beam_width)
    tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
        encoder_state, multiplier=beam_width)
    tiled_sequence_length = tf.contrib.seq2seq.tile_batch(
        sequence_length, multiplier=beam_width)
    attention_mechanism = tf.contrib.seq2seq.LuongAttention(
        num_units=attention_units,
        memory=tiled_encoder_outputs,
        memory_sequence_length=tiled_sequence_length)
    attention_cell = tf.contrib.seq2seq.AttentionWrapper(
        cell, attention_mechanism, attention_layer_size=256)
    decoder_initial_state = attention_cell.zero_state(dtype=tf.float32,
                                                      batch_size=batch_size *
                                                      beam_width)
    decoder_initial_state = decoder_initial_state.clone(
        cell_state=tiled_encoder_final_state)

    if args.mode == 'train':
        helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=decoder_inputs_embedded,
            sequence_length=tf.cast([max_time] * batch_size, dtype=tf.int32),
            time_major=True)
    elif args.mode == 'eval':
        """
    helper = tf.contrib.seq2seq.TrainingHelper(
      inputs=decoder_inputs_embedded,
      sequence_length=tf.cast([max_time] * batch_size, dtype=tf.int32),
      time_major=True)
    """
        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding=embeddings,
            start_tokens=tf.tile([BOS], [batch_size]),
            end_token=EOS)

    decoder = tf.contrib.seq2seq.BasicDecoder(
        cell=attention_cell,
        helper=helper,
        initial_state=decoder_initial_state)
    decoder_outputs = tf.contrib.seq2seq.dynamic_decode(
        decoder=decoder,
        output_time_major=True,
        impute_finished=False,
        maximum_iterations=max_time)

    decoder_logits = tf.contrib.layers.linear(decoder_outputs[0][0],
                                              vocabulary_size)
    decoder_prediction = tf.argmax(
        decoder_logits, 2)  # max_time: axis=0, batch: axis=1, vocab: axis=2

    # optimizer
    stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.one_hot(decoder_labels,
                          depth=vocabulary_size,
                          dtype=tf.float32),
        logits=decoder_logits,
    )

    loss = tf.reduce_mean(stepwise_cross_entropy)
    regularizer = 0.0 * tf.nn.l2_loss(decoder_outputs[0][0])
    train_op = tf.train.AdamOptimizer().minimize(loss + regularizer)

    saver = tf.train.Saver()
    minibatch_idx = {'train': 0, 'valid': 0, 'test': 0}
    with tf.Session() as sess:
        if args.mode == 'train':
            # train
            global_max_step = train_step * (
                len(source_train_datas) // batch_size + 1)
            loss_freq = global_max_step // 100 if global_max_step > 100 else 1
            loss_log = []
            batch_loss_log = []
            loss_suffix = ''
            es = EarlyStopper(max_size=5, edge_threshold=0.1)
            m = Monitor(global_max_step)
            log = Logger('%s/log' % model_directory)
            sess.run(tf.global_variables_initializer())
            global_step = 0
            stop_flag = False
            for batch in range(train_step):
                if stop_flag:
                    break
                current_batch_loss_log = []
                while True:  # minibatch process
                    m.monitor(global_step, loss_suffix)
                    source_train_batch, _ = batchnize(source_train_datas,
                                                      batch_size,
                                                      minibatch_idx['train'])
                    target_train_batch, minibatch_idx['train'] = batchnize(
                        target_train_datas, batch_size, minibatch_idx['train'])
                    batch_data = seq2seq(source_train_batch,
                                         target_train_batch,
                                         max_time,
                                         vocabulary_size,
                                         reverse=True)
                    feed_dict = {
                        encoder_inputs: batch_data['encoder_inputs'],
                        decoder_inputs: batch_data['decoder_inputs'],
                        decoder_labels: batch_data['decoder_labels']
                    }
                    sess.run(fetches=[train_op, loss], feed_dict=feed_dict)

                    if global_step % loss_freq == 0:
                        source_valid_batch, _ = batchnize(
                            source_valid_datas, batch_size,
                            minibatch_idx['valid'])
                        target_valid_batch, minibatch_idx['valid'] = batchnize(
                            target_valid_datas, batch_size,
                            minibatch_idx['valid'])
                        batch_data = seq2seq(source_valid_batch,
                                             target_valid_batch,
                                             max_time,
                                             vocabulary_size,
                                             reverse=True)
                        feed_dict = {
                            encoder_inputs: batch_data['encoder_inputs'],
                            decoder_inputs: batch_data['decoder_inputs'],
                            decoder_labels: batch_data['decoder_labels']
                        }
                        loss_val = sess.run(fetches=loss, feed_dict=feed_dict)
                        loss_log.append(loss_val)
                        current_batch_loss_log.append(loss_val)
                        loss_suffix = 'loss: %f' % loss_val
                    global_step += 1
                    if minibatch_idx['train'] == 0:
                        batch_loss = np.mean(current_batch_loss_log)
                        batch_loss_log.append(batch_loss)
                        loss_msg = 'Batch: {}/{}, batch loss: {}'.format(
                            batch + 1, train_step, batch_loss)
                        print(loss_msg)
                        log(loss_msg)
                        es_status = es(batch_loss)
                        if batch > train_step // 2 and es_status:
                            print('early stopping at step: %d' % global_step)
                            stop_flag = True
                        break

            # save tf.graph and variables
            saver.save(sess, model_path)
            print('save at %s' % model_path)

            # save plot of loss
            plt.plot(np.arange(len(loss_log)) * loss_freq, loss_log)
            plt.savefig('%s_global_loss.png' % model_path)
            plt.figure()
            plt.plot(np.arange(len(batch_loss_log)), batch_loss_log)
            plt.savefig('%s_batch_loss.png' % model_path)

            # save dictionary
            with open(dictionary_path['source'], 'wb') as f1, \
                 open(dictionary_path['source_reverse'], 'wb') as f2, \
                 open(dictionary_path['target'], 'wb') as f3, \
                 open(dictionary_path['target_reverse'], 'wb') as f4:
                pickle.dump(source_dictionary, f1)
                pickle.dump(source_reverse_dictionary, f2)
                pickle.dump(target_dictionary, f3)
                pickle.dump(target_reverse_dictionary, f4)

        elif args.mode == 'eval':
            saver.restore(sess, model_path)
            print('load from %s' % model_path)

        else:
            raise  # args.mode should be train or eval

        # evaluate
        loss_val = []
        input_vectors = None
        predict_vectors = None
        for i in range(len(source_test_datas) // batch_size + 1):
            source_test_batch, _ = batchnize(source_test_datas, batch_size,
                                             minibatch_idx['test'])
            target_test_batch, minibatch_idx['test'] = batchnize(
                target_test_datas, batch_size, minibatch_idx['test'])
            batch_data = seq2seq(source_test_batch,
                                 target_test_batch,
                                 max_time,
                                 vocabulary_size,
                                 reverse=True)
            feed_dict = {
                encoder_inputs: batch_data['encoder_inputs'],
                decoder_inputs: batch_data['decoder_inputs'],
                decoder_labels: batch_data['decoder_labels']
            }
            pred = sess.run(fetches=decoder_prediction, feed_dict=feed_dict)
            if predict_vectors is None:
                predict_vectors = pred.T
            else:
                predict_vectors = np.vstack((predict_vectors, pred.T))
            input_ = batch_data['encoder_inputs']
            if input_vectors is None:
                input_vectors = input_.T
            else:
                input_vectors = np.vstack((input_vectors, input_.T))
            loss_val.append(sess.run(fetches=loss, feed_dict=feed_dict))

        input_sentences = ''
        predict_sentences = ''
        ignore_token = EOS
        for i, (input_vector, predict_vector) in enumerate(
                zip(input_vectors[:len(source_test_datas)],
                    predict_vectors[:len(target_test_datas)])):
            input_sentences += ' '.join([
                source_reverse_dictionary[vector] for vector in input_vector
                if not vector == ignore_token
            ])
            predict_sentences += ' '.join([
                target_reverse_dictionary[vector] for vector in predict_vector
                if not vector == ignore_token
            ])
            if i < len(source_test_datas) - 1:
                input_sentences += '\n'
                predict_sentences += '\n'

        evaluate_input_path = '%s.evaluate_input' % model_path
        evaluate_predict_path = '%s.evaluate_predict' % model_path
        with open(evaluate_input_path, 'w') as f1, \
             open(evaluate_predict_path, 'w') as f2:
            f1.write(input_sentences)
            f2.write(predict_sentences)

        print('input sequences at {}'.format(evaluate_input_path))
        print('predict sequences at {}'.format(evaluate_predict_path))
        print('mean of loss: %f' % np.mean(loss_val))

    print('finish.')