Python load_train_dataの例

プログラミング言語: Python

名前空間/パッケージ名: data_helpers

メソッド/関数: load_train_data

hotexamples.comのコード掲載数: 6

Python load_train_data - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdata_helpers.load_train_dataの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: train.py プロジェクト: zcf131016/QA_LSTM_ATTENTION

def main():
    trained_model = "checkpoints/model.ckpt"
    embedding_size = 100  # Word embedding dimension
    epochs = 10
    batch_size = 64  # Batch data size
    rnn_size = 50  # Number of hidden layer neurons
    sequence_length = 300  # Sentence length
    learning_rate = 0.01  # Learning rate
    lrdownRate = 0.9
    margin = 0.1
    attention_matrix_size = 100
    gpu_mem_usage = 0.75
    gpu_device = "/gpu:0"
    cpu_device = "/cpu:0"

    embeddings, word2idx = data_helpers.load_embedding('vectors.nobin')
    voc = data_helpers.load_vocab('D:\\DataMining\\Datasets\\insuranceQA\\V1\\vocabulary')
    all_answers = data_helpers.load_answers('D:\\DataMining\\Datasets\\insuranceQA\\V1\\answers.label.token_idx', voc)
    questions, pos_answers, neg_answers = data_helpers.load_train_data('D:\\DataMining\\Datasets\\insuranceQA\\V1\\question.train.token_idx.label', all_answers, voc, word2idx, sequence_length)
    data_size = len(questions)
    permutation = np.random.permutation(data_size)
    questions = questions[permutation, :]
    pos_answers = pos_answers[permutation, :]
    neg_answers = neg_answers[permutation, :]
    with tf.Graph().as_default(), tf.device(gpu_device):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_mem_usage)
        session_conf = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)
        model = QALSTM(batch_size, sequence_length, embeddings, embedding_size, rnn_size, margin, attention_matrix_size)
        with tf.Session(config=session_conf).as_default() as sess:  # config=session_conf
            saver = tf.train.Saver()

            print("Start training")
            sess.run(tf.global_variables_initializer())  # Initialize all variables
            for epoch in range(epochs):
                print("The training of the %s iteration is underway" % (epoch + 1))
                batch_number = 1
                for question, pos_answer, neg_answer in data_helpers.batch_iter(questions, pos_answers, neg_answers, batch_size):
                    start_time = time.time()
                    feed_dict = {
                        model.q: question,
                        model.ap: pos_answer,
                        model.an: neg_answer,
                        model.lr: learning_rate
                    }
                    _, loss, acc = sess.run([model.train_op, model.loss, model.acc], feed_dict)
                    duration = time.time() - start_time
                    print('Epoch: [%d][%d/%d]\tTime %.3f\tLoss %2.3f\tAcc %2.3f' % (epoch + 1, batch_number * batch_size, data_size, duration, loss, acc))
                    batch_number += 1
                learning_rate *= lrdownRate
                saver.save(sess, trained_model)
            print("End of the training")

コード例 #2

ファイルを表示

def load_train_data():
    x, y, vocabulary, vocabulary_inv_list = data_helpers.load_train_data(
        train_path=train_data_path,
        train_x_col=train_data_x_col,
        train_y_cols=train_data_y_cols,
        save_path=models_dir)
    vocabulary_inv = {
        key: value
        for key, value in enumerate(vocabulary_inv_list)
    }
    y = y.argmax(axis=1)

    # Shuffle data
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x = x[shuffle_indices]
    y = y[shuffle_indices]
    train_len = int(len(x) * train_percent)
    x_train = x[:train_len]
    y_train = y[:train_len]
    x_val = x[train_len:]
    y_val = y[train_len:]

    return x_train, y_train, x_val, y_val, vocabulary_inv

コード例 #3

ファイルを表示

                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Data Preparation
# ==================================================

# Load data
print("Loading data...")
data_helpers = data_helpers.DataHelpers()
x_text, y = data_helpers.load_train_data()

# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in x_text])
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
x = np.array(list(vocab_processor.fit_transform(x_text)))

# Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]

# Split train/test set
# TODO: This is very crude, should use cross-validation
dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))

コード例 #4

ファイルを表示

def train_step():
    print("loading the dataset...")
    config = Config()
    log_path = os.path.join("logs", config.log_file)
    logger = get_logger(log_path)
    print_config(FLAGS, logger)

    x, xp, y = data_helpers.load_train_sentences("./data/train.csv")
    x_test, xp_test, y_test = data_helpers.load_train_sentences("./data/test.csv")

    # Build vocabulary
    if config.ckpt_path:
        print(os.path.join(config.ckpt_path, "vocab_processor.bin"))
        vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(
            os.path.join(config.ckpt_path, "vocab_processor.bin"))
    else:
        vocab_processor = learn.preprocessing.VocabularyProcessor(config.num_step, min_frequency=2)
        sentences = x + xp
        vocab_processor.fit_transform(sentences)  # word to id
        if not os.path.exists(config.word2vec_path):
            word2vec_model = data_helpers.dataMatrix(sentences, config.embed_dim, config.iteration)
            word2vec_model.wv.save_word2vec_format(os.path.join(config.word2vec_path), binary=False)

    # word to id
    x_train, xp_train, y = data_helpers.filter_train_list(list(vocab_processor.transform(x)),
                                                          list(vocab_processor.transform(xp)), y)
    x_test, xp_test, y_test = data_helpers.filter_train_list(list(vocab_processor.transform(x_test)),
                                                             list(vocab_processor.transform(xp_test)), y_test)

    vocab_id2w = dict()
    for index in range(len(vocab_processor.vocabulary_)):
        vocab_id2w[index] = vocab_processor.vocabulary_._reverse_mapping[index]
    logger.info("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))

    # test_data
    test_data = data_helpers.load_train_data(x_test, xp_test, y_test)
    # Training
    # ==================================================
    print("begin training")
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=config.allow_soft_placement,
            log_device_placement=config.log_device_placement)
        session_conf.gpu_options.allow_growth=True
        with tf.Session(config=session_conf).as_default() as session:
            if config.ckpt_path:
                out_dir = config.ckpt_path
            else:
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            bi_rnn, valid_bi_rnn, test_bi_rnn = create_model(session, y, vocab_id2w, config, config.ckpt_path, logger)

            # add summary
            dev_summary_op = tf.summary.merge([valid_bi_rnn.loss_summary, valid_bi_rnn.accuracy_summary])
            # dev_summary_op = tf.summary.merge([valid_bi_rnn.loss_summary])

            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, session.graph)

            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, session.graph)

            # add checkpoint
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))

            checkpoint_prefix = os.path.join(checkpoint_dir, "bi_rnn")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)

            # Write vocabulary
            if config.ckpt_path is None:    # not exist
                vocab_processor.save(os.path.join(out_dir, "vocab_processor.bin"))
            begin_time = int(time.time())

            for epoch in range(config.num_epoch):
                logger.info("epoch in:" + str(epoch+1) + "/" + str(config.num_epoch))
                # learning_rate衰减
                # 在遍数小于max_epoch时,lr_decay = 1;> max_epoch时,lr_decay = 0.5^(epoch-max_epoch)
                lr_decay = config.lr_decay ** max(epoch-config.max_decay_epoch, 0.0)
                bi_rnn.assign_new_lr(session, config.lr*lr_decay)

                shuffle_indices = np.random.permutation(np.arange(len(y)))  # 随机打乱索引
                x_shuffled = x_train[shuffle_indices]
                xp_shuffled = xp_train[shuffle_indices]
                y_shuffled = y[shuffle_indices]
                # Split train/test set
                # TODO: This is very crude, should use cross-validation
                dev_sample_index = int(config.valid_sample_percentage * float(len(y)))
                x_train1, x_valid1 = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]  # attention circle
                xp_train1, xp_valid1 = xp_shuffled[:dev_sample_index], xp_shuffled[dev_sample_index:]
                y_train1, y_valid1 = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
                logger.info("Train/Valid split: {:d}/{:d}".format(len(y_train1), len(y_valid1)))

                train_data = data_helpers.load_train_data(x_train1, xp_train1, y_train1)
                valid_data = data_helpers.load_train_data(x_valid1, xp_valid1, y_valid1)

                batches = data_helpers.batch_iter(train_data, batch_size=config.batch_size)
                for batch in batches:
                    global_steps, train_cost, train_accuracy = run_epoch(bi_rnn, session, batch, train_summary_writer)
                    if global_steps % config.evaluate_every == 0:
                        valid_cost, valid_accuracy = evaluate(valid_bi_rnn, session, valid_data, global_steps,
                                                              dev_summary_writer,
                                                              dev_summary_op)
                        logger.info("step {:d}, train_cost {:g}, train_accuracy {:g}, valid_cost {:g}, "
                                    "valid_accuracy {:g}".format(global_steps, train_cost, train_accuracy,
                                                                 valid_cost, valid_accuracy))

                    if global_steps % config.checkpoint_every == 0:  # 多少轮保存一次
                        path = bi_rnn.saver.save(session, checkpoint_prefix, global_step=global_steps)
                        logger.info("Saved bi_rnn chechpoint to:{}\n".format(path))
                embeddings = session.run(bi_rnn.embeddings)
                pickle.dump(embeddings.tolist(), open(os.path.join(out_dir, "vocab.pkl"), "wb"))
                test_costs, test_accuracy = test_evaluate(test_bi_rnn, session, test_data)
                logger.info("the test data, test_costs{:g}, acc {:g}".format(test_costs, test_accuracy))
            end_time = int(time.time())
            logger.info("training takes %d seconds already\n" % (end_time-begin_time))
            print("program end!")

コード例 #5

ファイルを表示

ファイル: train.py プロジェクト: DrWereviruswolf/AI

def train():
    np.random.seed(10)
    mask = np.ones(shape=[FLAGS.sequence_length]).nonzero()

    with tf.device('/cpu:0'):
        x_text, x_position, y = data_helpers.load_train_data(
            FLAGS.av_data_path + 'train_av.npy')

    print("Train set size: {}\n".format(len(y)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            model = AttLSTM(sequence_length=FLAGS.sequence_length,
                            num_classes=2,
                            vocab_size=FLAGS.vocab_size,
                            embedding_size=FLAGS.embedding_dim,
                            hidden_size=FLAGS.hidden_size,
                            l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdadeltaOptimizer(FLAGS.learning_rate,
                                                   FLAGS.decay_rate, 1e-6)
            gvs = optimizer.compute_gradients(model.loss)
            capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var)
                          for grad, var in gvs]
            train_op = optimizer.apply_gradients(capped_gvs,
                                                 global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", model.loss)
            acc_summary = tf.summary.scalar("accuracy", model.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Validation summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "val")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model_av")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Pre-trained word2vec
            pretrain_W = np.load("embedding.npy")
            norm = max(abs(pretrain_W.max()), abs(pretrain_W.min()))
            pretrain_W /= 4 * norm
            sess.run(model.W_text.assign(pretrain_W))
            print("Success to load pre-trained word2vec model!\n")

            # Generate batches
            batches = data_helpers.batch_iter(x_text, x_position, y,
                                              FLAGS.batch_size,
                                              FLAGS.final_epochs_av)
            # Training loop. For each batch...
            best_accuracy = 0.0  # For save checkpoint(model)
            accs = []
            losses = []
            for batch in batches:
                x_text_batch, x_position_batch, y_batch = batch
                # Train
                feed_dict = {
                    model.input_text: x_text_batch,
                    model.input_position: x_position_batch,
                    model.input_y: y_batch,
                    model.mask: mask,
                    model.emb_dropout_keep_prob: FLAGS.emb_dropout_keep_prob,
                    model.rnn_dropout_keep_prob: FLAGS.rnn_dropout_keep_prob,
                    model.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, model.loss,
                    model.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)
                accs.append(accuracy)
                losses.append(loss)

                # Training log display
                if step % FLAGS.display_every == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))

                if step % FLAGS.evaluate_every == 0:
                    acc = np.mean(accs)
                    los = np.mean(losses)
                    accs = []
                    losses = []
                    time_str = datetime.datetime.now().isoformat()
                    print("\nEvaluation: {}: step {}, loss {:g}, acc {:g}".
                          format(time_str, step, los, acc))
                    # Model checkpoint
                    if best_accuracy < acc:
                        best_accuracy = acc
                        path = saver.save(sess,
                                          checkpoint_prefix +
                                          "-{:g}".format(best_accuracy),
                                          global_step=step)
                        print("Saved model checkpoint to {}\n".format(path))
            path = saver.save(sess,
                              checkpoint_prefix + "_final",
                              global_step=step)
            print("Saved model checkpoint to {}\n".format(path))

    with tf.device('/cpu:0'):
        x_text, x_position, y = data_helpers.load_train_data(
            FLAGS.ta_data_path + 'train_ta.npy')

    print("Train set size: {}\n".format(len(y)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            model = AttLSTM(sequence_length=FLAGS.sequence_length,
                            num_classes=2,
                            vocab_size=FLAGS.vocab_size,
                            embedding_size=FLAGS.embedding_dim,
                            hidden_size=FLAGS.hidden_size,
                            l2_reg_lambda=FLAGS.l2_reg_lambda)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdadeltaOptimizer(FLAGS.learning_rate,
                                                   FLAGS.decay_rate, 1e-6)
            gvs = optimizer.compute_gradients(model.loss)
            capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var)
                          for grad, var in gvs]
            train_op = optimizer.apply_gradients(capped_gvs,
                                                 global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", model.loss)
            acc_summary = tf.summary.scalar("accuracy", model.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Validation summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "val")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model_ta")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Pre-trained word2vec
            # pretrain_W = np.load("embedding.npy")
            # sess.run(model.W_text.assign(pretrain_W))
            # print("Success to load pre-trained word2vec model!\n")

            # Generate batches
            batches = data_helpers.batch_iter(x_text, x_position, y,
                                              FLAGS.batch_size,
                                              FLAGS.final_epochs_ta)
            # Training loop. For each batch...
            best_accuracy = 0.0  # For save checkpoint(model)
            accs = []
            losses = []
            for batch in batches:
                x_text_batch, x_position_batch, y_batch = batch
                # Train
                feed_dict = {
                    model.input_text: x_text_batch,
                    model.input_position: x_position_batch,
                    model.input_y: y_batch,
                    model.mask: mask,
                    model.emb_dropout_keep_prob: FLAGS.emb_dropout_keep_prob,
                    model.rnn_dropout_keep_prob: FLAGS.rnn_dropout_keep_prob,
                    model.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, model.loss,
                    model.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)
                accs.append(accuracy)
                losses.append(loss)

                # Training log display
                if step % FLAGS.display_every == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))

                if step % FLAGS.evaluate_every == 0:
                    acc = np.mean(accs)
                    los = np.mean(losses)
                    accs = []
                    losses = []
                    time_str = datetime.datetime.now().isoformat()
                    print("\nEvaluation: {}: step {}, loss {:g}, acc {:g}".
                          format(time_str, step, los, acc))
                    # Model checkpoint
                    if best_accuracy < acc:
                        best_accuracy = acc
                        path = saver.save(sess,
                                          checkpoint_prefix +
                                          "-{:g}".format(best_accuracy),
                                          global_step=step)
                        print("Saved model checkpoint to {}\n".format(path))
            path = saver.save(sess,
                              checkpoint_prefix + "_final",
                              global_step=step)
            print("Saved model checkpoint to {}\n".format(path))

コード例 #6

ファイルを表示

def train():
    with tf.device('/cpu:0'):
        source_sent, target_sent = data_helpers.load_train_data(
            FLAGS.train_source_dir, FLAGS.train_target_dir,
            FLAGS.source_max_sentence_length, FLAGS.target_max_sentence_length)

    # Build vocabulary
    # Example: x_text[3] = "A misty ridge uprises from the surge."
    # ['a misty ridge uprises from the surge __EOS__ __UNK__ ... __UNK__']
    # =>
    # [27 39 40 41 42  1 43  0  0 ... 0]
    # dimension = FLAGS.max_sentence_length
    source_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.source_max_sentence_length)
    x = np.array(
        list(
            source_vocab_processor.fit_transform(["_START_ _EOS_ _PAD_"] +
                                                 source_sent)))
    print("Source Language Vocabulary Size: {:d}".format(
        len(source_vocab_processor.vocabulary_)))

    target_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
        FLAGS.target_max_sentence_length)
    y = np.array(
        list(
            target_vocab_processor.fit_transform(["_START_ _EOS_ _PAD_"] +
                                                 target_sent)))
    print("Target Language Vocabulary Size: {:d}".format(
        len(target_vocab_processor.vocabulary_)))

    print("x = {0}".format(x.shape))
    print("y = {0}".format(y.shape))
    print("")

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split corpora/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
        dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
        dev_sample_index:]
    print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            model = Transformer(
                sequence_length=x_train.shape[1],
                source_vocab_size=len(source_vocab_processor.vocabulary_),
                target_vocab_size=len(target_vocab_processor.vocabulary_),
                dim_model=FLAGS.dim_model,
                dim_ff=FLAGS.dim_ff,
                num_stack=FLAGS.num_stack,
                num_head=FLAGS.num_head)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(
                model.loss, global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", model.loss)
            acc_summary = tf.summary.scalar("accuracy", model.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,
                                                       sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            source_vocab_processor.save(os.path.join(out_dir, "source_vocab"))
            target_vocab_processor.save(os.path.join(out_dir, "target_vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Generate batches
            batches = data_helpers.batch_iter(list(zip(x_train, y_train)),
                                              FLAGS.batch_size,
                                              FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)

                # Train
                feed_dict = {
                    model.encoder_x: x_batch,
                    model.decoder_y: y_batch
                }

                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op, model.loss,
                    model.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)

                # Training log display
                if step % FLAGS.display_every == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss, accuracy))

                # Evaluation
                if step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    # Generate batches
                    batches_dev = data_helpers.batch_iter(
                        list(zip(x_dev, y_dev)), FLAGS.batch_size, 1)
                    # Evaluation loop. For each batch...
                    loss_dev = 0
                    accuracy_dev = 0
                    cnt = 0
                    for batch_dev in batches_dev:
                        x_batch_dev, y_batch_dev = zip(*batch_dev)

                        feed_dict_dev = {
                            model.encoder_x: x_batch_dev,
                            model.decoder_y: y_batch_dev
                        }

                        summaries_dev, loss, accuracy = sess.run(
                            [dev_summary_op, model.loss, model.accuracy],
                            feed_dict_dev)
                        dev_summary_writer.add_summary(summaries_dev, step)

                        loss_dev += loss
                        accuracy_dev += accuracy
                        cnt += 1

                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(
                        time_str, step, loss_dev / cnt, accuracy_dev / cnt))

                # Model checkpoint
                if step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=step)
                    print("Saved model checkpoint to {}\n".format(path))