Example #1
0
def initial_setup(data_corpus):
    metadata, idx_q, idx_a = data.load_data(PATH='data/{}/'.format(data_corpus))
    (trainX, trainY), (testX, testY), (validX, validY) = data.split_dataset(idx_q, idx_a)
    trainX = tl.prepro.remove_pad_sequences(trainX.tolist())
    trainY = tl.prepro.remove_pad_sequences(trainY.tolist())
    testX = tl.prepro.remove_pad_sequences(testX.tolist())
    testY = tl.prepro.remove_pad_sequences(testY.tolist())
    validX = tl.prepro.remove_pad_sequences(validX.tolist())
    validY = tl.prepro.remove_pad_sequences(validY.tolist())
    return metadata, trainX, trainY, testX, testY, validX, validY
Example #2
0
    def __init__(self):
        data_corpus = "twitter"
        metadata, idx_q, idx_a = data.load_data(
            PATH='data/{}/'.format(data_corpus))
        src_vocab_size = len(metadata['idx2w'])  # 8002 (0~8001)
        emb_dim = 1024

        word2idx = metadata['w2idx']  # dict  word 2 index
        idx2word = metadata['idx2w']  # list index 2 word

        unk_id = word2idx['unk']  # 1
        pad_id = word2idx['_']  # 0

        start_id = src_vocab_size  # 8002
        end_id = src_vocab_size + 1  # 8003

        word2idx.update({'start_id': start_id})
        word2idx.update({'end_id': end_id})
        idx2word = idx2word + ['start_id', 'end_id']

        src_vocab_size = tgt_vocab_size = src_vocab_size + 2

        #   num_epochs = 5
        vocabulary_size = src_vocab_size

        decoder_seq_length = 20
        self.unk_id = unk_id
        self.pad_id = pad_id
        self.start_id = start_id
        self.end_id = end_id
        self.word2idx = word2idx
        self.idx2word = idx2word
        self.model_ = Seq2seq(
            decoder_seq_length=decoder_seq_length,
            cell_enc=tf.keras.layers.GRUCell,
            cell_dec=tf.keras.layers.GRUCell,
            n_layer=3,
            n_units=256,
            embedding_layer=tl.layers.Embedding(
                vocabulary_size=vocabulary_size, embedding_size=emb_dim),
        )
        load_weights = tl.files.load_npz(name='model.npz')
        tl.files.assign_weights(load_weights, self.model_)
Example #3
0
def load_data(path):
    metadata, idx_q, idx_a = data.load_data(path)
    (trainX, trainY), (testX,
                       testY), (validX,
                                validY) = data.split_dataset(idx_q, idx_a)

    trainX = trainX.tolist()
    trainY = trainY.tolist()
    testX = testX.tolist()
    testY = testY.tolist()
    validX = validX.tolist()
    validY = validY.tolist()

    trainX = tl.prepro.remove_pad_sequences(trainX)
    trainY = tl.prepro.remove_pad_sequences(trainY)
    testX = tl.prepro.remove_pad_sequences(testX)
    testY = tl.prepro.remove_pad_sequences(testY)
    validX = tl.prepro.remove_pad_sequences(validX)
    validY = tl.prepro.remove_pad_sequences(validY)

    return trainX, trainY, testX, testY, validX, validY, metadata
Example #4
0
#! /usr/bin/python
# -*- coding: utf8 -*-
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import *

import tensorflow as tf
import numpy as np
import time

###============= prepare data
from data.twitter import data
metadata, idx_q, idx_a = data.load_data('data/twitter/')                   # Twitter

# from data.cornell_corpus import data
# metadata, idx_q, idx_a = data.load_data(PATH='data/cornell_corpus/')          # Cornell Moive
(trainX, trainY), (testX, testY), (validX, validY) = data.split_dataset(idx_q, idx_a)

trainX = trainX.tolist()
trainY = trainY.tolist()
testX = testX.tolist()
testY = testY.tolist()
validX = validX.tolist()
validY = validY.tolist()

trainX = tl.prepro.remove_pad_sequences(trainX)
trainY = tl.prepro.remove_pad_sequences(trainY)
testX = tl.prepro.remove_pad_sequences(testX)
testY = tl.prepro.remove_pad_sequences(testY)
validX = tl.prepro.remove_pad_sequences(validX)
validY = tl.prepro.remove_pad_sequences(validY)
References
----------
http://suriyadeepan.github.io/2016-12-31-practical-seq2seq/
"""
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import *

import tensorflow as tf
import numpy as np
import time

###============= prepare data
from data.twitter import data
metadata, idx_q, idx_a = data.load_data(PATH='data/twitter/')                   # Twitter
# from data.cornell_corpus import data
# metadata, idx_q, idx_a = data.load_data(PATH='data/cornell_corpus/')          # Cornell Moive
(trainX, trainY), (testX, testY), (validX, validY) = data.split_dataset(idx_q, idx_a)

trainX = trainX.tolist()
trainY = trainY.tolist()
testX = testX.tolist()
testY = testY.tolist()
validX = validX.tolist()
validY = validY.tolist()

trainX = tl.prepro.remove_pad_sequences(trainX)
trainY = tl.prepro.remove_pad_sequences(trainY)
testX = tl.prepro.remove_pad_sequences(testX)
testY = tl.prepro.remove_pad_sequences(testY)
Example #6
0
def main():

    metadata, idx_q, idx_a = data.load_data(PATH='data/twitter/')  # Twitter
    # from data.cornell_corpus import data
    # metadata, idx_q, idx_a = data.load_data(PATH='data/cornell_corpus/')          # Cornell Moive
    (trainX, trainY), (testX,
                       testY), (validX,
                                validY) = data.split_dataset(idx_q, idx_a)

    trainX = trainX.tolist()
    trainY = trainY.tolist()
    testX = testX.tolist()
    testY = testY.tolist()
    validX = validX.tolist()
    validY = validY.tolist()

    trainX = tl.prepro.remove_pad_sequences(trainX)
    trainY = tl.prepro.remove_pad_sequences(trainY)
    testX = tl.prepro.remove_pad_sequences(testX)
    testY = tl.prepro.remove_pad_sequences(testY)
    validX = tl.prepro.remove_pad_sequences(validX)
    validY = tl.prepro.remove_pad_sequences(validY)

    ###============= parameters
    xseq_len = len(trainX)  #.shape[-1]
    yseq_len = len(trainY)  #.shape[-1]
    assert xseq_len == yseq_len
    batch_size = 32
    n_step = int(xseq_len / batch_size)
    xvocab_size = len(metadata['idx2w'])  # 8002 (0~8001)
    emb_dim = 1024
    global w2idx
    global idx2w
    global encode_seqs2
    global decode_seqs2
    global start_id
    global end_id

    w2idx = metadata['w2idx']  # dict  word 2 index
    idx2w = metadata['idx2w']  # list index 2 word

    unk_id = w2idx['unk']  # 1
    pad_id = w2idx['_']  # 0

    start_id = xvocab_size  # 8002
    end_id = xvocab_size + 1  # 8003

    w2idx.update({'start_id': start_id})
    w2idx.update({'end_id': end_id})
    idx2w = idx2w + ['start_id', 'end_id']

    xvocab_size = yvocab_size = xvocab_size + 2
    """ A data for Seq2Seq should look like this:
    input_seqs : ['how', 'are', 'you', '<PAD_ID'>]
    decode_seqs : ['<START_ID>', 'I', 'am', 'fine', '<PAD_ID'>]
    target_seqs : ['I', 'am', 'fine', '<END_ID>', '<PAD_ID'>]
    target_mask : [1, 1, 1, 1, 0]
    """

    print("encode_seqs", [idx2w[id] for id in trainX[10]])
    target_seqs = tl.prepro.sequences_add_end_id([trainY[10]],
                                                 end_id=end_id)[0]
    # target_seqs = tl.prepro.remove_pad_sequences([target_seqs], pad_id=pad_id)[0]
    print("target_seqs", [idx2w[id] for id in target_seqs])
    decode_seqs = tl.prepro.sequences_add_start_id([trainY[10]],
                                                   start_id=start_id,
                                                   remove_last=False)[0]
    # decode_seqs = tl.prepro.remove_pad_sequences([decode_seqs], pad_id=pad_id)[0]
    print("decode_seqs", [idx2w[id] for id in decode_seqs])
    target_mask = tl.prepro.sequences_get_mask([target_seqs])[0]
    print("target_mask", target_mask)
    print(len(target_seqs), len(decode_seqs), len(target_mask))

    ###============= model
    global net_rnn

    def model(encode_seqs, decode_seqs, is_train=True, reuse=False):
        with tf.variable_scope("model", reuse=reuse):
            # for chatbot, you can use the same embedding layer,
            # for translation, you may want to use 2 seperated embedding layers
            with tf.variable_scope("embedding") as vs:
                net_encode = EmbeddingInputlayer(inputs=encode_seqs,
                                                 vocabulary_size=xvocab_size,
                                                 embedding_size=emb_dim,
                                                 name='seq_embedding')
                vs.reuse_variables()
                tl.layers.set_name_reuse(True)
                net_decode = EmbeddingInputlayer(inputs=decode_seqs,
                                                 vocabulary_size=xvocab_size,
                                                 embedding_size=emb_dim,
                                                 name='seq_embedding')
            net_rnn = Seq2Seq(
                net_encode,
                net_decode,
                cell_fn=tf.contrib.rnn.BasicLSTMCell,
                n_hidden=emb_dim,
                initializer=tf.random_uniform_initializer(-0.1, 0.1),
                encode_sequence_length=retrieve_seq_length_op2(encode_seqs),
                decode_sequence_length=retrieve_seq_length_op2(decode_seqs),
                initial_state_encode=None,
                dropout=(0.5 if is_train else None),
                n_layer=3,
                return_seq_2d=True,
                name='seq2seq')
            net_out = DenseLayer(net_rnn,
                                 n_units=xvocab_size,
                                 act=tf.identity,
                                 name='output')
        return net_out, net_rnn

    # model for training
    encode_seqs = tf.placeholder(dtype=tf.int64,
                                 shape=[batch_size, None],
                                 name="encode_seqs")
    decode_seqs = tf.placeholder(dtype=tf.int64,
                                 shape=[batch_size, None],
                                 name="decode_seqs")
    target_seqs = tf.placeholder(dtype=tf.int64,
                                 shape=[batch_size, None],
                                 name="target_seqs")
    target_mask = tf.placeholder(
        dtype=tf.int64, shape=[batch_size, None],
        name="target_mask")  # tl.prepro.sequences_get_mask()
    net_out, _ = model(encode_seqs, decode_seqs, is_train=True, reuse=False)

    # model for inferencing
    encode_seqs2 = tf.placeholder(dtype=tf.int64,
                                  shape=[1, None],
                                  name="encode_seqs")
    decode_seqs2 = tf.placeholder(dtype=tf.int64,
                                  shape=[1, None],
                                  name="decode_seqs")
    net, net_rnn = model(encode_seqs2,
                         decode_seqs2,
                         is_train=False,
                         reuse=True)
    global y
    y = tf.nn.softmax(net.outputs)

    # loss for training
    # print(net_out.outputs)    # (?, 8004)
    # print(target_seqs)    # (32, ?)
    # loss_weights = tf.ones_like(target_seqs, dtype=tf.float32)
    # loss = tf.contrib.legacy_seq2seq.sequence_loss(net_out.outputs, target_seqs, loss_weights, yvocab_size)
    loss = tl.cost.cross_entropy_seq_with_mask(logits=net_out.outputs,
                                               target_seqs=target_seqs,
                                               input_mask=target_mask,
                                               return_details=False,
                                               name='cost')

    net_out.print_params(False)

    lr = 0.0001
    train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
    # Truncated Backpropagation for training (option)
    # max_grad_norm = 30
    # grads, _ = tf.clip_by_global_norm(tf.gradients(loss, net_out.all_params),max_grad_norm)
    # optimizer = tf.train.GradientDescentOptimizer(lr)
    # train_op = optimizer.apply_gradients(zip(grads, net_out.all_params))

    # sess = tf.InteractiveSession()
    global sess
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                            log_device_placement=False))
    tl.layers.initialize_global_variables(sess)
    tl.files.load_and_assign_npz(sess=sess, name='n.npz', network=net)

    run(host='0.0.0.0', port=40026)
Example #7
0
class AI:
    metadata, idx_q, idx_a = data.load_data(PATH='data/{}/'.format("twitter"))
    src_vocab_size = len(metadata['idx2w'])
    emb_dim = 1024
    word2idx = metadata['w2idx']
    idx2word = metadata['idx2w']
    unk_id = word2idx['unk']
    pad_id = word2idx['_']
    start_id = src_vocab_size
    end_id = src_vocab_size + 1
    word2idx.update({'start_id': start_id})
    word2idx.update({'end_id': end_id})
    idx2word = idx2word + ['start_id', 'end_id']
    src_vocab_size = src_vocab_size + 2
    vocabulary_size = src_vocab_size
    decoder_seq_length = 20

    # Creates an instance of the AI with it's name being passed from a name-generating function
    def __init__(self):
        # preProcessor is only utilised here to avoid duplicating string2array
        self.preProcessor = DataPreProcessor("")
        self.model_ = Seq2seq(
            decoder_seq_length=self.decoder_seq_length,
            cell_enc=tf.keras.layers.GRUCell,
            cell_dec=tf.keras.layers.GRUCell,
            n_layer=3,
            n_units=256,
            embedding_layer=tl.layers.Embedding(
                vocabulary_size=self.vocabulary_size,
                embedding_size=self.emb_dim),
        )
        load_weights = tl.files.load_npz(name="data/model.npz")
        tl.files.assign_weights(load_weights, self.model_)

    # Take a name, create a mood
    def initialise(self, name):
        self.name = name
        feelings = [
            "good", "well", "great", "grand", "excellent", "ecstatic", "happy",
            "sad", "annoyed", "frustrated", "angry", "tired", "okay", "alright"
        ]
        self.feel = feelings[randint(0, len(feelings) - 1)]

    # Handle the creation of a response from the given input
    def respond(self, seed, number):
        simpleStart = self.simpleResponse(seed)
        self.model_.eval()
        seed_id = [self.word2idx.get(w, self.unk_id) for w in seed.split(" ")]
        sentence_id = self.model_(inputs=[[seed_id]],
                                  seq_length=20,
                                  start_token=self.start_id,
                                  top_n=number)
        sentence = []
        for w_id in sentence_id[0]:
            w = self.idx2word[w_id]
            if w == 'end_id':
                break
            sentence = sentence + [w]
        # A catch all just in case there are no responses, but we have yet to find an input to trigger this
        if sentence == []:
            sentence = [
                "I'm", "sorry,", "I", "just", "don't", "quite", "understand",
                "what", "you're", "asking..."
            ]
        return simpleStart + sentence

    # Handle simple questions that the AI is less than optimal at answering
    def simpleResponse(self, input):
        sentence = []
        input = self.preProcessor.string2Array(input)
        tally = [0, 0, 0]
        greetings = [
            "hello", "hi", "greetings", "salutations", "hey", "yo", "howdy"
        ]
        names = [["what", "who"], ["is", "are"], ["you", "your"], ["name"]]
        wellbeing = [["how"], ["do", "are"], ["you"],
                     ["doing", "feeling", "feel"]]
        # Tallying key words in the user query to determine if certain questions were being asked
        for x in input:
            for y in range(len(greetings)):
                if x == greetings[y]:
                    tally[0] = 1
                    break
            for y in range(len(names)):
                for z in names[y]:
                    if x is z:
                        tally[1] = tally[1] + 1
                        break
            for y in range(len(wellbeing)):
                for z in wellbeing[y]:
                    if x == z:
                        tally[2] = tally[2] + 1
                        break
        # Handle a return greeting, and maybe ask how the user is
        if tally[0] > 0:
            sentence.append(greetings[randint(0, 6)])
            if randint(0, 1) is 1:
                sentence.append("how")
                sentence.append("are")
                sentence.append("you")
                value = randint(0, 2)
                if value is 0:
                    sentence.append("doing")
                elif value is 1:
                    sentence.append("feeling")
        # Handle questions about it's name with a simple answer
        if tally[1] > 2 and len(input) < 5:
            if randint(0, 1) is 1:
                sentence.append("I")
                sentence.append("am")
            else:
                sentence.append("my")
                sentence.append("name")
                sentence.append("is")
            sentence.append(self.name)
        # Handle a 'how are you' type question with a pre-determined emotional state
        if tally[2] > 2 and len(input) < 5:
            sentence.append("I")
            sentence.append("am")
            if randint(0, 1) is 1:
                sentence.append("feeling")
            sentence.append(self.feel)
        return sentence
import time
import tensorflow as tf
import tensorlayer as tl
from sklearn.utils import shuffle
from tensorlayer.layers import EmbeddingInputlayer, Seq2Seq, DenseLayer, retrieve_seq_length_op2

from data.twitter import data
metadata, idx_q, idx_a = data.load_data(PATH='data/twitter/')
(trainX, trainY), (testX, testY), (validX,
                                   validY) = data.split_dataset(idx_q, idx_a)

trainX = trainX.tolist()
trainY = trainY.tolist()
testX = testX.tolist()
testY = testY.tolist()
validX = validX.tolist()
validY = validY.tolist()

trainX = tl.prepro.remove_pad_sequences(trainX)
trainY = tl.prepro.remove_pad_sequences(trainY)
testX = tl.prepro.remove_pad_sequences(testX)
testY = tl.prepro.remove_pad_sequences(testY)
validX = tl.prepro.remove_pad_sequences(validX)
validY = tl.prepro.remove_pad_sequences(validY)

# Hyperparameters
batch_size = 32
embedding_dimension = 1024
learning_rate = 0.0001
number_epochs = 1000
Example #9
0
"""
import time

import tensorflow as tf
import tensorlayer as tl
import numpy as np

from tensorlayer.layers import DenseLayer, EmbeddingInputlayer, Seq2Seq, retrieve_seq_length_op2
from sklearn.utils import shuffle

from data.twitter import data

# Data Preparation
data_corpus = 'twitter'

metadata, idx_q, idx_a = data.load_data(PATH='data/{}/'.format(data_corpus))
(trainX, trainY), (testX, testY), (validX,
                                   validY) = data.split_dataset(idx_q, idx_a)

trainX = trainX.tolist()
trainY = trainY.tolist()
testX = testX.tolist()
testY = testY.tolist()
validX = validX.tolist()
validY = validY.tolist()

trainX = tl.prepro.remove_pad_sequences(trainX)
trainY = tl.prepro.remove_pad_sequences(trainY)
testX = tl.prepro.remove_pad_sequences(testX)
testY = tl.prepro.remove_pad_sequences(testY)
validX = tl.prepro.remove_pad_sequences(validX)
Example #10
0
References
----------
http://suriyadeepan.github.io/2016-12-31-practical-seq2seq/
"""
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import *

import tensorflow as tf
import numpy as np
import time

###============= prepare data
from data.twitter import data
metadata, idx_q, idx_a = data.load_data(PATH='seq2seq-chatbot/data/fb_chat/')                   # Twitter
# from data.cornell_corpus import data
# metadata, idx_q, idx_a = data.load_data(PATH='data/cornell_corpus/')          # Cornell Moive
(trainX, trainY), (testX, testY), (validX, validY) = data.split_dataset(idx_q, idx_a)

trainX = trainX.tolist()
trainY = trainY.tolist()
testX = testX.tolist()
testY = testY.tolist()
validX = validX.tolist()
validY = validY.tolist()

trainX = tl.prepro.remove_pad_sequences(trainX)
trainY = tl.prepro.remove_pad_sequences(trainY)
testX = tl.prepro.remove_pad_sequences(testX)
testY = tl.prepro.remove_pad_sequences(testY)
Example #11
0
def initial_setup():
    metadata, idx_q, idx_a = data.load_data(PATH='data/twitter/')
    return metadata
Example #12
0
                decode_ans = tl.prepro.pad_sequences(decode_ans)
                mask = tl.prepro.sequences_get_mask(label_ans)

                _, error = session.run(
                    [self.optimized_rnn, self.loss], {
                        encode_q: ques,
                        decode_a: decode_ans,
                        label_a: label_ans,
                        label_mask: mask
                    })
                self.total_error += error
            tl.files.save_npz(test_rnn.all_params, 'ChatbotRNN.npz', session)


if __name__ == "__main__":
    frequentWords, questions, answers = data.load_data(PATH='data/twitter/')
    word2id = frequentWords['w2idx']
    id2word = frequentWords['idx2w']

    word2id.update({'<GO>': len(id2word)})
    word2id.update({'<EOS>': len(id2word) + 1})
    size_of_dict = len(id2word) + 2

    QuesTrain, AnsTrain, QuesTest, AnsTest = data.split_dataset(
        questions, answers)
    removeTrail = removeZeros()
    assert removeTrail == True
    QTrain, ATrain = shuffle(QTrain, ATrain, random_state=0)

    encode_q = tf.placeholder(tf.int64, [cluster_size, None])
    decode_a = tf.placeholder(tf.int64, [cluster_size, None])
Example #13
0
	def load_data(self):
		metadata, idx_q, idx_a = data.load_data(PATH='data/twitter/')
		return metadata
Example #14
0
def main():
    metadata, idx_q, idx_a = data.load_data(PATH='data/twitter/')
    trainX, trainY, testX, testY, validX, validY = getDataset(idx_q, idx_a)

    xseq_len = len(trainX)
    yseq_len = len(trainY)
    assert xseq_len == yseq_len
    batch_size = 32
    n_step = int(xseq_len / batch_size)
    xvocab_size = len(metadata['idx2w'])
    emb_dim = 1024

    w2idx = metadata['w2idx']
    idx2w = metadata['idx2w']

    unk_id = w2idx['unk']
    pad_id = w2idx['_']

    start_id = xvocab_size
    end_id = xvocab_size + 1

    w2idx.update({'start_id': start_id})
    w2idx.update({'end_id': end_id})
    idx2w = idx2w + ['start_id', 'end_id']

    xvocab_size = yvocab_size = xvocab_size + 2

    target_seqs = tl.prepro.sequences_add_end_id([trainY[10]],
                                                 end_id=end_id)[0]
    decode_seqs = tl.prepro.sequences_add_start_id([trainY[10]],
                                                   start_id=start_id,
                                                   remove_last=False)[0]
    target_mask = tl.prepro.sequences_get_mask([target_seqs])[0]
    encode_seqs = tf.placeholder(dtype=tf.int64,
                                 shape=[batch_size, None],
                                 name="encode_seqs")
    decode_seqs = tf.placeholder(dtype=tf.int64,
                                 shape=[batch_size, None],
                                 name="decode_seqs")
    target_seqs = tf.placeholder(dtype=tf.int64,
                                 shape=[batch_size, None],
                                 name="target_seqs")
    target_mask = tf.placeholder(dtype=tf.int64,
                                 shape=[batch_size, None],
                                 name="target_mask")
    net_out, _ = model(encode_seqs,
                       decode_seqs,
                       xvocab_size,
                       is_train=True,
                       reuse=False)
    encode_seqs2 = tf.placeholder(dtype=tf.int64,
                                  shape=[1, None],
                                  name="encode_seqs")
    decode_seqs2 = tf.placeholder(dtype=tf.int64,
                                  shape=[1, None],
                                  name="decode_seqs")
    net, net_rnn = model(encode_seqs2,
                         decode_seqs2,
                         xvocab_size,
                         is_train=False,
                         reuse=True)
    y = tf.nn.softmax(net.outputs)
    loss = tl.cost.cross_entropy_seq_with_mask(logits=net_out.outputs,
                                               target_seqs=target_seqs,
                                               input_mask=target_mask,
                                               return_details=False,
                                               name='cost')
    net_out.print_params(False)

    lr = 0.0001
    train_op = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
    gpu_option = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                            log_device_placement=False,
                                            gpu_options=gpu_option))
    tl.layers.initialize_global_variables(sess)
    load_parameter = tl.files.load_and_assign_npz(sess=sess,
                                                  name='twitter.npz',
                                                  network=net)
    if not load_parameter:
        print("Loading npz fail, starting to train.")
        n_epoch = 50
        for epoch in range(n_epoch):
            epoch_time = time.time()
            from sklearn.utils import shuffle
            trainX, trainY = shuffle(trainX, trainY, random_state=0)
            total_err, n_iter = 0, 0
            for X, Y in tl.iterate.minibatches(inputs=trainX,
                                               targets=trainY,
                                               batch_size=batch_size,
                                               shuffle=False):
                step_time = time.time()

                X = tl.prepro.pad_sequences(X)
                _target_seqs = tl.prepro.sequences_add_end_id(Y, end_id=end_id)
                _target_seqs = tl.prepro.pad_sequences(_target_seqs)

                _decode_seqs = tl.prepro.sequences_add_start_id(
                    Y, start_id=start_id, remove_last=False)
                _decode_seqs = tl.prepro.pad_sequences(_decode_seqs)
                _target_mask = tl.prepro.sequences_get_mask(_target_seqs)
                _, err = sess.run(
                    [train_op, loss], {
                        encode_seqs: X,
                        decode_seqs: _decode_seqs,
                        target_seqs: _target_seqs,
                        target_mask: _target_mask
                    })
                if n_iter % 200 == 0:
                    print("Epoch[%d/%d] step:[%d/%d] loss:%f took:%.5fs" %
                          (epoch, n_epoch, n_iter, n_step, err,
                           time.time() - step_time))

                total_err += err
                n_iter += 1
                if n_iter % 1000 == 0:
                    print("Query> happy birthday to you")
                    getReplying(y, w2idx, idx2w, decode_seqs2, encode_seqs2,
                                start_id, end_id, sess, net_rnn,
                                "happy birthday to you")
                    print("Query> help me to do the exam")
                    getReplying(y, w2idx, idx2w, decode_seqs2, encode_seqs2,
                                start_id, end_id, sess, net_rnn,
                                "help me to do the exam")
                    print("Query> ny is so cold now")
                    getReplying(y, w2idx, idx2w, decode_seqs2, encode_seqs2,
                                start_id, end_id, sess, net_rnn,
                                "ny is so cold now")
            print(
                "Epoch[%d/%d] averaged loss:%f took:%.5fs" %
                (epoch, n_epoch, total_err / n_iter, time.time() - epoch_time))

            tl.files.save_npz(net.all_params, name='n.npz', sess=sess)
    while (True):
        getReplying(y, w2idx, idx2w, decode_seqs2, encode_seqs2, start_id,
                    end_id, sess, net_rnn, input("You>"))