class ChatBot: def __init__(self, layers=5, maxlen=10, embedding_size=128, batch_size=32, is_train=True, lr=0.0001): self.layers = layers self.maxlen = maxlen self.embedding_size = embedding_size self.batch_size = batch_size self.learning_rate = lr self.model_path = "model/chatbot/model.npz" #what is npz? It is the extension , it is the file in which we save the weight of our seq2seq model. ## Vocabulary self.vocab = Vocabulary(corpus=None, maxlen=maxlen) self.vocab_size = self.vocab.vocab_size ## Init Session sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tf.reset_default_graph() self.sess = tf.Session(config=sess_config) ## Placeholders self.encoder_inputs = tf.placeholder(tf.int32, shape=[None, None]) self.decoder_inputs = tf.placeholder(tf.int32, shape=[None, None]) self.decoder_outputs = tf.placeholder(tf.int32, shape=[None, None]) self.mask = tf.placeholder(tf.int32, shape=[None, None]) ## Model self.net_out, _ = self.create_model( self.encoder_inputs, self.decoder_inputs, self.vocab_size, self.embedding_size, reuse=False) self.net_out.print_params(False) self.loss = tl.cost.cross_entropy_seq_with_mask( logits=self.net_out.outputs, target_seqs=self.decoder_outputs, input_mask=self.mask, return_details=False, name='cost') ## Optimizer self.train_op = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.loss) def train(self, X, Y, num_epochs=1): ## Init Vars self.sess.run(tf.global_variables_initializer()) ## Load Model tl.files.load_and_assign_npz(sess=self.sess, name=self.model_path, network=self.net_out) n_step = len(X)//self.batch_size for epoch in range(num_epochs): X, Y = shuffle(X, Y, random_state=0) total_loss, n_iter = 0, 0 for x, y in tqdm(tl.iterate.minibatches( inputs=X, targets=Y, batch_size=self.batch_size, shuffle=False), total=n_step, desc='Epoch[{}/{}]'.format(epoch + 1, num_epochs), leave=False): x1, x2, y1, W = self.vocab.dataset(x, y) feed_data = {} feed_data[self.encoder_inputs] = x1 feed_data[self.decoder_inputs] = x2 feed_data[self.decoder_outputs] = y1 feed_data[self.mask] = W _, loss_iter = self.sess.run([self.train_op, self.loss], feed_dict=feed_data) total_loss += loss_iter n_iter += 1 ## printing average loss after every epoch print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1, num_epochs, total_loss / n_iter)) ## saving the model tl.files.save_npz(self.net_out.all_params, name=self.model_path, sess=self.sess) ## session cleanup self.sess.close() """ Creates the LSTM Model """ def create_model(self, encoder_inputs, decoder_inputs, vocab_size, emb_dim, is_train=True, reuse=False): with tf.variable_scope("model", reuse=reuse): # for chatbot, you can use the same embedding layer, # for translation, you may want to use 2 seperated embedding layers # embedding layers? with tf.variable_scope("embedding") as vs: net_encode = EmbeddingInputlayer( inputs = encoder_inputs, vocabulary_size = vocab_size, embedding_size = emb_dim, name = 'seq_embedding') vs.reuse_variables() net_decode = EmbeddingInputlayer( inputs = decoder_inputs, vocabulary_size = vocab_size, embedding_size = emb_dim, name = 'seq_embedding') net_rnn = Seq2Seq(net_encode, net_decode, cell_fn = tf.nn.rnn_cell.LSTMCell, n_hidden = emb_dim, initializer = tf.random_uniform_initializer(-0.1, 0.1), encode_sequence_length = retrieve_seq_length_op2(encoder_inputs), decode_sequence_length = retrieve_seq_length_op2(decoder_inputs), initial_state_encode = None, dropout = (0.5 if is_train else None), n_layer = self.layers, return_seq_2d = True, name = 'seq2seq') net_out = DenseLayer(net_rnn, n_units=vocab_size, act=tf.identity, name='output') return net_out, net_rnn def infer(self, query): unk_id = self.vocab.word_index["<unk>"] pad_id = self.vocab.word_index["<pad>"] start_id = self.vocab.word_index["<start>"] end_id = self.vocab.word_index["<end>"] ## Init Session sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tf.reset_default_graph() sess = tf.Session(config=sess_config) ## Inference Data Placeholders encode_inputs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="encode_inputs") decode_inputs = tf.placeholder(dtype=tf.int64, shape=[1, None], name="decode_inputs") net, net_rnn = self.create_model( encode_inputs, decode_inputs, self.vocab_size, self.embedding_size, is_train=False, reuse=False) y = tf.nn.softmax(net.outputs) ## Init Vars sess.run(tf.global_variables_initializer()) ## Load Model tl.files.load_and_assign_npz(sess=sess, name=self.model_path, network=net) """ Inference using pre-trained model """ def inference(seed): seed_id = self.vocab.text_to_sequence(seed) ## Encode and get state state = sess.run(net_rnn.final_state_encode, {encode_inputs: [seed_id]}) ## Decode, feed start_id and get first word [https://github.com/zsdonghao/tensorlayer/blob/master/example/tutorial_ptb_lstm_state_is_tuple.py] o, state = sess.run([y, net_rnn.final_state_decode], { net_rnn.initial_state_decode: state, decode_inputs: [[start_id]]}) w_id = tl.nlp.sample_top(o[0], top_k=3) #w = self.vocab.index_word[w_id] ## Decode and feed state iteratively sentence = [w_id] for _ in range(self.maxlen): # max sentence length o, state = sess.run([y, net_rnn.final_state_decode],{ net_rnn.initial_state_decode: state, decode_inputs: [[w_id]]}) w_id = tl.nlp.sample_top(o[0], top_k=2) #w = self.vocab.index_word[w_id] if w_id == end_id: break sentence = sentence + [w_id] return sentence ## infer sentence = inference(query) response = self.vocab.seqs_to_text(sentence) response = " ".join(response.split(" ")) return response