def get_variables(batch, vocab, dec_max_len, use_cuda=True): """ Args: - **batch**: (list, list) each list is a batch of variable-length sequence Outputs: Variables for network """ post_ids = [sentence2id(sent, vocab) for sent in batch[0]] # add GO response_ids = [[GO_ID] + sentence2id(sent, vocab) for sent in batch[1]] reference_ids = [sentence2id(sent, vocab) for sent in batch[1]] posts_var, posts_length = padding_inputs(post_ids, None) responses_var, responses_length = padding_inputs(response_ids, dec_max_len) # add EOS references_var, references_length = padding_inputs(reference_ids, dec_max_len, eos=True) # sort by post length posts_length, perms_idx = posts_length.sort(0, descending=True) posts_var = posts_var[perms_idx] responses_var = responses_var[perms_idx] responses_length = responses_length[perms_idx] references_var = references_var[perms_idx] references_length = references_length[perms_idx] if use_cuda: posts_var = posts_var.cuda() responses_var = responses_var.cuda() references_var = references_var.cuda() return posts_var, posts_length, responses_var, responses_length, references_var, references_length
def get_variables_cls(batch, vocab, dec_max_len, use_cuda=True): """ Args: - **batch**: (list, list) each list is a batch of variable-length sequence Outputs: Variables for network """ post_ids = [sentence2id(sent, vocab) for sent in batch[0]] reply_ids = [[GO_ID] + sentence2id(sent, vocab) for sent in batch[1]] posts_var, posts_length = padding_inputs(post_ids, None) reply_var, reply_length = padding_inputs(reply_ids, dec_max_len) lables = torch.FloatTensor(batch[2]) # sort by post length posts_length, perms_idx = posts_length.sort(0, descending=True) posts_var = posts_var[perms_idx] reply_var = reply_var[perms_idx] reply_length = reply_length[perms_idx] lables = lables[perms_idx] if use_cuda: posts_var = posts_var.cuda() reply_var = reply_var.cuda() lables = lables.cuda() return posts_var, posts_length, reply_var, reply_length, lables
def chat(line): """ in test mode, we don't to create the backward path """ global enc_vocab, inv_dec_vocab, model, saver, sess, output_file line = line.decode().lower() start = time.time() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': return 'Hmm...' output_file.write('HUMAN: ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): return 'TL;DR' # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) # print(response) output_file.write('BOT: ' + response + '\n') print(time.time() - start) return response
def chat(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open( os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print( 'Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) # store a line history for 3 lines conversation_history = [] line_history = ['', '', ''] while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] # update the line_history line_history.append(line) line_history.pop(0) # create line from the line history line = ''.join(line_history) if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) print(response) conversation_history.append((line, response)) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def chat(use_attention, ckpt_path="./ckp-dir/checkpoints"): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) if not use_attention: model = BasicChatBotModel(batch_size=1) else: model = AttentionChatBotModel(batch_size=1) model.build() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver, ckpt_path) output_file = open(os.path.join( config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print( 'Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) while True: line = _get_user_input() if len(line) > 0 and line[-1] == b'\n': line = line[:-1] if line == b'': break output_file.write('HUMAN ++++ ' + line.decode('ascii') + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, line) if len(token_ids) > max_length: print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? # bucket_id = _find_right_bucket(len(token_ids)) bucket_id = -1 # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. decoder_lens = np.sum(np.transpose(np.array(decoder_masks), (1, 0)), axis=1) output_logits = sess.run([model.final_outputs], feed_dict={model.encoder_inputs_tensor: encoder_inputs, model.decoder_inputs_tensor: decoder_inputs, model.decoder_length_tensor: decoder_lens, model.bucket_length: config.BUCKETS[bucket_id]}) response = _construct_response(output_logits, inv_dec_vocab) print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def chat(): """ in test mode, we don't to create the backward path """ # index2word , word2index _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) # 배치 사이즈는 하나 (forward only) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open( os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] # 유저가 타이핑할 수 있는 최대 길이는 버킷의 최대길이 print( 'Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) while True: line = _get_user_input() # 시스템 인풋을 받아온다 if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': # 아무것도 타이핑 안하면 브레이크 break output_file.write('HUMAN ++++ ' + line + '\n') # 아웃풋 파일에 한줄씩 기록 # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) # 문장 하나를 index로 if (len(token_ids) > max_length): # 만약 최대 길이보다 더 받았으면 다시 타이핑 받게 한다 print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket( len(token_ids)) # 입력 시퀀스의 길이에 맞는 버킷(최소) id 골라온다 # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], # 디코더 인풋은 x 전부 패딩되서 들어가는듯 bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) # forward_only == True response = _construct_response( output_logits, inv_dec_vocab) # id2word로 복구해서 다시 리스폰스로 print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def handle_client(client, enc_vocab, inv_dec_vocab, model, saver, sess, output_file): # Takes client socket as argument. """Handles a single client connection.""" name = client.recv(BUFSIZ).decode("utf8") max_length = config.BUCKETS[-1][0] msg = 'Welcome %s! Max length is %d. If you ever want to quit, type {quit} to exit.' % ( name, max_length) client.send(bytes(msg, "utf8")) msg = "%s has joined the chat!" % name broadcast(bytes(msg, "utf8")) clients[client] = name # Decode from standard input. while True: msg = client.recv(BUFSIZ) #try: #print(u'%s: ' % name + msg.decode("utf8")) #except OSError: #print(u'%s: ' % name) if msg != bytes("{quit}", "utf8"): broadcast(msg, name + ": ") else: print(u"%s has left the chat." % name) #client.send(bytes("{quit}", "utf8")) client.close() del clients[client] broadcast(bytes("%s has left the chat." % name, "utf8")) break output_file.write(u'HUMAN ++++ ' + msg.decode("utf8") + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, msg.decode("utf8")) #print(token_ids) if (len(token_ids) > max_length): broadcast( bytes("Max length I can handle is %d" % max_length, "utf8")) continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) broadcast(bytes(response, "utf8"), "BOT: ") #print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def predict(demo_sent): print('============= 开始预测 =============') demo_id = sentence2id(demo_sent, word2id) length = len(demo_id) if length > args.max_len: print('Inputs is too long ') demo_data = [(demo_id, [0] * length)] print(demo_sent) tags = model.predict_sentence(sess, demo_data) print(tags[:length]) return json.dumps(tags[:length])
def chat(): """ in test mode, we don't create the backward path :return: """ _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab_enc')) inv_dec_vocab, _ = data.load_vocab((os.path.join(config.PROCESSED_PATH, 'vocab_dec'))) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open( os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input max_length = config.BUCKETS[-1][0] print( 'I am ChatBot. Proceed to chat. Enter of exit. Max length is {0}'. format(max_length)) while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if len(token_ids) > max_length: print('Max length I can handle is: {0}'.format(max_length)) line = _get_user_input() continue # Which bucket does this go in?? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response((output_logits, inv_dec_vocab)) print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('===============================================\n') output_file.close()
def translate(): """ in test mode, we don't to create the backward path """ model = TranslationModel(True, batch_size=1) model.build_graph() _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.en')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.vi')) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open( os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print('Type something. Enter to exit. Max length is', max_length) while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('English ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) print(response) output_file.write('Translation (Vietnamese) ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def generate_answer(self, question): """Combines stackoverflow and chitchat parts using intent recognition.""" # Recognize intent of the question using `intent_recognizer`. # Don't forget to prepare question and calculate features for the question. #prepared_question = #### YOUR CODE HERE #### #features = #### YOUR CODE HERE #### #intent = #### YOUR CODE HERE #### prepared_question = text_prepare(question) #### YOUR CODE HERE #### features = self.tfidf_vectorizer.transform([prepared_question]) #### YOUR CODE HERE #### intent = self.intent_recognizer.predict(features)[0] # Chit-chat part: if intent == 'dialogue': # Pass question to chitchat_bot to generate a response. max_length = config.BUCKETS[-1][0] line = question if len(line) > 0 and line[-1] == '\n': line = line[:-1] token_ids = data.sentence2id(self.enc_vocab, str(line)) if (len(token_ids) > max_length): response = "Max length I can handle is:" + str(max_length) bucket_id = _find_right_bucket(len(token_ids)) encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) _, _, output_logits = run_step(self.sess, self.model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, self.inv_dec_vocab) return response # Goal-oriented part: else: # Pass features to tag_classifier to get predictions. tag = self.tag_classifier.predict(features)[0] # Pass prepared_question to thread_ranker to get predictions. thread_id = self.thread_ranker.get_best_thread( prepared_question, tag) return self.ANSWER_TEMPLATE % (tag, thread_id)
def get_predicted_sentence(input_sentence, enc_vocab, inv_dec_vocab, model, sess): """ in test mode, we don't to create the backward path """ line = input_sentence token_ids = data.sentence2id(enc_vocab, line) bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) decoder_inputs[0][0] = 2 # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) return response
def chat(self, line): # Get token-ids for the input sentence. token_ids = data.sentence2id(self.__enc_vocab, str(line)) if (len(token_ids) > self.max_length): return "Would you mind to be more concise? I can't understand" # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(self.__session, self.__model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) return _construct_response(output_logits, self.__inv_dec_vocab)
def predict(test_post_file, max_len, vocab, rev_vocab, word_embeddings, encoder, generator, output_file=None): # data generator test_data_generator = batcher(1, test_post_file, response_file=None) if output_file: fo = open(output_file, 'wb') while True: try: post_sentence = test_data_generator.next() except StopIteration: logger.info('---------------------finish-------------------------') break post_ids = [sentence2id(sent, vocab) for sent in post_sentence] posts_var, posts_length = padding_inputs(post_ids, None) if USE_CUDA: posts_var = posts_var.cuda() embedded_post = word_embeddings(posts_var) _, dec_init_state = encoder(embedded_post, input_lengths=posts_length.numpy()) log_softmax_outputs = generator.inference( dec_init_state, word_embeddings) # [B, T, vocab_size] hyps, _ = beam_search(dec_init_state, max_len, word_embeddings, generator, beam=5, penalty=1.0, nbest=1) results = [] for h in hyps: results.append(id2sentence(h[0], rev_vocab)) print('*******************************************************') print "post:" + ''.join(post_sentence[0]) print "response:\n" + '\n'.join([''.join(r) for r in results]) print
def chat(input_cmd): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open( os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] # input_cmd = _get_user_input() if len(input_cmd) > 0 and input_cmd[-1] == '\n': input_cmd = input_cmd[:-1] output_file.write('Input: ' + input_cmd + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(input_cmd)) if len(token_ids) > max_length: input_cmd = input_cmd[max_length] # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) output_file.write('Response: ' + response + '\n') output_file.close() return response
def chat(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open(os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print('Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def chat(): _,enc_load = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab_enc')) inv_dec_vocab,_ = data.load_vocab(os.path.join(config.PROCESSED_PATH,'vocab.dec')) model = ChatBotModel(True,batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess,saver) output_file = open(os.path.join(config.PROCESSED_PATH,config.OUTPUT_FILE),'a+') max_length = config.BUCKETS[-1][0] print('Welcome to TensorBro, Say something. Enter to exit. Max length is ', max_length) while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') token_ids = data.sentence2id(enc_vocab,str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:' max_length) line = _get_user_input() continue bucket_id = _find_right_bucket(len(token_ids)) encoder_inputs,decoder_inputs,decoder_masks = data.get_batch([(token_ids , [])],bucket_id,batch_size=1) _,_,output_logits = run_step(sess,model,encoder_inputs,decoder_inputs,decoder_masks,bucket_id,True) response = _construct_response(output_logits,inv_dec_vocab) print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('===================================\n') output_file.close()
def translate(): _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) # Decode from standard input. max_length = config.BUCKETS[-1][0] print('Please enter sentence in English') while True: line = _get_user_input() if len(line) > 0 and line[-1] == u'\n': line = line[:-1] if line == '': break # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, line) if (len(token_ids) > max_length): token_ids = token_ids[:max_length] # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) print(response)
def chat(sess, input_text, enc_vocab, inv_dec_vocab, model): # output_file = open(os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] # print('Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) # while True: line = input_text # line = _get_user_input() # if len(line) > 0 and line[-1] == '\n': # line = line[:-1] # output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) # if (len(token_ids) > max_length): # print('Max length I can handle is:', max_length) # line = _get_user_input() # continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) print(response) if (len(response) == 0 or "UNK" in response): response = line + "เหรอ5555" # output_file.write('BOT ++++ ' + response + '\n') # output_file.write('=============================================\n') # output_file.close() return response
def response(self, line): """ return a string response to a string input """ # Decode from standard input. if line[-1] == '\n': line = line[:-1] # Get token-ids for the input sentence. token_ids = data.sentence2id(self.enc_vocab, str(line)) if (len(token_ids) > self.max_length): raise RuntimeError('Max length the bot can handle is:', self.max_length) # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(self.sess, self.model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, self.inv_dec_vocab) return response
def pretrain(): # Parse command line arguments argparser = argparse.ArgumentParser() # train argparser.add_argument('--mode', '-m', choices=('pretrain', 'adversarial', 'inference'), type=str, required=True) argparser.add_argument('--batch_size', '-b', type=int, default=168) argparser.add_argument('--num_epoch', '-e', type=int, default=10) argparser.add_argument('--print_every', type=int, default=100) argparser.add_argument('--use_cuda', default=True) argparser.add_argument('--g_learning_rate', '-glr', type=float, default=0.001) argparser.add_argument('--d_learning_rate', '-dlr', type=float, default=0.001) # resume argparser.add_argument('--resume', action='store_true', dest='resume') argparser.add_argument('--resume_dir', type=str) argparser.add_argument('--resume_epoch', type=int) # save argparser.add_argument('--exp_dir', type=str, required=True) # model argparser.add_argument('--emb_dim', type=int, default=128) argparser.add_argument('--hidden_dim', type=int, default=256) argparser.add_argument('--dropout_rate', '-drop', type=float, default=0.5) argparser.add_argument('--n_layers', type=int, default=1) argparser.add_argument('--response_max_len', type=int, default=15) # data argparser.add_argument('--train_query_file', '-tqf', type=str, required=True) argparser.add_argument('--train_response_file', '-trf', type=str, required=True) argparser.add_argument('--valid_query_file', '-vqf', type=str, required=True) argparser.add_argument('--valid_response_file', '-vrf', type=str, required=True) argparser.add_argument('--vocab_file', '-vf', type=str, default='') argparser.add_argument('--max_vocab_size', '-mv', type=int, default=100000) args = argparser.parse_args() # set up the output directory exp_dirname = os.path.join(args.exp_dir, args.mode, time.strftime("%Y-%m-%d-%H-%M-%S")) os.makedirs(exp_dirname) # set up the logger tqdm_logging.config(logger, os.path.join(exp_dirname, 'train.log'), mode='w', silent=False, debug=True) if not args.vocab_file: logger.info("no vocabulary file") build_vocab(args.train_query_file, args.train_response_file, seperated=True) sys.exit() else: vocab, rev_vocab = load_vocab(args.vocab_file, max_vocab=args.max_vocab_size) vocab_size = len(vocab) word_embeddings = nn.Embedding(vocab_size, args.emb_dim, padding_idx=SYM_PAD) E = EncoderRNN(vocab_size, args.emb_dim, args.hidden_dim, args.n_layers, args.dropout_rate, bidirectional=True, variable_lengths=True) G = Generator(vocab_size, args.response_max_len, args.emb_dim, 2 * args.hidden_dim, args.n_layers, dropout_p=args.dropout_rate) if args.use_cuda: word_embeddings.cuda() E.cuda() G.cuda() loss_func = nn.NLLLoss(size_average=False) params = list(word_embeddings.parameters()) + list(E.parameters()) + list( G.parameters()) opt = torch.optim.Adam(params, lr=args.g_learning_rate) logger.info('----------------------------------') logger.info('Pre-train a neural conversation model') logger.info('----------------------------------') logger.info('Args:') logger.info(str(args)) logger.info('Vocabulary from ' + args.vocab_file) logger.info('vocabulary size: %d' % vocab_size) logger.info('Loading text data from ' + args.train_query_file + ' and ' + args.train_response_file) # resume training from other experiment if args.resume: assert args.resume_epoch >= 0, 'If resume training, please assign resume_epoch' reload_model(args.resume_dir, args.resume_epoch, word_embeddings, E, G) start_epoch = args.resume_epoch + 1 else: start_epoch = 0 # dump args with open(os.path.join(exp_dirname, 'args.pkl'), 'wb') as f: pickle.dump(args, f) for e in range(start_epoch, args.num_epoch): logger.info('---------------------training--------------------------') train_data_generator = batcher(args.batch_size, args.train_query_file, args.train_response_file) logger.info("Epoch: %d/%d" % (e, args.num_epoch)) step = 0 total_loss = 0.0 total_valid_char = [] cur_time = time.time() while True: try: post_sentences, response_sentences = train_data_generator.next( ) except StopIteration: # save model save_model(exp_dirname, e, word_embeddings, E, G) # evaluation eval(args.valid_query_file, args.valid_response_file, args.batch_size, word_embeddings, E, G, loss_func, args.use_cuda, vocab, args.response_max_len) break post_ids = [sentence2id(sent, vocab) for sent in post_sentences] response_ids = [ sentence2id(sent, vocab) for sent in response_sentences ] posts_var, posts_length = padding_inputs(post_ids, None) responses_var, responses_length = padding_inputs( response_ids, args.response_max_len) # sort by post length posts_length, perms_idx = posts_length.sort(0, descending=True) posts_var = posts_var[perms_idx] responses_var = responses_var[perms_idx] responses_length = responses_length[perms_idx] # 在sentence后面加eos references_var = torch.cat([ responses_var, Variable(torch.zeros(responses_var.size(0), 1).long(), requires_grad=False) ], dim=1) for idx, length in enumerate(responses_length): references_var[idx, length] = SYM_EOS # show case #for p, r, ref in zip(posts_var.data.numpy()[:10], responses_var.data.numpy()[:10], references_var.data.numpy()[:10]): # print ''.join(id2sentence(p, rev_vocab)) # print ''.join(id2sentence(r, rev_vocab)) # print ''.join(id2sentence(ref, rev_vocab)) # print if args.use_cuda: posts_var = posts_var.cuda() responses_var = responses_var.cuda() references_var = references_var.cuda() embedded_post = word_embeddings(posts_var) embedded_response = word_embeddings(responses_var) _, dec_init_state = E(embedded_post, input_lengths=posts_length.numpy()) log_softmax_outputs = G.supervise( embedded_response, dec_init_state, word_embeddings) # [B, T, vocab_size] outputs = log_softmax_outputs.view(-1, vocab_size) mask_pos = mask(references_var).view(-1).unsqueeze(-1) masked_output = outputs * (mask_pos.expand_as(outputs)) loss = loss_func(masked_output, references_var.view(-1)) / (posts_var.size(0)) opt.zero_grad() loss.backward() opt.step() total_loss += loss * (posts_var.size(0)) total_valid_char.append(mask_pos) if step % args.print_every == 0: total_loss_val = total_loss.cpu().data.numpy()[0] total_valid_char_val = torch.sum( torch.cat(total_valid_char, dim=1)).cpu().data.numpy()[0] logger.info( 'Step %5d: (per word) training perplexity %.2f (%.1f iters/sec)' % (step, math.exp(total_loss_val / total_valid_char_val), args.print_every / (time.time() - cur_time))) total_loss = 0.0 total_valid_char = [] total_case_num = 0 cur_time = time.time() step = step + 1
def chat(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open( os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print( 'Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) if config.BEAM_SEARCH: while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) if config.ANTI_LM: # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # do beam search and antilm together # Get output logits for the sentence. beams, new_beams, results = [(1, 0, { 'eos': 0, 'dec_inp': decoder_inputs, 'prob': 1, 'prob_ts': 1, 'prob_t': 1 })], [], [ ] # initialize beams as (log_prob, empty_string, eos) dummy_encoder_inputs = [ np.array([config.PAD_ID]) for _ in range(len(encoder_inputs)) ] for dptr in range(len(decoder_inputs) - 1): if dptr > 0: target_weights[dptr] = [1.] beams, new_beams = new_beams[:args.beam_size], [] if config.DEBUG: print("=====[beams]=====", beams) heapq.heapify(beams) # since we will remove something for prob, _, cand in beams: if cand['eos']: results += [(prob, 0, cand)] continue # normal seq2seq if config.DEBUG: print( cand['prob'], " ".join([ dict_lookup(inv_dec_vocab, w[0]) for w in cand['dec_inp'] ])) # all_prob_ts = model_step(encoder_inputs, cand['dec_inp'], dptr, target_weights, bucket_id) _, _, all_prob_ts = run_step( sess, model, encoder_inputs, cand['dec_inp'], target_weights, bucket_id, True) if config.ANTI_LM: # anti-lm # all_prob_t = model_step(dummy_encoder_inputs, cand['dec_inp'], dptr, target_weights, bucket_id) _, _, all_prob_t = run_step( sess, model, dummy_encoder_inputs, cand['dec_inp'], target_weights, bucket_id, True) # adjusted probability all_prob = all_prob_ts - config.LAMBDA * np.array( all_prob_t ) #+ args.n_bonus * dptr + random() * 1e-50 else: all_prob_t = [0] * len(all_prob_ts) all_prob = all_prob_ts # suppress copy-cat (respond the same as input) if dptr < len(token_ids): all_prob[token_ids[dptr]] = all_prob[ token_ids[dptr]] * 0.01 # for debug use # if config.DEBUG: return all_prob, all_prob_ts, all_prob_t # beam search for c in np.argsort( all_prob)[::-1][:args.beam_size]: new_cand = { 'eos': (c == config.EOS_ID), 'dec_inp': [(np.array([c]) if i == (dptr + 1) else k) for i, k in enumerate(cand['dec_inp'])], 'prob_ts': cand['prob_ts'] * all_prob_ts[c], 'prob_t': cand['prob_t'] * all_prob_t[c], 'prob': cand['prob'] * all_prob[c], } new_cand = ( new_cand['prob'], random(), new_cand ) # stuff a random to prevent comparing new_cand try: if (len(new_beams) < config.BEAM_SIZE): heapq.heappush(new_beams, new_cand) elif (new_cand[0] > new_beams[0][0]): heapq.heapreplace(new_beams, new_cand) except Exception as e: print("[Error]", e) print("-----[new_beams]-----\n", new_beams) print("-----[new_cand]-----\n", new_cand) results += new_beams # flush last cands # post-process results res_cands = [] for prob, _, cand in sorted(results, reverse=True): cand['dec_inp'] = " ".join([ dict_lookup(inv_dec_vocab, w) for w in cand['dec_inp'] ]) print('response antilm: ', cand['dec_inp']) res_cands.append(cand) return res_cands[:args.beam_size] else: # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # add beam search parameter to run_Step path, symbol, output_logits = run_step( sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) k = output_logits[0] paths = [] for kk in range(config.BEAM_SIZE): paths.append([]) curr = range(config.BEAM_SIZE) num_steps = len(path) for i in range(num_steps - 1, -1, -1): for kk in range(config.BEAM_SIZE): paths[kk].append(symbol[i][curr[kk]]) curr[kk] = path[i][curr[kk]] responses = set() for kk in range(config.BEAM_SIZE): response = _construct_beam_response( paths[kk], inv_dec_vocab) if response not in responses: responses.add(response) print('response: ', response) output_file.write('BOT ++++ ' + response + '\n') else: while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1)
## demo elif args.mode == 'demo': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, dictname2id, word2id, paths, config=config) model.build_graph() saver = tf.train.Saver() with tf.Session(config=config) as sess: print('============= demo =============') saver.restore(sess, ckpt_file) while (1): print('Please input your sentence:') demo_sent = input() if demo_sent == '' or demo_sent.isspace(): print('See you next time!') break else: demo_sent = list(demo_sent.strip()) demo_data = [(sentence2id(demo_sent, word2id), [0] * len(demo_sent))] tag = model.demo_one(sess, demo_data) res = get_entity(tag[0], demo_sent, dictname2id) print(res)
elif args.mode == 'predict': ckpt_file = tf.train.latest_checkpoint(model_path) print(ckpt_file) paths['model_path'] = ckpt_file model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=configs) model.build_graph() saver = tf.train.Saver() with tf.Session(config=configs) as sess: print('============= demo =============') saver.restore(sess, ckpt_file) while 1: print('Please input your sentence:') demo_sent = input() if demo_sent == '' or demo_sent.isspace(): print('See you next time!') break else: demo_id = sentence2id(demo_sent, word2id) length = len(demo_id) if length > args.max_len: print('Inputs is too long ') demo_data = [(demo_id, [0] * length)] print(demo_id) tags = model.predict_sentence(sess, demo_data) print(tags[:length])
def adversarial(): # user the root logger logger = logging.getLogger("lan2720") argparser = argparse.ArgumentParser(add_help=False) argparser.add_argument('--load_path', '-p', type=str, required=True) # TODO: load best argparser.add_argument('--load_epoch', '-e', type=int, required=True) argparser.add_argument('--filter_num', type=int, required=True) argparser.add_argument('--filter_sizes', type=str, required=True) argparser.add_argument('--training_ratio', type=int, default=2) argparser.add_argument('--g_learning_rate', '-glr', type=float, default=0.001) argparser.add_argument('--d_learning_rate', '-dlr', type=float, default=0.001) argparser.add_argument('--batch_size', '-b', type=int, default=168) # new arguments used in adversarial new_args = argparser.parse_args() # load default arguments default_arg_file = os.path.join(new_args.load_path, 'args.pkl') if not os.path.exists(default_arg_file): raise RuntimeError('No default argument file in %s' % new_args.load_path) else: with open(default_arg_file, 'rb') as f: args = pickle.load(f) args.mode = 'adversarial' #args.d_learning_rate = 0.0001 args.print_every = 1 args.g_learning_rate = new_args.g_learning_rate args.d_learning_rate = new_args.d_learning_rate args.batch_size = new_args.batch_size # add new arguments args.load_path = new_args.load_path args.load_epoch = new_args.load_epoch args.filter_num = new_args.filter_num args.filter_sizes = new_args.filter_sizes args.training_ratio = new_args.training_ratio # set up the output directory exp_dirname = os.path.join(args.exp_dir, args.mode, time.strftime("%Y-%m-%d-%H-%M-%S")) os.makedirs(exp_dirname) # set up the logger tqdm_logging.config(logger, os.path.join(exp_dirname, 'adversarial.log'), mode='w', silent=False, debug=True) # load vocabulary vocab, rev_vocab = load_vocab(args.vocab_file, max_vocab=args.max_vocab_size) vocab_size = len(vocab) word_embeddings = nn.Embedding(vocab_size, args.emb_dim, padding_idx=SYM_PAD) E = EncoderRNN(vocab_size, args.emb_dim, args.hidden_dim, args.n_layers, args.dropout_rate, bidirectional=True, variable_lengths=True) G = Generator(vocab_size, args.response_max_len, args.emb_dim, 2*args.hidden_dim, args.n_layers, dropout_p=args.dropout_rate) D = Discriminator(args.emb_dim, args.filter_num, eval(args.filter_sizes)) if args.use_cuda: word_embeddings.cuda() E.cuda() G.cuda() D.cuda() # define optimizer opt_G = torch.optim.Adam(G.rnn.parameters(), lr=args.g_learning_rate) opt_D = torch.optim.Adam(D.parameters(), lr=args.d_learning_rate) logger.info('----------------------------------') logger.info('Adversarial a neural conversation model') logger.info('----------------------------------') logger.info('Args:') logger.info(str(args)) logger.info('Vocabulary from ' + args.vocab_file) logger.info('vocabulary size: %d' % vocab_size) logger.info('Loading text data from ' + args.train_query_file + ' and ' + args.train_response_file) reload_model(args.load_path, args.load_epoch, word_embeddings, E, G) # start_epoch = args.resume_epoch + 1 #else: # start_epoch = 0 # dump args with open(os.path.join(exp_dirname, 'args.pkl'), 'wb') as f: pickle.dump(args, f) # TODO: num_epoch is old one for e in range(args.num_epoch): train_data_generator = batcher(args.batch_size, args.train_query_file, args.train_response_file) logger.info("Epoch: %d/%d" % (e, args.num_epoch)) step = 0 cur_time = time.time() while True: try: post_sentences, response_sentences = train_data_generator.next() except StopIteration: # save model save_model(exp_dirname, e, word_embeddings, E, G, D) ## evaluation #eval(args.valid_query_file, args.valid_response_file, args.batch_size, # word_embeddings, E, G, loss_func, args.use_cuda, vocab, args.response_max_len) break # prepare data post_ids = [sentence2id(sent, vocab) for sent in post_sentences] response_ids = [sentence2id(sent, vocab) for sent in response_sentences] posts_var, posts_length = padding_inputs(post_ids, None) responses_var, responses_length = padding_inputs(response_ids, args.response_max_len) # sort by post length posts_length, perms_idx = posts_length.sort(0, descending=True) posts_var = posts_var[perms_idx] responses_var = responses_var[perms_idx] responses_length = responses_length[perms_idx] if args.use_cuda: posts_var = posts_var.cuda() responses_var = responses_var.cuda() embedded_post = word_embeddings(posts_var) real_responses = word_embeddings(responses_var) # forward _, dec_init_state = E(embedded_post, input_lengths=posts_length.numpy()) fake_responses = G(dec_init_state, word_embeddings) # [B, T, emb_size] prob_real = D(embedded_post, real_responses) prob_fake = D(embedded_post, fake_responses) # loss D_loss = - torch.mean(torch.log(prob_real) + torch.log(1. - prob_fake)) G_loss = torch.mean(torch.log(1. - prob_fake)) if step % args.training_ratio == 0: opt_D.zero_grad() D_loss.backward(retain_graph=True) opt_D.step() opt_G.zero_grad() G_loss.backward() opt_G.step() if step % args.print_every == 0: logger.info('Step %5d: D accuracy=%.2f (0.5 for D to converge) D score=%.2f (-1.38 for G to converge) (%.1f iters/sec)' % ( step, prob_real.cpu().data.numpy().mean(), -D_loss.cpu().data.numpy()[0], args.print_every/(time.time()-cur_time))) cur_time = time.time() step = step + 1
def test(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab( os.path.join(config.PROCESSED_PATH, 'vocab.dec')) fh_test_truth = io.open(os.path.join(config.PROCESSED_PATH, 'test.dec'), 'r', encoding='utf-8') fh_test_enc = io.open(os.path.join(config.PROCESSED_PATH, 'test.enc'), 'r', encoding='utf-8') test_truths = fh_test_truth.readlines( ) # 1268 lines of correct translation in target language test_enc = fh_test_enc.readlines( ) # 1268 lines of initial sentence in source language fh_test_truth.close() fh_test_enc.close() bleu_scores = [] model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = io.open(os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+', encoding='utf-8') # Decode from lines in test files. max_length = config.BUCKETS[-1][0] i = 0 # Index to be used to read ground_truth from test_truths for i in range(len(test_enc)): enc_line = test_enc[i] if len(enc_line) > 0 and enc_line[-1] == u'\n': enc_line = enc_line[:-1] if enc_line == '': break #output_file.write('HUMAN ++++ ' + enc_line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, enc_line) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) output_file.write(u'.\n') bleu_score = 0 bleu_scores.append(bleu_score) i = i + 1 continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch( [(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response( output_logits, inv_dec_vocab) # response is the translated sentence truth = test_truths[i] #print(type(truth)) #print(type(response)) truth_li = [truth.split()] response_li = response.split() bleu_score = sentence_bleu( truth_li, response_li, smoothing_function=SmoothingFunction().method1) print(response) output_file.write(response + '\n') i = i + 1 #print("BLEU: %.5f" % bleu_score) bleu_scores.append(bleu_score) output_file.write(u'=============================================\n') output_file.write(u"Average BLEU: %.5f" % np.mean(np.array(bleu_scores))) output_file.close() print("Average BLEU: %.5f" % np.mean(np.array(bleu_scores))) return bleu_scores
def chat(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open(os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print('Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) if config.BEAM_SEARCH: while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])], bucket_id, batch_size=1) # add beam search parameter to run_Step path, symbol, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) k = output_logits[0] paths = [] for kk in range(config.BEAM_SIZE): paths.append([]) curr = range(config.BEAM_SIZE) num_steps = len(path) for i in range(num_steps-1, -1, -1): for kk in range(config.BEAM_SIZE): paths[kk].append(symbol[i][curr[kk]]) curr[kk] = path[i][curr[kk]] responses = set() for kk in range(config.BEAM_SIZE): response = _construct_beam_response(paths[kk], inv_dec_vocab) if response not in responses: responses.add(response) print('response: ', response) output_file.write('BOT ++++ ' + response + '\n') else: while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) if config.ANTI_LM: dummy_encoder_inputs = [np.array([config.PAD_ID]) for _ in range(len(encoder_inputs))] _, _, output_logits_t = run_step(sess, model, dummy_encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) # only apply antilm up to a certain point in the decoder input gamma = int(config.GAMMA*len(decoder_inputs)) antilm_mask = np.array([1*( _ < gamma) for _ in range(len(decoder_inputs))]).reshape((-1,1,1)) output_logits -= config.LAMBDA*(output_logits_t*antilm_mask) response = _construct_response(output_logits, inv_dec_vocab) print('response: ', response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()
def eval(valid_query_file, valid_response_file, batch_size, word_embeddings, E, G, loss_func, use_cuda, vocab, response_max_len): logger.info('---------------------validating--------------------------') logger.info('Loading valid data from %s and %s' % (valid_query_file, valid_response_file)) valid_data_generator = batcher(batch_size, valid_query_file, valid_response_file) sum_loss = 0.0 valid_char_num = 0 example_num = 0 while True: try: post_sentences, response_sentences = valid_data_generator.next() except StopIteration: # one epoch finish break post_ids = [sentence2id(sent, vocab) for sent in post_sentences] response_ids = [ sentence2id(sent, vocab) for sent in response_sentences ] posts_var, posts_length = padding_inputs(post_ids, None) responses_var, responses_length = padding_inputs( response_ids, response_max_len) # sort by post length posts_length, perms_idx = posts_length.sort(0, descending=True) posts_var = posts_var[perms_idx] responses_var = responses_var[perms_idx] responses_length = responses_length[perms_idx] # 在sentence后面加eos references_var = torch.cat([ responses_var, Variable(torch.zeros(responses_var.size(0), 1).long(), requires_grad=False) ], dim=1) for idx, length in enumerate(responses_length): references_var[idx, length] = SYM_EOS if use_cuda: posts_var = posts_var.cuda() responses_var = responses_var.cuda() references_var = references_var.cuda() embedded_post = word_embeddings(posts_var) embedded_response = word_embeddings(responses_var) _, dec_init_state = E(embedded_post, input_lengths=posts_length.numpy()) log_softmax_outputs = G.supervise( embedded_response, dec_init_state, word_embeddings) # [B, T, vocab_size] outputs = log_softmax_outputs.view(-1, len(vocab)) mask_pos = mask(references_var).view(-1).unsqueeze(-1) masked_output = outputs * (mask_pos.expand_as(outputs)) loss = loss_func(masked_output, references_var.view(-1)) sum_loss += loss.cpu().data.numpy()[0] example_num += posts_var.size(0) valid_char_num += torch.sum(mask_pos).cpu().data.numpy()[0] logger.info( 'Valid Loss (per case): %.2f Valid Perplexity (per word): %.2f' % (sum_loss / example_num, math.exp(sum_loss / valid_char_num))) logger.info('---------------------finish-------------------------')