def __init__(self, voc_path): self.dialogue = Dialogue() self.dialogue.load_vocab(voc_path) self.model = Hred(self.dialogue.voc_size, False) self.sess = tf.Session() # 모델 불러오기 ckpt = tf.train.get_checkpoint_state('./model') self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)
class chatbot: def __init__(self, voc_path): self.dialogue = Dialogue(voc_path) self.model = Seq2Seq(self.dialogue.input_max_len, self.dialogue.output_max_len, self.dialogue.voc_size, self.dialogue.word_embedding_matrix, False) self.sess = tf.Session() # 모델 불러오기 ckpt = tf.train.get_checkpoint_state('./model') self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) def run(self): sys.stdout.write("> ") sys.stdout.flush() line = sys.stdin.readline() while line: print(self.get_replay(line.strip())) # 응답 sys.stdout.write("\n> ") sys.stdout.flush() line = sys.stdin.readline() def _decode(self, enc_input, dec_input): if type(dec_input) is np.ndarray: dec_input = dec_input.tolist() enc_len = enc_input.__len__() dec_len = len(dec_input) enc_input, dec_input, _ = self.dialogue.transform(enc_input, dec_input, self.dialogue.input_max_len, self.dialogue.output_max_len) return self.model.predict(self.sess, [enc_input], [enc_len], [dec_input], [dec_len+1]) # msg에 대한 응답을 반환 def get_replay(self, msg): enc_input = self.dialogue.tokenizer(msg) enc_input = self.dialogue.tokens_to_ids(enc_input) dec_input = [] outputs = self._decode(enc_input, dec_input) if self.dialogue.is_eos(outputs): reply = self.dialogue.decode([dec_input], True) # 디코더의 출력을 디코더의 입력으로 넣는다. # curr_seq = 0 # for i in range(self.dialogue.max_seq_len): # outputs = self._decode(enc_input, dec_input) # if self.dialogue.is_eos(outputs[0][curr_seq]): # break # elif self.dialogue.is_defined(outputs[0][curr_seq]) is not True: # dec_input.append(outputs[0][curr_seq]) # curr_seq += 1 # # # 문자열로 반환 # reply = self.dialogue.decode([dec_input], True) return reply
def __init__(self, voc_path): self.dialogue = Dialogue(voc_path) self.model = Seq2Seq(self.dialogue.input_max_len, self.dialogue.output_max_len, self.dialogue.voc_size, self.dialogue.word_embedding_matrix, False) self.sess = tf.Session() # 모델 불러오기 ckpt = tf.train.get_checkpoint_state('./model') self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)
def __init__(self, npc_list, text): self.npc_list = npc_list choices = [] for npc in npc_list: choices.append(GoToNpc(npc)) self.dialogue = Dialogue(None, choices, text)
class chatbot: def __init__(self, voc_path): self.dialogue = Dialogue() self.dialogue.load_vocab(voc_path) self.model = Hred(self.dialogue.voc_size, False) self.sess = tf.Session() # 모델 불러오기 ckpt = tf.train.get_checkpoint_state('./model') self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) def run(self): sentences = [] sentences.append(inputs.strip()) reply = self.get_replay(sentences) return reply def _decode(self, enc_input, dec_input): enc_len = [] dec_len = [] enc_batch = [] dec_batch = [] for input in enc_input: # 최대길이 25로 제한. if len(input) > 25: input = input[0:25] dec_input = enc_input for i in range(0, len(enc_input)): enc, dec, _ = self.dialogue.transform(enc_input[i], dec_input[i], 25, 25) enc_batch.append(enc) dec_batch.append(dec) enc_len.append(len(enc_input[i])) dec_len.append(len(dec_input[i]) + 1) # predict할떄 dec는 필요없는데 model에 placeholder로 해놔서 일단 아무거나(enc) 줬습니다. return self.model.predict(self.sess, [enc_batch], [enc_len], [dec_batch], [dec_len], [b], context_size) # msg에 대한 응답을 반환 def get_replay(self, sentences): enc_input = [ self.dialogue.tokens_to_ids(self.dialogue.tokenizer(sentence)) for sentence in sentences ] dec_input = enc_input outputs = self._decode(enc_input, dec_input) reply = self.dialogue.decode([outputs[len(enc_input) - 1]], True) reply = self.dialogue.cut_eos(reply) return reply def kakao_input(self, inputs): self.inputs = inputs
def read_format_data(src_root): src_files = glob('%s/*' % src_root) diag_list = [] for f in src_files: print 'Read %s.' % f lines = open(f).read().split('\n\n')[:-1] for line in lines: line = line.split('\n') diag = Dialogue(line[0], line[2], line[1]) diag_list.append(diag) print 'Read diag length:', len(diag_list) return diag_list
class chatbot: def __init__(self, voc_path): self.dialogue = Dialogue(voc_path) self.model = Seq2Seq(self.dialogue.voc_size) self.sess = tf.Session() # 모델 불러오기 ckpt = tf.train.get_checkpoint_state('./model') self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) def run(self): sys.stdout.write("> ") sys.stdout.flush() line = sys.stdin.readline() while line: print(self.get_replay(line.strip())) # 응답 sys.stdout.write("\n> ") sys.stdout.flush() line = sys.stdin.readline() def _decode(self, enc_input, dec_input): if type(dec_input) is np.ndarray: dec_input = dec_input.tolist() input_len = int(math.ceil( (len(enc_input) + 1) * 1.5)) # 입력길이의 1.5배만큼 적당한 버킷 사용 enc_input, dec_input, _ = self.dialogue.transform( enc_input, dec_input, input_len, self.dialogue.max_output_len) return self.model.predict(self.sess, [enc_input], [dec_input]) # msg에 대한 응답을 반환 def get_replay(self, msg): enc_input = self.dialogue.tokenizer(msg) enc_input = self.dialogue.tokens_to_ids(enc_input) dec_input = [] # 디코더의 출력을 디코더의 입력으로 넣는다. curr_seq = 0 for i in range(self.dialogue.max_output_len): outputs = self._decode(enc_input, dec_input) if self.dialogue.is_eos(outputs[0][curr_seq]): break elif self.dialogue.is_defined(outputs[0][curr_seq]) is not True: dec_input.append(outputs[0][curr_seq]) curr_seq += 1 # 문자열로 반환 reply = self.dialogue.decode([dec_input], True) return reply
def main(_): data_path = './data/dict_idx_1by10.npy' vocab_path = './data/dictionary.txt' dialog = Dialogue() dialog.load_vocab(vocab_path) dialog.load_data(data_path) train(dialog, epoch=1000) # 학습
def main(_): data_path = './data/chat.log' vocab_path = './data/chat.voc' dialog = Dialogue() dialog.load_vocab(vocab_path) dialog.load_data(data_path) #train(dialog, epoch=100) # 학습 test(dialog) # 테스트
class Repository: data_dialogues = json.load( open(r'resources\dialogues.json', 'r', encoding="utf-8")) # data_map = json.load(open(r'resources\map.json', 'r', encoding="utf-8")) # rawDialogs = json.loads('resources\data.json') dialogues = {} # for raw_dialog in data_dialogues: # print('****>', raw_dialog, '<****') # dialogues[raw_dialog['key']] = raw_dialog # print('!!!!>', dialogues[raw_dialog['key']], '<!!!!') dialog_keys = [] for dialog in data_dialogues: dialog_keys.append(dialog['key']) for raw_dialog in data_dialogues: choices = [] for choice in raw_dialog['dialogBranches']: if choice['nextDialogKey'] in dialog_keys: choices.append( SayPhrase(choice['branchName'], choice['nextDialogKey'])) else: choices.append( EndDialogue('(В разработке)' + choice['branchName'])) dialogues[raw_dialog['key']] = Dialogue(None, choices, raw_dialog['dialogText']) @staticmethod def get_dialogue(dialogue_id): # "получить диалог" принимает "id диалога" return Repository.dialogues[dialogue_id] @staticmethod def get_location(id): # TODO AAA start = [NPC('start', '1')] return Location(start, 'История первая: "Прототип движения по диалогам"') @staticmethod def initial_game_state(): return GameState(Repository.get_location('start_location'))
def process_dir(src_dir, dest_file): print src_dir, '->', dest_file diag_files = glob(src_dir + '/*.tsv') print 'Total %d files need to be process.' % len(diag_files) out_ = open(dest_file, 'w') for idx, diag_file in enumerate(diag_files): diag = Dialogue(diag_file) diag.clean_string = True diag.enable_tags = False diag.initialize() if diag.isvalid: print >>out_, '\n'.join( [diag.filename, diag.print_users(), diag.print_utters()] ) print >>out_, '' if idx % 100 == 0: print '\r%.2f%%' % (100.0*idx/len(diag_files)), sys.stdout.flush() print '\r100.00%%' out_.close()
class chatbot: def __init__(self, voc_path): self.dialogue = Dialogue() self.dialogue.load_vocab(voc_path) self.model = Hred(self.dialogue.voc_size, False, 1) self.sess = tf.Session() # 모델 불러오기 ckpt = tf.train.get_checkpoint_state('./model') self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) def run(self): sys.stdout.write("> ") sys.stdout.flush() setences = [] line = sys.stdin.readline() setences.append(line.strip()) while line: reply = self.get_replay(setences) print(reply) # 응답 setences.append(reply) sys.stdout.write("\n> ") sys.stdout.flush() line = sys.stdin.readline() setences.append(line.strip()) def _decode(self, enc_input, dec_input): enc_len = [] dec_len = [] enc_batch = [] dec_batch = [] for i in range(0, len(enc_input)): enc_in = enc_input[i] if len(enc_in) > 70: enc_in = enc_in[0:70] dec_in = dec_input[i] if len(dec_in) > 69: dec_in = dec_in[0:69] enc, dec, _ = self.dialogue.transform(enc_in, dec_in, 70, 70) enc_batch.append(enc) dec_batch.append(dec) enc_len.append(len(enc_in)) dec_len.append(len(dec_in) + 1) # predict할떄 dec는 필요없는데 model에 placeholder로 해놔서 일단 아무거나(enc) 줬습니다. context_size = len(enc_input) b = np.max(dec_len, 0) return self.model.predict(self.sess, [enc_batch], [enc_len], [dec_batch], [dec_len], [b], context_size) def get_replay(self, sentences): enc_input = [ self.dialogue.tokens_to_ids(list(sentence)) for sentence in sentences ] dec_input = enc_input outputs = self._decode(enc_input, dec_input) reply = self.dialogue.decode([outputs[len(enc_input) - 1]], True) reply = self.dialogue.cut_eos(reply) return reply
class chatbot: def __init__(self, voc_path): self.dialogue = Dialogue() self.dialogue.load_vocab(voc_path) self.model = Hred(self.dialogue.voc_size, False, 1) self.sess = tf.Session() # 모델 불러오기 ckpt = tf.train.get_checkpoint_state('./model') self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) def run(self): sys.stdout.write("> ") sys.stdout.flush() setences = [] line = sys.stdin.readline() setences.append(line.strip()) while line: reply = self.get_replay(setences) print(reply) # 응답 setences.append(reply) sys.stdout.write("\n> ") sys.stdout.flush() line = sys.stdin.readline() setences.append(line.strip()) def _decode(self, enc_inputs, dec_inputs): enc_len = [] dec_len = [] enc_batch = [] dec_batch = [] for i in range(0, len(enc_inputs)): enc_input = enc_inputs[i] if len(enc_input) > 25: enc_input = enc_inputs[i][0:25] dec_input = dec_inputs[i] if len(dec_input) > 24: dec_input = dec_inputs[i][0:24] enc, dec, _ = self.dialogue.transform(enc_input, dec_input, 25, 25) enc_batch.append(enc) dec_batch.append(dec) enc_len.append(len(enc_input)) dec_len.append(len(dec_input) + 1) context_size = len(enc_inputs) b = np.max(dec_len, 0) return self.model.predict(self.sess, [enc_batch], [enc_len], [dec_batch], [dec_len], [b], context_size) # msg에 대한 응답을 반환 def get_replay(self, sentences): enc_input = [ self.dialogue.tokens_to_ids(self.dialogue.tokenizer(sentence)) for sentence in sentences ] dec_input = enc_input outputs = self._decode(enc_input, dec_input) reply = self.dialogue.decode([outputs[len(enc_input) - 1]], True) reply = self.dialogue.cut_eos(reply) return reply
def test(batch_size=10): data_path = './data/dict_idx_char_test.npy' vocab_path = './data/char_dictionary.txt' dialog = Dialogue() dialog.load_vocab(vocab_path) dialog.load_data(data_path) print("\n=== 예측 테스트 ===") model = Hred(dialog.voc_size, False, 1) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state('./model') print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) all_expect = [] all_predict = [] total_batch = int(math.ceil(len(dialog.seq_data) / float(batch_size))) for step in range(total_batch): enc_input, enc_length, dec_input, dec_length, targets = dialog.next_batch( batch_size) expects, outputs = sess.run( [model.targets, model.outputs], feed_dict={ model.enc_input_idx: [enc_input], model.dec_input_idx: [dec_input], model.enc_length: [enc_length], model.dec_length: [dec_length], model.targets: [targets], model.dec_max_length: [np.max(dec_length, 0)], model.context_size: len(enc_input) }) for i in range(len(outputs)): all_expect.append( dialog.cut_eos(dialog.decode([expects[0][i]], True))) all_predict.append( dialog.cut_eos(dialog.decode([outputs[i]], True)))
class chatbot: def __init__(self, voc_path): self.dialogue = Dialogue() self.dialogue.load_vocab(voc_path) self.model = Hred(self.dialogue.voc_size, False) self.sess = tf.Session() # 모델 불러오기 ckpt = tf.train.get_checkpoint_state('./model') self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) def run(self): sys.stdout.write("> ") sys.stdout.flush() setences = [] line = sys.stdin.readline() setences.append(line.strip()) while line: reply = self.get_replay(setences) print(reply) # 응답 setences.append(reply) sys.stdout.write("\n> ") sys.stdout.flush() line = sys.stdin.readline() setences.append(line.strip()) def _decode(self, enc_input, dec_input): enc_len = [] dec_len = [] enc_batch = [] dec_batch = [] input_len = int(math.ceil((len(enc_input) + 1) * 1.5)) # 버킷 사용 for i in range(0, len(enc_input)): enc, dec, _ = self.dialogue.transform(enc_input[i], dec_input[i], input_len, input_len) enc_batch.append(enc) dec_batch.append(dec) enc_len.append(len(enc_input[i])) dec_len.append(len(dec_input[i]) + 1) # predict할떄 dec는 필요없는데 model에 placeholder로 해놔서 일단 아무거나(enc) 줬습니다. return self.model.predict(self.sess, enc_batch, enc_len, dec_batch, dec_len, len(enc_batch)) # msg에 대한 응답을 반환 def get_replay(self, sentences): enc_input = [ self.dialogue.tokens_to_ids(self.dialogue.tokenizer(sentence)) for sentence in sentences ] dec_input = enc_input outputs = self._decode(enc_input, dec_input) reply = self.dialogue.decode([outputs[len(enc_input) - 1]], True) reply = self.dialogue.cut_eos(reply) return reply
def train(batch_size=10, epoch=500): data_path = './data/dict_idx_all_training.npy' vocab_path = './data/words.npy' dialog = Dialogue() dialog.load_vocab(vocab_path) dialog.load_data(data_path) model = Hred(dialog.voc_size, dialog.embedding_matrix, True, 10) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state('./model') if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("새로운 모델을 생성하는 중 입니다.") sess.run(tf.global_variables_initializer()) # 학습시작. total_batch = int( math.ceil(len(dialog.seq_data) / (float(batch_size) * 10))) for step in range(total_batch * epoch): enc_inputs = [] enc_lengths = [] dec_inputs = [] dec_lengths = [] targets = [] for i in range(10): enc_input, enc_length, dec_input, dec_length, target = dialog.next_batch( batch_size) enc_inputs.append(enc_input) enc_lengths.append(enc_length) dec_inputs.append(dec_input) dec_lengths.append(dec_length) targets.append(target) max_dec_lengths = np.max(dec_lengths, 1) context_size = len(enc_input) _, loss = sess.run( [model.train_op, model.cost], feed_dict={ model.enc_input_idx: enc_inputs, model.dec_input_idx: dec_inputs, model.enc_length: enc_lengths, model.dec_length: dec_lengths, model.targets: targets, model.dec_max_length: max_dec_lengths, model.context_size: context_size }) if (step + 1) % 700 == 0: sys.stdout.write(str(loss)) print('Step:', '%06d' % model.global_step.eval(), 'cost =', '{:.6f}'.format(loss)) if (step + 1) % 700 == 0: model.saver.save(sess, './model/conversation.ckpt', global_step=model.global_step) print('최적화 완료!')
def main(_): dialog = Dialogue('./data/chat.log') #train(dialog, epoch=1000) # 학습 test(dialog) # 테스트
def main(_): dialog = Dialogue('./data/chat.log') train(dialog, epoch=2000) # 학습
def testnpc(): Dialogue(ZoneMap[Player.pos][SPEC]) ActiveCase[Player.pos] = False time.sleep(0.5) prompt()