Esempio n. 1
0
 def __init__(self, voc_path):
     self.dialogue = Dialogue()
     self.dialogue.load_vocab(voc_path)
     self.model = Hred(self.dialogue.voc_size, False)
     self.sess = tf.Session()
     # 모델 불러오기
     ckpt = tf.train.get_checkpoint_state('./model')
     self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)
Esempio n. 2
0
class chatbot:
    def __init__(self, voc_path):
        self.dialogue = Dialogue(voc_path)
        self.model = Seq2Seq(self.dialogue.input_max_len, self.dialogue.output_max_len, self.dialogue.voc_size, self.dialogue.word_embedding_matrix, False)

        self.sess = tf.Session()

        # 모델 불러오기
        ckpt = tf.train.get_checkpoint_state('./model')
        self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)

    def run(self):
        sys.stdout.write("> ")
        sys.stdout.flush()
        line = sys.stdin.readline()

        while line:
            print(self.get_replay(line.strip())) # 응답

            sys.stdout.write("\n> ")
            sys.stdout.flush()

            line = sys.stdin.readline()

    def _decode(self, enc_input, dec_input):
        if type(dec_input) is np.ndarray:
            dec_input = dec_input.tolist()

        enc_len = enc_input.__len__()
        dec_len = len(dec_input)

        enc_input, dec_input, _ = self.dialogue.transform(enc_input, dec_input, self.dialogue.input_max_len, self.dialogue.output_max_len)
        return self.model.predict(self.sess, [enc_input], [enc_len], [dec_input], [dec_len+1])

    # msg에 대한 응답을 반환
    def get_replay(self, msg):
        enc_input = self.dialogue.tokenizer(msg)
        enc_input = self.dialogue.tokens_to_ids(enc_input)
        dec_input = []

        outputs = self._decode(enc_input, dec_input)
        if self.dialogue.is_eos(outputs):
            reply = self.dialogue.decode([dec_input], True)
        # 디코더의 출력을 디코더의 입력으로 넣는다.
        # curr_seq = 0
        # for i in range(self.dialogue.max_seq_len):
        #     outputs = self._decode(enc_input, dec_input)
        #     if self.dialogue.is_eos(outputs[0][curr_seq]):
        #         break
        #     elif self.dialogue.is_defined(outputs[0][curr_seq]) is not True:
        #         dec_input.append(outputs[0][curr_seq])
        #         curr_seq += 1
        #
        # # 문자열로 반환
        # reply = self.dialogue.decode([dec_input], True)

        return reply
Esempio n. 3
0
    def __init__(self, voc_path):
        self.dialogue = Dialogue(voc_path)
        self.model = Seq2Seq(self.dialogue.input_max_len, self.dialogue.output_max_len, self.dialogue.voc_size, self.dialogue.word_embedding_matrix, False)

        self.sess = tf.Session()

        # 모델 불러오기
        ckpt = tf.train.get_checkpoint_state('./model')
        self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)
Esempio n. 4
0
    def __init__(self, npc_list, text):
        self.npc_list = npc_list
        choices = []
        for npc in npc_list:
            choices.append(GoToNpc(npc))

        self.dialogue = Dialogue(None, choices, text)
Esempio n. 5
0
class chatbot:
    def __init__(self, voc_path):
        self.dialogue = Dialogue()
        self.dialogue.load_vocab(voc_path)
        self.model = Hred(self.dialogue.voc_size, False)
        self.sess = tf.Session()
        # 모델 불러오기
        ckpt = tf.train.get_checkpoint_state('./model')
        self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)

    def run(self):
        sentences = []
        sentences.append(inputs.strip())
        reply = self.get_replay(sentences)
        return reply

    def _decode(self, enc_input, dec_input):
        enc_len = []
        dec_len = []

        enc_batch = []
        dec_batch = []

        for input in enc_input:  # 최대길이 25로 제한.
            if len(input) > 25:
                input = input[0:25]

        dec_input = enc_input

        for i in range(0, len(enc_input)):
            enc, dec, _ = self.dialogue.transform(enc_input[i], dec_input[i],
                                                  25, 25)
            enc_batch.append(enc)
            dec_batch.append(dec)
            enc_len.append(len(enc_input[i]))
            dec_len.append(len(dec_input[i]) + 1)
            # predict할떄 dec는 필요없는데 model에 placeholder로 해놔서 일단 아무거나(enc) 줬습니다.
        return self.model.predict(self.sess, [enc_batch], [enc_len],
                                  [dec_batch], [dec_len], [b], context_size)

    # msg에 대한 응답을 반환
    def get_replay(self, sentences):

        enc_input = [
            self.dialogue.tokens_to_ids(self.dialogue.tokenizer(sentence))
            for sentence in sentences
        ]
        dec_input = enc_input

        outputs = self._decode(enc_input, dec_input)
        reply = self.dialogue.decode([outputs[len(enc_input) - 1]], True)
        reply = self.dialogue.cut_eos(reply)

        return reply

    def kakao_input(self, inputs):
        self.inputs = inputs
def read_format_data(src_root):
    src_files = glob('%s/*' % src_root)
    diag_list = []
    for f in src_files:
        print 'Read %s.' % f
        lines = open(f).read().split('\n\n')[:-1]
        for line in lines:
            line = line.split('\n')
            diag = Dialogue(line[0], line[2], line[1])
            diag_list.append(diag)
    print 'Read diag length:', len(diag_list)
    return diag_list
Esempio n. 7
0
class chatbot:
    def __init__(self, voc_path):
        self.dialogue = Dialogue(voc_path)
        self.model = Seq2Seq(self.dialogue.voc_size)

        self.sess = tf.Session()

        # 모델 불러오기
        ckpt = tf.train.get_checkpoint_state('./model')
        self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)

    def run(self):
        sys.stdout.write("> ")
        sys.stdout.flush()
        line = sys.stdin.readline()

        while line:
            print(self.get_replay(line.strip()))  # 응답

            sys.stdout.write("\n> ")
            sys.stdout.flush()

            line = sys.stdin.readline()

    def _decode(self, enc_input, dec_input):
        if type(dec_input) is np.ndarray:
            dec_input = dec_input.tolist()

        input_len = int(math.ceil(
            (len(enc_input) + 1) * 1.5))  # 입력길이의 1.5배만큼 적당한 버킷 사용

        enc_input, dec_input, _ = self.dialogue.transform(
            enc_input, dec_input, input_len, self.dialogue.max_output_len)

        return self.model.predict(self.sess, [enc_input], [dec_input])

    # msg에 대한 응답을 반환
    def get_replay(self, msg):
        enc_input = self.dialogue.tokenizer(msg)
        enc_input = self.dialogue.tokens_to_ids(enc_input)
        dec_input = []

        # 디코더의 출력을 디코더의 입력으로 넣는다.
        curr_seq = 0
        for i in range(self.dialogue.max_output_len):
            outputs = self._decode(enc_input, dec_input)
            if self.dialogue.is_eos(outputs[0][curr_seq]):
                break
            elif self.dialogue.is_defined(outputs[0][curr_seq]) is not True:
                dec_input.append(outputs[0][curr_seq])
                curr_seq += 1

        # 문자열로 반환
        reply = self.dialogue.decode([dec_input], True)

        return reply
Esempio n. 8
0
def main(_):
    data_path = './data/dict_idx_1by10.npy'
    vocab_path = './data/dictionary.txt'

    dialog = Dialogue()

    dialog.load_vocab(vocab_path)
    dialog.load_data(data_path)

    train(dialog, epoch=1000)  # 학습
Esempio n. 9
0
def main(_):
    data_path = './data/chat.log'
    vocab_path = './data/chat.voc'

    dialog = Dialogue()

    dialog.load_vocab(vocab_path)
    dialog.load_data(data_path)

    #train(dialog, epoch=100)   # 학습
    test(dialog)  # 테스트
Esempio n. 10
0
class Repository:
    data_dialogues = json.load(
        open(r'resources\dialogues.json', 'r', encoding="utf-8"))
    # data_map = json.load(open(r'resources\map.json', 'r', encoding="utf-8"))

    # rawDialogs = json.loads('resources\data.json')

    dialogues = {}

    # for raw_dialog in data_dialogues:
    #     print('****>', raw_dialog, '<****')
    #     dialogues[raw_dialog['key']] = raw_dialog
    #     print('!!!!>', dialogues[raw_dialog['key']], '<!!!!')

    dialog_keys = []
    for dialog in data_dialogues:
        dialog_keys.append(dialog['key'])

    for raw_dialog in data_dialogues:
        choices = []
        for choice in raw_dialog['dialogBranches']:
            if choice['nextDialogKey'] in dialog_keys:
                choices.append(
                    SayPhrase(choice['branchName'], choice['nextDialogKey']))
            else:
                choices.append(
                    EndDialogue('(В разработке)' + choice['branchName']))
        dialogues[raw_dialog['key']] = Dialogue(None, choices,
                                                raw_dialog['dialogText'])

    @staticmethod
    def get_dialogue(dialogue_id):  # "получить диалог" принимает "id диалога"
        return Repository.dialogues[dialogue_id]

    @staticmethod
    def get_location(id):
        # TODO AAA
        start = [NPC('start', '1')]
        return Location(start,
                        'История первая: "Прототип движения по диалогам"')

    @staticmethod
    def initial_game_state():
        return GameState(Repository.get_location('start_location'))
def process_dir(src_dir, dest_file):
    print src_dir, '->', dest_file
    diag_files = glob(src_dir + '/*.tsv')
    print 'Total %d files need to be process.' % len(diag_files)

    out_ = open(dest_file, 'w')
    
    for idx, diag_file in enumerate(diag_files):
        diag = Dialogue(diag_file)
        diag.clean_string = True
        diag.enable_tags = False
        diag.initialize()
        if diag.isvalid:
            print >>out_, '\n'.join( [diag.filename, diag.print_users(), diag.print_utters()] )
            print >>out_, ''
        if idx % 100 == 0:
            print '\r%.2f%%' % (100.0*idx/len(diag_files)),
            sys.stdout.flush()

    print '\r100.00%%'
    out_.close()
Esempio n. 12
0
class chatbot:
    def __init__(self, voc_path):
        self.dialogue = Dialogue()
        self.dialogue.load_vocab(voc_path)
        self.model = Hred(self.dialogue.voc_size, False, 1)
        self.sess = tf.Session()

        # 모델 불러오기
        ckpt = tf.train.get_checkpoint_state('./model')
        self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)

    def run(self):
        sys.stdout.write("> ")
        sys.stdout.flush()

        setences = []
        line = sys.stdin.readline()
        setences.append(line.strip())

        while line:
            reply = self.get_replay(setences)
            print(reply)  # 응답
            setences.append(reply)

            sys.stdout.write("\n> ")
            sys.stdout.flush()

            line = sys.stdin.readline()
            setences.append(line.strip())

    def _decode(self, enc_input, dec_input):
        enc_len = []
        dec_len = []

        enc_batch = []
        dec_batch = []

        for i in range(0, len(enc_input)):
            enc_in = enc_input[i]
            if len(enc_in) > 70:
                enc_in = enc_in[0:70]
            dec_in = dec_input[i]
            if len(dec_in) > 69:
                dec_in = dec_in[0:69]

            enc, dec, _ = self.dialogue.transform(enc_in, dec_in, 70, 70)
            enc_batch.append(enc)
            dec_batch.append(dec)
            enc_len.append(len(enc_in))
            dec_len.append(len(dec_in) + 1)
            # predict할떄 dec는 필요없는데 model에 placeholder로 해놔서 일단 아무거나(enc) 줬습니다.
        context_size = len(enc_input)
        b = np.max(dec_len, 0)

        return self.model.predict(self.sess, [enc_batch], [enc_len],
                                  [dec_batch], [dec_len], [b], context_size)

    def get_replay(self, sentences):

        enc_input = [
            self.dialogue.tokens_to_ids(list(sentence))
            for sentence in sentences
        ]
        dec_input = enc_input

        outputs = self._decode(enc_input, dec_input)
        reply = self.dialogue.decode([outputs[len(enc_input) - 1]], True)
        reply = self.dialogue.cut_eos(reply)

        return reply
Esempio n. 13
0
class chatbot:
    def __init__(self, voc_path):
        self.dialogue = Dialogue()
        self.dialogue.load_vocab(voc_path)
        self.model = Hred(self.dialogue.voc_size, False, 1)
        self.sess = tf.Session()

        # 모델 불러오기
        ckpt = tf.train.get_checkpoint_state('./model')
        self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)

    def run(self):
        sys.stdout.write("> ")
        sys.stdout.flush()

        setences = []
        line = sys.stdin.readline()
        setences.append(line.strip())

        while line:
            reply = self.get_replay(setences)
            print(reply)  # 응답
            setences.append(reply)

            sys.stdout.write("\n> ")
            sys.stdout.flush()

            line = sys.stdin.readline()
            setences.append(line.strip())

    def _decode(self, enc_inputs, dec_inputs):
        enc_len = []
        dec_len = []

        enc_batch = []
        dec_batch = []

        for i in range(0, len(enc_inputs)):
            enc_input = enc_inputs[i]
            if len(enc_input) > 25:
                enc_input = enc_inputs[i][0:25]
            dec_input = dec_inputs[i]
            if len(dec_input) > 24:
                dec_input = dec_inputs[i][0:24]

            enc, dec, _ = self.dialogue.transform(enc_input, dec_input, 25, 25)
            enc_batch.append(enc)
            dec_batch.append(dec)
            enc_len.append(len(enc_input))
            dec_len.append(len(dec_input) + 1)

        context_size = len(enc_inputs)
        b = np.max(dec_len, 0)

        return self.model.predict(self.sess, [enc_batch], [enc_len],
                                  [dec_batch], [dec_len], [b], context_size)

    # msg에 대한 응답을 반환
    def get_replay(self, sentences):

        enc_input = [
            self.dialogue.tokens_to_ids(self.dialogue.tokenizer(sentence))
            for sentence in sentences
        ]
        dec_input = enc_input

        outputs = self._decode(enc_input, dec_input)
        reply = self.dialogue.decode([outputs[len(enc_input) - 1]], True)
        reply = self.dialogue.cut_eos(reply)

        return reply
Esempio n. 14
0
def test(batch_size=10):
    data_path = './data/dict_idx_char_test.npy'
    vocab_path = './data/char_dictionary.txt'

    dialog = Dialogue()

    dialog.load_vocab(vocab_path)
    dialog.load_data(data_path)

    print("\n=== 예측 테스트 ===")
    model = Hred(dialog.voc_size, False, 1)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state('./model')
        print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path)

        model.saver.restore(sess, ckpt.model_checkpoint_path)

        all_expect = []
        all_predict = []

        total_batch = int(math.ceil(len(dialog.seq_data) / float(batch_size)))
        for step in range(total_batch):
            enc_input, enc_length, dec_input, dec_length, targets = dialog.next_batch(
                batch_size)

            expects, outputs = sess.run(
                [model.targets, model.outputs],
                feed_dict={
                    model.enc_input_idx: [enc_input],
                    model.dec_input_idx: [dec_input],
                    model.enc_length: [enc_length],
                    model.dec_length: [dec_length],
                    model.targets: [targets],
                    model.dec_max_length: [np.max(dec_length, 0)],
                    model.context_size: len(enc_input)
                })

            for i in range(len(outputs)):
                all_expect.append(
                    dialog.cut_eos(dialog.decode([expects[0][i]], True)))
                all_predict.append(
                    dialog.cut_eos(dialog.decode([outputs[i]], True)))
Esempio n. 15
0
class chatbot:
    def __init__(self, voc_path):
        self.dialogue = Dialogue()
        self.dialogue.load_vocab(voc_path)
        self.model = Hred(self.dialogue.voc_size, False)
        self.sess = tf.Session()

        # 모델 불러오기
        ckpt = tf.train.get_checkpoint_state('./model')
        self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)

    def run(self):
        sys.stdout.write("> ")
        sys.stdout.flush()

        setences = []
        line = sys.stdin.readline()
        setences.append(line.strip())

        while line:
            reply = self.get_replay(setences)
            print(reply)  # 응답
            setences.append(reply)

            sys.stdout.write("\n> ")
            sys.stdout.flush()

            line = sys.stdin.readline()
            setences.append(line.strip())

    def _decode(self, enc_input, dec_input):
        enc_len = []
        dec_len = []

        enc_batch = []
        dec_batch = []

        input_len = int(math.ceil((len(enc_input) + 1) * 1.5))  # 버킷 사용

        for i in range(0, len(enc_input)):
            enc, dec, _ = self.dialogue.transform(enc_input[i], dec_input[i],
                                                  input_len, input_len)
            enc_batch.append(enc)
            dec_batch.append(dec)
            enc_len.append(len(enc_input[i]))
            dec_len.append(len(dec_input[i]) + 1)
            # predict할떄 dec는 필요없는데 model에 placeholder로 해놔서 일단 아무거나(enc) 줬습니다.
        return self.model.predict(self.sess, enc_batch, enc_len, dec_batch,
                                  dec_len, len(enc_batch))

    # msg에 대한 응답을 반환
    def get_replay(self, sentences):

        enc_input = [
            self.dialogue.tokens_to_ids(self.dialogue.tokenizer(sentence))
            for sentence in sentences
        ]
        dec_input = enc_input

        outputs = self._decode(enc_input, dec_input)
        reply = self.dialogue.decode([outputs[len(enc_input) - 1]], True)
        reply = self.dialogue.cut_eos(reply)

        return reply
Esempio n. 16
0
def train(batch_size=10, epoch=500):

    data_path = './data/dict_idx_all_training.npy'
    vocab_path = './data/words.npy'

    dialog = Dialogue()

    dialog.load_vocab(vocab_path)
    dialog.load_data(data_path)

    model = Hred(dialog.voc_size, dialog.embedding_matrix, True, 10)
    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state('./model')
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("새로운 모델을 생성하는 중 입니다.")
            sess.run(tf.global_variables_initializer())

        # 학습시작.
        total_batch = int(
            math.ceil(len(dialog.seq_data) / (float(batch_size) * 10)))
        for step in range(total_batch * epoch):

            enc_inputs = []
            enc_lengths = []
            dec_inputs = []
            dec_lengths = []
            targets = []

            for i in range(10):
                enc_input, enc_length, dec_input, dec_length, target = dialog.next_batch(
                    batch_size)

                enc_inputs.append(enc_input)
                enc_lengths.append(enc_length)
                dec_inputs.append(dec_input)
                dec_lengths.append(dec_length)
                targets.append(target)

            max_dec_lengths = np.max(dec_lengths, 1)
            context_size = len(enc_input)

            _, loss = sess.run(
                [model.train_op, model.cost],
                feed_dict={
                    model.enc_input_idx: enc_inputs,
                    model.dec_input_idx: dec_inputs,
                    model.enc_length: enc_lengths,
                    model.dec_length: dec_lengths,
                    model.targets: targets,
                    model.dec_max_length: max_dec_lengths,
                    model.context_size: context_size
                })

            if (step + 1) % 700 == 0:
                sys.stdout.write(str(loss))
                print('Step:', '%06d' % model.global_step.eval(), 'cost =',
                      '{:.6f}'.format(loss))

            if (step + 1) % 700 == 0:
                model.saver.save(sess,
                                 './model/conversation.ckpt',
                                 global_step=model.global_step)

        print('최적화 완료!')
Esempio n. 17
0
def main(_):
    dialog = Dialogue('./data/chat.log')

    #train(dialog, epoch=1000)   # 학습
    test(dialog)  # 테스트
Esempio n. 18
0
def main(_):
    dialog = Dialogue('./data/chat.log')
    train(dialog, epoch=2000)   # 학습
Esempio n. 19
0
def testnpc():
    Dialogue(ZoneMap[Player.pos][SPEC])
    ActiveCase[Player.pos] = False
    time.sleep(0.5)
    prompt()