Ejemplo n.º 1
0
def test(params):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow
    from word_sequence import WordSequence  # pylint: disable=unused-variable

    x_data, _ = pickle.load(open('chatbot.pkl', 'rb'))
    ws = pickle.load(open('ws.pkl', 'rb'))

    for x in x_data[:5]:
        print(' '.join(x))

    config = tf.ConfigProto(device_count={
        'CPU': 1,
        'GPU': 0
    },
                            allow_soft_placement=True,
                            log_device_placement=False)

    # save_path = '/tmp/s2ss_chatbot.ckpt'
    save_path = './s2ss_chatbot.ckpt'

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=0,
                                    **params)
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        while True:
            user_text = input('Input Chat Sentence:')
            if user_text in ('exit', 'quit'):
                exit(0)
            x_test = [list(user_text.lower())]
            # x_test = [word_tokenize(user_text)]
            bar = batch_flow([x_test], ws, 1)
            x, xl = next(bar)
            x = np.flip(x, axis=1)
            # x = np.array([
            #     list(reversed(xx))
            #     for xx in x
            # ])
            print(x, xl)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(pred)
            # prob = np.exp(prob.transpose())
            print(ws.inverse_transform(x[0]))
            # print(ws.inverse_transform(pred[0]))
            # print(pred.shape, prob.shape)
            for p in pred:
                ans = ws.inverse_transform(p)
                print(ans)
Ejemplo n.º 2
0
def test(params, infos):

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow

    x_data, _ = pickle.load(open('chatbot.pkl', 'rb'))
    ws = pickle.load(open('ws.pkl', 'rb'))

    for x in x_data[:5]:
        print(' '.join(x))

    config = tf.ConfigProto(
        device_count = {'CPU':1, 'GPU':0},
        allow_soft_placement=True,
        log_device_placement=False
    )

    save_path = './model/s2ss_chatbot_anti.ckpt'

    tf.reset_default_graph()
    model_pred = SequenceToSequence(
        input_vocab_size=len(ws),
        target_vocab_size=len(ws),
        batch_size=1,
        mode='decode',
        beam_width=0,
        **params
    )
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        while True:
            # user_text = input('请输入您的句子:')
            # if user_text in ('exit', 'quit'):
            #     exit(0)
            x_test = [list(infos.lower())]
            bar = batch_flow([x_test], ws, 1)
            x, xl = next(bar)
            x = np.flip(x, axis=1)

            print(x, xl)

            pred = model_pred.predict(
                sess,
                np.array(x),
                np.array(xl)
            )
            print(pred)

            print(ws.inverse_transform(x[0]))

            for p in pred:
                ans = ws.inverse_transform(p)
                print(ans)
                return ans
Ejemplo n.º 3
0
def nlp(question):
    """测试不同参数在生成的假数据上的运行结果"""
    params = json.load(open(os.path.dirname(__file__) + '/params.json'))
    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow

    x_data, _ = pickle.load(
        open(os.path.dirname(__file__) + '/data/chatbot.pkl', 'rb'))
    ws = pickle.load(open(os.path.dirname(__file__) + '/data/ws.pkl', 'rb'))

    # for x in x_data[:5]:
    # print(' '.join(x))

    config = tf.ConfigProto(device_count={
        'CPU': 1,
        'GPU': 0
    },
                            allow_soft_placement=True,
                            log_device_placement=False)

    # save_path = '/tmp/s2ss_chatbot.ckpt'
    save_path = os.path.dirname(__file__) + '/data/s2ss_chatbot.ckpt'

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=0,
                                    **params)
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        while True:
            # user_text = input('Input Chat Sentence:')
            question = Converter('zh-hans').convert(question)  # 繁體轉簡體

            if question in ('exit', 'quit'):
                exit(0)
            x_test = [list(question.lower())]
            # x_test = [word_tokenize(user_text)]
            bar = batch_flow([x_test], ws, 1)
            x, xl = next(bar)
            x = np.flip(x, axis=1)

            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            # print(ws.inverse_transform(x[0]))   -->['</s>', '饭', '吃', '去', '要', '我']

            for p in pred:
                ans = ws.inverse_transform(p)
                str1 = ''.join(ans)  # list轉str
                line = str1.strip("</s>")
                line = Converter('zh-hant').convert(line)  # 簡體轉繁體
            return line
Ejemplo n.º 4
0
def start():

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow

    global input_list
    global response_dict
    global lock

    x_data, _ = pickle.load(open('chatbot.pkl', 'rb'))
    ws = pickle.load(open('ws.pkl', 'rb'))

    config = tf.ConfigProto(device_count={
        'CPU': 1,
        'GPU': 0
    },
                            allow_soft_placement=True,
                            log_device_placement=False)

    save_path = './model/s2ss_chatbot_anti.ckpt'

    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=0,
                                    **params)
    init = tf.global_variables_initializer()

    print("线程-模型>>模型准备就绪")

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        while True:
            lock.acquire()
            if input_list:
                request = input_list.pop(0)
                ip = request.get('IP')
                infos = request.get('infos')
                print("线程-模型>>成功获取来自", ip, "的请求,内容:", infos)
                x_test = [list(infos.lower())]
                bar = batch_flow([x_test], ws, 1)
                x, xl = next(bar)
                x = np.flip(x, axis=1)

                pred = model_pred.predict(sess, np.array(x), np.array(xl))

                for p in pred:
                    ans = "".join(ws.inverse_transform(p))
                    response_dict[ip] = ans
                    print("线程-模型>>完成处理来自", ip, "的请求,返回内容:", ans)
                    break
            lock.release()
            time.sleep(0.1)
Ejemplo n.º 5
0
class chatbot_port(object):
    def __init__(self):
        self.cell_type = 'lstm'
        self.depth = 2
        self.bidirectional = True
        self.attention_type = 'Bahdanau'
        self.use_residual = False
        self.use_dropout = False
        print("!!!!")
        self.time_major = False
        self.hidden_units = 512
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)
        self.x_data, self._, self.ws = pickle.load(
            open('chatbot_cut/chatbot.pkl', 'rb'))
        self.config = tf.ConfigProto(device_count={
            'CPU': 1,
            'GPU': 0
        },
                                     allow_soft_placement=True,
                                     log_device_placement=False)
        self.save_path = 'chatbot_cut/s2ss_chatbot_anti.ckpt'
        tf.reset_default_graph()
        self.model_pred = SequenceToSequence(
            input_vocab_size=len(self.ws),
            target_vocab_size=len(self.ws),
            batch_size=1,
            mode='decode',
            beam_width=64,
            bidirectional=self.bidirectional,
            cell_type=self.cell_type,
            depth=self.depth,
            attention_type=self.attention_type,
            use_residual=self.use_residual,
            use_dropout=self.use_dropout,
            parallel_iterations=1,
            time_major=self.time_major,
            hidden_units=self.hidden_units,
            share_embedding=True,
            pretrained_embedding=True)
        self.init = tf.global_variables_initializer()
        #sess.run(self.init)
        print("!!!!")

    def chat(self, user_text):
        with tf.Session(config=self.config) as sess:
            self.model_pred.load(sess, self.save_path)
            x_test = [jieba.lcut(user_text.lower())]
            bar = batch_flow([x_test], self.ws, 1)
            x, xl = next(bar)
            x = np.flip(x, axis=1)
            pred = self.model_pred.predict(sess, np.array(x), np.array(xl))
            return ''.join(self.ws.inverse_transform(pred))
Ejemplo n.º 6
0
def chatbot_port(user_text, bidirectional=True, num=0, cell_type='lstm', depth=2,
         attention_type='Bahdanau', use_residual=False, use_dropout=False, time_major=False, hidden_units=512):
    random.seed(0)
    np.random.seed(0)
    tf.set_random_seed(0)
    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow
    x_data, _, ws = pickle.load(open('chatbot_cut/chatbot.pkl', 'rb'))
    config = tf.ConfigProto(
        device_count={'CPU': 1, 'GPU': 0},
        allow_soft_placement=True,
        log_device_placement=False
    )
    save_path = 'chatbot_cut/s2ss_chatbot_anti.ckpt'
    tf.reset_default_graph()
    model_pred = SequenceToSequence(
        input_vocab_size=len(ws),
        target_vocab_size=len(ws),
        batch_size=1,
        mode='decode',
        beam_width=64,
        bidirectional=bidirectional,
        cell_type=cell_type,
        depth=depth,
        attention_type=attention_type,
        use_residual=use_residual,
        use_dropout=use_dropout,
        parallel_iterations=1,
        time_major=time_major,
        hidden_units=hidden_units,
        share_embedding=True,
        pretrained_embedding=True
    )
    init = tf.global_variables_initializer()
    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)
        x_test = [jieba.lcut(user_text.lower())]
        print(user_text)
        bar = batch_flow([x_test], ws, 1)
        x, xl = next(bar)
        print(x,xl)
        x = np.flip(x, axis=1)
        pred = model_pred.predict(
            sess,
            np.array(x),
            np.array(xl)
        )
        print(pred)
        return ''.join(ws.inverse_transform(pred))
Ejemplo n.º 7
0
def test(params):
    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow_bucket as batch_flow
    from threadedgenerator import ThreadedGenerator

    x_data, y_data = pickle.load(open('chatbot.pkl',
                                      'rb'))  #extract_conv.py 生成
    ws = pickle.load(open('ws.pkl', 'rb'))

    #训练
    """
    1.n_epoch是训练的轮次数;
    2.理论上来讲训练的轮次数越大,那么训练的精度越高;
    3.如果训练的轮次数特别大,比如1000,那么有可能会发生过拟合的现象,但是是否过拟合也和训练的数据有关
    4.n_epoch越大,训练的时间越长;
    5.办公室电脑用P5000的GPU训练40轮,训练了大概3天,训练2轮,大概一个半小时,
        如果使用CPU来训练的话,速度会特别特别的慢,可能一轮就要几个小时
    """
    n_epoch = 2
    batch_size = 128

    #每一轮的步数
    steps = int(len(x_data) / batch_size) + 1

    #日志
    config = tf.ConfigProto(
        allow_soft_placement=True,  #是不是由系统选择CPU还是GPU
        log_device_placement=False  #是不是打印设备的日志
    )

    #训练完后的模型保存路径
    save_path = './model/s2ss_chatbot.ckpt'

    #重置默认的图
    tf.reset_default_graph()
    #定义图的信息
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        #传入定义的配置
        with tf.Session(config=config) as sess:
            # 定义模型
            model = SequenceToSequence(
                input_vocab_size=len(ws),
                target_vocab_size=len(ws),
                batch_size=batch_size,
                **params  #传入的参数
            )
            init = tf.global_variables_initializer()
            sess.run(init)

            #定义数据流 放在线程生成器里做
            flow = ThreadedGenerator(batch_flow([x_data, y_data],
                                                ws,
                                                batch_size,
                                                add_end=[False, True]),
                                     queue_maxsize=30)

            for epoch in range(1, n_epoch + 1):
                costs = []
                #进度条
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                #变量没啥意义 或者 之后用不到的 使用下划线'_'来来表示
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    #[[1,2], [3,4]]
                    #[[3,4], [1,2]]
                    #对输出进行翻转  这个实际上是在训练时的一个技巧,这个技巧出自于encoder的论文
                    x = np.flip(x, axis=1)
                    #得到损失和学习率
                    cost, lr = model.train(sess, x, xl, y, yl, return_lr=True)
                    costs.append(cost)
                    #进行信息的打印
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))

                #保存模型
                model.save(sess, save_path)

    #测试 模型的正确性
    tf.reset_default_graph()  #默认的图reset
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=12,
                                    parallel_iterations=1,
                                    **params)

    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            #打印出预测的文字
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            #仅测试三轮查看是否能正常对话
            if t >= 3:
                break
Ejemplo n.º 8
0
def test(bidirectional, cell_type, depth, attention_type, use_residual,
         use_dropout, time_major, hidden_units):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow
    from word_sequence import WordSequence  # pylint: disable=unused-variable

    x_data, y_data, ws_input, ws_target = pickle.load(
        open('en-zh_cn.pkl', 'rb'))

    # 获取一些假数据
    # x_data, y_data, ws_input, ws_target = generate(size=10000)

    # 训练部分
    split = int(len(x_data) * 0.8)
    x_train, x_test, y_train, y_test = (x_data[:split], x_data[split:],
                                        y_data[:split], y_data[split:])
    n_epoch = 2
    batch_size = 256
    steps = int(len(x_train) / batch_size) + 1

    config = tf.ConfigProto(
        # device_count={'CPU': 1, 'GPU': 0},
        allow_soft_placement=True,
        log_device_placement=False)

    save_path = './s2ss_en2zh.ckpt'

    tf.reset_default_graph()
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:

            model = SequenceToSequence(input_vocab_size=len(ws_input),
                                       target_vocab_size=len(ws_target),
                                       batch_size=batch_size,
                                       learning_rate=0.001,
                                       bidirectional=bidirectional,
                                       cell_type=cell_type,
                                       depth=depth,
                                       attention_type=attention_type,
                                       use_residual=use_residual,
                                       use_dropout=use_dropout,
                                       parallel_iterations=64,
                                       hidden_units=hidden_units,
                                       optimizer='adam',
                                       time_major=time_major)
            init = tf.global_variables_initializer()
            sess.run(init)

            # print(sess.run(model.input_layer.kernel))
            # exit(1)

            flow = batch_flow([x_train, y_train], [ws_input, ws_target],
                              batch_size)

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    cost = model.train(sess, x, xl, y, yl)
                    costs.append(cost)
                    bar.set_description('epoch {} loss={:.6f}'.format(
                        epoch, np.mean(costs)))

            model.save(sess, save_path)

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(
        input_vocab_size=len(ws_input),
        target_vocab_size=len(ws_target),
        batch_size=1,
        mode='decode',
        beam_width=12,
        bidirectional=bidirectional,
        cell_type=cell_type,
        depth=depth,
        attention_type=attention_type,
        use_residual=use_residual,
        use_dropout=use_dropout,
        hidden_units=hidden_units,
        time_major=time_major,
        parallel_iterations=1  # for test
    )
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_test, y_test], [ws_input, ws_target], 1)
        t = 0
        for x, xl, y, yl in bar:
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws_input.inverse_transform(x[0]))
            print(ws_target.inverse_transform(y[0]))
            print(ws_target.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break

    tf.reset_default_graph()
    model_pred = SequenceToSequence(
        input_vocab_size=len(ws_input),
        target_vocab_size=len(ws_target),
        batch_size=1,
        mode='decode',
        beam_width=1,
        bidirectional=bidirectional,
        cell_type=cell_type,
        depth=depth,
        attention_type=attention_type,
        use_residual=use_residual,
        use_dropout=use_dropout,
        hidden_units=hidden_units,
        time_major=time_major,
        parallel_iterations=1  # for test
    )
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_test, y_test], [ws_input, ws_target], 1)
        t = 0
        for x, xl, y, yl in bar:
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws_input.inverse_transform(x[0]))
            print(ws_target.inverse_transform(y[0]))
            print(ws_target.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break
def test(bidirectional, cell_type, depth,
         attention_type, use_residual, use_dropout, time_major, hidden_units):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from discriminative import Discriminative
    from data_utils import batch_flow
    from word_sequence import WordSequence # pylint: disable=unused-variable

    x_data, y_data, ws = pickle.load(
        open('chatbot.pkl', 'rb'))

    # 训练部分
    n_epoch = 1
    batch_size = 512
    steps = int(len(x_data) / batch_size) + 1

    config = tf.ConfigProto(
        # device_count={'CPU': 1, 'GPU': 0},
        allow_soft_placement=True,
        log_device_placement=False
    )

    # save_path = '/tmp/s2ss_chatbot.ckpt'
    save_path = './s2ss_chatbot_discriminative.ckpt'
    save_path_forward = './s2ss_chatbot_forward.ckpt'

    graph = tf.Graph()
    graph_d = tf.Graph()

    # 测试部分
    with graph.as_default():
        model_pred = SequenceToSequence(
            input_vocab_size=len(ws),
            target_vocab_size=len(ws),
            batch_size=batch_size,
            mode='train',
            beam_width=0,
            bidirectional=bidirectional,
            cell_type=cell_type,
            depth=depth,
            attention_type=attention_type,
            use_residual=use_residual,
            use_dropout=use_dropout,
            time_major=time_major,
            hidden_units=hidden_units,
            optimizer='adadelta',
            dropout=0.4,
            share_embedding=True
        )
        init = tf.global_variables_initializer()
        sess = tf.Session(config=config)
        sess.run(init)
        model_pred.load(sess, save_path_forward)

    with graph_d.as_default():
        model_d = Discriminative(
            input_vocab_size=len(ws),
            batch_size=batch_size * 2,
            learning_rate=0.0001,
            bidirectional=bidirectional,
            cell_type=cell_type,
            depth=depth,
            use_residual=use_residual,
            use_dropout=use_dropout,
            parallel_iterations=32,
            time_major=time_major,
            hidden_units=hidden_units,
            optimizer='adadelta',
            dropout=0.4
        )
        init = tf.global_variables_initializer()
        sess_d = tf.Session(config=config)
        sess_d.run(init)
        # model_d.load(sess, save_path_rl)


    # 开始训练
    flow = batch_flow([x_data, y_data], ws, batch_size)

    for epoch in range(1, n_epoch + 1):
        costs = []
        accuracy = []
        bar = tqdm(range(steps), total=steps,
                   desc='epoch {}, loss=0.000000'.format(epoch))
        for _ in bar:

            x, xl, y, yl = next(flow)

            _, a = model_pred.entropy(
                sess, x, xl, y, yl
            )

            al = []
            new_a = []
            for aa in a:
                j = 0
                for j, aaj in enumerate(aa):
                    if aaj == WordSequence.END:
                        break
                new_a.append(list(aa[:j]))
                if j <= 0:
                    j = 1
                al.append(j)

            max_len = max((a.shape[1], y.shape[1]))
            if a.shape[1] < max_len:
                a = np.concatenate(
                    (
                        a,
                        np.ones(
                            (batch_size, max_len - a.shape[1])
                        ) * WordSequence.END
                    ), axis=1)
            if y.shape[1] < max_len:
                y = np.concatenate(
                    (
                        y,
                        np.ones(
                            (batch_size, max_len - y.shape[1])
                        ) * WordSequence.END
                    ), axis=1)

            targets = np.array(([0] * len(a)) + ([1] * len(a)))

            batch = np.concatenate((a, y), axis=0)
            batchl = np.concatenate((al, yl), axis=0)

            batch = batch.tolist()
            batchl = batchl.tolist()

            # batch, batchl = shuffle(batch, batchl)

            xx = np.concatenate((x, x), axis=0)
            xxl = np.concatenate((xl, xl), axis=0)

            # tmp_batch = list(zip(xx, xxl, batch, batchl))
            # tmp_batch = sorted(tmp_batch, key=lambda x: x[1], reverse=True)
            # xx, xxl, batch, batchl = zip(*tmp_batch)

            batch = np.array(batch).astype(np.int32)
            batchl = np.array(batchl)

            cost, acc = model_d.train(sess_d, xx, xxl, batch, batchl, targets)
            costs.append(cost)
            accuracy.append(acc)

            # print(batch, batchl)

            bar.set_description('epoch {} loss={:.6f} acc={:.6f} {}'.format(
                epoch,
                np.mean(costs),
                np.mean(accuracy),
                len(costs)
            ))

        model_d.save(sess_d, save_path)
Ejemplo n.º 10
0
def test(bidirectional, cell_type, depth,
         attention_type, use_residual, use_dropout, time_major, hidden_units):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow
    from word_sequence import WordSequence # pylint: disable=unused-variable

    x_data, y_data, ws = pickle.load(
        open('chatbot.pkl', 'rb'))

    # 训练部分
    n_epoch = 2
    batch_size = 512
    steps = int(len(x_data) / batch_size) + 1

    config = tf.ConfigProto(
        # device_count={'CPU': 1, 'GPU': 0},
        allow_soft_placement=True,
        log_device_placement=False
    )

    save_path = './s2ss_chatbot_forward.ckpt'

    tf.reset_default_graph()
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:

            model = SequenceToSequence(
                input_vocab_size=len(ws),
                target_vocab_size=len(ws),
                batch_size=batch_size,
                learning_rate=0.0001,
                bidirectional=bidirectional,
                cell_type=cell_type,
                depth=depth,
                attention_type=attention_type,
                use_residual=use_residual,
                use_dropout=use_dropout,
                hidden_units=hidden_units,
                optimizer='adadelta',
                dropout=0.4,
                time_major=time_major,
                share_embedding=True
            )
            init = tf.global_variables_initializer()
            sess.run(init)

            flow = batch_flow([x_data, y_data], ws, batch_size)

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps), total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    cost = model.train(sess, x, xl, y, yl)
                    costs.append(cost)
                    bar.set_description('epoch {} loss={:.6f}'.format(
                        epoch,
                        np.mean(costs)
                    ))

                model.save(sess, save_path)

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(
        input_vocab_size=len(ws),
        target_vocab_size=len(ws),
        batch_size=1,
        mode='decode',
        beam_width=12,
        bidirectional=bidirectional,
        cell_type=cell_type,
        depth=depth,
        attention_type=attention_type,
        use_residual=use_residual,
        use_dropout=use_dropout,
        hidden_units=hidden_units,
        time_major=time_major,
        parallel_iterations=1,
        share_embedding=True
    )
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow(
            [x_data, y_data], ws, 1
        )
        t = 0
        for x, xl, y, yl in bar:
            pred = model_pred.predict(
                sess,
                x,
                xl
            )
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break
Ejemplo n.º 11
0
def test(bidirectional, cell_type, depth, attention_type, use_residual,
         use_dropout, time_major, hidden_units):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow
    from word_sequence import WordSequence  # pylint: disable=unused-variable

    _, _, ws = pickle.load(open('chatbot.pkl', 'rb'))

    # for x in x_data[:5]:
    #     print(' '.join(x))

    config = tf.ConfigProto(device_count={
        'CPU': 1,
        'GPU': 0
    },
                            allow_soft_placement=True,
                            log_device_placement=False)

    # save_path = '/tmp/s2ss_chatbot.ckpt'
    save_path = './s2ss_chatbot.ckpt'
    save_path_rl = './s2ss_chatbot_anti.ckpt'

    graph = tf.Graph()
    graph_rl = tf.Graph()

    with graph_rl.as_default():
        model_rl = SequenceToSequence(input_vocab_size=len(ws),
                                      target_vocab_size=len(ws),
                                      batch_size=1,
                                      mode='decode',
                                      beam_width=12,
                                      bidirectional=bidirectional,
                                      cell_type=cell_type,
                                      depth=depth,
                                      attention_type=attention_type,
                                      use_residual=use_residual,
                                      use_dropout=use_dropout,
                                      parallel_iterations=1,
                                      time_major=time_major,
                                      hidden_units=hidden_units,
                                      share_embedding=True,
                                      pretrained_embedding=True)
        init = tf.global_variables_initializer()
        sess_rl = tf.Session(config=config)
        sess_rl.run(init)
        model_rl.load(sess_rl, save_path_rl)

    # 测试部分
    with graph.as_default():
        model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                        target_vocab_size=len(ws),
                                        batch_size=1,
                                        mode='decode',
                                        beam_width=12,
                                        bidirectional=bidirectional,
                                        cell_type=cell_type,
                                        depth=depth,
                                        attention_type=attention_type,
                                        use_residual=use_residual,
                                        use_dropout=use_dropout,
                                        parallel_iterations=1,
                                        time_major=time_major,
                                        hidden_units=hidden_units,
                                        share_embedding=True,
                                        pretrained_embedding=True)
        init = tf.global_variables_initializer()
        sess = tf.Session(config=config)
        sess.run(init)
        model_pred.load(sess, save_path)

    while True:
        user_text = input('Input Chat Sentence:')
        if user_text in ('exit', 'quit'):
            exit(0)
        x_test = [jieba.lcut(user_text.lower())]
        bar = batch_flow([x_test], [ws], 1)
        x, xl = next(bar)
        x = np.flip(x, axis=1)
        print(x, xl)
        pred = model_pred.predict(sess, np.array(x), np.array(xl))
        pred_rl = model_rl.predict(sess_rl, np.array(x), np.array(xl))
        print(ws.inverse_transform(x[0]))
        print('no:', ws.inverse_transform(pred[0]))
        print('rl:', ws.inverse_transform(pred_rl[0]))
        p = []
        for pp in ws.inverse_transform(pred_rl[0]):
            if pp == WordSequence.END_TAG:
                break
            if pp == WordSequence.PAD_TAG:
                break
            p.append(pp)
Ejemplo n.º 12
0
def test(params):
    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow_bucket as batch_flow
    from threadedgenerator import ThreadedGenerator

    x_data, y_data = pickle.load(open('chatbot.pkl', 'rb'))
    ws = pickle.load(open('ws.pkl', 'rb'))

    #训练
    """
    1.n_epoch是训练的轮次数;
    2.理论上来讲训练的轮次数越大,那么训练的精度越高;
    3.如果训练的轮次数特别大,比如1000,那么有可能会发生过拟合的现象,但是是否过拟合也和训练的数据有关
    4.n_epoch越大,训练的时间越长;
    5.办公室电脑用P5000的GPU训练40轮,训练了大概3天,训练2轮,大概一个半小时,
        如果使用CPU来训练的话,速度会特别特别的慢,可能一轮就要几个小时
    """
    n_epoch = 2
    batch_size = 128

    steps = int(len(x_data) / batch_size) + 1

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)

    save_path = './model/s2ss_chatbot.ckpt'

    tf.reset_default_graph()
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:
            # 定义模型
            model = SequenceToSequence(input_vocab_size=len(ws),
                                       target_vocab_size=len(ws),
                                       batch_size=batch_size,
                                       **params)
            init = tf.global_variables_initializer()
            sess.run(init)

            flow = ThreadedGenerator(batch_flow([x_data, y_data],
                                                ws,
                                                batch_size,
                                                add_end=[False, True]),
                                     queue_maxsize=30)

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    #[[1,2], [3,4]]
                    #[[3,4], [1,2]]
                    x = np.flip(x, axis=1)
                    cost, lr = model.train(sess, x, xl, y, yl, return_lr=True)
                    costs.append(cost)
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))

                model.save(sess, save_path)

    #测试
    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=12,
                                    parallel_iterations=1,
                                    **params)

    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break
Ejemplo n.º 13
0
def test(bidirectional, cell_type, depth,
         attention_type, use_residual, use_dropout, time_major, hidden_units, anti):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow
    from word_sequence import WordSequence  # pylint: disable=unused-variable

    x_data, _, ws = pickle.load(open('./pickle/chatbot.pkl', 'rb'))
    '''
    for x in x_data[:5]:
        print(' '.join(x))
    '''
    config = tf.ConfigProto(
        device_count={'CPU': 1, 'GPU': 0},
        allow_soft_placement=True,
        log_device_placement=False
    )

    if anti is False:
        save_path = './pickle/s2ss_chatbot.ckpt'
    else:
        save_path = './pickle/s2ss_chatbot_anti.ckpt'

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(
        input_vocab_size=len(ws),
        target_vocab_size=len(ws),
        batch_size=1,
        mode='decode',
        beam_width=0,
        bidirectional=bidirectional,
        cell_type=cell_type,
        depth=depth,
        attention_type=attention_type,
        use_residual=use_residual,
        use_dropout=use_dropout,
        parallel_iterations=1,
        time_major=time_major,
        hidden_units=hidden_units,
        share_embedding=True,
        pretrained_embedding=True
    )
    init = tf.global_variables_initializer()

    fp = open('meals.txt', 'r')
    meals = fp.readlines()
    order_table = defaultdict(list) # 紀錄所有點餐

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        while True:
            order = [] # 紀錄當次點餐
            no_order_ans = ['咖啡', '紅茶', '綠茶'] # 當使用者沒有輸入餐點回答的預設餐點
            user_text = input()
            tmp = user_text.split() # 分開使用者id與問題
            user_id = tmp[0]
            user_text = tmp[1]
            if user_text in ('exit', 'quit'):
                exit(0)
            elif user_text == '#menu':
                print(str(order_table[user_id]) + ' 零。')
                continue
            elif user_text == '#delete':
                del order_table[user_id]
                continue
            s2tw = OpenCC('s2tw')
            user_text = s2tw.convert(user_text)

            order_tmp, user_text = replace_meal_line(user_text, meals)

            for line in iter(order_tmp):
                order.append(line)
            x_test = [jieba.lcut(user_text.lower())]

            bar = batch_flow([x_test], ws, 1)
            x, xl = next(bar)
            x = np.flip(x, axis=1)

            pred = model_pred.predict(
                sess,
                np.array(x),
                np.array(xl)
            )
            s2tw = OpenCC('s2tw')
            out = filter(lambda ch: ch not in '</s><unk>',
                         ws.inverse_transform(pred[0]))

            out = list(out)
            for i, _ in enumerate(out):
                if out[i] == 'allkindofmeal':
                    if not order:
                        out[i] = no_order_ans[0]
                        del no_order_ans[0]
                    else:
                        out[i] = order[0]
                        order_table[user_id].append(order[0])
                        del order[0]
            order_table[user_id].extend(order)
            out = ''.join(list(out))
            print(out)
Ejemplo n.º 14
0
def test(bidirectional, cell_type, depth, attention_type, use_residual,
         use_dropout, time_major, hidden_units):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow
    from word_sequence import WordSequence  # pylint: disable=unused-variable

    x_data, _, ws = pickle.load(open('chatbot.pkl', 'rb'))

    for x in x_data[:5]:
        print(' '.join(x))

    config = tf.ConfigProto(device_count={
        'CPU': 1,
        'GPU': 0
    },
                            allow_soft_placement=True,
                            log_device_placement=False)

    # save_path = '/tmp/s2ss_chatbot.ckpt'
    save_path = './s2ss_chatbot.ckpt'

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=0,
                                    bidirectional=bidirectional,
                                    cell_type=cell_type,
                                    depth=depth,
                                    attention_type=attention_type,
                                    use_residual=use_residual,
                                    use_dropout=use_dropout,
                                    parallel_iterations=1,
                                    time_major=time_major,
                                    hidden_units=hidden_units,
                                    share_embedding=True,
                                    pretrained_embedding=True)
    init = tf.global_variables_initializer()

    fp = open('meals.txt', 'r')
    meals = fp.readlines()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        while True:
            order = []
            no_order_ans = ['咖啡', '紅茶', '綠茶']
            user_text = input()
            if user_text in ('exit', 'quit'):
                exit(0)
            tw2s = OpenCC('tw2s')
            # user_text = tw2s.convert(user_text)
            # print(user_text)
            order_tmp, user_text = replace_meal_line(user_text, meals)
            print(user_text)
            for line in iter(order_tmp):
                order.append(line)
            x_test = [jieba.lcut(user_text.lower())]
            print(x_test)
            # x_test = [word_tokenize(user_text)]
            bar = batch_flow([x_test], ws, 1)
            x, xl = next(bar)
            x = np.flip(x, axis=1)
            # x = np.array([
            #     list(reversed(xx))
            #     for xx in x
            # ])
            print(ws.inverse_transform(x[0]))
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            s2tw = OpenCC('s2tw')
            out = filter(lambda ch: ch not in '</s><unk>',
                         ws.inverse_transform(pred[0]))
            # print(ws.inverse_transform(pred[0]))
            '''
            for i in range(len(out)):
                if out[i] == '甲':
                    if len(order) == 0:
                        out[i] = no_order_ans[0]
                        del no_order_ans[0]
                    else:
                        out[i] = order[0]
                        del order[0]
            '''
            # print(type(out))
            # print(list(out))
            out = list(out)
            print(out[0])
            print(out)
            for i, _ in enumerate(out):
                if out[i] == 'allkindofmeal':
                    if not order:
                        print('aaa')
                        out[i] = no_order_ans[0]
                        del no_order_ans[0]
                    else:
                        print('bbb')
                        out[i] = order[0]
                        del order[0]
            out = ''.join(list(out))
            print(out)
Ejemplo n.º 15
0
def test(bidirectional,
         cell_type,
         depth,
         attention_type,
         use_residual,
         use_dropout,
         time_major,
         hidden_units,
         preload=True):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from discriminative import Discriminative
    from data_utils import batch_flow
    from word_sequence import WordSequence  # pylint: disable=unused-variable

    x_data, y_data, ws = pickle.load(open('chatbot.pkl', 'rb'))

    vectorizer = pickle.load(open('tfidf.pkl', 'rb'))

    # 训练部分
    n_epoch = 2
    batch_size = 512
    steps = int(len(x_data) / batch_size) + 1

    config = tf.ConfigProto(
        # device_count={'CPU': 1, 'GPU': 0},
        allow_soft_placement=True,
        log_device_placement=False)

    save_path = './s2ss_chatbot_ad.ckpt'
    forward_path = './s2ss_chatbot_forward.ckpt'
    discriminative_path = './s2ss_chatbot_discriminative.ckpt'

    graph_d = tf.Graph()
    graph_ad = tf.Graph()

    # 读取反向模型 seq2seq(x|y)
    with graph_d.as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        sess_d = tf.Session(config=config)

        model_d = Discriminative(input_vocab_size=len(ws),
                                 batch_size=batch_size,
                                 learning_rate=0.0001,
                                 bidirectional=bidirectional,
                                 cell_type=cell_type,
                                 depth=depth,
                                 use_residual=use_residual,
                                 use_dropout=use_dropout,
                                 parallel_iterations=32,
                                 time_major=time_major,
                                 hidden_units=hidden_units,
                                 optimizer='adadelta',
                                 dropout=0.4)
        init = tf.global_variables_initializer()
        sess_d.run(init)
        model_d.load(sess_d, discriminative_path)

    # 构建要训练的模型
    with graph_ad.as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        sess_ad = tf.Session(config=config)

        model_ad = SequenceToSequence(
            input_vocab_size=len(ws),
            target_vocab_size=len(ws),
            batch_size=batch_size,
            # beam_width=12,
            learning_rate=0.0001,
            bidirectional=bidirectional,
            cell_type=cell_type,
            depth=depth,
            attention_type=attention_type,
            use_residual=use_residual,
            use_dropout=use_dropout,
            hidden_units=hidden_units,
            optimizer='adadelta',
            dropout=0.4,
            time_major=time_major,
            share_embedding=True)

        init = tf.global_variables_initializer()
        sess_ad.run(init)
        if preload:
            model_ad.load(sess_ad, forward_path)

    # 开始训练
    flow = batch_flow([x_data, y_data], ws, batch_size, raw=True)

    for epoch in range(1, n_epoch + 1):
        costs = []
        bar = tqdm(range(steps),
                   total=steps,
                   desc='epoch {}, loss=0.000000'.format(epoch))
        for _ in bar:

            x, xl, xraw, y, yl, yraw = next(flow)

            rewards = model_d.predict(sess_d, x, xl, y, yl)
            rewards = rewards[:, 1]

            texts = []
            for i in range(batch_size):
                # text = ws.inverse_transform(y[i])
                # text = ''.join(text)[:yl[i]]
                text = ''.join(yraw[i])
                texts.append(text)
            tfidfs = np.sum(vectorizer.transform(texts), axis=1)
            tfidfs_sum = np.sum(tfidfs)

            def smooth(x):
                return (0.5 + x) * (2.0 / 3)

            for i in range(batch_size):
                text = texts[i]
                rewards[i] = smooth(rewards[i])
                rewards[i] *= smooth(repeat_reward(text))
                rewards[i] *= smooth(chinese_reward(text))
                rewards[i] *= smooth(similarity_reward(''.join(xraw[i]), text))
                rewards[i] *= smooth(tfidfs[i] / tfidfs_sum * batch_size)

            rewards = rewards.reshape(-1, 1)

            cost = model_ad.train(sess_ad, x, xl, y, yl)  #, rewards)

            costs.append(cost)
            # lengths.append(np.mean(al))
            des = ('epoch {} '
                   'loss={:.6f} '
                   'rmean={:.4f} '
                   'rmin={:.4f} '
                   'rmax={:.4f} '
                   'rmed={:.4f}')
            bar.set_description(
                des.format(epoch, np.mean(costs), np.mean(rewards),
                           np.min(rewards), np.max(rewards),
                           np.median(rewards)))

        model_ad.save(sess_ad, save_path)
Ejemplo n.º 16
0
def test(params):

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow_bucket as batch_flow
    from word_sequence import WordSequence
    from threadedgenerator import ThreadedGenerator

    x_data, y_data = pickle.load(open('chatbot.pkl', 'rb'))
    ws = pickle.load(open('ws.pkl', 'rb'))

    n_epoch = 2
    batch_size = 128
    steps = int(len(x_data) / batch_size) + 1

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)

    save_path = 'model/s2ss_chatbot_anti.ckpt'

    tf.reset_default_graph()
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:
            model = SequenceToSequence(input_vocab_size=len(ws),
                                       target_vocab_size=len(ws),
                                       batch_size=batch_size,
                                       **params)
            init = tf.global_variables_initializer()
            sess.run(init)

            flow = ThreadedGenerator(batch_flow([x_data, y_data],
                                                ws,
                                                batch_size,
                                                add_end=[False, True]),
                                     queue_maxsize=30)

            dummy_encoder_inputs = np.array(
                [np.array([WordSequence.PAD]) for _ in range(batch_size)])
            dummy_encoder_inputs_length = np.array([1] * batch_size)

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    x = np.flip(x, axis=1)

                    add_loss = model.train(sess,
                                           dummy_encoder_inputs,
                                           dummy_encoder_inputs_length,
                                           y,
                                           yl,
                                           loss_only=True)
                    add_loss *= -0.5

                    cost, lr = model.train(sess,
                                           x,
                                           xl,
                                           y,
                                           yl,
                                           return_lr=True,
                                           add_loss=add_loss)
                    costs.append(cost)
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))
                model.save(sess, save_path)
            flow.close()

    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=12,
                                    **params)
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break

    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=1,
                                    **params)
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break
Ejemplo n.º 17
0
def test(bidirectional, cell_type, depth, attention_type, use_residual,
         use_dropout, time_major, hidden_units):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow
    from word_sequence import WordSequence  # pylint: disable=unused-variable

    x_data, _, ws = pickle.load(open('chatbot.pkl', 'rb'))

    for x in x_data[:5]:
        print(' '.join(x))

    config = tf.ConfigProto(device_count={
        'CPU': 1,
        'GPU': 0
    },
                            allow_soft_placement=True,
                            log_device_placement=False)

    # save_path = '/tmp/s2ss_chatbot.ckpt'
    save_path = './s2ss_chatbot_forward.ckpt'

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=12,
                                    bidirectional=bidirectional,
                                    cell_type=cell_type,
                                    depth=depth,
                                    attention_type=attention_type,
                                    use_residual=use_residual,
                                    use_dropout=use_dropout,
                                    hidden_units=hidden_units,
                                    time_major=time_major,
                                    parallel_iterations=1,
                                    share_embedding=True)
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        while True:
            user_text = input('Input Chat Sentence:')
            if user_text in ('exit', 'quit'):
                exit(0)
            x_test = [list(user_text.lower())]
            bar = batch_flow([x_test], [ws], 1)
            x, xl = next(bar)
            # x = np.array([
            #     list(reversed(xx))
            #     for xx in x
            # ])
            print(x, xl)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(pred)
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(pred[0]))
Ejemplo n.º 18
0
def test(bidirectional, cell_type, depth, attention_type):
    """测试并展示attention图
    """

    from tqdm import tqdm
    from fake_data import generate

    # 获取一些假数据
    x_data, y_data, ws_input, ws_target = generate(size=10000)

    # 训练部分

    split = int(len(x_data) * 0.9)
    x_train, x_test, y_train, y_test = (x_data[:split], x_data[split:],
                                        y_data[:split], y_data[split:])
    n_epoch = 2
    batch_size = 32
    steps = int(len(x_train) / batch_size) + 1

    config = tf.ConfigProto(device_count={
        'CPU': 1,
        'GPU': 0
    },
                            allow_soft_placement=True,
                            log_device_placement=False)

    save_path = '/tmp/s2ss_atten.ckpt'

    with tf.Graph().as_default():

        model = SequenceToSequence(input_vocab_size=len(ws_input),
                                   target_vocab_size=len(ws_target),
                                   batch_size=batch_size,
                                   learning_rate=0.001,
                                   bidirectional=bidirectional,
                                   cell_type=cell_type,
                                   depth=depth,
                                   attention_type=attention_type,
                                   parallel_iterations=1)
        init = tf.global_variables_initializer()

        with tf.Session(config=config) as sess:
            sess.run(init)
            for epoch in range(1, n_epoch + 1):
                costs = []
                flow = batch_flow([x_train, y_train], [ws_input, ws_target],
                                  batch_size)
                bar = tqdm(range(steps),
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    cost = model.train(sess, x, xl, y, yl)
                    costs.append(cost)
                    bar.set_description('epoch {} loss={:.6f}'.format(
                        epoch, np.mean(costs)))

            model.save(sess, save_path)

    # attention 展示 不能用 beam search 的
    # 所以这里只是用 greedy

    with tf.Graph().as_default():
        model_pred = SequenceToSequence(input_vocab_size=len(ws_input),
                                        target_vocab_size=len(ws_target),
                                        batch_size=1,
                                        mode='decode',
                                        beam_width=0,
                                        bidirectional=bidirectional,
                                        cell_type=cell_type,
                                        depth=depth,
                                        attention_type=attention_type,
                                        parallel_iterations=1)
        init = tf.global_variables_initializer()

        with tf.Session(config=config) as sess:
            sess.run(init)
            model_pred.load(sess, save_path)

            pbar = batch_flow([x_test, y_test], [ws_input, ws_target], 1)
            t = 0
            for x, xl, y, yl in pbar:
                pred, atten = model_pred.predict(sess,
                                                 np.array(x),
                                                 np.array(xl),
                                                 attention=True)
                ox = ws_input.inverse_transform(x[0])
                oy = ws_target.inverse_transform(y[0])
                op = ws_target.inverse_transform(pred[0])
                print(ox)
                print(oy)
                print(op)

                fig, ax = plt.subplots()
                cax = ax.matshow(atten.reshape(
                    [atten.shape[0], atten.shape[2]]),
                                 cmap=cm.coolwarm)
                ax.set_xticks(np.arange(len(ox)))
                ax.set_yticks(np.arange(len(op)))
                ax.set_xticklabels(ox)
                ax.set_yticklabels(op)
                fig.colorbar(cax)
                plt.show()

                print('-' * 30)

                t += 1
                if t >= 10:
                    break
Ejemplo n.º 19
0
def test(bidirectional, cell_type, depth,
         attention_type, use_residual, use_dropout, time_major, hidden_units):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow
    from word_sequence import WordSequence # pylint: disable=unused-variable

    x_data, _, ws = pickle.load(open('chatbot.pkl', 'rb'))

    for x in x_data[:5]:
        print(' '.join(x))

    config = tf.ConfigProto(
        device_count={'CPU': 1, 'GPU': 0},
        allow_soft_placement=True,
        log_device_placement=False
    )

    # save_path = '/tmp/s2ss_chatbot.ckpt'
    save_path = './s2ss_chatbot_anti.ckpt'

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(
        input_vocab_size=len(ws),
        target_vocab_size=len(ws),
        batch_size=1,
        mode='decode',
        beam_width=64,
        bidirectional=bidirectional,
        cell_type=cell_type,
        depth=depth,
        attention_type=attention_type,
        use_residual=use_residual,
        use_dropout=use_dropout,
        parallel_iterations=1,
        time_major=time_major,
        hidden_units=hidden_units,
        share_embedding=True,
        pretrained_embedding=True
    )
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        with open('test_input.txt', 'r',encoding='utf-8',newline='') as f:
            with open('test_output.txt','w',encoding='utf-8') as g:
                for user_text in f:
                    user_text=user_text.rstrip()
                    print(user_text,file=g)
                    x_test = [jieba.lcut(user_text.lower())]
                    bar = batch_flow([x_test], ws, 1)
                    x, xl = next(bar)
                    print(x,xl,file=g)
                    x = np.flip(x, axis=1)
                    pred = model_pred.predict(
                        sess,
                        np.array(x),
                        np.array(xl)
                    )
                    ans = ws.inverse_transform(pred)
                    print(ans,file=g)
        while True:
            user_text = input('Input Chat Sentence:')
            if user_text in ('exit', 'quit'):
                exit(0)
            x_test = [jieba.lcut(user_text.lower())]
            bar = batch_flow([x_test], ws, 1)
            x, xl = next(bar)
            x = np.flip(x, axis=1)
            print(x, xl)
            pred = model_pred.predict(
                sess,
                np.array(x),
                np.array(xl)
            )
            print(pred)
            # prob = np.exp(prob.transpose())
            print(ws.inverse_transform(x[0]))
            # print(ws.inverse_transform(pred[0]))
            # print(pred.shape, prob.shape)
            ans = ws.inverse_transform(pred)
            print(ans)
Ejemplo n.º 20
0
def test(bidirectional, cell_type, depth, attention_type, use_residual,
         use_dropout, time_major, hidden_units):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow_bucket as batch_flow
    from word_sequence import WordSequence  # pylint: disable=unused-variable
    from threadedgenerator import ThreadedGenerator

    emb = pickle.load(open('emb.pkl', 'rb'))

    x_data, y_data, ws = pickle.load(open('chatbot.pkl', 'rb'))

    # 训练部分
    n_epoch = 5
    batch_size = 128
    # x_data, y_data = shuffle(x_data, y_data, random_state=0)
    # x_data = x_data[:100000]
    # y_data = y_data[:100000]
    steps = int(len(x_data) / batch_size) + 1

    config = tf.ConfigProto(
        # device_count={'CPU': 1, 'GPU': 0},
        allow_soft_placement=True,
        log_device_placement=False)

    save_path = './s2ss_chatbot_anti.ckpt'

    tf.reset_default_graph()
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:

            model = SequenceToSequence(input_vocab_size=len(ws),
                                       target_vocab_size=len(ws),
                                       batch_size=batch_size,
                                       bidirectional=bidirectional,
                                       cell_type=cell_type,
                                       depth=depth,
                                       attention_type=attention_type,
                                       use_residual=use_residual,
                                       use_dropout=use_dropout,
                                       hidden_units=hidden_units,
                                       time_major=time_major,
                                       learning_rate=0.001,
                                       optimizer='adam',
                                       share_embedding=True,
                                       dropout=0.2,
                                       pretrained_embedding=True)
            init = tf.global_variables_initializer()
            sess.run(init)

            # 加载训练好的embedding
            model.feed_embedding(sess, encoder=emb)

            # print(sess.run(model.input_layer.kernel))
            # exit(1)

            flow = ThreadedGenerator(batch_flow([x_data, y_data], ws,
                                                batch_size),
                                     queue_maxsize=30)

            dummy_encoder_inputs = np.array(
                [np.array([WordSequence.PAD]) for _ in range(batch_size)])
            dummy_encoder_inputs_lengths = np.array([1] * batch_size)

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    x = np.flip(x, axis=1)

                    add_loss = model.train(sess,
                                           dummy_encoder_inputs,
                                           dummy_encoder_inputs_lengths,
                                           y,
                                           yl,
                                           loss_only=True)

                    add_loss *= -0.5
                    # print(x, y)
                    cost, lr = model.train(sess,
                                           x,
                                           xl,
                                           y,
                                           yl,
                                           return_lr=True,
                                           add_loss=add_loss)
                    costs.append(cost)
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))

                model.save(sess, save_path)

            flow.close()

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=12,
                                    bidirectional=bidirectional,
                                    cell_type=cell_type,
                                    depth=depth,
                                    attention_type=attention_type,
                                    use_residual=use_residual,
                                    use_dropout=use_dropout,
                                    hidden_units=hidden_units,
                                    time_major=time_major,
                                    parallel_iterations=1,
                                    learning_rate=0.001,
                                    optimizer='adam',
                                    share_embedding=True,
                                    pretrained_embedding=True)
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break

    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=1,
                                    bidirectional=bidirectional,
                                    cell_type=cell_type,
                                    depth=depth,
                                    attention_type=attention_type,
                                    use_residual=use_residual,
                                    use_dropout=use_dropout,
                                    hidden_units=hidden_units,
                                    time_major=time_major,
                                    parallel_iterations=1,
                                    learning_rate=0.001,
                                    optimizer='adam',
                                    share_embedding=True,
                                    pretrained_embedding=True)
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1)
        t = 0
        for x, xl, y, yl in bar:
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break
Ejemplo n.º 21
0
def test(params):
    """测试不同参数在生成的假数据上的运行结果"""

    from sequence_to_sequence import SequenceToSequence
    from data_utils import batch_flow_bucket as batch_flow
    from word_sequence import WordSequence # pylint: disable=unused-variable
    from threadedgenerator import ThreadedGenerator

    x_data, y_data = pickle.load(open('chatbot.pkl', 'rb'))
    ws = pickle.load(open('ws.pkl', 'rb'))

    # 训练部分
    n_epoch = 5
    batch_size = 32
    # x_data, y_data = shuffle(x_data, y_data, random_state=0)
    # x_data = x_data[:10000]
    # y_data = y_data[:10000]
    steps = int(len(x_data) / batch_size) + 1

    config = tf.ConfigProto(
        # device_count={'CPU': 1, 'GPU': 0},
        allow_soft_placement=True,
        log_device_placement=False
    )

    save_path = './s2ss_chatbot.ckpt'

    tf.reset_default_graph()
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:

            model = SequenceToSequence(
                input_vocab_size=len(ws),
                target_vocab_size=len(ws),
                batch_size=batch_size,
                **params
            )
            init = tf.global_variables_initializer()
            sess.run(init)

            # print(sess.run(model.input_layer.kernel))
            # exit(1)

            flow = ThreadedGenerator(
                batch_flow([x_data, y_data], ws, batch_size,
                           add_end=[False, True]),
                queue_maxsize=30)

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps), total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    x = np.flip(x, axis=1)
                    # print(x, y)
                    # print(xl, yl)
                    # exit(1)
                    cost, lr = model.train(sess, x, xl, y, yl, return_lr=True)
                    costs.append(cost)
                    bar.set_description('epoch {} loss={:.6f} lr={:.6f}'.format(
                        epoch,
                        np.mean(costs),
                        lr
                    ))

                model.save(sess, save_path)

            flow.close()

    # 测试部分
    tf.reset_default_graph()
    model_pred = SequenceToSequence(
        input_vocab_size=len(ws),
        target_vocab_size=len(ws),
        batch_size=1,
        mode='decode',
        beam_width=12,
        parallel_iterations=1,
        **params
    )
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(
                sess,
                np.array(x),
                np.array(xl)
            )
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break

    tf.reset_default_graph()
    model_pred = SequenceToSequence(
        input_vocab_size=len(ws),
        target_vocab_size=len(ws),
        batch_size=1,
        mode='decode',
        beam_width=1,
        parallel_iterations=1,
        **params
    )
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            pred = model_pred.predict(
                sess,
                np.array(x),
                np.array(xl)
            )
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break