Example #1
0
def test_given_data():
    from sequence2sequence import SequenceToSequence
    from data_utils import batch_flow_bucket as batch_flow
    from word_sequence import WordSequence
    from thread_generator import ThreadedGenerator

    data = pickle.load(open('chatbot.pkl', 'rb'))
    ws = pickle.load(open('WordSequence.pkl', 'rb'))
    n_epoch = 40
    batch_size = 5
    x_data = []
    y_data = []
    for i in data:
        x_data.append(i[0])
        y_data.append(i[1])

    print('done')
    print(len(x_data))
    print(len(y_data))
    steps = int(len(x_data) / batch_size) + 1  # 取整会把小数点去掉,所以需要加1
    flow = ThreadedGenerator(
        batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]),
        queue_maxsize=30
    )

    for i in range(1):
        datas = next(flow)
        print(datas)
Example #2
0
def test(params):
    from sequence2sequence import SequenceToSequence
    from data_utils import batch_flow_bucket as batch_flow
    from thread_generator import ThreadedGenerator

    data = pickle.load(open('chatbot.pkl', 'rb'))
    ws = pickle.load(open('WordSequence.pkl', 'rb'))
    x_data = []
    y_data = []
    for i in data:
        x_data.append(i[0])
        y_data.append(i[1])

    print('done')
    print(len(x_data))
    print(len(y_data))

    #训练
    '''
    1.n_epoch是训练的轮次数;
    2.理论上来讲训练的轮次数越大,那么训练的精度越高;
    3.如果训练的轮次数特别大,比如1000,那么有可能会发生过拟合的现象,但是是否过拟合
    也和训练的数据有关
    4.n_epoch越大,训练的时间越长
    5.办公室电脑用P5000的CPU训练40轮,训练了大概3天,训练2轮,大概一个半小时,如果使用
    CPU来训练的话,速度会特别的慢,可能一轮就要几个小时,
    
    '''
    n_epoch = 2
    batch_size = 128
    steps = int(len(x_data) / batch_size)
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)

    save_path = './model/s2ss_chatbot.ckpt'

    tf.reset_default_graph()
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:
            #定义模型
            model = SequenceToSequence(input_vocab_size=len(ws),
                                       target_vocab_size=len(ws),
                                       batch_size=batch_size,
                                       **params)

            init = tf.global_variables_initializer()
            sess.run(init)

            flow = ThreadedGenerator(batch_flow([x_data, y_data],
                                                ws,
                                                batch_size,
                                                add_end=[False, True]),
                                     queue_maxsize=30)
            print('生成了flow')
            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {},loss=0.00000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    #[[1,2],[3,4]]
                    #[[3,4],[1,2]
                    x = np.flip(x, axis=1)
                    cost, lr = model.train(sess, x, xl, y, yl, return_lr=True)
                    costs.append(cost)
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))

                model.save(sess, save_path)
    #测试
    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=12,
                                    parallel_iterations=1,
                                    **params)

    init = tf.global_variables_initializer()
    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))

            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            if t >= 3:
                break
Example #3
0
def test(params):
    from seq_to_seq import Seq2Seq
    from data_utils import batch_flow_bucket as batch_flow
    from thread_generator import ThreadedGenerator

    x_data, y_data = pickle.load(open('chatbot.pkl', 'rb'))
    ws = pickle.load(open('wx.pkl', 'rb'))

    # 训练
    # n_epoch是轮数(越大越容易过拟合)
    n_epoch = 1
    batch_size = 128

    steps = int(len(x_data) / batch_size) + 1

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)

    save_path = './model/s2ss_chatbot.ckpt'

    tf.reset_default_graph()

    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:
            # 定义模型
            model = Seq2Seq(input_vocab_size=len(ws),
                            target_vocab_size=len(ws),
                            batch_size=batch_size,
                            **params)

            init = tf.global_variables_initializer()
            sess.run(init)

            flow = ThreadedGenerator(batch_flow([x_data, y_data],
                                                ws,
                                                batch_size,
                                                add_end=[False, True]),
                                     queue_maxsize=30)

            for epoch in range(1, n_epoch + 1):
                costs = []
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    # [[1,2],[3,4]]
                    # [[3,4],[1,2]]
                    # 按axis=1反转
                    x = np.flip(x, axis=1)
                    cost, lr = model.train(sess, x, xl, y, yl, return_lr=True)
                    costs.append(cost)
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))

            model.save(sess, save_path=save_path)

    # Testing
    tf.reset_default_graph()
    model_pred = Seq2Seq(input_vocab_size=len(ws),
                         target_vocab_size=len(ws),
                         batch_size=1,
                         mode='decode',
                         beam_width=12,
                         parallel_iterations=1,
                         **params)
    init = tf.global_variables_initializer()
    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break
Example #4
0
def test(params):
    from sequence2sequence import SequenceToSequence
    from data_utils import batch_flow_bucket as batch_flow
    from word_sequence import WordSequence
    from thread_generator import ThreadedGenerator

    data = pickle.load(open('chatbot.pkl','rb'))
    ws = pickle.load(open('WordSequence.pkl','rb'))
    n_epoch = 40
    batch_size = 128
    x_data = []
    y_data = []
    for i in data:
        x_data.append(i[0])
        y_data.append(i[1])

    print('done')
    print(len(x_data))
    print(len(y_data))
    steps = int(len(x_data)/batch_size) + 1# 取整会把小数点去掉,所以需要加1
    config = tf.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=False
    )
    '''
     tf.ConfigProto()函数用在创建Session的时候,用来对session进行参数配置:
     tf.ConfigProto(allow_soft_placement=True)
     在tf中,通过命令 "with tf.device('/cpu:0'):",允许手动设置操作运行的设备。如果手动设置的设备不存在或者不可用,就会导致tf程序等待或异常,
     为了防止这种情况,可以设置tf.ConfigProto()中参数allow_soft_placement=True,允许tf自动选择一个存在并且可用的设备来运行操作。
     
     1. 记录设备指派情况 :  tf.ConfigProto(log_device_placement=True)
     2. 设置tf.ConfigProto()中参数log_device_placement = True ,可以获取到 operations 和 Tensor 
     被指派到哪个设备(几号CPU或几号GPU)上运行,会在终端打印出各项操作是在哪个设备上运行的
    '''

    save_path='./model/s2ss_chatbot.ckpt'

    tf.reset_default_graph()
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:
            model = SequenceToSequence(
                input_vocab_size=len(ws),
                target_vocab_size=len(ws),
                batch_size=batch_size,
                **params
            )

            init = tf.global_variables_initializer()
            sess.run(init)
            model.load(sess,'model/s2ss_chatbot.ckpt-4')
            flow = ThreadedGenerator(
                batch_flow([x_data,y_data],ws,batch_size,add_end=[False,True]),
                queue_maxsize=30
            )

            dummy_encoder_inputs = np.array([
                np.array([WordSequence.PAD]) for _ in range(batch_size)
            ])
            dummy_encoder_inputs_length = np.array([1]*batch_size)

            for epoch in range(5,n_epoch+1):
                costs = []
                bar = tqdm(range(steps),total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))

                for _ in bar:
                    x,xl,y,yl = next(flow)
                    x = np.flip(x,axis=1)

                    add_loss = model.train(sess,dummy_encoder_inputs,
                                           dummy_encoder_inputs_length,
                                           y,yl,loss_only=True)
                    add_loss *= -0.5

                    cost,lr = model.train(sess,x,xl,y,yl,return_lr=True,add_loss=add_loss)
                    costs.append(cost)

                    bar.set_description('epoch {} loss={:.6f},lr={:.6f}'.format(
                        epoch,
                        np.mean(costs),
                        lr
                    ))
                model.save(sess,save_path='./model/s2ss_chatbot.ckpt',index=epoch)
            flow.close()

            init = tf.global_variables_initializer()

            # 测试
            tf.reset_default_graph()
            model_pred = SequenceToSequence(
                input_vocab_size=len(ws),
                target_vocab_size=len(ws),
                batch_size=1,
                mode='decode',
                beam_width=1,
                parallel_iterations=1,
                **params
            )
            init = tf.global_variables_initializer()
            with tf.Session(config=config) as sess:
                sess.run(init)
                model_pred.load(sess,save_path)

                bar = batch_flow([x_data,y_data],ws,1,add_end=False)
                t= 0
                for x,xl,y,yl in bar:
                    x = np.flip(x,axis=1)
                    pred = model_pred.predict(
                        sess,
                        np.array(x),
                        np.array(xl)
                    )
                    print(ws.inverse_transform(x[0]))
                    print(ws.inverse_transform(y[0]))
                    print(ws.inverse_transform(pred[0]))
                    t += 1
                    if t >= 3:
                        break
Example #5
0
def train(params):
    from seq_to_seq import SequenceToSequence
    from data_utils import batch_flow_bucket as batch_flow
    from word_sequence import WordSequence
    from thread_generator import ThreadedGenerator

    # 加载数据
    x_data, y_data = pickle.load(open('data/xiaohaungji_chatbot.pkl', 'rb'))
    ws = pickle.load(open('data/xiaohuangji_ws.pkl', 'rb'))

    n_epoch = 200  # 训练轮次
    batch_size = 128
    steps = int(len(x_data) / batch_size) + 1

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)

    # 模型保存路径
    # save_path = './model/s2ss_chatbot_anti.ckpt'
    save_path = './xiaohaungji_model/s2ss_chatbot_anti.ckpt'

    # 训练1
    tf.reset_default_graph()
    with tf.Graph().as_default():
        random.seed(0)
        np.random.seed(0)
        tf.set_random_seed(0)

        with tf.Session(config=config) as sess:
            model = SequenceToSequence(input_vocab_size=len(ws),
                                       target_vocab_size=len(ws),
                                       batch_size=batch_size,
                                       **params)
            init = tf.global_variables_initializer()
            sess.run(init)

            flow = ThreadedGenerator(
                batch_flow([x_data, y_data],
                           ws,
                           batch_size,
                           add_end=[False, True]),
                queue_maxsize=30  # 句子长度
            )

            dummy_encoder_inputs = np.array(
                [np.array([WordSequence.PAD]) for _ in range(batch_size)])
            dummy_encoder_inputs_length = np.array([1] * batch_size)

            for epoch in range(1, n_epoch + 1):
                costs = []
                # 进度条
                bar = tqdm(range(steps),
                           total=steps,
                           desc='epoch {}, loss=0.000000'.format(epoch))
                for _ in bar:
                    x, xl, y, yl = next(flow)
                    x = np.flip(x, axis=1)

                    add_loss = model.train(sess,
                                           dummy_encoder_inputs,
                                           dummy_encoder_inputs_length,
                                           y,
                                           yl,
                                           loss_only=True)
                    add_loss *= -0.5

                    cost, lr = model.train(sess,
                                           x,
                                           xl,
                                           y,
                                           yl,
                                           return_lr=True,
                                           add_loss=add_loss)
                    costs.append(cost)
                    bar.set_description(
                        'epoch {} loss={:.6f} lr={:.6f}'.format(
                            epoch, np.mean(costs), lr))
                model.save(sess, save_path)
            flow.close()

    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=12,
                                    **params)
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break

    # 训练2
    tf.reset_default_graph()
    model_pred = SequenceToSequence(input_vocab_size=len(ws),
                                    target_vocab_size=len(ws),
                                    batch_size=1,
                                    mode='decode',
                                    beam_width=1,
                                    **params)
    init = tf.global_variables_initializer()

    with tf.Session(config=config) as sess:
        sess.run(init)
        model_pred.load(sess, save_path)

        bar = batch_flow([x_data, y_data], ws, 1, add_end=False)
        t = 0
        for x, xl, y, yl in bar:
            x = np.flip(x, axis=1)
            pred = model_pred.predict(sess, np.array(x), np.array(xl))
            print(ws.inverse_transform(x[0]))
            print(ws.inverse_transform(y[0]))
            print(ws.inverse_transform(pred[0]))
            t += 1
            if t >= 3:
                break