def test(params): from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) n_epoch = 2 batch_size = 128 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) save_path = 'model/s2ss_chatbot_anti.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) dummy_encoder_inputs = np.array( [np.array([WordSequence.PAD]) for _ in range(batch_size)]) dummy_encoder_inputs_length = np.array([1] * batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) add_loss = model.train(sess, dummy_encoder_inputs, dummy_encoder_inputs_length, y, yl, loss_only=True) add_loss *= -0.5 cost, lr = model.train(sess, x, xl, y, yl, return_lr=True, add_loss=add_loss) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) flow.close() tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable from threadedgenerator import ThreadedGenerator emb = pickle.load(open('emb.pkl', 'rb')) x_data, y_data, ws = pickle.load(open('chatbot.pkl', 'rb')) # 训练部分 n_epoch = 5 batch_size = 128 # x_data, y_data = shuffle(x_data, y_data, random_state=0) # x_data = x_data[:100000] # y_data = y_data[:100000] steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False) save_path = './s2ss_chatbot_anti.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, learning_rate=0.001, optimizer='adam', share_embedding=True, dropout=0.2, pretrained_embedding=True) init = tf.global_variables_initializer() sess.run(init) # 加载训练好的embedding model.feed_embedding(sess, encoder=emb) # print(sess.run(model.input_layer.kernel)) # exit(1) flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size), queue_maxsize=30) dummy_encoder_inputs = np.array( [np.array([WordSequence.PAD]) for _ in range(batch_size)]) dummy_encoder_inputs_lengths = np.array([1] * batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) add_loss = model.train(sess, dummy_encoder_inputs, dummy_encoder_inputs_lengths, y, yl, loss_only=True) add_loss *= -0.5 # print(x, y) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True, add_loss=add_loss) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) flow.close() # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, learning_rate=0.001, optimizer='adam', share_embedding=True, pretrained_embedding=True) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, learning_rate=0.001, optimizer='adam', share_embedding=True, pretrained_embedding=True) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, y_data, ws_input, ws_target = pickle.load( open('en-zh_cn.pkl', 'rb')) # 获取一些假数据 # x_data, y_data, ws_input, ws_target = generate(size=10000) # 训练部分 split = int(len(x_data) * 0.8) x_train, x_test, y_train, y_test = (x_data[:split], x_data[split:], y_data[:split], y_data[split:]) n_epoch = 2 batch_size = 256 steps = int(len(x_train) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False) save_path = './s2ss_en2zh.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=batch_size, learning_rate=0.001, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=64, hidden_units=hidden_units, optimizer='adam', time_major=time_major) init = tf.global_variables_initializer() sess.run(init) # print(sess.run(model.input_layer.kernel)) # exit(1) flow = batch_flow([x_train, y_train], [ws_input, ws_target], batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) cost = model.train(sess, x, xl, y, yl) costs.append(cost) bar.set_description('epoch {} loss={:.6f}'.format( epoch, np.mean(costs))) model.save(sess, save_path) # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1 # for test ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_test, y_test], [ws_input, ws_target], 1) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws_input.inverse_transform(x[0])) print(ws_target.inverse_transform(y[0])) print(ws_target.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=1, mode='decode', beam_width=1, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1 # for test ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_test, y_test], [ws_input, ws_target], 1) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws_input.inverse_transform(x[0])) print(ws_target.inverse_transform(y[0])) print(ws_target.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(params): from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) #extract_conv.py 生成 ws = pickle.load(open('ws.pkl', 'rb')) #训练 """ 1.n_epoch是训练的轮次数; 2.理论上来讲训练的轮次数越大,那么训练的精度越高; 3.如果训练的轮次数特别大,比如1000,那么有可能会发生过拟合的现象,但是是否过拟合也和训练的数据有关 4.n_epoch越大,训练的时间越长; 5.办公室电脑用P5000的GPU训练40轮,训练了大概3天,训练2轮,大概一个半小时, 如果使用CPU来训练的话,速度会特别特别的慢,可能一轮就要几个小时 """ n_epoch = 2 batch_size = 128 #每一轮的步数 steps = int(len(x_data) / batch_size) + 1 #日志 config = tf.ConfigProto( allow_soft_placement=True, #是不是由系统选择CPU还是GPU log_device_placement=False #是不是打印设备的日志 ) #训练完后的模型保存路径 save_path = './model/s2ss_chatbot.ckpt' #重置默认的图 tf.reset_default_graph() #定义图的信息 with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) #传入定义的配置 with tf.Session(config=config) as sess: # 定义模型 model = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params #传入的参数 ) init = tf.global_variables_initializer() sess.run(init) #定义数据流 放在线程生成器里做 flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) for epoch in range(1, n_epoch + 1): costs = [] #进度条 bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) #变量没啥意义 或者 之后用不到的 使用下划线'_'来来表示 for _ in bar: x, xl, y, yl = next(flow) #[[1,2], [3,4]] #[[3,4], [1,2]] #对输出进行翻转 这个实际上是在训练时的一个技巧,这个技巧出自于encoder的论文 x = np.flip(x, axis=1) #得到损失和学习率 cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) #进行信息的打印 bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) #保存模型 model.save(sess, save_path) #测试 模型的正确性 tf.reset_default_graph() #默认的图reset model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, parallel_iterations=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) #打印出预测的文字 print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 #仅测试三轮查看是否能正常对话 if t >= 3: break
def test(params): from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) #训练 """ 1.n_epoch是训练的轮次数; 2.理论上来讲训练的轮次数越大,那么训练的精度越高; 3.如果训练的轮次数特别大,比如1000,那么有可能会发生过拟合的现象,但是是否过拟合也和训练的数据有关 4.n_epoch越大,训练的时间越长; 5.办公室电脑用P5000的GPU训练40轮,训练了大概3天,训练2轮,大概一个半小时, 如果使用CPU来训练的话,速度会特别特别的慢,可能一轮就要几个小时 """ n_epoch = 2 batch_size = 128 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) save_path = './model/s2ss_chatbot.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: # 定义模型 model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) #[[1,2], [3,4]] #[[3,4], [1,2]] x = np.flip(x, axis=1) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) #测试 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, parallel_iterations=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(params): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) # 训练部分 n_epoch = 5 batch_size = 32 # x_data, y_data = shuffle(x_data, y_data, random_state=0) # x_data = x_data[:10000] # y_data = y_data[:10000] steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False ) save_path = './s2ss_chatbot.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params ) init = tf.global_variables_initializer() sess.run(init) # print(sess.run(model.input_layer.kernel)) # exit(1) flow = ThreadedGenerator( batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) # print(x, y) # print(xl, yl) # exit(1) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) bar.set_description('epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr )) model.save(sess, save_path) flow.close() # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, parallel_iterations=1, **params ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, parallel_iterations=1, **params ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(bidirectional, cell_type, depth, attention_type): """测试并展示attention图 """ from tqdm import tqdm from fake_data import generate # 获取一些假数据 x_data, y_data, ws_input, ws_target = generate(size=10000) # 训练部分 split = int(len(x_data) * 0.9) x_train, x_test, y_train, y_test = (x_data[:split], x_data[split:], y_data[:split], y_data[split:]) n_epoch = 2 batch_size = 32 steps = int(len(x_train) / batch_size) + 1 config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) save_path = '/tmp/s2ss_atten.ckpt' with tf.Graph().as_default(): model = SequenceToSequence(input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=batch_size, learning_rate=0.001, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, parallel_iterations=1) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) for epoch in range(1, n_epoch + 1): costs = [] flow = batch_flow([x_train, y_train], [ws_input, ws_target], batch_size) bar = tqdm(range(steps), desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) cost = model.train(sess, x, xl, y, yl) costs.append(cost) bar.set_description('epoch {} loss={:.6f}'.format( epoch, np.mean(costs))) model.save(sess, save_path) # attention 展示 不能用 beam search 的 # 所以这里只是用 greedy with tf.Graph().as_default(): model_pred = SequenceToSequence(input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=1, mode='decode', beam_width=0, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, parallel_iterations=1) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) pbar = batch_flow([x_test, y_test], [ws_input, ws_target], 1) t = 0 for x, xl, y, yl in pbar: pred, atten = model_pred.predict(sess, np.array(x), np.array(xl), attention=True) ox = ws_input.inverse_transform(x[0]) oy = ws_target.inverse_transform(y[0]) op = ws_target.inverse_transform(pred[0]) print(ox) print(oy) print(op) fig, ax = plt.subplots() cax = ax.matshow(atten.reshape( [atten.shape[0], atten.shape[2]]), cmap=cm.coolwarm) ax.set_xticks(np.arange(len(ox))) ax.set_yticks(np.arange(len(op))) ax.set_xticklabels(ox) ax.set_yticklabels(op) fig.colorbar(cax) plt.show() print('-' * 30) t += 1 if t >= 10: break
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, y_data, ws = pickle.load( open('chatbot.pkl', 'rb')) # 训练部分 n_epoch = 2 batch_size = 512 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False ) save_path = './s2ss_chatbot_forward.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, learning_rate=0.0001, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, optimizer='adadelta', dropout=0.4, time_major=time_major, share_embedding=True ) init = tf.global_variables_initializer() sess.run(init) flow = batch_flow([x_data, y_data], ws, batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) cost = model.train(sess, x, xl, y, yl) costs.append(cost) bar.set_description('epoch {} loss={:.6f}'.format( epoch, np.mean(costs) )) model.save(sess, save_path) # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, share_embedding=True ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow( [x_data, y_data], ws, 1 ) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict( sess, x, xl ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units, preload=True): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from discriminative import Discriminative from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, y_data, ws = pickle.load(open('chatbot.pkl', 'rb')) vectorizer = pickle.load(open('tfidf.pkl', 'rb')) # 训练部分 n_epoch = 2 batch_size = 512 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False) save_path = './s2ss_chatbot_ad.ckpt' forward_path = './s2ss_chatbot_forward.ckpt' discriminative_path = './s2ss_chatbot_discriminative.ckpt' graph_d = tf.Graph() graph_ad = tf.Graph() # 读取反向模型 seq2seq(x|y) with graph_d.as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) sess_d = tf.Session(config=config) model_d = Discriminative(input_vocab_size=len(ws), batch_size=batch_size, learning_rate=0.0001, bidirectional=bidirectional, cell_type=cell_type, depth=depth, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=32, time_major=time_major, hidden_units=hidden_units, optimizer='adadelta', dropout=0.4) init = tf.global_variables_initializer() sess_d.run(init) model_d.load(sess_d, discriminative_path) # 构建要训练的模型 with graph_ad.as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) sess_ad = tf.Session(config=config) model_ad = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, # beam_width=12, learning_rate=0.0001, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, optimizer='adadelta', dropout=0.4, time_major=time_major, share_embedding=True) init = tf.global_variables_initializer() sess_ad.run(init) if preload: model_ad.load(sess_ad, forward_path) # 开始训练 flow = batch_flow([x_data, y_data], ws, batch_size, raw=True) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, xraw, y, yl, yraw = next(flow) rewards = model_d.predict(sess_d, x, xl, y, yl) rewards = rewards[:, 1] texts = [] for i in range(batch_size): # text = ws.inverse_transform(y[i]) # text = ''.join(text)[:yl[i]] text = ''.join(yraw[i]) texts.append(text) tfidfs = np.sum(vectorizer.transform(texts), axis=1) tfidfs_sum = np.sum(tfidfs) def smooth(x): return (0.5 + x) * (2.0 / 3) for i in range(batch_size): text = texts[i] rewards[i] = smooth(rewards[i]) rewards[i] *= smooth(repeat_reward(text)) rewards[i] *= smooth(chinese_reward(text)) rewards[i] *= smooth(similarity_reward(''.join(xraw[i]), text)) rewards[i] *= smooth(tfidfs[i] / tfidfs_sum * batch_size) rewards = rewards.reshape(-1, 1) cost = model_ad.train(sess_ad, x, xl, y, yl) #, rewards) costs.append(cost) # lengths.append(np.mean(al)) des = ('epoch {} ' 'loss={:.6f} ' 'rmean={:.4f} ' 'rmin={:.4f} ' 'rmax={:.4f} ' 'rmed={:.4f}') bar.set_description( des.format(epoch, np.mean(costs), np.mean(rewards), np.min(rewards), np.max(rewards), np.median(rewards))) model_ad.save(sess_ad, save_path)