def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable from threadedgenerator import ThreadedGenerator emb = pickle.load(open('emb.pkl', 'rb')) x_data, y_data, ws = pickle.load(open('chatbot.pkl', 'rb')) # 训练部分 n_epoch = 5 batch_size = 128 # x_data, y_data = shuffle(x_data, y_data, random_state=0) # x_data = x_data[:100000] # y_data = y_data[:100000] steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False) save_path = './s2ss_chatbot_anti.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, learning_rate=0.001, optimizer='adam', share_embedding=True, dropout=0.2, pretrained_embedding=True) init = tf.global_variables_initializer() sess.run(init) # 加载训练好的embedding model.feed_embedding(sess, encoder=emb) # print(sess.run(model.input_layer.kernel)) # exit(1) flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size), queue_maxsize=30) dummy_encoder_inputs = np.array( [np.array([WordSequence.PAD]) for _ in range(batch_size)]) dummy_encoder_inputs_lengths = np.array([1] * batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) add_loss = model.train(sess, dummy_encoder_inputs, dummy_encoder_inputs_lengths, y, yl, loss_only=True) add_loss *= -0.5 # print(x, y) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True, add_loss=add_loss) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) flow.close() # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, learning_rate=0.001, optimizer='adam', share_embedding=True, pretrained_embedding=True) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, learning_rate=0.001, optimizer='adam', share_embedding=True, pretrained_embedding=True) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(params): from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) n_epoch = 2 batch_size = 128 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) save_path = 'model/s2ss_chatbot_anti.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) dummy_encoder_inputs = np.array( [np.array([WordSequence.PAD]) for _ in range(batch_size)]) dummy_encoder_inputs_length = np.array([1] * batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) add_loss = model.train(sess, dummy_encoder_inputs, dummy_encoder_inputs_length, y, yl, loss_only=True) add_loss *= -0.5 cost, lr = model.train(sess, x, xl, y, yl, return_lr=True, add_loss=add_loss) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) flow.close() tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(params): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) # 训练部分 n_epoch = 5 batch_size = 32 # x_data, y_data = shuffle(x_data, y_data, random_state=0) # x_data = x_data[:10000] # y_data = y_data[:10000] steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False ) save_path = './s2ss_chatbot.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params ) init = tf.global_variables_initializer() sess.run(init) # print(sess.run(model.input_layer.kernel)) # exit(1) flow = ThreadedGenerator( batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) # print(x, y) # print(xl, yl) # exit(1) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) bar.set_description('epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr )) model.save(sess, save_path) flow.close() # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, parallel_iterations=1, **params ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, parallel_iterations=1, **params ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units, optimizer, embedding_size): ''' 测试不同参数在生成的假数据上的运行结果 :param bidirectional: :param cell_type: :param depth: 模型深度 :param attention_type: attention的类型 :param use_residual: :param use_dropout: :param time_major: :param hidden_units: :return: ''' emb_path = 'D:\project\Chatbot_CN\Chatbot_Data\Text_generator\emb.pkl' chatbot_path = 'D:\project\Chatbot_CN\Chatbot_Data\Text_generator\chatbot.pkl' emb = pickle.load(open(emb_path, 'rb')) x_data, y_data, ws = pickle.load(open(chatbot_path, 'rb')) # 训练部分 n_epoch = 40 batch_size = 60 # 每训练一轮将数据打乱。 [shuffle的重要性] # np.random.permutation 和 shuffle的区别: # 函数shuffle与permutation都是对原来的数组进行重新洗牌(即随机打乱原来的元素顺序); # 区别在于shuffle直接在原来的数组上进行操作,改变原来数组的顺序,无返回值。 # 而permutation不直接在原来的数组上进行操作,而是返回一个新的打乱顺序的数组,并不改变原来的数组。 x_data, y_data = shuffle(x_data, y_data, random_state=0) # x_data = x_data[:10000] # y_data = y_data[:10000] steps = int(len(x_data) / batch_size) + 1 # 控制训练的步数 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False ) save_path = './chatbots/S2S_Chatbot.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, learning_rate=0.001, optimizer= optimizer, embedding_size= embedding_size, share_embedding=True, dropout=0.2, pretrained_embedding=True ) init = tf.global_variables_initializer() sess.run(init) # 加载训练好的embedding model.feed_embedding(sess, encoder=emb) flow = ThreadedGenerator( batch_flow_bucket([x_data, y_data], ws, batch_size), queue_maxsize=30 ) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) # print(x, y) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) bar.set_description('epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr )) model.save(sess, save_path) flow.close() # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, learning_rate=0.001, optimizer='adam', share_embedding=True, pretrained_embedding=True ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow_bucket([x_data, y_data], ws, 1) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, learning_rate=0.001, optimizer='adam', share_embedding=True, pretrained_embedding=True ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow_bucket([x_data, y_data], ws, 1) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def train(params): from seq_2_seq import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) n_epoch = 40 batch_size = 128 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) save_path = 'model_anti/s2ss_chatbot_anti.ckpt' best_save_path = 'model_anti_best/best_cost.ckpt' # 训练模式 # loss下降较慢,不至于出现严重的梯度消散 tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) # 是否继续训练 if tf.train.checkpoint_exists( './model_anti/s2ss_chatbot_anti.ckpt'): model.load(sess, save_path) print('>>>=Having restored model') flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) dummy_encoder_inputs = np.array( [np.array([WordSequence.PAD]) for _ in range(batch_size)]) dummy_encoder_inputs_length = np.array([1] * batch_size) temp_loss = 30 for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) # permutation = np.random.permutation(batch_size) # dummy_encoder_inputs = x[permutation, :] # dummy_encoder_inputs_length = xl[permutation] x = np.flip(x, axis=1) dummy_encoder_inputs = np.flip(dummy_encoder_inputs, axis=1) add_loss = model.train(sess, dummy_encoder_inputs, dummy_encoder_inputs_length, y, yl, loss_only=True) add_loss *= -0.5 # 此处相当于减去加入负样本所带来的损失 cost, lr = model.train(sess, x, xl, y, yl, return_lr=True, add_loss=add_loss) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) mean_loss = np.mean(costs) if mean_loss <= temp_loss: model.save(sess, best_save_path) temp_loss = mean_loss with open('./model_anti/globalstep.txt', 'a+') as f: f.write('global step is:{}\n'.format(epoch)) flow.close() # 预测模式(beam_width=200) tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=200, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print('输入问句(倒序):', ws.inverse_transform(x[0])) print('输入答句:', ws.inverse_transform(y[0])) print('预测答句:', ws.inverse_transform(pred[0][0])) t += 1 if t >= 3: break # 预测模式(beam_width=1) tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print('输入问句(倒序):', ws.inverse_transform(x[0])) print('输入答句:', ws.inverse_transform(y[0])) print('预测答句:', ws.inverse_transform(pred[0][0])) t += 1 if t >= 3: break
def train(params): from seq_2_seq import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) # 训练模式 n_epoch = 200 batch_size = 256 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( allow_soft_placement=True, # 系统自动选择运行cpu或者gpu log_device_placement=False # 是否需要打印设备日志 ) save_path = './model/s2ss_chatbot.ckpt' # 重置默认的图 tf.reset_default_graph() # 定义图的基本信息 with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: # 定义模型 model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) if tf.train.checkpoint_exists('./model/s2ss_chatbot.ckpt'): model.load(sess, save_path) print('>>>=Having restored model') flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) # 此处效果为每个seq倒序 x = np.flip(x, 1) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) flow.close() # 测试模式 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=200, parallel_iterations=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 # 此处只测试了3次 if t >= 3: break