def test_given_data(): from sequence2sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence from thread_generator import ThreadedGenerator data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('WordSequence.pkl', 'rb')) n_epoch = 40 batch_size = 5 x_data = [] y_data = [] for i in data: x_data.append(i[0]) y_data.append(i[1]) print('done') print(len(x_data)) print(len(y_data)) steps = int(len(x_data) / batch_size) + 1 # 取整会把小数点去掉,所以需要加1 flow = ThreadedGenerator( batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30 ) for i in range(1): datas = next(flow) print(datas)
def test(bidirectional, cell_type, depth, use_residual, use_dropout, time_major, hidden_units, output_project_active): """测试不同参数在生成的假数据上的运行结果""" from rnn_crf import RNNCRF from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, _, ws_input, ws_target = pickle.load(open('ner.pkl', 'rb')) for x in x_data[:5]: print(' '.join(x)) config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) save_path = './s2ss_crf.ckpt' # 测试部分 tf.reset_default_graph() model_pred = RNNCRF(input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), max_decode_step=100, batch_size=1, mode='decode', bidirectional=bidirectional, cell_type=cell_type, depth=depth, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=1, time_major=time_major, hidden_units=hidden_units, output_project_active=output_project_active) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: user_text = input('Input Sentence:') if user_text in ('exit', 'quit'): exit(0) x_test = [list(user_text.lower())] bar = batch_flow([x_test, x_test], [ws_input, ws_target], 1) x, xl, _, _ = next(bar) # x = np.array([ # list(reversed(xx)) # for xx in x # ]) print(x, xl) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(pred) print(ws_input.inverse_transform(x[0])) print(ws_target.inverse_transform(pred[0]))
def test(params): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, _ = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) for x in x_data[:5]: print(' '.join(x)) config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) # save_path = '/tmp/s2ss_chatbot.ckpt' save_path = './s2ss_chatbot.ckpt' # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=0, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: user_text = input('Input Chat Sentence:') if user_text in ('exit', 'quit'): exit(0) x_test = [list(user_text.lower())] # x_test = [word_tokenize(user_text)] bar = batch_flow([x_test], ws, 1) x, xl = next(bar) x = np.flip(x, axis=1) # x = np.array([ # list(reversed(xx)) # for xx in x # ]) print(x, xl) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(pred) # prob = np.exp(prob.transpose()) print(ws.inverse_transform(x[0])) # print(ws.inverse_transform(pred[0])) # print(pred.shape, prob.shape) for p in pred: ans = ws.inverse_transform(p) print(ans)
def test(params, infos): from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow x_data, _ = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) for x in x_data[:5]: print(' '.join(x)) config = tf.ConfigProto( device_count = {'CPU':1, 'GPU':0}, allow_soft_placement=True, log_device_placement=False ) save_path = './model/s2ss_chatbot_anti.ckpt' tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=0, **params ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: # user_text = input('请输入您的句子:') # if user_text in ('exit', 'quit'): # exit(0) x_test = [list(infos.lower())] bar = batch_flow([x_test], ws, 1) x, xl = next(bar) x = np.flip(x, axis=1) print(x, xl) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(pred) print(ws.inverse_transform(x[0])) for p in pred: ans = ws.inverse_transform(p) print(ans) return ans
def nlp(question): """测试不同参数在生成的假数据上的运行结果""" params = json.load(open(os.path.dirname(__file__) + '/params.json')) from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow x_data, _ = pickle.load( open(os.path.dirname(__file__) + '/data/chatbot.pkl', 'rb')) ws = pickle.load(open(os.path.dirname(__file__) + '/data/ws.pkl', 'rb')) # for x in x_data[:5]: # print(' '.join(x)) config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) # save_path = '/tmp/s2ss_chatbot.ckpt' save_path = os.path.dirname(__file__) + '/data/s2ss_chatbot.ckpt' # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=0, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: # user_text = input('Input Chat Sentence:') question = Converter('zh-hans').convert(question) # 繁體轉簡體 if question in ('exit', 'quit'): exit(0) x_test = [list(question.lower())] # x_test = [word_tokenize(user_text)] bar = batch_flow([x_test], ws, 1) x, xl = next(bar) x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) # print(ws.inverse_transform(x[0])) -->['</s>', '饭', '吃', '去', '要', '我'] for p in pred: ans = ws.inverse_transform(p) str1 = ''.join(ans) # list轉str line = str1.strip("</s>") line = Converter('zh-hant').convert(line) # 簡體轉繁體 return line
def test(params): from seq_2_seq import SequenceToSequence from data_utils import batch_flow x_data, _ = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) # 查看前5条x # for x in x_data[:5]: # print(' '.join(x)) config = tf.ConfigProto( device_count={ 'CPU': 1, 'GPU': 0 }, # 选择设备的显示格式 allow_soft_placement=True, log_device_placement=False) save_path = './model/s2ss_chatbot.ckpt' tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=200, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: # 本地: user_text = input('请输入您的句子:') if user_text in ('exit', 'quit'): exit(0) x_test = [list(user_text.lower())] # 接收infos: # x_test = [list(infos.lower())] bar = batch_flow([x_test], ws, 1) x, xl = next(bar) x = np.flip(x, axis=1) print('输入句子、长度: ', x, xl) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print('预测结果ids: ', pred) # 转换为words print(ws.inverse_transform(x[0])) # for p in pred: # ans = ws.inverse_transform(p) # print('预测结果words: ', ans) print('预测结果words: ', ws.inverse_transform(pred[0]))
def start(): from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow global input_list global response_dict global lock x_data, _ = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) save_path = './model/s2ss_chatbot_anti.ckpt' tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=0, **params) init = tf.global_variables_initializer() print("线程-模型>>模型准备就绪") with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: lock.acquire() if input_list: request = input_list.pop(0) ip = request.get('IP') infos = request.get('infos') print("线程-模型>>成功获取来自", ip, "的请求,内容:", infos) x_test = [list(infos.lower())] bar = batch_flow([x_test], ws, 1) x, xl = next(bar) x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) for p in pred: ans = "".join(ws.inverse_transform(p)) response_dict[ip] = ans print("线程-模型>>完成处理来自", ip, "的请求,返回内容:", ans) break lock.release() time.sleep(0.1)
def chat(self, user_text): with tf.Session(config=self.config) as sess: self.model_pred.load(sess, self.save_path) x_test = [jieba.lcut(user_text.lower())] bar = batch_flow([x_test], self.ws, 1) x, xl = next(bar) x = np.flip(x, axis=1) pred = self.model_pred.predict(sess, np.array(x), np.array(xl)) return ''.join(self.ws.inverse_transform(pred))
def test(): """单元测试""" from fake_data import generate from data_utils import batch_flow from tqdm import tqdm x_data, y_data, ws_input, ws_target = generate(size=10000) batch_size = 4 n_epoch = 10 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = Discriminative(len(ws_input), batch_size=batch_size) init = tf.global_variables_initializer() sess.run(init) # print(sess.run(model.input_layer.kernel)) # exit(1) for epoch in range(1, n_epoch + 1): costs = [] flow = batch_flow([x_data, y_data], [ws_input, ws_target], batch_size) bar = tqdm(range(steps), desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) targets = np.array([(0, 1) for x in range(len(y))]) cost = model.train(sess, x, xl, y, yl, targets) print(x.shape, xl.shape) print('cost.shape, cost', cost.shape, cost) exit(1) costs.append(cost) bar.set_description('epoch {} loss={:.6f}'.format( epoch, np.mean(costs)))
def chatbot_port(user_text, bidirectional=True, num=0, cell_type='lstm', depth=2, attention_type='Bahdanau', use_residual=False, use_dropout=False, time_major=False, hidden_units=512): random.seed(0) np.random.seed(0) tf.set_random_seed(0) from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow x_data, _, ws = pickle.load(open('chatbot_cut/chatbot.pkl', 'rb')) config = tf.ConfigProto( device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False ) save_path = 'chatbot_cut/s2ss_chatbot_anti.ckpt' tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=64, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=1, time_major=time_major, hidden_units=hidden_units, share_embedding=True, pretrained_embedding=True ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) x_test = [jieba.lcut(user_text.lower())] print(user_text) bar = batch_flow([x_test], ws, 1) x, xl = next(bar) print(x,xl) x = np.flip(x, axis=1) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(pred) return ''.join(ws.inverse_transform(pred))
def test(params, infos): from seq_to_seq import SequenceToSequence from data_utils import batch_flow """xiaohaungji语料""" x_data, _ = pickle.load(open('chatbot_2/richor_chatbot.pkl', 'rb')) ws = pickle.load(open('chatbot_2/richor_ws.pkl', 'rb')) # x_data, _ = pickle.load(open('data/chatbot.pkl', 'rb')) # ws = pickle.load(open('data/ws.pkl', 'rb')) config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) save_path = './richor_model/s2ss_chatbot_anti.ckpt' # save_path = './model/s2ss_chatbot_anti.ckpt' tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=0, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: x_test = [list(infos.lower())] bar = batch_flow([x_test], ws, 1) x, xl = next(bar) x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(pred) for p in pred: ans = ws.inverse_transform(p) print(ans) return ans
def test(params): from seq_to_seq import Seq2Seq from data_utils import batch_flow_bucket as batch_flow from thread_generator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('wx.pkl', 'rb')) # 训练 # n_epoch是轮数(越大越容易过拟合) n_epoch = 1 batch_size = 128 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) save_path = './model/s2ss_chatbot.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: # 定义模型 model = Seq2Seq(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) # [[1,2],[3,4]] # [[3,4],[1,2]] # 按axis=1反转 x = np.flip(x, axis=1) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path=save_path) # Testing tf.reset_default_graph() model_pred = Seq2Seq(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, parallel_iterations=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, y_data, ws_input, ws_target = pickle.load( open('en-zh_cn.pkl', 'rb')) # 获取一些假数据 # x_data, y_data, ws_input, ws_target = generate(size=10000) # 训练部分 split = int(len(x_data) * 0.8) x_train, x_test, y_train, y_test = (x_data[:split], x_data[split:], y_data[:split], y_data[split:]) n_epoch = 2 batch_size = 256 steps = int(len(x_train) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False) save_path = './s2ss_en2zh.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=batch_size, learning_rate=0.001, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=64, hidden_units=hidden_units, optimizer='adam', time_major=time_major) init = tf.global_variables_initializer() sess.run(init) # print(sess.run(model.input_layer.kernel)) # exit(1) flow = batch_flow([x_train, y_train], [ws_input, ws_target], batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) cost = model.train(sess, x, xl, y, yl) costs.append(cost) bar.set_description('epoch {} loss={:.6f}'.format( epoch, np.mean(costs))) model.save(sess, save_path) # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1 # for test ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_test, y_test], [ws_input, ws_target], 1) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws_input.inverse_transform(x[0])) print(ws_target.inverse_transform(y[0])) print(ws_target.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=1, mode='decode', beam_width=1, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1 # for test ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_test, y_test], [ws_input, ws_target], 1) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws_input.inverse_transform(x[0])) print(ws_target.inverse_transform(y[0])) print(ws_target.inverse_transform(pred[0])) t += 1 if t >= 3: break
def train(params): from seq_2_seq import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) # 训练模式 n_epoch = 200 batch_size = 256 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( allow_soft_placement=True, # 系统自动选择运行cpu或者gpu log_device_placement=False # 是否需要打印设备日志 ) save_path = './model/s2ss_chatbot.ckpt' # 重置默认的图 tf.reset_default_graph() # 定义图的基本信息 with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: # 定义模型 model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) if tf.train.checkpoint_exists('./model/s2ss_chatbot.ckpt'): model.load(sess, save_path) print('>>>=Having restored model') flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) # 此处效果为每个seq倒序 x = np.flip(x, 1) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) flow.close() # 测试模式 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=200, parallel_iterations=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 # 此处只测试了3次 if t >= 3: break
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from discriminative import Discriminative from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, y_data, ws = pickle.load( open('chatbot.pkl', 'rb')) # 训练部分 n_epoch = 1 batch_size = 512 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False ) # save_path = '/tmp/s2ss_chatbot.ckpt' save_path = './s2ss_chatbot_discriminative.ckpt' save_path_forward = './s2ss_chatbot_forward.ckpt' graph = tf.Graph() graph_d = tf.Graph() # 测试部分 with graph.as_default(): model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, mode='train', beam_width=0, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, time_major=time_major, hidden_units=hidden_units, optimizer='adadelta', dropout=0.4, share_embedding=True ) init = tf.global_variables_initializer() sess = tf.Session(config=config) sess.run(init) model_pred.load(sess, save_path_forward) with graph_d.as_default(): model_d = Discriminative( input_vocab_size=len(ws), batch_size=batch_size * 2, learning_rate=0.0001, bidirectional=bidirectional, cell_type=cell_type, depth=depth, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=32, time_major=time_major, hidden_units=hidden_units, optimizer='adadelta', dropout=0.4 ) init = tf.global_variables_initializer() sess_d = tf.Session(config=config) sess_d.run(init) # model_d.load(sess, save_path_rl) # 开始训练 flow = batch_flow([x_data, y_data], ws, batch_size) for epoch in range(1, n_epoch + 1): costs = [] accuracy = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) _, a = model_pred.entropy( sess, x, xl, y, yl ) al = [] new_a = [] for aa in a: j = 0 for j, aaj in enumerate(aa): if aaj == WordSequence.END: break new_a.append(list(aa[:j])) if j <= 0: j = 1 al.append(j) max_len = max((a.shape[1], y.shape[1])) if a.shape[1] < max_len: a = np.concatenate( ( a, np.ones( (batch_size, max_len - a.shape[1]) ) * WordSequence.END ), axis=1) if y.shape[1] < max_len: y = np.concatenate( ( y, np.ones( (batch_size, max_len - y.shape[1]) ) * WordSequence.END ), axis=1) targets = np.array(([0] * len(a)) + ([1] * len(a))) batch = np.concatenate((a, y), axis=0) batchl = np.concatenate((al, yl), axis=0) batch = batch.tolist() batchl = batchl.tolist() # batch, batchl = shuffle(batch, batchl) xx = np.concatenate((x, x), axis=0) xxl = np.concatenate((xl, xl), axis=0) # tmp_batch = list(zip(xx, xxl, batch, batchl)) # tmp_batch = sorted(tmp_batch, key=lambda x: x[1], reverse=True) # xx, xxl, batch, batchl = zip(*tmp_batch) batch = np.array(batch).astype(np.int32) batchl = np.array(batchl) cost, acc = model_d.train(sess_d, xx, xxl, batch, batchl, targets) costs.append(cost) accuracy.append(acc) # print(batch, batchl) bar.set_description('epoch {} loss={:.6f} acc={:.6f} {}'.format( epoch, np.mean(costs), np.mean(accuracy), len(costs) )) model_d.save(sess_d, save_path)
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, _, ws = pickle.load(open('chatbot.pkl', 'rb')) for x in x_data[:5]: print(' '.join(x)) config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) # save_path = '/tmp/s2ss_chatbot.ckpt' save_path = './s2ss_chatbot_forward.ckpt' # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, share_embedding=True) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: user_text = input('Input Chat Sentence:') if user_text in ('exit', 'quit'): exit(0) x_test = [list(user_text.lower())] bar = batch_flow([x_test], [ws], 1) x, xl = next(bar) # x = np.array([ # list(reversed(xx)) # for xx in x # ]) print(x, xl) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(pred) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(pred[0]))
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable _, _, ws = pickle.load(open('chatbot.pkl', 'rb')) # for x in x_data[:5]: # print(' '.join(x)) config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) # save_path = '/tmp/s2ss_chatbot.ckpt' save_path = './s2ss_chatbot.ckpt' save_path_rl = './s2ss_chatbot_anti.ckpt' graph = tf.Graph() graph_rl = tf.Graph() with graph_rl.as_default(): model_rl = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=1, time_major=time_major, hidden_units=hidden_units, share_embedding=True, pretrained_embedding=True) init = tf.global_variables_initializer() sess_rl = tf.Session(config=config) sess_rl.run(init) model_rl.load(sess_rl, save_path_rl) # 测试部分 with graph.as_default(): model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=1, time_major=time_major, hidden_units=hidden_units, share_embedding=True, pretrained_embedding=True) init = tf.global_variables_initializer() sess = tf.Session(config=config) sess.run(init) model_pred.load(sess, save_path) while True: user_text = input('Input Chat Sentence:') if user_text in ('exit', 'quit'): exit(0) x_test = [jieba.lcut(user_text.lower())] bar = batch_flow([x_test], [ws], 1) x, xl = next(bar) x = np.flip(x, axis=1) print(x, xl) pred = model_pred.predict(sess, np.array(x), np.array(xl)) pred_rl = model_rl.predict(sess_rl, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print('no:', ws.inverse_transform(pred[0])) print('rl:', ws.inverse_transform(pred_rl[0])) p = [] for pp in ws.inverse_transform(pred_rl[0]): if pp == WordSequence.END_TAG: break if pp == WordSequence.PAD_TAG: break p.append(pp)
def train(params): from seq_2_seq import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) n_epoch = 40 batch_size = 128 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) save_path = 'model_anti/s2ss_chatbot_anti.ckpt' best_save_path = 'model_anti_best/best_cost.ckpt' # 训练模式 # loss下降较慢,不至于出现严重的梯度消散 tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) # 是否继续训练 if tf.train.checkpoint_exists( './model_anti/s2ss_chatbot_anti.ckpt'): model.load(sess, save_path) print('>>>=Having restored model') flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) dummy_encoder_inputs = np.array( [np.array([WordSequence.PAD]) for _ in range(batch_size)]) dummy_encoder_inputs_length = np.array([1] * batch_size) temp_loss = 30 for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) # permutation = np.random.permutation(batch_size) # dummy_encoder_inputs = x[permutation, :] # dummy_encoder_inputs_length = xl[permutation] x = np.flip(x, axis=1) dummy_encoder_inputs = np.flip(dummy_encoder_inputs, axis=1) add_loss = model.train(sess, dummy_encoder_inputs, dummy_encoder_inputs_length, y, yl, loss_only=True) add_loss *= -0.5 # 此处相当于减去加入负样本所带来的损失 cost, lr = model.train(sess, x, xl, y, yl, return_lr=True, add_loss=add_loss) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) mean_loss = np.mean(costs) if mean_loss <= temp_loss: model.save(sess, best_save_path) temp_loss = mean_loss with open('./model_anti/globalstep.txt', 'a+') as f: f.write('global step is:{}\n'.format(epoch)) flow.close() # 预测模式(beam_width=200) tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=200, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print('输入问句(倒序):', ws.inverse_transform(x[0])) print('输入答句:', ws.inverse_transform(y[0])) print('预测答句:', ws.inverse_transform(pred[0][0])) t += 1 if t >= 3: break # 预测模式(beam_width=1) tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print('输入问句(倒序):', ws.inverse_transform(x[0])) print('输入答句:', ws.inverse_transform(y[0])) print('预测答句:', ws.inverse_transform(pred[0][0])) t += 1 if t >= 3: break
def test(params): from sequence2sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from thread_generator import ThreadedGenerator data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('WordSequence.pkl', 'rb')) x_data = [] y_data = [] for i in data: x_data.append(i[0]) y_data.append(i[1]) print('done') print(len(x_data)) print(len(y_data)) #训练 ''' 1.n_epoch是训练的轮次数; 2.理论上来讲训练的轮次数越大,那么训练的精度越高; 3.如果训练的轮次数特别大,比如1000,那么有可能会发生过拟合的现象,但是是否过拟合 也和训练的数据有关 4.n_epoch越大,训练的时间越长 5.办公室电脑用P5000的CPU训练40轮,训练了大概3天,训练2轮,大概一个半小时,如果使用 CPU来训练的话,速度会特别的慢,可能一轮就要几个小时, ''' n_epoch = 2 batch_size = 128 steps = int(len(x_data) / batch_size) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) save_path = './model/s2ss_chatbot.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: #定义模型 model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) print('生成了flow') for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {},loss=0.00000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) #[[1,2],[3,4]] #[[3,4],[1,2] x = np.flip(x, axis=1) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) #测试 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, parallel_iterations=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) if t >= 3: break
def test(bidirectional, cell_type, depth, use_residual, use_dropout, time_major, hidden_units, output_project_active, crf_loss=True, save_path='./s2ss_crf.ckpt'): """测试不同参数在生成的假数据上的运行结果""" from rnn_crf import RNNCRF from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, y_data, ws_input, ws_target = pickle.load(open('ner.pkl', 'rb')) # 训练部分 split = int(len(x_data) * 0.8) x_train, x_test, y_train, y_test = (x_data[:split], x_data[split:], y_data[:split], y_data[split:]) n_epoch = 10 batch_size = 128 steps = int(len(x_train) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False) tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = RNNCRF(input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), max_decode_step=100, batch_size=batch_size, learning_rate=0.001, bidirectional=bidirectional, cell_type=cell_type, depth=depth, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=64, hidden_units=hidden_units, optimizer='adam', time_major=time_major, output_project_active=output_project_active, crf_loss=crf_loss) init = tf.global_variables_initializer() sess.run(init) # print(sess.run(model.input_layer.kernel)) # exit(1) flow = batch_flow([x_train, y_train], [ws_input, ws_target], batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) cost = model.train(sess, x, xl, y, yl) costs.append(cost) bar.set_description('epoch {} loss={:.6f}'.format( epoch, np.mean(costs))) model.save(sess, save_path) # 测试部分 tf.reset_default_graph() model_pred = RNNCRF(input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), max_decode_step=100, batch_size=batch_size, mode='decode', bidirectional=bidirectional, cell_type=cell_type, depth=depth, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, output_project_active=output_project_active, crf_loss=crf_loss) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) pbar = tqdm(range(100)) flow = batch_flow([x_test, y_test], [ws_input, ws_target], batch_size) acc = [] prec = [] rec = [] for i in pbar: x, xl, y, yl = next(flow) pred = model_pred.predict(sess, np.array(x), np.array(xl)) for j in range(batch_size): right = np.asarray(ws_target.inverse_transform(y[j])) predict = ws_target.inverse_transform(pred[j]) if len(predict) < len(right): predict += ['O'] * (len(right) - len(predict)) predict = np.asarray(predict) pp = predict[:yl[j]] rr = right[:yl[j]] if len(rr) > 0: acc.append(np.sum(pp == rr) / len(rr)) pp = predict[:yl[j]] rr = right[:yl[j]] pp = pp[rr != 'O'] rr = rr[rr != 'O'] if len(rr) > 0: rec.append(np.sum(pp == rr) / len(rr)) pp = predict[:yl[j]] rr = right[:yl[j]] rr = rr[pp != 'O'] pp = pp[pp != 'O'] if len(rr) > 0: prec.append(np.sum(pp == rr) / len(rr)) if i < 3: # print(ws_input.inverse_transform(x[0])) # print(ws_target.inverse_transform(y[0])) # print(ws_target.inverse_transform(pred[0])) pass else: pbar.set_description( 'acc: {:.4f} prec: {:.4f} rec: {:.4f}'.format( np.mean(acc), np.mean(prec), np.mean(rec)))
def test(params): from sequence2sequence import SequenceToSequence from data_utils import batch_flow data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('WordSequence.pkl', 'rb')) x_data = [] y_data = [] for i in data: x_data.append(i[0]) y_data.append(i[1]) print('done') print(len(x_data)) print(len(y_data)) for x in x_data[:5]: print(' '.join(x)) config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) save_path = './model/s2ss_chatbot.ckpt-26' tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=0, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: user_text = input('请输入您的句子:') if user_text in ('exit', 'quit'): exit(0) #中途退出的方式 x_test = [list(user_text.lower())] batchs = batch_flow([x_test], ws, 1) x, xl = next(batchs) print(x, xl) # x = np.flip(x,axis=1) # # print(x,xl) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print('预测的结果如下所示:') reply = [] for p in pred: ans = ws.inverse_transform(p) reply.extend(ans) print(''.join(reply))
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, _, ws = pickle.load(open('chatbot.pkl', 'rb')) for x in x_data[:5]: print(' '.join(x)) config = tf.ConfigProto( device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False ) # save_path = '/tmp/s2ss_chatbot.ckpt' save_path = './s2ss_chatbot_anti.ckpt' # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=64, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=1, time_major=time_major, hidden_units=hidden_units, share_embedding=True, pretrained_embedding=True ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) with open('test_input.txt', 'r',encoding='utf-8',newline='') as f: with open('test_output.txt','w',encoding='utf-8') as g: for user_text in f: user_text=user_text.rstrip() print(user_text,file=g) x_test = [jieba.lcut(user_text.lower())] bar = batch_flow([x_test], ws, 1) x, xl = next(bar) print(x,xl,file=g) x = np.flip(x, axis=1) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) ans = ws.inverse_transform(pred) print(ans,file=g) while True: user_text = input('Input Chat Sentence:') if user_text in ('exit', 'quit'): exit(0) x_test = [jieba.lcut(user_text.lower())] bar = batch_flow([x_test], ws, 1) x, xl = next(bar) x = np.flip(x, axis=1) print(x, xl) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(pred) # prob = np.exp(prob.transpose()) print(ws.inverse_transform(x[0])) # print(ws.inverse_transform(pred[0])) # print(pred.shape, prob.shape) ans = ws.inverse_transform(pred) print(ans)
def test(params): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) # 训练部分 n_epoch = 5 batch_size = 32 # x_data, y_data = shuffle(x_data, y_data, random_state=0) # x_data = x_data[:10000] # y_data = y_data[:10000] steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False ) save_path = './s2ss_chatbot.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params ) init = tf.global_variables_initializer() sess.run(init) # print(sess.run(model.input_layer.kernel)) # exit(1) flow = ThreadedGenerator( batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) # print(x, y) # print(xl, yl) # exit(1) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) bar.set_description('epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr )) model.save(sess, save_path) flow.close() # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, parallel_iterations=1, **params ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, parallel_iterations=1, **params ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(bidirectional, cell_type, depth, attention_type): """测试并展示attention图 """ from tqdm import tqdm from fake_data import generate # 获取一些假数据 x_data, y_data, ws_input, ws_target = generate(size=10000) # 训练部分 split = int(len(x_data) * 0.9) x_train, x_test, y_train, y_test = (x_data[:split], x_data[split:], y_data[:split], y_data[split:]) n_epoch = 2 batch_size = 32 steps = int(len(x_train) / batch_size) + 1 config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) save_path = '/tmp/s2ss_atten.ckpt' with tf.Graph().as_default(): model = SequenceToSequence(input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=batch_size, learning_rate=0.001, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, parallel_iterations=1) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) for epoch in range(1, n_epoch + 1): costs = [] flow = batch_flow([x_train, y_train], [ws_input, ws_target], batch_size) bar = tqdm(range(steps), desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) cost = model.train(sess, x, xl, y, yl) costs.append(cost) bar.set_description('epoch {} loss={:.6f}'.format( epoch, np.mean(costs))) model.save(sess, save_path) # attention 展示 不能用 beam search 的 # 所以这里只是用 greedy with tf.Graph().as_default(): model_pred = SequenceToSequence(input_vocab_size=len(ws_input), target_vocab_size=len(ws_target), batch_size=1, mode='decode', beam_width=0, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, parallel_iterations=1) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) pbar = batch_flow([x_test, y_test], [ws_input, ws_target], 1) t = 0 for x, xl, y, yl in pbar: pred, atten = model_pred.predict(sess, np.array(x), np.array(xl), attention=True) ox = ws_input.inverse_transform(x[0]) oy = ws_target.inverse_transform(y[0]) op = ws_target.inverse_transform(pred[0]) print(ox) print(oy) print(op) fig, ax = plt.subplots() cax = ax.matshow(atten.reshape( [atten.shape[0], atten.shape[2]]), cmap=cm.coolwarm) ax.set_xticks(np.arange(len(ox))) ax.set_yticks(np.arange(len(op))) ax.set_xticklabels(ox) ax.set_yticklabels(op) fig.colorbar(cax) plt.show() print('-' * 30) t += 1 if t >= 10: break
def test(params): from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) #extract_conv.py 生成 ws = pickle.load(open('ws.pkl', 'rb')) #训练 """ 1.n_epoch是训练的轮次数; 2.理论上来讲训练的轮次数越大,那么训练的精度越高; 3.如果训练的轮次数特别大,比如1000,那么有可能会发生过拟合的现象,但是是否过拟合也和训练的数据有关 4.n_epoch越大,训练的时间越长; 5.办公室电脑用P5000的GPU训练40轮,训练了大概3天,训练2轮,大概一个半小时, 如果使用CPU来训练的话,速度会特别特别的慢,可能一轮就要几个小时 """ n_epoch = 2 batch_size = 128 #每一轮的步数 steps = int(len(x_data) / batch_size) + 1 #日志 config = tf.ConfigProto( allow_soft_placement=True, #是不是由系统选择CPU还是GPU log_device_placement=False #是不是打印设备的日志 ) #训练完后的模型保存路径 save_path = './model/s2ss_chatbot.ckpt' #重置默认的图 tf.reset_default_graph() #定义图的信息 with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) #传入定义的配置 with tf.Session(config=config) as sess: # 定义模型 model = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params #传入的参数 ) init = tf.global_variables_initializer() sess.run(init) #定义数据流 放在线程生成器里做 flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) for epoch in range(1, n_epoch + 1): costs = [] #进度条 bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) #变量没啥意义 或者 之后用不到的 使用下划线'_'来来表示 for _ in bar: x, xl, y, yl = next(flow) #[[1,2], [3,4]] #[[3,4], [1,2]] #对输出进行翻转 这个实际上是在训练时的一个技巧,这个技巧出自于encoder的论文 x = np.flip(x, axis=1) #得到损失和学习率 cost, lr = model.train(sess, x, xl, y, yl, return_lr=True) costs.append(cost) #进行信息的打印 bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) #保存模型 model.save(sess, save_path) #测试 模型的正确性 tf.reset_default_graph() #默认的图reset model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, parallel_iterations=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) #打印出预测的文字 print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 #仅测试三轮查看是否能正常对话 if t >= 3: break
def test(params): from sequence2sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence from thread_generator import ThreadedGenerator data = pickle.load(open('chatbot.pkl','rb')) ws = pickle.load(open('WordSequence.pkl','rb')) n_epoch = 40 batch_size = 128 x_data = [] y_data = [] for i in data: x_data.append(i[0]) y_data.append(i[1]) print('done') print(len(x_data)) print(len(y_data)) steps = int(len(x_data)/batch_size) + 1# 取整会把小数点去掉,所以需要加1 config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False ) ''' tf.ConfigProto()函数用在创建Session的时候,用来对session进行参数配置: tf.ConfigProto(allow_soft_placement=True) 在tf中,通过命令 "with tf.device('/cpu:0'):",允许手动设置操作运行的设备。如果手动设置的设备不存在或者不可用,就会导致tf程序等待或异常, 为了防止这种情况,可以设置tf.ConfigProto()中参数allow_soft_placement=True,允许tf自动选择一个存在并且可用的设备来运行操作。 1. 记录设备指派情况 : tf.ConfigProto(log_device_placement=True) 2. 设置tf.ConfigProto()中参数log_device_placement = True ,可以获取到 operations 和 Tensor 被指派到哪个设备(几号CPU或几号GPU)上运行,会在终端打印出各项操作是在哪个设备上运行的 ''' save_path='./model/s2ss_chatbot.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params ) init = tf.global_variables_initializer() sess.run(init) model.load(sess,'model/s2ss_chatbot.ckpt-4') flow = ThreadedGenerator( batch_flow([x_data,y_data],ws,batch_size,add_end=[False,True]), queue_maxsize=30 ) dummy_encoder_inputs = np.array([ np.array([WordSequence.PAD]) for _ in range(batch_size) ]) dummy_encoder_inputs_length = np.array([1]*batch_size) for epoch in range(5,n_epoch+1): costs = [] bar = tqdm(range(steps),total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x,xl,y,yl = next(flow) x = np.flip(x,axis=1) add_loss = model.train(sess,dummy_encoder_inputs, dummy_encoder_inputs_length, y,yl,loss_only=True) add_loss *= -0.5 cost,lr = model.train(sess,x,xl,y,yl,return_lr=True,add_loss=add_loss) costs.append(cost) bar.set_description('epoch {} loss={:.6f},lr={:.6f}'.format( epoch, np.mean(costs), lr )) model.save(sess,save_path='./model/s2ss_chatbot.ckpt',index=epoch) flow.close() init = tf.global_variables_initializer() # 测试 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, parallel_iterations=1, **params ) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess,save_path) bar = batch_flow([x_data,y_data],ws,1,add_end=False) t= 0 for x,xl,y,yl in bar: x = np.flip(x,axis=1) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(params): from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence from threadedgenerator import ThreadedGenerator x_data, y_data = pickle.load(open('chatbot.pkl', 'rb')) ws = pickle.load(open('ws.pkl', 'rb')) n_epoch = 2 batch_size = 128 steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) save_path = 'model/s2ss_chatbot_anti.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, **params) init = tf.global_variables_initializer() sess.run(init) flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size, add_end=[False, True]), queue_maxsize=30) dummy_encoder_inputs = np.array( [np.array([WordSequence.PAD]) for _ in range(batch_size)]) dummy_encoder_inputs_length = np.array([1] * batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) add_loss = model.train(sess, dummy_encoder_inputs, dummy_encoder_inputs_length, y, yl, loss_only=True) add_loss *= -0.5 cost, lr = model.train(sess, x, xl, y, yl, return_lr=True, add_loss=add_loss) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) flow.close() tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1, add_end=False) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units, anti): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable x_data, _, ws = pickle.load(open('./pickle/chatbot.pkl', 'rb')) ''' for x in x_data[:5]: print(' '.join(x)) ''' config = tf.ConfigProto( device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False ) if anti is False: save_path = './pickle/s2ss_chatbot.ckpt' else: save_path = './pickle/s2ss_chatbot_anti.ckpt' # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=0, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, parallel_iterations=1, time_major=time_major, hidden_units=hidden_units, share_embedding=True, pretrained_embedding=True ) init = tf.global_variables_initializer() fp = open('meals.txt', 'r') meals = fp.readlines() order_table = defaultdict(list) # 紀錄所有點餐 with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: order = [] # 紀錄當次點餐 no_order_ans = ['咖啡', '紅茶', '綠茶'] # 當使用者沒有輸入餐點回答的預設餐點 user_text = input() tmp = user_text.split() # 分開使用者id與問題 user_id = tmp[0] user_text = tmp[1] if user_text in ('exit', 'quit'): exit(0) elif user_text == '#menu': print(str(order_table[user_id]) + ' 零。') continue elif user_text == '#delete': del order_table[user_id] continue s2tw = OpenCC('s2tw') user_text = s2tw.convert(user_text) order_tmp, user_text = replace_meal_line(user_text, meals) for line in iter(order_tmp): order.append(line) x_test = [jieba.lcut(user_text.lower())] bar = batch_flow([x_test], ws, 1) x, xl = next(bar) x = np.flip(x, axis=1) pred = model_pred.predict( sess, np.array(x), np.array(xl) ) s2tw = OpenCC('s2tw') out = filter(lambda ch: ch not in '</s><unk>', ws.inverse_transform(pred[0])) out = list(out) for i, _ in enumerate(out): if out[i] == 'allkindofmeal': if not order: out[i] = no_order_ans[0] del no_order_ans[0] else: out[i] = order[0] order_table[user_id].append(order[0]) del order[0] order_table[user_id].extend(order) out = ''.join(list(out)) print(out)
def test(bidirectional, cell_type, depth, attention_type, use_residual, use_dropout, time_major, hidden_units): """测试不同参数在生成的假数据上的运行结果""" from sequence_to_sequence import SequenceToSequence from data_utils import batch_flow_bucket as batch_flow from word_sequence import WordSequence # pylint: disable=unused-variable from threadedgenerator import ThreadedGenerator emb = pickle.load(open('emb.pkl', 'rb')) x_data, y_data, ws = pickle.load(open('chatbot.pkl', 'rb')) # 训练部分 n_epoch = 5 batch_size = 128 # x_data, y_data = shuffle(x_data, y_data, random_state=0) # x_data = x_data[:100000] # y_data = y_data[:100000] steps = int(len(x_data) / batch_size) + 1 config = tf.ConfigProto( # device_count={'CPU': 1, 'GPU': 0}, allow_soft_placement=True, log_device_placement=False) save_path = './s2ss_chatbot_anti.ckpt' tf.reset_default_graph() with tf.Graph().as_default(): random.seed(0) np.random.seed(0) tf.set_random_seed(0) with tf.Session(config=config) as sess: model = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=batch_size, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, learning_rate=0.001, optimizer='adam', share_embedding=True, dropout=0.2, pretrained_embedding=True) init = tf.global_variables_initializer() sess.run(init) # 加载训练好的embedding model.feed_embedding(sess, encoder=emb) # print(sess.run(model.input_layer.kernel)) # exit(1) flow = ThreadedGenerator(batch_flow([x_data, y_data], ws, batch_size), queue_maxsize=30) dummy_encoder_inputs = np.array( [np.array([WordSequence.PAD]) for _ in range(batch_size)]) dummy_encoder_inputs_lengths = np.array([1] * batch_size) for epoch in range(1, n_epoch + 1): costs = [] bar = tqdm(range(steps), total=steps, desc='epoch {}, loss=0.000000'.format(epoch)) for _ in bar: x, xl, y, yl = next(flow) x = np.flip(x, axis=1) add_loss = model.train(sess, dummy_encoder_inputs, dummy_encoder_inputs_lengths, y, yl, loss_only=True) add_loss *= -0.5 # print(x, y) cost, lr = model.train(sess, x, xl, y, yl, return_lr=True, add_loss=add_loss) costs.append(cost) bar.set_description( 'epoch {} loss={:.6f} lr={:.6f}'.format( epoch, np.mean(costs), lr)) model.save(sess, save_path) flow.close() # 测试部分 tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=12, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, learning_rate=0.001, optimizer='adam', share_embedding=True, pretrained_embedding=True) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1) t = 0 for x, xl, y, yl in bar: x = np.flip(x, axis=1) pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break tf.reset_default_graph() model_pred = SequenceToSequence(input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, mode='decode', beam_width=1, bidirectional=bidirectional, cell_type=cell_type, depth=depth, attention_type=attention_type, use_residual=use_residual, use_dropout=use_dropout, hidden_units=hidden_units, time_major=time_major, parallel_iterations=1, learning_rate=0.001, optimizer='adam', share_embedding=True, pretrained_embedding=True) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) bar = batch_flow([x_data, y_data], ws, 1) t = 0 for x, xl, y, yl in bar: pred = model_pred.predict(sess, np.array(x), np.array(xl)) print(ws.inverse_transform(x[0])) print(ws.inverse_transform(y[0])) print(ws.inverse_transform(pred[0])) t += 1 if t >= 3: break
def test(params): from seq_to_seq import SequenceToSequence from data_utils import batch_flow """dgk语料""" # x_data, _ = pickle.load(open('chatbot.pkl', 'rb')) # ws = pickle.load(open('ws.pkl', 'rb')) """xiaohaungji语料""" # x_data, _ = pickle.load(open('data/xiaohaungji_chatbot.pkl', 'rb')) # ws = pickle.load(open('data/xiaohuangji_ws.pkl', 'rb')) # 取前五条数据 # for x in x_data[:5]: # print(' '.join(x)) # GPU or CPU config = tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False) # 读取模型路径 # save_path = './xiaohaungji_model/s2ss_chatbot_anti.ckpt' # save_path = './model/s2ss_chatbot.ckpt' tf.reset_default_graph() model_pred = SequenceToSequence( input_vocab_size=len(ws), target_vocab_size=len(ws), batch_size=1, # batch_size=256, mode='decode', # beam_width=0, **params) init = tf.global_variables_initializer() with tf.Session(config=config) as sess: sess.run(init) model_pred.load(sess, save_path) while True: user_text = input('请输入您的句子:') if user_text in ('exit', 'quit'): exit(0) x_test = [list(user_text.lower())] bar = batch_flow(data=[x_test], ws=ws, batch_size=1) x, xl = next(bar) x = np.flip(x, axis=1) print(x, xl) pred = model_pred.predict(sess, encoder_inputs=np.array(x), encoder_inputs_length=np.array(xl)) print(pred) print(ws.inverse_transform(x[0])) for p in pred: ans = ws.inverse_transform(p) print(ans) return ans