def train(params): assert params["mode"].lower() == "train", "change training mode to 'train'" vocab = Vocab(params["vocab_path"], params["vocab_size"]) print('true vocab is ', vocab) print("Creating the batcher ...") b = batcher(vocab, params) print("Building the model ...") model = SequenceToSequence(params) print("Creating the checkpoint manager") checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"]) ckpt = tf.train.Checkpoint(SequenceToSequence=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5) ckpt.restore(ckpt_manager.latest_checkpoint) if ckpt_manager.latest_checkpoint: print("Restored from {}".format(ckpt_manager.latest_checkpoint)) else: print("Initializing from scratch.") print("Starting the training ...") train_model(model, b, params, ckpt, ckpt_manager)
def test(params): assert params["mode"].lower() == "test", "change training mode to 'test' or 'eval'" # assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params" print("Building the model ...") model = SequenceToSequence(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) print("Creating the batcher ...") b = batcher(vocab, params) print("Creating the checkpoint manager") checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"]) ckpt = tf.train.Checkpoint(SequenceToSequence=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5) # path = params["model_path"] if params["model_path"] else ckpt_manager.latest_checkpoint # path = ckpt_manager.latest_checkpoint ckpt.restore(ckpt_manager.latest_checkpoint) print("Model restored") # for batch in b: # yield batch_greedy_decode(model, batch, vocab, params) if params['greedy_decode']: # params['batch_size'] = 512 predict_result(model, params, vocab, params['test_save_dir'])
def train(params): assert params["mode"].lower() == "train", "change training mode to 'train'" # 对应文件vocab.txt vocab_size参数设置为30000 # Vocab类定义在batcher下 vocab = Vocab(params["vocab_path"], params["vocab_size"]) # print('true vocab is ', vocab) 注释,返回的是object类型 print('true vocab is ', vocab.count) # 为设定的30000 print("Creating the batcher ...") b = batcher(vocab, params) # print(type(b)) # <class 'tensorflow.python.data.ops.dataset_ops.DatasetV1Adapter'> print("Building the model ...") model = SequenceToSequence(params) print("Creating the checkpoint manager") checkpoint_dir = "{}/checkpoint_vocab30000".format( params["seq2seq_model_dir"]) ckpt = tf.train.Checkpoint(SequenceToSequence=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5) ckpt.restore(ckpt_manager.latest_checkpoint) if ckpt_manager.latest_checkpoint: print("Restored from {}".format(ckpt_manager.latest_checkpoint)) else: print("Initializing from scratch.") print("Starting the training ...") train_model(model, b, params, ckpt, ckpt_manager)
def test(params): assert params["mode"].lower( ) == "test", "change training mode to 'test' or 'eval'" assert params["beam_size"] == params[ "batch_size"], "Beam size must be equal to batch_size, change the params" print("Building the model ...") model = PGN(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) print("Creating the batcher ...") b = batcher(vocab, params) print("Creating the checkpoint manager") print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(Seq2Seq=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") print("Model restored") for batch in b: print(beam_decode(model, batch, vocab, params))
def predict_result(model, params, vocab, result_save_path): if params['greedy_decode']: dataset = batcher(vocab, params) results = greedy_decode(model, dataset, vocab, params) else: params1 = params.copy() params1["batch_size"] = 1 dataset = batcher(vocab, params1) params["batch_size"] = params["beam_size"] results = beam_decode_all(model, dataset, vocab, params) results = list(map(lambda x: x.replace(" ", ""), results)) # 保存结果 save_predict_result(results, params) return results
def predict_result(model, params, vocab, result_save_path): dataset = batcher(vocab, params) # 预测结果 results = greedy_decode(model, dataset, vocab, params) results = list(map(lambda x: x.replace(" ",""), results)) # 保存结果 save_predict_result(results, params) return results
def predict_result(model, params, vocab): dataset = batcher(vocab, params) # 预测结果 if eval(params['greedy_decode']): results = greedy_decode(model, dataset, vocab, params) else: results = beam_decode(model, dataset, vocab, params) results = list(map(lambda x: x.replace(" ", ""), results)) # 保存结果 save_predict_result(results, params) return results
def predict_result(model, params, vocab, result_save_path): dataset = batcher(vocab, params) # 预测结果 if params['greedy_decode']: results = greedy_decode(model, dataset, vocab, params) elif params['beam_search_decode']: results = beam_decode(model, dataset, vocab, params) results = list(map(lambda x: x.replace(" ",""), results)) #print("results2",results)#64个元素,每个元素都是大小为dec_max_len的序列 # 保存结果 save_predict_result(results, params) return results
def predict_result_beam(model, params, vocab, result_save_path): dataset = batcher(vocab, params) # 预测结果 batch_size = params["batch_size"] results = [] sample_size = 20000 # batch 操作轮数 math.ceil向上取整 小数 +1 # 因为最后一个batch可能不足一个batch size 大小 ,但是依然需要计算 steps_epoch = sample_size // batch_size + 1 for _ in tqdm(range(steps_epoch)): enc_data = next(iter(dataset)) results += beam_decode(model, enc_data, vocab, params) # 保存结果 save_predict_result(results, params, 'beam_decode') return results
def train_model(model, vocab, params, checkpoint_manager): epochs = params['epochs'] # batch_size = params['batch_size'] # max_dec_len = params['max_dec_len'] # max_enc_len = params['max_enc_len'] optimizer = tf.keras.optimizers.Adagrad(params['learning_rate'], initial_accumulator_value=params['adagrad_init_acc'], clipnorm=params['max_grad_norm']) loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none') # 定义损失函数 def loss_function(real, pred, padding_mask): loss = 0 for t in range(real.shape[1]): if padding_mask: loss_ = loss_object(real[:, t], pred[:, t, :]) mask = tf.cast(padding_mask[:, t], dtype=loss_.dtype) loss_ *= mask loss_ = tf.reduce_mean(loss_, axis=0) # batch-wise loss += loss_ else: loss_ = loss_object(real[:, t], pred[:, t, :]) loss_ = tf.reduce_mean(loss_, axis=0) # batch-wise loss += loss_ return tf.reduce_mean(loss) # 训练 @tf.function def train_step(enc_inp, extended_enc_input, max_oov_len, dec_input, dec_target, cov_loss_wt, enc_pad_mask, padding_mask=None): batch_loss = 0 with tf.GradientTape() as tape: enc_output, enc_hidden = model.call_encoder(enc_inp) # 第一个隐藏层输入 dec_hidden = enc_hidden # 逐个预测序列 predictions, _, attentions, coverages = model(dec_input, dec_hidden, enc_output, dec_target, extended_enc_input, max_oov_len, enc_pad_mask=enc_pad_mask, use_coverage=True, prev_coverage=None) # print('dec_target is :{}'.format(dec_target)) # print('predictions is :{}'.format(predictions.shape)) # print('dec_target is :{}'.format(dec_target.shape)) # print('padding_mask is :{}'.format(padding_mask.shape)) # # [max_y_len,batch size ,max_x_len] # print('attentions is :{}'.format(attentions)) # # [max_y_len,batch size ,max_x_len,1] # print('coverages is :{}'.format(coverages)) # batch_loss = loss_function(dec_target, predictions, padding_mask) # l_loss = loss_function(dec_target, predictions, padding_mask) # print('l_loss :{}'.format(l_loss)) # c_loss = coverage_loss(attentions, coverages, padding_mask) # print('c_loss :{}'.format(c_loss)) batch_loss = loss_function(dec_target, predictions, padding_mask) + \ cov_loss_wt * coverage_loss(attentions, coverages, padding_mask) variables = model.encoder.trainable_variables + model.decoder.trainable_variables + \ model.attention.trainable_variables + model.pointer.trainable_variables gradients = tape.gradient(batch_loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return batch_loss for epoch in range(epochs): start = time.time() dataset = batcher(vocab, params) total_loss = 0 step = 0 for encoder_batch_data, decoder_batch_data in dataset: # print('batch[0]["enc_input"] is ', batch[0]["enc_input"]) # print('batch[0]["extended_enc_input"] is ', batch[0]["extended_enc_input"]) # print('batch[1]["dec_input"] is ', batch[1]["dec_input"]) # print('batch[1]["dec_target"] is ', batch[1]["dec_target"]) # print('batch[0]["max_oov_len"] is ', batch[0]["max_oov_len"]) batch_loss = train_step(encoder_batch_data["enc_input"], encoder_batch_data["extended_enc_input"], encoder_batch_data["max_oov_len"], decoder_batch_data["dec_input"], decoder_batch_data["dec_target"], cov_loss_wt=0.5, enc_pad_mask=encoder_batch_data["sample_encoder_pad_mask"], padding_mask=decoder_batch_data["sample_decoder_pad_mask"]) # batch_loss = train_step(inputs, target) total_loss += batch_loss step += 1 if step % 1 == 0: print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, step, batch_loss.numpy())) # saving (checkpoint) the model every 2 epochs if (epoch + 1) % 2 == 0: ckpt_save_path = checkpoint_manager.save() print('Saving checkpoint for epoch {} at {}'.format(epoch + 1, ckpt_save_path)) print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / step)) print('Time taken for 1 epoch {} sec\n'.format(time.time() - start)) if step > params['max_train_steps']: break
def test(params): assert params["mode"].lower( ) == "test", "change training mode to 'test' or 'eval'" # assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params" print("Building the model ...") model = SequenceToSequence(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) # print("Creating the batcher ...") # b = batcher(vocab, params) 在predict_result执行的 print("Creating the checkpoint manager") checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"]) ckpt = tf.train.Checkpoint(SequenceToSequence=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5) # path = params["model_path"] if params["model_path"] else ckpt_manager.latest_checkpoint # path = ckpt_manager.latest_checkpoint ckpt.restore(ckpt_manager.latest_checkpoint) print("Model restored") # for batch in b: # yield batch_greedy_decode(model, batch, vocab, params) """ 修改: 去掉了predict_result 函数 将处理steps_epoch的共用代码提取出来,再进行分支greedy_decode/beam_decode的出来 """ # 调用batcher里的batcher->batch_generator函数 生成器example_generator mode == "test" 207行开始 dataset = batcher(vocab, params) # 测试集的数量 sample_size = params['sample_size'] steps_epoch = sample_size // params["batch_size"] + 1 results = [] for i in tqdm(range(steps_epoch)): enc_data, _ = next(iter(dataset)) # 如果为TRUE进行贪心搜索 否则BEAM SEARCH if params['greedy_decode']: # print("-----------------greedy_decode 模式-----------------") results += batch_greedy_decode(model, enc_data, vocab, params) else: # print("-----------------beam_decode 模式-----------------") # print(enc_data["enc_input"][0]) # print(enc_data["enc_input"][1]) # 需要beam sezi=batch size 输入时候相当于遍历一个个X 去进行搜索 for row in range(params['batch_size']): batch = [ enc_data["enc_input"][row] for _ in range(params['beam_size']) ] best_hyp = beam_decode(model, batch, vocab, params) results.append(best_hyp.abstract) # batch遍历完成 保存测试结果 results = list(map(lambda x: x.replace(" ", ""), results)) # 保存结果 AutoMaster_TestSet.csv save_predict_result(results, params) # save_predict_result(results, params) print('save beam search result to :{}'.format(params['test_x_dir']))
def train_model(model, vocab, params, checkpoint_manager): epochs = params['epochs'] batch_size = params['batch_size'] pad_index = vocab.word2id[vocab.PAD_TOKEN] start_index = vocab.word2id[vocab.START_DECODING] # 计算vocab size # params['vocab_size'] = vocab.count optimizer = tf.keras.optimizers.Adam(name='Adam', learning_rate=params["learning_rate"]) loss_object = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction='none') # 定义损失函数 def loss_function(real, pred): mask = tf.math.logical_not(tf.math.equal(real, pad_index)) loss_ = loss_object(real, pred) mask = tf.cast(mask, dtype=loss_.dtype) # loss_,mask (batch_size, dec_len-1) loss_ *= mask return tf.reduce_mean(loss_) # return tf.reduce_sum(loss_) / tf.reduce_sum(mask) # 训练 # @tf.function(input_signature=(tf.TensorSpec(shape=[params["batch_size"], params["max_enc_len"]], dtype=tf.int64), # tf.TensorSpec(shape=[params["batch_size"], params["max_dec_len"]], dtype=tf.int64))) def train_step(enc_input, dec_target): # dec_target [4980, 939, 41, 27, 4013, 815, 14702] with tf.GradientTape() as tape: # enc_output (batch_size, enc_len, enc_unit) # enc_hidden (batch_size, enc_unit) enc_output, enc_hidden = model.encoder(enc_input) # 第一个decoder输入 开始标签 # dec_input (batch_size, 1) dec_input = tf.expand_dims([start_index] * batch_size, 1) # 第一个隐藏层输入 # dec_hidden (batch_size, enc_unit) dec_hidden = enc_hidden # 逐个预测序列 # predictions (batch_size, dec_len-1, vocab_size) predictions, _ = model(dec_input, dec_hidden, enc_output, dec_target) _batch_loss = loss_function(dec_target[:, 1:], predictions) variables = model.trainable_variables gradients = tape.gradient(_batch_loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return _batch_loss # dataset, steps_per_epoch = train_batch_generator(batch_size) dataset = batcher(vocab, params) steps_per_epoch = params["steps_per_epoch"] for epoch in range(epochs): start = time.time() total_loss = 0 # for (batch, (inputs, target)) in enumerate(dataset.take(steps_per_epoch)): for (batch, enc_dec_inputs) in enumerate(dataset.take(steps_per_epoch)): inputs = enc_dec_inputs["enc_input"] target = enc_dec_inputs["target"] batch_loss = train_step(inputs, target) total_loss += batch_loss if (batch + 1) % 1 == 0: print('Epoch {} Batch {} Loss {:.4f}'.format( params["trained_epoch"] + epoch + 1, batch + 1, batch_loss.numpy())) if params["debug_mode"]: print('Epoch {} Batch {} Loss {:.4f}'.format( epoch + 1, batch, batch_loss.numpy())) if batch >= 10: break if params["debug_mode"]: break # saving (checkpoint) the model every 2 epochs if (epoch + 1) % 1 == 0: ckpt_save_path = checkpoint_manager.save() try: record_file = os.path.join(SEQ2SEQ_CKPT, "record.txt") with open(record_file, mode="a", encoding="utf-8") as f: f.write('Epoch {} Loss {:.4f}\n'.format( params["trained_epoch"] + epoch + 1, total_loss / steps_per_epoch)) except: pass print('Saving checkpoint for epoch {} at {}'.format( epoch + 1, ckpt_save_path)) # ---学习率衰减--- lr = params["learning_rate"] * np.power(0.9, epoch + 1) # 更新优化器的学习率 optimizer = tf.keras.optimizers.Adam(name='Adam', learning_rate=lr) assert lr == optimizer.get_config()["learning_rate"] print("learning_rate=", optimizer.get_config()["learning_rate"]) save_train_msg(params["trained_epoch"] + epoch + 1) # 保存已训练的轮数 print('Epoch {} Loss {:.4f}'.format( params["trained_epoch"] + epoch + 1, total_loss / steps_per_epoch)) print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))