def test(params): assert params["mode"].lower() == "test", "change training mode to 'test' or 'eval'" # assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params" print("Building the model ...") model = SequenceToSequence(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) print("Creating the batcher ...") b = batcher(vocab, params) print("Creating the checkpoint manager") checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"]) ckpt = tf.train.Checkpoint(SequenceToSequence=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5) # path = params["model_path"] if params["model_path"] else ckpt_manager.latest_checkpoint # path = ckpt_manager.latest_checkpoint ckpt.restore(ckpt_manager.latest_checkpoint) print("Model restored") # for batch in b: # yield batch_greedy_decode(model, batch, vocab, params) if params['sampling_method'] == 'greedy_decoder': # params['batch_size'] = 512 predict_result(model, params, vocab, params['test_save_dir']) elif params['sampling_method'] == 'beam_search': beam_decode(model, b, vocab, params)
def test(params): assert params["mode"].lower() in ["test", "eval"], "change training mode to 'test' or 'eval'" assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params" # GPU资源配置 config_gpu(use_cpu=True) print("Building the model ...") model = Seq2Seq(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) params['vocab_size'] = vocab.count print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(Seq2Seq=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") print("Model restored") if params['greedy_decode']: predict_result(model, params, vocab, params['result_save_path']) else: b = bream_test_batch_generator(params["beam_size"]) results = [] for batch in b: best_hyp = beam_decode(model, batch, vocab, params) results.append(best_hyp.abstract) save_predict_result(results, params['result_save_path']) print('save result to :{}'.format(params['result_save_path']))
def test(params): assert params["mode"].lower( ) == "test", "change training mode to 'test' or 'eval'" assert params["beam_size"] == params[ "batch_size"], "Beam size must be equal to batch_size, change the params" print("Building the model ...") model = PGN(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) print("Creating the batcher ...") b = batcher(vocab, params) print("Creating the checkpoint manager") print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(Seq2Seq=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") print("Model restored") for batch in b: print(beam_decode(model, batch, vocab, params))
def predict_result(model, params, vocab): dataset = batcher(vocab, params) # 预测结果 if eval(params['greedy_decode']): results = greedy_decode(model, dataset, vocab, params) else: results = beam_decode(model, dataset, vocab, params) results = list(map(lambda x: x.replace(" ", ""), results)) # 保存结果 save_predict_result(results, params) return results
def predict_result_beam(model, params, vocab, result_save_path): dataset = batcher(vocab, params) # 预测结果 batch_size = params["batch_size"] results = [] sample_size = 20000 # batch 操作轮数 math.ceil向上取整 小数 +1 # 因为最后一个batch可能不足一个batch size 大小 ,但是依然需要计算 steps_epoch = sample_size // batch_size + 1 for _ in tqdm(range(steps_epoch)): enc_data = next(iter(dataset)) results += beam_decode(model, enc_data, vocab, params) # 保存结果 save_predict_result(results, params, 'beam_decode') return results
def test(params): assert params["mode"].lower( ) == "test", "change training mode to 'test' or 'eval'" # assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params" print("Building the model ...") model = SequenceToSequence(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) # print("Creating the batcher ...") # b = batcher(vocab, params) 在predict_result执行的 print("Creating the checkpoint manager") checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"]) ckpt = tf.train.Checkpoint(SequenceToSequence=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5) # path = params["model_path"] if params["model_path"] else ckpt_manager.latest_checkpoint # path = ckpt_manager.latest_checkpoint ckpt.restore(ckpt_manager.latest_checkpoint) print("Model restored") # for batch in b: # yield batch_greedy_decode(model, batch, vocab, params) """ 修改: 去掉了predict_result 函数 将处理steps_epoch的共用代码提取出来,再进行分支greedy_decode/beam_decode的出来 """ # 调用batcher里的batcher->batch_generator函数 生成器example_generator mode == "test" 207行开始 dataset = batcher(vocab, params) # 测试集的数量 sample_size = params['sample_size'] steps_epoch = sample_size // params["batch_size"] + 1 results = [] for i in tqdm(range(steps_epoch)): enc_data, _ = next(iter(dataset)) # 如果为TRUE进行贪心搜索 否则BEAM SEARCH if params['greedy_decode']: # print("-----------------greedy_decode 模式-----------------") results += batch_greedy_decode(model, enc_data, vocab, params) else: # print("-----------------beam_decode 模式-----------------") # print(enc_data["enc_input"][0]) # print(enc_data["enc_input"][1]) # 需要beam sezi=batch size 输入时候相当于遍历一个个X 去进行搜索 for row in range(params['batch_size']): batch = [ enc_data["enc_input"][row] for _ in range(params['beam_size']) ] best_hyp = beam_decode(model, batch, vocab, params) results.append(best_hyp.abstract) # batch遍历完成 保存测试结果 results = list(map(lambda x: x.replace(" ", ""), results)) # 保存结果 AutoMaster_TestSet.csv save_predict_result(results, params) # save_predict_result(results, params) print('save beam search result to :{}'.format(params['test_x_dir']))