def test(params):
    assert params["mode"].lower() == "test", "change training mode to 'test' or 'eval'"
    # assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params"

    print("Building the model ...")
    model = SequenceToSequence(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    print("Creating the batcher ...")
    b = batcher(vocab, params)

    print("Creating the checkpoint manager")
    checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"])
    ckpt = tf.train.Checkpoint(SequenceToSequence=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5)

    # path = params["model_path"] if params["model_path"] else ckpt_manager.latest_checkpoint
    # path = ckpt_manager.latest_checkpoint
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Model restored")
    # for batch in b:
    #     yield batch_greedy_decode(model, batch, vocab, params)
    if params['sampling_method'] == 'greedy_decoder':
        # params['batch_size'] = 512
        predict_result(model, params, vocab, params['test_save_dir'])
    elif params['sampling_method'] == 'beam_search':
        beam_decode(model, b, vocab, params)
예제 #2
0
def test(params):
    assert params["mode"].lower() in ["test", "eval"], "change training mode to 'test' or 'eval'"
    assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params"
    # GPU资源配置
    config_gpu(use_cpu=True)

    print("Building the model ...")
    model = Seq2Seq(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    params['vocab_size'] = vocab.count

    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(Seq2Seq=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    print("Model restored")

    if params['greedy_decode']:
        predict_result(model, params, vocab, params['result_save_path'])
    else:
        b = bream_test_batch_generator(params["beam_size"])
        results = []
        for batch in b:
            best_hyp = beam_decode(model, batch, vocab, params)
            results.append(best_hyp.abstract)
        save_predict_result(results, params['result_save_path'])
        print('save result to :{}'.format(params['result_save_path']))
예제 #3
0
파일: test.py 프로젝트: zyffrank/nlp
def test(params):
    assert params["mode"].lower(
    ) == "test", "change training mode to 'test' or 'eval'"
    assert params["beam_size"] == params[
        "batch_size"], "Beam size must be equal to batch_size, change the params"

    print("Building the model ...")
    model = PGN(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    print("Creating the batcher ...")
    b = batcher(vocab, params)

    print("Creating the checkpoint manager")
    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(Seq2Seq=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    checkpoint_dir,
                                                    max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    print("Model restored")

    for batch in b:
        print(beam_decode(model, batch, vocab, params))
예제 #4
0
def predict_result(model, params, vocab):
    dataset = batcher(vocab, params)
    # 预测结果
    if eval(params['greedy_decode']):
        results = greedy_decode(model, dataset, vocab, params)
    else:
        results = beam_decode(model, dataset, vocab, params)
    results = list(map(lambda x: x.replace(" ", ""), results))
    # 保存结果
    save_predict_result(results, params)

    return results
예제 #5
0
def predict_result_beam(model, params, vocab, result_save_path):
    dataset = batcher(vocab, params)
    # 预测结果
    batch_size = params["batch_size"]
    results = []

    sample_size = 20000
    # batch 操作轮数 math.ceil向上取整 小数 +1
    # 因为最后一个batch可能不足一个batch size 大小 ,但是依然需要计算
    steps_epoch = sample_size // batch_size + 1
    
    for _ in tqdm(range(steps_epoch)):
        enc_data = next(iter(dataset))
        results += beam_decode(model, enc_data, vocab, params)
    # 保存结果
    save_predict_result(results, params, 'beam_decode')

    return results
예제 #6
0
def test(params):
    assert params["mode"].lower(
    ) == "test", "change training mode to 'test' or 'eval'"
    # assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params"

    print("Building the model ...")
    model = SequenceToSequence(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    # print("Creating the batcher ...")
    # b = batcher(vocab, params) 在predict_result执行的

    print("Creating the checkpoint manager")
    checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"])
    ckpt = tf.train.Checkpoint(SequenceToSequence=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_dir,
                                              max_to_keep=5)

    # path = params["model_path"] if params["model_path"] else ckpt_manager.latest_checkpoint
    # path = ckpt_manager.latest_checkpoint
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Model restored")
    # for batch in b:
    #     yield batch_greedy_decode(model, batch, vocab, params)
    """    
    修改:
        去掉了predict_result 函数
        将处理steps_epoch的共用代码提取出来,再进行分支greedy_decode/beam_decode的出来
    """

    # 调用batcher里的batcher->batch_generator函数 生成器example_generator mode == "test" 207行开始
    dataset = batcher(vocab, params)
    # 测试集的数量
    sample_size = params['sample_size']
    steps_epoch = sample_size // params["batch_size"] + 1
    results = []
    for i in tqdm(range(steps_epoch)):
        enc_data, _ = next(iter(dataset))

        # 如果为TRUE进行贪心搜索 否则BEAM SEARCH
        if params['greedy_decode']:
            # print("-----------------greedy_decode 模式-----------------")
            results += batch_greedy_decode(model, enc_data, vocab, params)
        else:
            # print("-----------------beam_decode 模式-----------------")
            # print(enc_data["enc_input"][0])
            # print(enc_data["enc_input"][1])
            # 需要beam sezi=batch size 输入时候相当于遍历一个个X 去进行搜索
            for row in range(params['batch_size']):
                batch = [
                    enc_data["enc_input"][row]
                    for _ in range(params['beam_size'])
                ]
                best_hyp = beam_decode(model, batch, vocab, params)
                results.append(best_hyp.abstract)

    # batch遍历完成 保存测试结果
    results = list(map(lambda x: x.replace(" ", ""), results))
    # 保存结果 AutoMaster_TestSet.csv
    save_predict_result(results, params)

    # save_predict_result(results, params)
    print('save beam search result to :{}'.format(params['test_x_dir']))