예제 #1
0
def train(params):
    assert params["mode"].lower() == "train", "change training mode to 'train'"

    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    print('true vocab is ', vocab)

    print("Creating the batcher ...")
    b = batcher(vocab, params)

    print("Building the model ...")
    model = SequenceToSequence(params)

    print("Creating the checkpoint manager")
    checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"])
    ckpt = tf.train.Checkpoint(SequenceToSequence=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5)

    ckpt.restore(ckpt_manager.latest_checkpoint)
    if ckpt_manager.latest_checkpoint:
        print("Restored from {}".format(ckpt_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")

    print("Starting the training ...")
    train_model(model, b, params, ckpt, ckpt_manager)
예제 #2
0
def test(params):
    assert params["mode"].lower() == "test", "change training mode to 'test' or 'eval'"
    # assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params"

    print("Building the model ...")
    model = SequenceToSequence(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    print("Creating the batcher ...")
    b = batcher(vocab, params)

    print("Creating the checkpoint manager")
    checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"])
    ckpt = tf.train.Checkpoint(SequenceToSequence=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=5)

    # path = params["model_path"] if params["model_path"] else ckpt_manager.latest_checkpoint
    # path = ckpt_manager.latest_checkpoint
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Model restored")
    # for batch in b:
    #     yield batch_greedy_decode(model, batch, vocab, params)
    if params['greedy_decode']:
        # params['batch_size'] = 512
        predict_result(model, params, vocab, params['test_save_dir'])
예제 #3
0
def train(params):
    assert params["mode"].lower() == "train", "change training mode to 'train'"
    # 对应文件vocab.txt vocab_size参数设置为30000
    # Vocab类定义在batcher下
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    # print('true vocab is ', vocab) 注释,返回的是object类型
    print('true vocab is ', vocab.count)  # 为设定的30000

    print("Creating the batcher ...")
    b = batcher(vocab, params)
    # print(type(b))
    # <class 'tensorflow.python.data.ops.dataset_ops.DatasetV1Adapter'>

    print("Building the model ...")
    model = SequenceToSequence(params)

    print("Creating the checkpoint manager")
    checkpoint_dir = "{}/checkpoint_vocab30000".format(
        params["seq2seq_model_dir"])
    ckpt = tf.train.Checkpoint(SequenceToSequence=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_dir,
                                              max_to_keep=5)

    ckpt.restore(ckpt_manager.latest_checkpoint)
    if ckpt_manager.latest_checkpoint:
        print("Restored from {}".format(ckpt_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")

    print("Starting the training ...")
    train_model(model, b, params, ckpt, ckpt_manager)
예제 #4
0
파일: test.py 프로젝트: zyffrank/nlp
def test(params):
    assert params["mode"].lower(
    ) == "test", "change training mode to 'test' or 'eval'"
    assert params["beam_size"] == params[
        "batch_size"], "Beam size must be equal to batch_size, change the params"

    print("Building the model ...")
    model = PGN(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    print("Creating the batcher ...")
    b = batcher(vocab, params)

    print("Creating the checkpoint manager")
    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(Seq2Seq=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    checkpoint_dir,
                                                    max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    print("Model restored")

    for batch in b:
        print(beam_decode(model, batch, vocab, params))
예제 #5
0
def predict_result(model, params, vocab, result_save_path):
    if params['greedy_decode']:
        dataset = batcher(vocab, params)
        results = greedy_decode(model, dataset, vocab, params)
    else:

        params1 = params.copy()
        params1["batch_size"] = 1
        dataset = batcher(vocab, params1)
        params["batch_size"] = params["beam_size"]
        results = beam_decode_all(model, dataset, vocab, params)

    results = list(map(lambda x: x.replace(" ", ""), results))
    # 保存结果
    save_predict_result(results, params)

    return results
예제 #6
0
def predict_result(model, params, vocab, result_save_path):
    dataset = batcher(vocab, params)
    # 预测结果
    results = greedy_decode(model, dataset, vocab, params)
    results = list(map(lambda x: x.replace(" ",""), results))
    # 保存结果
    save_predict_result(results, params)

    return results
예제 #7
0
def predict_result(model, params, vocab):
    dataset = batcher(vocab, params)
    # 预测结果
    if eval(params['greedy_decode']):
        results = greedy_decode(model, dataset, vocab, params)
    else:
        results = beam_decode(model, dataset, vocab, params)
    results = list(map(lambda x: x.replace(" ", ""), results))
    # 保存结果
    save_predict_result(results, params)

    return results
예제 #8
0
def predict_result(model, params, vocab, result_save_path):
    dataset = batcher(vocab, params)
    # 预测结果
    if params['greedy_decode']:
        results = greedy_decode(model, dataset, vocab, params)
    elif params['beam_search_decode']:
        results = beam_decode(model, dataset, vocab, params)
    results = list(map(lambda x: x.replace(" ",""), results))
    #print("results2",results)#64个元素,每个元素都是大小为dec_max_len的序列
    # 保存结果
    save_predict_result(results, params)

    return results
예제 #9
0
def predict_result_beam(model, params, vocab, result_save_path):
    dataset = batcher(vocab, params)
    # 预测结果
    batch_size = params["batch_size"]
    results = []

    sample_size = 20000
    # batch 操作轮数 math.ceil向上取整 小数 +1
    # 因为最后一个batch可能不足一个batch size 大小 ,但是依然需要计算
    steps_epoch = sample_size // batch_size + 1
    
    for _ in tqdm(range(steps_epoch)):
        enc_data = next(iter(dataset))
        results += beam_decode(model, enc_data, vocab, params)
    # 保存结果
    save_predict_result(results, params, 'beam_decode')

    return results
예제 #10
0
def train_model(model, vocab, params, checkpoint_manager):
    epochs = params['epochs']
    # batch_size = params['batch_size']
    # max_dec_len = params['max_dec_len']
    # max_enc_len = params['max_enc_len']

    optimizer = tf.keras.optimizers.Adagrad(params['learning_rate'],
                                            initial_accumulator_value=params['adagrad_init_acc'],
                                            clipnorm=params['max_grad_norm'])
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

    # 定义损失函数
    def loss_function(real, pred, padding_mask):
        loss = 0
        for t in range(real.shape[1]):
            if padding_mask:
                loss_ = loss_object(real[:, t], pred[:, t, :])
                mask = tf.cast(padding_mask[:, t], dtype=loss_.dtype)
                loss_ *= mask
                loss_ = tf.reduce_mean(loss_, axis=0)  # batch-wise
                loss += loss_
            else:
                loss_ = loss_object(real[:, t], pred[:, t, :])
                loss_ = tf.reduce_mean(loss_, axis=0)  # batch-wise
                loss += loss_
        return tf.reduce_mean(loss)

    # 训练
    @tf.function
    def train_step(enc_inp, extended_enc_input, max_oov_len,
                   dec_input, dec_target, cov_loss_wt,
                   enc_pad_mask, padding_mask=None):
        batch_loss = 0
        with tf.GradientTape() as tape:
            enc_output, enc_hidden = model.call_encoder(enc_inp)

            # 第一个隐藏层输入
            dec_hidden = enc_hidden
            # 逐个预测序列
            predictions, _, attentions, coverages = model(dec_input,
                                                          dec_hidden,
                                                          enc_output,
                                                          dec_target,
                                                          extended_enc_input,
                                                          max_oov_len,
                                                          enc_pad_mask=enc_pad_mask,
                                                          use_coverage=True,
                                                          prev_coverage=None)

            # print('dec_target is :{}'.format(dec_target))
            # print('predictions is :{}'.format(predictions.shape))
            # print('dec_target is :{}'.format(dec_target.shape))
            # print('padding_mask is :{}'.format(padding_mask.shape))
            # # [max_y_len,batch size ,max_x_len]
            # print('attentions is :{}'.format(attentions))
            # # [max_y_len,batch size ,max_x_len,1]
            # print('coverages is :{}'.format(coverages))
            # batch_loss = loss_function(dec_target, predictions, padding_mask)

            # l_loss = loss_function(dec_target, predictions, padding_mask)
            # print('l_loss :{}'.format(l_loss))
            # c_loss = coverage_loss(attentions, coverages, padding_mask)
            # print('c_loss :{}'.format(c_loss))

            batch_loss = loss_function(dec_target, predictions, padding_mask) + \
                         cov_loss_wt * coverage_loss(attentions, coverages, padding_mask)

            variables = model.encoder.trainable_variables + model.decoder.trainable_variables + \
                        model.attention.trainable_variables + model.pointer.trainable_variables

            gradients = tape.gradient(batch_loss, variables)

            optimizer.apply_gradients(zip(gradients, variables))

            return batch_loss

    for epoch in range(epochs):
        start = time.time()
        dataset = batcher(vocab, params)
        total_loss = 0
        step = 0
        for encoder_batch_data, decoder_batch_data in dataset:
            # print('batch[0]["enc_input"] is ', batch[0]["enc_input"])
            # print('batch[0]["extended_enc_input"] is ', batch[0]["extended_enc_input"])
            # print('batch[1]["dec_input"] is ', batch[1]["dec_input"])
            # print('batch[1]["dec_target"] is ', batch[1]["dec_target"])
            # print('batch[0]["max_oov_len"] is ', batch[0]["max_oov_len"])
            batch_loss = train_step(encoder_batch_data["enc_input"],
                                    encoder_batch_data["extended_enc_input"],
                                    encoder_batch_data["max_oov_len"],
                                    decoder_batch_data["dec_input"],
                                    decoder_batch_data["dec_target"],
                                    cov_loss_wt=0.5,
                                    enc_pad_mask=encoder_batch_data["sample_encoder_pad_mask"],
                                    padding_mask=decoder_batch_data["sample_decoder_pad_mask"])

            # batch_loss = train_step(inputs, target)
            total_loss += batch_loss

            step += 1

            if step % 1 == 0:
                print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                             step,
                                                             batch_loss.numpy()))
        # saving (checkpoint) the model every 2 epochs
        if (epoch + 1) % 2 == 0:
            ckpt_save_path = checkpoint_manager.save()
            print('Saving checkpoint for epoch {} at {}'.format(epoch + 1,
                                                                ckpt_save_path))
            print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                                total_loss / step))
            print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

            if step > params['max_train_steps']:
                break
예제 #11
0
def test(params):
    assert params["mode"].lower(
    ) == "test", "change training mode to 'test' or 'eval'"
    # assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params"

    print("Building the model ...")
    model = SequenceToSequence(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    # print("Creating the batcher ...")
    # b = batcher(vocab, params) 在predict_result执行的

    print("Creating the checkpoint manager")
    checkpoint_dir = "{}/checkpoint".format(params["seq2seq_model_dir"])
    ckpt = tf.train.Checkpoint(SequenceToSequence=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_dir,
                                              max_to_keep=5)

    # path = params["model_path"] if params["model_path"] else ckpt_manager.latest_checkpoint
    # path = ckpt_manager.latest_checkpoint
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Model restored")
    # for batch in b:
    #     yield batch_greedy_decode(model, batch, vocab, params)
    """    
    修改:
        去掉了predict_result 函数
        将处理steps_epoch的共用代码提取出来,再进行分支greedy_decode/beam_decode的出来
    """

    # 调用batcher里的batcher->batch_generator函数 生成器example_generator mode == "test" 207行开始
    dataset = batcher(vocab, params)
    # 测试集的数量
    sample_size = params['sample_size']
    steps_epoch = sample_size // params["batch_size"] + 1
    results = []
    for i in tqdm(range(steps_epoch)):
        enc_data, _ = next(iter(dataset))

        # 如果为TRUE进行贪心搜索 否则BEAM SEARCH
        if params['greedy_decode']:
            # print("-----------------greedy_decode 模式-----------------")
            results += batch_greedy_decode(model, enc_data, vocab, params)
        else:
            # print("-----------------beam_decode 模式-----------------")
            # print(enc_data["enc_input"][0])
            # print(enc_data["enc_input"][1])
            # 需要beam sezi=batch size 输入时候相当于遍历一个个X 去进行搜索
            for row in range(params['batch_size']):
                batch = [
                    enc_data["enc_input"][row]
                    for _ in range(params['beam_size'])
                ]
                best_hyp = beam_decode(model, batch, vocab, params)
                results.append(best_hyp.abstract)

    # batch遍历完成 保存测试结果
    results = list(map(lambda x: x.replace(" ", ""), results))
    # 保存结果 AutoMaster_TestSet.csv
    save_predict_result(results, params)

    # save_predict_result(results, params)
    print('save beam search result to :{}'.format(params['test_x_dir']))
예제 #12
0
def train_model(model, vocab, params, checkpoint_manager):

    epochs = params['epochs']
    batch_size = params['batch_size']

    pad_index = vocab.word2id[vocab.PAD_TOKEN]
    start_index = vocab.word2id[vocab.START_DECODING]

    # 计算vocab size
    # params['vocab_size'] = vocab.count

    optimizer = tf.keras.optimizers.Adam(name='Adam',
                                         learning_rate=params["learning_rate"])
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')

    # 定义损失函数
    def loss_function(real, pred):
        mask = tf.math.logical_not(tf.math.equal(real, pad_index))
        loss_ = loss_object(real, pred)
        mask = tf.cast(mask, dtype=loss_.dtype)

        # loss_,mask (batch_size, dec_len-1)
        loss_ *= mask
        return tf.reduce_mean(loss_)
        # return tf.reduce_sum(loss_) / tf.reduce_sum(mask)

        # 训练

    # @tf.function(input_signature=(tf.TensorSpec(shape=[params["batch_size"], params["max_enc_len"]], dtype=tf.int64),
    #                               tf.TensorSpec(shape=[params["batch_size"], params["max_dec_len"]], dtype=tf.int64)))
    def train_step(enc_input, dec_target):
        # dec_target [4980, 939, 41, 27, 4013, 815, 14702]

        with tf.GradientTape() as tape:

            # enc_output (batch_size, enc_len, enc_unit)
            # enc_hidden (batch_size, enc_unit)
            enc_output, enc_hidden = model.encoder(enc_input)

            # 第一个decoder输入 开始标签
            # dec_input (batch_size, 1)
            dec_input = tf.expand_dims([start_index] * batch_size, 1)

            # 第一个隐藏层输入
            # dec_hidden (batch_size, enc_unit)
            dec_hidden = enc_hidden
            # 逐个预测序列
            # predictions (batch_size, dec_len-1, vocab_size)
            predictions, _ = model(dec_input, dec_hidden, enc_output,
                                   dec_target)

            _batch_loss = loss_function(dec_target[:, 1:], predictions)

        variables = model.trainable_variables
        gradients = tape.gradient(_batch_loss, variables)
        optimizer.apply_gradients(zip(gradients, variables))

        return _batch_loss

    # dataset, steps_per_epoch = train_batch_generator(batch_size)

    dataset = batcher(vocab, params)
    steps_per_epoch = params["steps_per_epoch"]

    for epoch in range(epochs):
        start = time.time()
        total_loss = 0

        # for (batch, (inputs, target)) in enumerate(dataset.take(steps_per_epoch)):
        for (batch,
             enc_dec_inputs) in enumerate(dataset.take(steps_per_epoch)):
            inputs = enc_dec_inputs["enc_input"]
            target = enc_dec_inputs["target"]

            batch_loss = train_step(inputs, target)

            total_loss += batch_loss
            if (batch + 1) % 1 == 0:
                print('Epoch {} Batch {} Loss {:.4f}'.format(
                    params["trained_epoch"] + epoch + 1, batch + 1,
                    batch_loss.numpy()))

            if params["debug_mode"]:
                print('Epoch {} Batch {} Loss {:.4f}'.format(
                    epoch + 1, batch, batch_loss.numpy()))
                if batch >= 10:
                    break

        if params["debug_mode"]:
            break

        # saving (checkpoint) the model every 2 epochs
        if (epoch + 1) % 1 == 0:
            ckpt_save_path = checkpoint_manager.save()

            try:
                record_file = os.path.join(SEQ2SEQ_CKPT, "record.txt")
                with open(record_file, mode="a", encoding="utf-8") as f:
                    f.write('Epoch {} Loss {:.4f}\n'.format(
                        params["trained_epoch"] + epoch + 1,
                        total_loss / steps_per_epoch))
            except:
                pass

            print('Saving checkpoint for epoch {} at {}'.format(
                epoch + 1, ckpt_save_path))

            # ---学习率衰减---
            lr = params["learning_rate"] * np.power(0.9, epoch + 1)

            # 更新优化器的学习率
            optimizer = tf.keras.optimizers.Adam(name='Adam', learning_rate=lr)

            assert lr == optimizer.get_config()["learning_rate"]

            print("learning_rate=", optimizer.get_config()["learning_rate"])
            save_train_msg(params["trained_epoch"] + epoch + 1)  # 保存已训练的轮数

        print('Epoch {} Loss {:.4f}'.format(
            params["trained_epoch"] + epoch + 1, total_loss / steps_per_epoch))
        print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))