コード例 #1
0
def train(params):
    # GPU资源配置
    config_gpu()
    # 读取vocab训练
    print("Building the model ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    # 构建模型
    print("Building the model ...")
    # model = Seq2Seq(params)
    model = PGN(params)

    print("Creating the batcher ...")
    # dataset = batcher(params["train_seg_x_dir"], params["train_seg_y_dir"], vocab, params)
    # print('dataset is ', dataset)

    # 获取保存管理者
    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(Seq2Seq=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    checkpoint_dir,
                                                    max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    # 训练模型
    print("Starting the training ...")
    train_model(model, vocab, params, checkpoint_manager)
コード例 #2
0
def decoder():
    # 配置GPU
    use_gpu = True
    config_gpu(use_gpu=use_gpu)

    # 读取字典 和 词向量矩阵
    vocab = Vocab(config.path_vocab, config.vocab_size)

    wvtool = WVTool(ndim=config.emb_dim)
    embedding_matrix = wvtool.load_embedding_matrix(path_embedding_matrix=config.path_embedding_matrixt)

    # 构建模型
    logger.info('构建Seq2Seq模型 ...')
    model=Seq2Seq(config.beam_size,embedding_matrix=embedding_matrix)


    # 存档点管理
    ckpt = tf.train.Checkpoint(Seq2Seq=model)
    ckpt_manager = tf.train.CheckpointManager(checkpoint=ckpt, directory=config.dir_ckpt, max_to_keep=10)
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        logger.info('decoder模型存档点加载自: {}'.format(ckpt_manager.latest_checkpoint))
    else:
        logger.info('无可加载的存档点')

    # 获取训练数据
    batcher = Batcher(config.path_seg_test, vocab, mode='decode',
                      batch_size=config.beam_size, single_pass=True)

    time.sleep(20)
    # 训练模型
    # 输入:训练数据barcher,模型,词表,存档点,词向量矩阵
    batch_decode(batcher, model=model,vocab=vocab)
コード例 #3
0
def train(params):
    # GPU资源配置
    config_gpu(use_cpu=False, gpu_memory=params['gpu_memory'])
    # 读取vocab训练
    print("Building the model ...")
    vocab = Vocab(params["vocab_path"], params["max_vocab_size"])
    params['vocab_size'] = vocab.count

    # 构建模型
    print("Building the model ...")
    # model = Seq2Seq(params)
    model = PGN(params)

    print("Creating the batcher ...")
    dataset = batcher(vocab, params)
    # print('dataset is ', dataset)

    # 获取保存管理者
    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(PGN=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    params['checkpoint_dir'],
                                                    max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    # 训练模型
    print("Starting the training ...")
    train_model(model, dataset, params, checkpoint_manager)
コード例 #4
0
def test(params):
    assert params["mode"].lower() in [
        "test", "eval"
    ], "change training mode to 'test' or 'eval'"
    if params['decode_mode'] == 'beam':
        assert params["beam_size"] == params[
            "batch_size"], "Beam size must be equal to batch_size, change the params"
    # GPU资源配置
    config_gpu(use_cpu=True)

    print("Building the model ...")
    model = PGN(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    params['vocab_size'] = vocab.count

    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(PGN=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    checkpoint_dir,
                                                    max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    print("Model restored")

    results = predict_result(model, params, vocab, params['result_save_path'])
    print('save result to :{}'.format(params['result_save_path']))
    print('save result :{}'.format(results[:5]))
コード例 #5
0
ファイル: test.py プロジェクト: lijunforNLPtraining/kaikeba
def test(params):
    assert params["mode"].lower() in ["test", "eval"], "change training mode to 'test' or 'eval'"
    assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params"
    # GPU资源配置
    config_gpu(use_cpu=True)

    print("Building the model ...")
    model = Seq2Seq(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    params['vocab_size'] = vocab.count

    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(Seq2Seq=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    print("Model restored")

    if params['greedy_decode']:
        predict_result(model, params, vocab, params['result_save_path'])
    else:
        b = bream_test_batch_generator(params["beam_size"])
        results = []
        for batch in b:
            best_hyp = beam_decode(model, batch, vocab, params)
            results.append(best_hyp.abstract)
        save_predict_result(results, params['result_save_path'])
        print('save result to :{}'.format(params['result_save_path']))
コード例 #6
0
def test(params):
    assert params["mode"].lower() in ["test", "eval"], "change training mode to 'test' or 'eval'"
    assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params"
    # GPU资源配置
    config_gpu(use_cpu=True)

    print("Building the model ...")
    model = PGN(params)

    print("Creating the vocab ...")
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    params['vocab_size'] = vocab.count

    print("Creating the batcher ...")
    b = batcher(vocab, params)

    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(PGN=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    print("Model restored")

    if params['mode'] == 'eval' or params['mode'] == 'test':
        for batch in b:
            #print("batch is:\n", batch)

            yield beam_decode(model, batch, vocab, params)

    else:
        for batch in b:
            print(beam_decode(model, batch, vocab, params))
コード例 #7
0
def train(params):
    config_gpu()

    vocab_to_index, _ = load_vocab(word2vec_model_path)

    params['vocab_size'] = len(vocab_to_index)

    seq2seq = Seq2Seq(params)

    checkpoint = tf.train.Checkpoint(model=seq2seq)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    checkpoint_dir,
                                                    max_to_keep=5)

    train_model(seq2seq, params, vocab_to_index, checkpoint_manager)
コード例 #8
0
def train(params):
    config_gpu()

    vocab, _ = get_vocab(model_path_trained)

    params['vocab_size'] = len(vocab)

    print("Building the model")
    model = Seq2Seq(params)

    checkpoint = tf.train.Checkpoint(Seq2Seq=model)

    check_point_manager = tf.train.CheckpointManager(checkpoint,
                                                     checkpoint_dir,
                                                     max_to_keep=5)

    train_model(model, vocab, params, check_point_manager)
コード例 #9
0
ファイル: train.py プロジェクト: lijunforNLPtraining/kaikeba
def train(params):
    # GPU资源配置
    config_gpu()

    # 读取vocab训练
    vocab = Vocab(params["vocab_path"], params["vocab_size"])

    params['vocab_size'] = vocab.count

    # 构建模型
    print("Building the model ...")
    model = Seq2Seq(params)

    # 获取保存管理者
    checkpoint = tf.train.Checkpoint(Seq2Seq=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint, params['checkpoint_dir'], max_to_keep=5)

    # 训练模型
    train_model(model, vocab, params, checkpoint_manager)
コード例 #10
0
def train(params):
    # GPU资源配置
    config_gpu()
    # 读取vocab训练
    print("Building the model ...")
    vocab = Vocab(params["vocab_path"], params["max_vocab_size"])
    params['vocab_size'] = vocab.count

    # 构建模型
    print("Building the model ...")
    # model = Seq2Seq(params)
    model = PGN(params)

    print("Creating the batcher ...")
    dataset, params['steps_per_epoch'] = batcher(vocab, params)

    # print('dataset is ', dataset)

    # 获取保存管理者
    print("Creating the checkpoint manager")
    checkpoint = tf.train.Checkpoint(PGN=model)
    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    params['checkpoint_dir'],
                                                    max_to_keep=5)
    checkpoint.restore(checkpoint_manager.latest_checkpoint)
    if checkpoint_manager.latest_checkpoint:
        print("Restored from {}".format(checkpoint_manager.latest_checkpoint))
        params["trained_epoch"] = int(checkpoint_manager.latest_checkpoint[-1])
    else:
        print("Initializing from scratch.")
        params["trained_epoch"] = 1

    # 学习率衰减
    params["learning_rate"] *= np.power(0.95, params["trained_epoch"])
    print('learning_rate:{}'.format(params["learning_rate"]))
    # 训练模型
    print("Starting the training ...")

    train_model(model, dataset, params, checkpoint_manager)
コード例 #11
0
        # passing the concatenated vector to the GRU
        output, state = self.gru(x)

        # output shape == (batch_size * 1, hidden_size)
        output = tf.reshape(output, (-1, output.shape[2]))

        # output shape == (batch_size, vocab)
        prediction = self.fc(output)

        return prediction, state


if __name__ == '__main__':
    # GPU资源配置
    config_gpu()
    # 获得参数
    params = get_params()
    # 读取vocab训练
    vocab = Vocab(params["vocab_path"], params["vocab_size"])
    # 计算vocab size
    vocab_size = vocab.count
    # 使用GenSim训练好的embedding matrix
    embedding_matrix = load_word2vec_file(save_wv_model_path)

    input_sequence_len = 250
    BATCH_SIZE = 64
    embedding_dim = 500
    units = 1024

    # 编码器结构