Exemple #1
0
def load():
    """ load model for predict """
    config = model_config()
    config.vocab_size = len(open(config.vocab_path).readlines())
    final_score, final_ids, final_index = knowledge_seq2seq(config)

    final_score.persistable = True
    final_ids.persistable = True
    final_index.persistable = True

    main_program = fluid.default_main_program()

    if config.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    exe = Executor(place)
    exe.run(framework.default_startup_program())

    fluid.io.load_params(executor=exe, dirname=config.model_path, main_program=main_program)

    processors = KnowledgeCorpus(
        data_dir=config.data_dir,
        data_prefix=config.data_prefix,
        vocab_path=config.vocab_path,
        min_len=config.min_len,
        max_len=config.max_len)

    # load dict
    id_dict_array = load_id2str_dict(config.vocab_path)

    model_handle = [exe, place, final_score, final_ids, final_index, processors, id_dict_array]
    return model_handle
Exemple #2
0
def train(config):
    """ model training """
    config.vocab_size = len(open(config.vocab_path).readlines())
    bow_loss, kl_loss, nll_loss, final_loss= knowledge_seq2seq(config)

    bow_loss.persistable = True
    kl_loss.persistable = True
    nll_loss.persistable = True
    final_loss.persistable = True
    
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    fluid.clip.set_gradient_clip(
        clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=config.grad_clip))
    optimizer = fluid.optimizer.Adam(learning_rate=config.lr)

    if config.stage == 0:
        print("stage 0")
        optimizer.minimize(bow_loss)
    else:
        print("stage 1")
        optimizer.minimize(final_loss)

    opt_var_name_list = optimizer.get_opti_var_name_list()

    if config.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    exe = Executor(place)
    exe.run(framework.default_startup_program())

    param_list = main_program.block(0).all_parameters()
    param_name_list = [p.name for p in param_list]

    init_model(config, param_name_list, place)

    processors = KnowledgeCorpus(
                        data_dir=config.data_dir,
                        data_prefix=config.data_prefix,
                        vocab_path=config.vocab_path,
                        min_len=config.min_len,
                        max_len=config.max_len)
    train_generator = processors.data_generator(
                        batch_size=config.batch_size,
                        phase="train",
                        shuffle=True)
    valid_generator = processors.data_generator(
                        batch_size=config.batch_size,
                        phase="dev",
                        shuffle=False)

    model_handle = [exe, place, bow_loss, kl_loss, nll_loss, final_loss]

    train_loop(config,
               train_generator, valid_generator,
               main_program, inference_program,
               model_handle, param_name_list, opt_var_name_list)
def load():
    """ load model for predict """
    config = model_config()
    config.vocab_size = len(open(config.vocab_path).readlines())
    final_score, final_ids, final_index = knowledge_seq2seq(config)
    #基于知识库生成对话的回复
    final_score.persistable = True
    final_ids.persistable = True
    final_index.persistable = True
    #设置是否为永久变量

    main_program = fluid.default_main_program()
Exemple #4
0
def train(config):
    """ model training """
    config.vocab_size = len(open(config.vocab_path).readlines())
    #搭建网络:Bi-GRU Utterance encoder +Bi-GRU KG encoder  +层级GRU decoder
    bow_loss, kl_loss, nll_loss, final_loss= knowledge_seq2seq(config)
    #持久性变量(Persistables)是一种在每次迭代结束后均不会被删除的变量
    bow_loss.persistable = True
    kl_loss.persistable = True
    nll_loss.persistable = True
    final_loss.persistable = True
    #fluid.layers 接口中添加的op和variable会存储在 default main program 中
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)
    #给指定参数做梯度裁剪
    fluid.clip.set_gradient_clip(
        clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=config.grad_clip))
    optimizer = fluid.optimizer.Adam(learning_rate=config.lr)

    if config.stage == 0:
        print("stage 0")
        optimizer.minimize(bow_loss)
    else:
        print("stage 1")
        optimizer.minimize(final_loss)
    #优化器的训练参数如lr
    opt_var_name_list = optimizer.get_opti_var_name_list()

    if config.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    exe = Executor(place)
    #初始化 default_startup_program函数可以获取默认/全局 startup Program (初始化启动程序)。
    exe.run(framework.default_startup_program())
    #block0 表示一段代码的最外层块
    param_list = main_program.block(0).all_parameters()
    param_name_list = [p.name for p in param_list]
    #TODO:init包含
    init_model(config, param_name_list, place)

    processors = KnowledgeCorpus(
                        data_dir=config.data_dir,
                        data_prefix=config.data_prefix,
                        vocab_path=config.vocab_path,
                        min_len=config.min_len,
                        max_len=config.max_len)
    #train_generator为yeild 生成函数
    #进行了如下操作:
    #读取stream record file
    #对读入的文本进行tokennize;根据max min len 进行过滤
    #并根据词汇表把src\tgt\cue文本串(chatpath+knowledge+":"+history\ response\ KG cue)转为数字串
    ##进行padding并返回padding后的串和每个串的原长
    train_generator = processors.train_generator(
                        batch_size=config.batch_size,
                        phase="train",
                        shuffle=True)
    valid_generator = processors.data_generator(
                        batch_size=config.batch_size,
                        phase="dev",
                        shuffle=False)

    model_handle = [exe, place, bow_loss, kl_loss, nll_loss, final_loss]
    #在训练过程中,每config.valid_steps 个batch(步)进行一次valid,并储存一次最好模型
    train_loop(config,
               train_generator, valid_generator,
               main_program, inference_program,
               model_handle, param_name_list, opt_var_name_list)
Exemple #5
0
def test(config):
    """ test """
    batch_size = config.batch_size
    config.vocab_size = len(open(config.vocab_path).readlines())
    final_score, final_ids, final_index = knowledge_seq2seq(config)

    final_score.persistable = True
    final_ids.persistable = True
    final_index.persistable = True

    main_program = fluid.default_main_program()

    if config.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    exe = Executor(place)
    exe.run(framework.default_startup_program())

    fluid.io.load_params(executor=exe, dirname=config.model_path,
                         main_program=main_program)
    print("laod params finsihed")

    # test data generator
    processors = KnowledgeCorpus(
        data_dir=config.data_dir,
        data_prefix=config.data_prefix,
        vocab_path=config.vocab_path,
        min_len=config.min_len,
        max_len=config.max_len)
    test_generator = processors.data_generator(
        batch_size=config.batch_size,
        phase="test",
        shuffle=False)

    # load dict
    id_dict_array = load_id2str_dict(config.vocab_path)

    out_file = config.output
    fout = open(out_file, 'w')
    for batch_id, data in enumerate(test_generator()):
        data_feed, sent_num = build_data_feed(data, place, batch_size=batch_size)

        if data_feed is None:
            break

        out = exe.run(feed=data_feed,
                      fetch_list=[final_score.name, final_ids.name, final_index.name])

        batch_score = out[0]
        batch_ids = out[1]
        batch_pre_index = out[2]

        batch_score_arr = np.split(batch_score, batch_size, axis=1)
        batch_ids_arr = np.split(batch_ids, batch_size, axis=1)
        batch_pre_index_arr = np.split(batch_pre_index, batch_size, axis=1)

        index = 0
        for (score, ids, pre_index) in zip(batch_score_arr, batch_ids_arr, batch_pre_index_arr):
            trace_ids, trace_score = trace_fianl_result(score, ids, pre_index, topk=1, EOS=3)
            fout.write(id_to_text(trace_ids[0][:-1], id_dict_array))
            fout.write('\n')

            index += 1
            if index >= sent_num:
                break

    fout.close()
def train(config):
    """ model training """
    config.vocab_size = len(open(config.vocab_path).readlines())
    #计算行数,即词的个数
    bow_loss, kl_loss, nll_loss, final_loss= knowledge_seq2seq(config)

    bow_loss.persistable = True
    kl_loss.persistable = True
    nll_loss.persistable = True
    final_loss.persistable = True
    
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)
    #Program是Paddle Fluid对于计算图的一种静态描述
    #1. Program.clone() 方法不会克隆例如 DataLoader 这样的数据读取相关的部分,这可能会造成的数据读取部分在克隆后丢失
    #2. 此API当 for_test=True 时将会裁剪部分OP和变量。为防止错误的裁剪,推荐在 append_backward 和执行优化器之前使用 clone(for_test=True) 。
    #当 for_test=True 时创建一个新的、仅包含当前Program前向内容的Program。否则创建一个新的,和当前Program完全相同的Program
    fluid.clip.set_gradient_clip(
        clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=config.grad_clip))
    optimizer = fluid.optimizer.Adam(learning_rate=config.lr)
    #给指定参数做梯度裁剪。
    # 此API对位置使用的要求较高,其必须位于组建网络之后, minimize 之前,因此在未来版本中可能被删除,故不推荐使用。
    # 推荐在 optimizer 初始化时设置梯度裁剪。 有三种裁剪策略: GradientClipByGlobalNorm 、 GradientClipByNorm 、
    #  GradientClipByValue 。 如果在 optimizer 中设置过梯度裁剪,又使用了 set_gradient_clip ,set_gradient_clip 将不会生效。

    #深度神经网络采用链式法则进行参数求导的方式并不是绝对安全的,有时会出现梯度消失或者梯度爆炸的情况。
    # 其中梯度爆炸问题的常见应对方式为“梯度裁剪”,也就是通过“clip”方式来防止迭代中梯度值过大。

    if config.stage == 0:
        print("stage 0")
        optimizer.minimize(bow_loss)
    else:
        print("stage 1")
        optimizer.minimize(final_loss)
        #指定优化器最小化的loss
    opt_var_name_list = optimizer.get_opti_var_name_list()

    if config.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()
    #Gpu or Cpu
    exe = Executor(place)
    exe.run(framework.default_startup_program())
    #执行程序
    param_list = main_program.block(0).all_parameters()
    #Block 是高级语言中变量作用域的概念,在编程语言中,Block是一对大括号,其中包含局部变量定义和一系列指令或
    # 操作符。编程语言中的控制流结构 if-else 和 for 在深度学习中可以被等效为:
    # Fluid 中的 Block 描述了一组以顺序、选择或是循环执行的 Operator 以及 Operator 操作的对象:Tensor。
    param_name_list = [p.name for p in param_list]
    #模型列表
    init_model(config, param_name_list, place)

    processors = KnowledgeCorpus(
                        data_dir=config.data_dir,
                        data_prefix=config.data_prefix,
                        vocab_path=config.vocab_path,
                        min_len=config.min_len,
                        max_len=config.max_len)
                        #知识库对象
    train_generator = processors.data_generator(
                        batch_size=config.batch_size,
                        phase="train",
                        shuffle=True)
    #训练数据生成器
    valid_generator = processors.data_generator(
                        batch_size=config.batch_size,
                        phase="dev",
                        shuffle=False)
    #验证数据生成器
    model_handle = [exe, place, bow_loss, kl_loss, nll_loss, final_loss]

    train_loop(config,
               train_generator, valid_generator,
               main_program, inference_program,
               model_handle, param_name_list, opt_var_name_list)