예제 #1
0
파일: eval.py 프로젝트: zzsnow/PaddleHelix
def main(args):
    """main"""
    paddle.enable_static()

    model_config = json.load(open(args.model_config, 'r'))

    exe_params = default_exe_params(False, args.use_cuda, args.thread_num)
    exe = exe_params['exe']
    trainer_num = exe_params['trainer_num']
    trainer_id = exe_params['trainer_id']
    places = exe_params['places']

    task = model_config['task']

    model = TAPEModel(model_config=model_config, name=task)

    test_program = fluid.Program()
    test_startup = fluid.Program()
    with fluid.program_guard(test_program, test_startup):
        with fluid.unique_name.guard():
            model.forward(True)
            model.cal_loss()
            test_data_loader = setup_data_loader(model.input_list,
                                                 model_config, args.test_data,
                                                 trainer_id, trainer_num,
                                                 places, args.batch_size)
            exe.run(test_startup)
    test_metric = get_metric(task)

    if not args.init_model is None and args.init_model != "":
        load_partial_params(exe, args.init_model, test_program)
    else:
        raise RuntimeError('Please set init_model.')

    test_fetch_list = model.get_fetch_list(),
    for data in test_data_loader():
        results = exe.run(program=test_program,
                          feed=data,
                          fetch_list=test_fetch_list,
                          return_numpy=False)
        update_metric(task, test_metric, results)
    test_metric.show()
예제 #2
0
파일: train.py 프로젝트: zzsnow/PaddleHelix
def main(args):
    paddle.enable_static()

    model_config = json.load(open(args.model_config, 'r'))

    exe_params = default_exe_params(args.is_distributed, args.use_cuda,
                                    args.thread_num)
    exe = exe_params['exe']
    trainer_num = exe_params['trainer_num']
    trainer_id = exe_params['trainer_id']
    dist_strategy = exe_params['dist_strategy']
    places = exe_params['places']

    task = model_config['task']

    model = TAPEModel(model_config=model_config, name=task)

    train_program = fluid.Program()
    train_startup = fluid.Program()
    with fluid.program_guard(train_program, train_startup):
        with fluid.unique_name.guard():
            model.forward(False)
            model.cal_loss()

            optimizer = default_optimizer(args.lr, args.warmup_steps,
                                          args.max_grad_norm)
            setup_optimizer(optimizer, model, args.use_cuda,
                            args.is_distributed, dist_strategy)

            optimizer.minimize(model.loss)

            train_data_loader = setup_data_loader(model.input_list,
                                                  model_config,
                                                  args.train_data, trainer_id,
                                                  trainer_num, places,
                                                  args.batch_size)
            exe.run(train_startup)

    train_metric = get_metric(task)
    train_fetch_list = model.get_fetch_list()

    if args.test_data is not None:
        test_program = fluid.Program()
        test_startup = fluid.Program()
        with fluid.program_guard(test_program, test_startup):
            with fluid.unique_name.guard():
                model.forward(True)
                model.cal_loss()
                test_data_loader = setup_data_loader(model, model_config,
                                                     args.test_data,
                                                     trainer_id, trainer_num,
                                                     places, args.batch_size)
                exe.run(test_startup)
        test_metric = get_metric(task)
        test_fetch_list = model.get_fetch_list()

    if not args.is_distributed:
        train_program = fluid.compiler.CompiledProgram(
            train_program).with_data_parallel(loss_name=model.loss.name)
        if args.test_data is not None and args.test_data != "":
            test_program = fluid.compiler.CompiledProgram(
                test_program).with_data_parallel(loss_name=model.loss.name)

    if args.init_model is not None and args.init_model != "":
        load_partial_params(exe, args.init_model, test_program)

    for epoch_id in range(args.max_epoch):
        print(time.time(), 'Start epoch %d' % epoch_id)
        print('Train:')
        train_metric.clear()
        for data in train_data_loader():
            results = exe.run(program=train_program,
                              feed=data,
                              fetch_list=train_fetch_list,
                              return_numpy=False)
            update_metric(task, train_metric, results)
            train_metric.show()
        print()

        if args.test_data is not None and args.test_data != "":
            print('Test:')
            test_metric.clear()
            for data in test_data_loader():
                results = exe.run(program=test_program,
                                  feed=data,
                                  fetch_list=test_fetch_list,
                                  return_numpy=False)
                update_metric(task, test_metric, results)
            test_metric.show()
            print()

        if trainer_id == 0:
            print(time.time(), "Save model epoch%d." % epoch_id)

            is_exist = os.path.exists(args.model_dir)
            if not is_exist:
                os.makedirs(args.model_dir)
            fluid.io.save_params(exe,
                                 '%s/epoch%d' % (args.model_dir, epoch_id),
                                 train_program)