def define_args():
    parser = argparse.ArgumentParser('ERNIE-en model with Paddle')
    parser.add_argument('--debug', type=str2bool, default=False)

    # Model Args
    parser.add_argument('--ernie_config_file',
                        type=str,
                        default='./config/ernie_config.json')
    parser.add_argument('--vocab_file', type=str, default='./config/vocab.txt')
    parser.add_argument('--init_checkpoint', type=str, default="")
    parser.add_argument('--max_seq_len', type=int, default=512)
    parser.add_argument('--preln', type=str2bool, default=False)

    # Data Args
    parser.add_argument('--data_dir', type=str, default='./data')
    parser.add_argument('--eval_data_path', type=str, default='./data')
    parser.add_argument('--output_dir', type=str, default='./output')

    # Training Args
    parser.add_argument('--epoch', type=int, default=100)
    parser.add_argument('--global_bsz', type=int, default=256)
    parser.add_argument('--micro_bsz', type=int, default=16)
    parser.add_argument('--do_eval', type=str2bool, default=True)
    parser.add_argument('--eval_batch_size', type=int, default=35)
    parser.add_argument('--num_train_steps', type=int, default=1500000)
    parser.add_argument('--global_steps', type=int, default=0)
    parser.add_argument('--warmup_steps', type=int, default=10000)
    parser.add_argument('--save_steps', type=int, default=10000)
    parser.add_argument('--eval_steps', type=int, default=-1)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--learning_rate', type=float, default=1e-4)
    parser.add_argument('--weight_decay', type=float, default=0.01)
    parser.add_argument('--use_lamb', type=str2bool, default=False)
    parser.add_argument('--seed', type=int, default=2021)
    parser.add_argument('--use_sop', type=str2bool, default=False)

    # Fleet Args
    parser.add_argument('--use_sharding', type=str2bool, default=False)
    parser.add_argument('--use_hybrid_dp', type=str2bool, default=True)
    parser.add_argument('--use_amp', type=str2bool, default=True)
    parser.add_argument('--use_recompute', type=str2bool, default=True)
    parser.add_argument('--use_offload', type=str2bool, default=False)
    parser.add_argument('--grad_merge', type=int, default=0)
    parser.add_argument('--num_mp',
                        type=int,
                        default=1,
                        help="num of model parallel")
    parser.add_argument('--num_pp',
                        type=int,
                        default=1,
                        help="num of pipeline")
    parser.add_argument('--num_sharding',
                        type=int,
                        default=1,
                        help="num of sharding")
    parser.add_argument('--num_dp', type=int, default=1, help="num of dp")
    args = parser.parse_args()

    print_arguments(args)
    return args
Example #2
0
def main(args):
    """
        main_func
    """
    if args.print_config:
        print_arguments(args)
    start_procs(args)
Example #3
0
def main(args):
    def get_param(name):
        key = "--" + name
        if key not in args.training_script_args:
            return None
        index = args.training_script_args.index(key) + 1
        return args.training_script_args[index]

    rs_index = -1
    rs_name = "--random_seed"
    has_rs = False
    if rs_name in args.training_script_args:
        rs_index = args.training_script_args.index(rs_name) + 1
        if args.training_script_args[rs_index] != '-1':
            has_rs = True
    else:
        args.training_script_args += [rs_name, '-1']
        rs_index = args.training_script_args.index(rs_name) + 1

    if not has_rs:
        args.training_script_args[rs_index] = str(random.randint(0, 100000))
    if args.print_config:
        print_arguments(args)
    start_procs(args)
    if not has_rs:
        args.training_script_args[rs_index] = '-1'
    exe.run(program)

    print(
        '---------------------- Converted Parameters -----------------------')
    print(
        '###### [TF param name] --> [Fluid param name]  [param shape] ######')
    print(
        '-------------------------------------------------------------------')

    reader = pywrap_tensorflow.NewCheckpointReader(args.init_tf_checkpoint)
    for param in tf_fluid_param_name_map:
        value = reader.get_tensor(param)
        if param == 'cls/seq_relationship/output_weights':
            value = np.transpose(value)
        if param == 'cls/squad/output_weights':
            value = np.transpose(value)
        if param == 'output_weights':
            value = np.transpose(value)
        fluid.global_scope().find_var(
            tf_fluid_param_name_map[param]).get_tensor().set(value, place)
        print(param, ' --> ', tf_fluid_param_name_map[param], '  ',
              value.shape)

    fluid.io.save_params(exe, args.fluid_params_dir, main_program=program)


if __name__ == '__main__':
    args = parse_args()
    print_arguments(args)
    convert(args)
Example #5
0
def main(args):
    if args.print_config:
        print_arguments(args)
    start_procs(args)