Exemple #1
0
            "to use distributed and fp16 training.")
    # https://nvidia.github.io/apex/amp.html

    optimizer = FusedAdam(optimizer_grouped_parameters,
                          lr=args.learning_rate,
                          bias_correction=False)
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    # if args.loss_scale == 0:
    #     optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True,
    #                                verbose=False)
    # else:
    #     optimizer = FP16_Optimizer(optimizer,
    #                                static_loss_scale=args.loss_scale,
    #                                verbose=False)
else:
    optimizer = Adam(optimizer_grouped_parameters, args.learning_rate)

#########################################################################
# Training !
##########################################################################

if args.local_rank == -1:
    train_logger = open(join(log_dir, 'train_log.txt'), 'a+', buffering=1)
    eval_logger = open(join(log_dir, 'eval_log.txt'), 'a+', buffering=1)
    print(
        'epoch,global_step,step,mean_loss,mean_ppl,n_token_real,'
        'n_token_total,epoch_time',
        file=train_logger)
    print('epoch,global_step,step,eval_loss,eval_ppl', file=eval_logger)

global_step = 0
Exemple #2
0
    optimizer = FusedAdam(optimizer_grouped_parameters,
                          lr=args.learning_rate,
                          bias_correction=False,
                          max_grad_norm=1.0)
    if args.loss_scale == 0:
        optimizer = FP16_Optimizer(optimizer,
                                   dynamic_loss_scale=True,
                                   verbose=False)
    else:
        optimizer = FP16_Optimizer(optimizer,
                                   static_loss_scale=args.loss_scale,
                                   verbose=False)
else:
    optimizer = Adam(optimizer_grouped_parameters,
                     args.learning_rate,
                     max_grad_norm=1.0)

#########################################################################
# Training !
##########################################################################

if args.local_rank == -1 or get_rank() == 0:
    train_logger = open(join(log_dir, 'train_log.txt'), 'a+', buffering=1)
    eval_logger = open(join(log_dir, 'eval_log.txt'), 'a+', buffering=1)
    print(
        'epoch,global_step,step,mean_loss,mean_ppl,n_token_real,'
        'n_token_total,epoch_time',
        file=train_logger)
    print('epoch,global_step,step,eval_loss,eval_ppl', file=eval_logger)