예제 #1
0
파일: train.py 프로젝트: guxd/DialoGPT
def evaluate(model, tokenizer, epoch_id, args):
    # use the same signature with eval_model_generation
    logger.info(
        'compute eval model loss, using eval mode, please change it back to train after calling this function'
    )
    model.eval()

    eval_dataloader = DynamicBatchingLoader(args.eval_input_file, tokenizer,
                                            args.normalize_data,
                                            args.eval_batch_size,
                                            args.max_seq_length)

    tot_loss = []
    tot_sample = []
    with torch.no_grad():
        for step, batch in enumerate(eval_dataloader):
            batch = tuple(t.to(args.device) for t in batch)
            input_ids, position_ids, token_ids, label_ids, src_len, _ = batch
            if args.no_token_id: token_ids = None
            n_sample = input_ids.shape[0]
            loss = model(input_ids, position_ids, token_ids, label_ids)
            tot_loss.append(loss.mean().item() * n_sample)
            tot_sample.append(n_sample)
    print(
        f"\n Epoch {epoch_id}: Val loss {np.sum(tot_loss) / np.sum(tot_sample)}  "
    )
    return np.sum(tot_loss) / np.sum(tot_sample)
예제 #2
0
def evaluate_models_from(GPT_saved_models_folder, eval_file, enc, args):
    # Prepare eval data
    eval_dataloader_loss = DynamicBatchingLoader(eval_file, enc,
                                                 args.normalize_data,
                                                 args.eval_batch_size,
                                                 args.max_seq_length)

    eval_dataloader_gen = get_eval_list_same_length(eval_file, enc,
                                                    args.eval_batch_size, True)
    # read eval_loss log file
    eval_loss_log_file = os.path.join(GPT_saved_models_folder, "eval_log.txt")
    min_ckpt_old_perplexity = None
    min_ckpt_new_perplexity = None
    min_old_perplexity = 1000000.0
    min_new_perplexity = 1000000.0

    with open(eval_loss_log_file, "r") as reader:
        head_row = next(reader)
        for line in reader:
            line = line.strip()
            epoch, ckpt_no, _, loss, perplexity = line.split(",")
            epoch = int(epoch)
            ckpt_no = int(ckpt_no) - 1
            loss = float(loss)
            perplexity = float(perplexity)
            print(ckpt_no, loss, perplexity, end="")
            if min_old_perplexity > perplexity:
                min_old_perplexity = perplexity
                min_ckpt_old_perplexity = ckpt_no
            # calculate new loss and perplexity
            model_filename = "GP2-pretrain-step-{}.pkl"
            model = load_model(GPT2LMHeadModel(config),
                               os.path.join(GPT_saved_models_folder,
                                            model_filename.format(ckpt_no)),
                               args,
                               verbose=True)
            eval_loss, eval_ppl = eval_model_loss(model, enc,
                                                  eval_dataloader_loss, epoch,
                                                  args)
            if min_new_perplexity > eval_ppl:
                min_new_perplexity = eval_ppl
                min_ckpt_new_perplexity = ckpt_no
    print("Old best ckpt and perplexity:", min_ckpt_old_perplexity,
          min_old_perplexity)
    print("New best ckpt and perplexity:", min_ckpt_new_perplexity,
          min_new_perplexity)
    return min_ckpt_old_perplexity, min_old_perplexity, min_ckpt_new_perplexity, min_new_perplexity
예제 #3
0
config = GPT2Config.from_json_file(join(args.model_name_or_path,
                                        'config.json'))

if args.local_rank == -1:
    train_dataloader = BucketingDataLoader(args.train_input_file,
                                           args.train_batch_size,
                                           args.max_seq_length)
else:
    pass
    # train_dataloader = DistributedBucketingDataLoader(
    #     get_rank(), get_world_size(),
    #     args.train_input_file, args.train_batch_size,
    #     args.max_seq_length)

eval_dataloader_loss = DynamicBatchingLoader(args.eval_input_file, enc,
                                             args.normalize_data,
                                             args.eval_batch_size,
                                             args.max_seq_length)

eval_dataloader_gen = get_eval_list_same_length(args.eval_input_file, enc,
                                                args.eval_batch_size, True)

#########################################################################
# Prepare Model and Optimizer
##########################################################################
model = load_model(GPT2LMHeadModel(config),
                   args.init_checkpoint,
                   args,
                   verbose=True)
if args.local_rank != -1:
    # when from scratch make sure initial models are the same
    params = [p.data for p in model.parameters()]
예제 #4
0
logger.info('Input Argument Information')
args_dict = vars(args)
for a in args_dict:
    logger.info('%-28s  %s' % (a, args_dict[a]))

#########################################################################
# Prepare Data Set
##########################################################################
print("Prepare Data")
enc = GPT2Tokenizer.from_pretrained(args.model_name_or_path)

config = GPT2Config.from_json_file(
    join(args.model_name_or_path, 'config.json'))

inference_dataloader_loss = DynamicBatchingLoader(
    args.inference_input_file, enc, args.normalize_data,
    args.inference_batch_size, args.max_seq_length, True)

inference_dataloader_gen = get_eval_list_same_length(
    args.inference_input_file, enc, args.inference_batch_size, True)

# eval_dataloader_loss = DynamicBatchingLoader(
#     args.eval_input_file, enc, args.normalize_data,
#     args.eval_batch_size, args.max_seq_length)
#
# eval_dataloader_gen = get_eval_list_same_length(
#     args.eval_input_file, enc, args.eval_batch_size, True)

#########################################################################
# Prepare Model
##########################################################################