예제 #1
0
def evaluate_models_from(GPT_saved_models_folder, eval_file, enc, args):
    # Prepare eval data
    eval_dataloader_loss = DynamicBatchingLoader(eval_file, enc,
                                                 args.normalize_data,
                                                 args.eval_batch_size,
                                                 args.max_seq_length)

    eval_dataloader_gen = get_eval_list_same_length(eval_file, enc,
                                                    args.eval_batch_size, True)
    # read eval_loss log file
    eval_loss_log_file = os.path.join(GPT_saved_models_folder, "eval_log.txt")
    min_ckpt_old_perplexity = None
    min_ckpt_new_perplexity = None
    min_old_perplexity = 1000000.0
    min_new_perplexity = 1000000.0

    with open(eval_loss_log_file, "r") as reader:
        head_row = next(reader)
        for line in reader:
            line = line.strip()
            epoch, ckpt_no, _, loss, perplexity = line.split(",")
            epoch = int(epoch)
            ckpt_no = int(ckpt_no) - 1
            loss = float(loss)
            perplexity = float(perplexity)
            print(ckpt_no, loss, perplexity, end="")
            if min_old_perplexity > perplexity:
                min_old_perplexity = perplexity
                min_ckpt_old_perplexity = ckpt_no
            # calculate new loss and perplexity
            model_filename = "GP2-pretrain-step-{}.pkl"
            model = load_model(GPT2LMHeadModel(config),
                               os.path.join(GPT_saved_models_folder,
                                            model_filename.format(ckpt_no)),
                               args,
                               verbose=True)
            eval_loss, eval_ppl = eval_model_loss(model, enc,
                                                  eval_dataloader_loss, epoch,
                                                  args)
            if min_new_perplexity > eval_ppl:
                min_new_perplexity = eval_ppl
                min_ckpt_new_perplexity = ckpt_no
    print("Old best ckpt and perplexity:", min_ckpt_old_perplexity,
          min_old_perplexity)
    print("New best ckpt and perplexity:", min_ckpt_new_perplexity,
          min_new_perplexity)
    return min_ckpt_old_perplexity, min_old_perplexity, min_ckpt_new_perplexity, min_new_perplexity
예제 #2
0
def convert_to_dialogpt(args):
    config = GPT2Config.from_json_file(args.config_path)
    model = load_model(GPT2LMHeadModel(config), None, args, verbose=True)

    model_state_dict = torch.load(args.megatron_checkpoint_path)

    model_state_dict = fix_state_dict_namespace(model_state_dict['model'])
    model_state_dict = fix_model_shapes(model_state_dict)

    start_model = model
    if (hasattr(model, "transformer")
        and all(not s.startswith('transformer.')
                for s in model_state_dict.keys())):
        logger.info('loading transfomer only')
        start_model = model.transformer
    start_model.load_state_dict(model_state_dict)

    torch.save(start_model.state_dict(), args.dialogpt_output_path)
예제 #3
0
    #     get_rank(), get_world_size(),
    #     args.train_input_file, args.train_batch_size,
    #     args.max_seq_length)

eval_dataloader_loss = DynamicBatchingLoader(args.eval_input_file, enc,
                                             args.normalize_data,
                                             args.eval_batch_size,
                                             args.max_seq_length)

eval_dataloader_gen = get_eval_list_same_length(args.eval_input_file, enc,
                                                args.eval_batch_size, True)

#########################################################################
# Prepare Model and Optimizer
##########################################################################
model = load_model(GPT2LMHeadModel(config),
                   args.init_checkpoint,
                   args,
                   verbose=True)
if args.local_rank != -1:
    # when from scratch make sure initial models are the same
    params = [p.data for p in model.parameters()]
    all_reduce_and_rescale_tensors(params,
                                   float(torch.distributed.get_world_size()))

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
total_params = sum([np.prod(p.size()) for p in model_parameters])
logger.info('Number of parameter = {}'.format(total_params))

param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'ln']  # no decay for bias and LayerNorm (ln)
예제 #4
0
    tokenizer = GPT2Tokenizer.from_pretrained(args.model_name_or_path)

    # load
    config = GPT2Config.from_json_file(
        os.path.join(args.model_name_or_path, 'config.json'))
    config.no_token_id = args.no_token_id
    config.persona_emb_type = args.persona_emb_type
    config.PersonaNum = args.PersonaNum

    config.do_persona_linear = args.do_persona_linear
    config.persona_n_embd = args.persona_n_embd
    args.n_gpu = 1
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.device = device

    model = load_model(GPT2LMHeadModel(config), model_file, args, verbose=True)

    # fix misused key value
    model.eval()
    model.to('cuda')

    # decode_size = len(open(decode_file,'rU').readlines())
    output_lines = []
    # with open(decode_file,'r') as fin:
    with codecs.open(decode_file, 'r', encoding='utf-8') as fin:
        print(decode_file)
        lines = fin.readlines()
        assert args.decode_num <= len(lines)
        if args.decode_num == -1:
            decode_size = len(lines)
        else:
예제 #5
0
        get_rank(), get_world_size(),
        args.train_input_file, args.train_batch_size,
        args.max_seq_length)

eval_dataloader_loss = DynamicBatchingLoader(
    args.eval_input_file, enc, args.normalize_data,
    args.eval_batch_size, args.max_seq_length)

eval_dataloader_gen = get_eval_list_same_length(
    args.eval_input_file, enc, args.eval_batch_size, True)


#########################################################################
# Prepare Model and Optimizer
##########################################################################
model = load_model(GPT2LMHeadModel(config), args.init_checkpoint,
                   args, verbose=True)
if args.local_rank != -1:
    # when from scratch make sure initial models are the same
    params = [p.data for p in model.parameters()]
    all_reduce_and_rescale_tensors(
        params, float(torch.distributed.get_world_size()))

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
total_params = sum([np.prod(p.size()) for p in model_parameters])
logger.info('Number of parameter = {}'.format(total_params))
logger.info('ee')
param_optimizer = list(model.named_parameters())
logger.info('ok')
no_decay = ['bias', 'ln']   # no decay for bias and LayerNorm (ln)
optimizer_grouped_parameters = [