def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, pytorch_dump_folder_path):
    # Construct model
    if gpt2_config_file == "":
        config = GPT2Config()
    else:
        config = GPT2Config(gpt2_config_file)
    model = GPT2Model(config)

    # Load weights from numpy
    load_tf_weights_in_gpt2(model, gpt2_checkpoint_path)

    # Save pytorch-model
    pytorch_weights_dump_path = pytorch_dump_folder_path + '/' + WEIGHTS_NAME
    pytorch_config_dump_path = pytorch_dump_folder_path + '/' + CONFIG_NAME
    print("Save PyTorch model to {}".format(pytorch_weights_dump_path))
    torch.save(model.state_dict(), pytorch_weights_dump_path)
    print("Save configuration file to {}".format(pytorch_config_dump_path))
    with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
        f.write(config.to_json_string())
def run_test(model_dir, data_dir, mode, config_path='345M/', beam_width=10):
    config_path = config_path + 'config.json'
    vocab_path = config_path + 'vocab.json'
    merge_path = config_path + 'merges.txt'
    checkpoint_path = model_dir + '/GPT_model.pkl'
    log_filename = model_dir + '/test_data.log'

    config = GPT2Config.from_json_file(os.path.join('./configs/', config_path))

    create_log(log_filename)
    print("Building model")
    model = load_model(GPT2LMHeadModel(config), checkpoint_path,
                       test=True).cuda()
    model.eval()
    tokenizer = GPT2Tokenizer(vocab_path, merge_path)
    if mode == 'test':
        print('Loading test dataset...')
        test_data_loader = GPT2DataLoader(data_path=data_dir,
                                          vocab_file=vocab_path,
                                          bpe_merges=merge_path,
                                          bucket=2,
                                          batch_size=1,
                                          max_seq_len=512)
Ejemplo n.º 3
0
from pytorch_pretrained_bert.modeling_gpt2 import GPT2Config
from data_loader import GPT2DataLoader
from train import run
import os
import torch

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_size = 'small'
    if model_size == 'small':
        config_path = '117M/config.json'
    elif model_size == 'middle':
        config_path = '345M/config.json'
    elif model_size == 'big':
        config_path = '762M/config.json'
    config = GPT2Config.from_json_file(os.path.join('./configs/', config_path))
    model = load_model(GPT2LMHeadModel(config), "checkpoints/small_fs.pkl")
    model = model.to(device)

    train_data_loader = GPT2DataLoader(data_path='DailyDialog/train_text.txt',
                                       vocab_file='./vocab_file/encoder.json',
                                       bpe_merges='vocab_file/merges.txt',
                                       bucket=2,
                                       batch_size=5,
                                       max_seq_len=512)

    valid_data_loader = GPT2DataLoader(data_path='DailyDialog/test_text.txt',
                                       vocab_file='./vocab_file/encoder.json',
                                       bpe_merges='vocab_file/merges.txt',
                                       bucket=2,
                                       batch_size=5,