Ejemplo n.º 1
0
def main():
    parser = ArgumentParser()
    parser.add_argument("--model-config",
                        type=str,
                        default="openai-gpt",
                        help="Path, url or short name of the model")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--outlens", type=int, default=30)
    parser.add_argument("--beam", type=int, default=1)
    parser.add_argument("--checkpoints", type=str)
    parser.add_argument("--data", type=str, default="file")

    args = parser.parse_args()
    args.load_model = True

    model = BertModel(None, args)
    state_dict = convert_model(torch.load(args.checkpoints)['sd'])
    model.load_state_dict(state_dict)
    model.to(args.device)
    tokenizer = BertWordPieceTokenizer("bert-base-chinese",
                                       cache_dir="temp_cache_dir")
    generate(model,
             tokenizer,
             args.device,
             args.data,
             sample=True,
             top_k=5,
             beam_size=6,
             outlens=30)
Ejemplo n.º 2
0
def model_init(app):
    ArgsSet = type('ArgsSet',(object,),{})
    client = ArgsSet()
    parser = ArgumentParser()
    parser.add_argument("--model-config", type=str, default="openai-gpt",
                        help="Path, url or short name of the model")
    parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available()
                        else "cpu", help="Device (cuda or cpu)")
    parser.add_argument("--outlens", type=int, default=30)
    parser.add_argument("--beam", type=int, default=1)
    parser.add_argument("--gpt-checkpoints", type=str)
    parser.add_argument("--port", type=int, default=8866)

    args = parser.parse_args()
    args.load_model = True
    args.fp32_embedding = False
    args.fp32_layernorm = False
    args.fp32_tokentypes = False
    args.layernorm_epsilon = 1e-12

    gpt = BertModel(None, args)
    state_dict = convert_model(torch.load(args.gpt_checkpoints)['sd'])
    gpt.load_state_dict(state_dict)
    gpt.to(args.device)
    gpt.eval()
    tokenizer = BertWordPieceTokenizer("bert-base-chinese", cache_dir="temp_cache_dir")
    print(" Load model from {}".format(args.gpt_checkpoints))

    client.tokenizer = tokenizer
    client.gpt =gpt
    client.gpt_beam = SequenceGenerator(gpt, tokenizer, beam_size=args.beam, max_lens=args.outlens)
    client.device = args.device
    client.port = args.port
    client.generator = sample_sequence

    return client
Ejemplo n.º 3
0
        old_vocab = train_dataset.vocab
        vocab = torchtext.legacy.vocab.Vocab(
            counter=old_vocab.freqs, specials=['<unk>', '<pad>', '<MASK>'])
        with open(args.save_vocab, 'wb') as f:
            torch.save(vocab, f)
    pad_id = vocab.stoi['<pad>']
    sep_id = vocab.stoi['<sep>']
    cls_id = vocab.stoi['<cls>']
    train_dataset, dev_dataset = SQuAD1(vocab=vocab)
    train_dataset = process_raw_data(train_dataset)
    dev_dataset = process_raw_data(dev_dataset)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    embed_layer = BertEmbedding(len(vocab), args.emsize)
    pretrained_bert = BertModel(len(vocab), args.emsize, args.nhead, args.nhid,
                                args.nlayers, embed_layer, args.dropout)
    pretrained_bert.load_state_dict(torch.load(args.bert_model))
    model = QuestionAnswerTask(pretrained_bert).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
    best_f1 = None
    train_loss_log, val_loss_log = [], []

    for epoch in range(1, args.epochs + 1):
        epoch_start_time = time.time()
        train()
        val_loss, val_exact, val_f1 = evaluate(dev_dataset, vocab)
        val_loss_log.append(val_loss)
        print('-' * 89)
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
              'exact {:8.3f}% | '
Ejemplo n.º 4
0
def main(train_file,
         dev_file,
         target_dir,
         epochs=10,
         batch_size=32,
         lr=2e-05,
         patience=3,
         max_grad_norm=10.0,
         checkpoint=None):
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-chinese',
                                                   do_lower_case=True)
    device = torch.device("cuda")
    print(20 * "=", " Preparing for training ", 20 * "=")
    # 保存模型的路径
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    # -------------------- Data loading ------------------- #
    print("\t* Loading training data...")
    train_data = DataPrecessForSentence(bert_tokenizer, train_file)
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    print("\t* Loading validation data...")
    dev_data = DataPrecessForSentence(bert_tokenizer, dev_file)
    dev_loader = DataLoader(dev_data, shuffle=True, batch_size=batch_size)
    # -------------------- Model definition ------------------- #
    print("\t* Building model...")
    model = BertModel().to(device)
    # -------------------- Preparation for training  ------------------- #
    # 待优化的参数
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    optimizer = AdamW(optimizer_grouped_parameters, lr=lr)
    #optimizer = torch.optim.Adam(optimizer_grouped_parameters, lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="max",
                                                           factor=0.85,
                                                           patience=0)
    best_score = 0.0
    start_epoch = 1
    # Data for loss curves plot
    epochs_count = []
    train_losses = []
    valid_losses = []
    # Continuing training from a checkpoint if one was given as argument
    if checkpoint:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint["epoch"] + 1
        best_score = checkpoint["best_score"]
        print("\t* Training will continue on existing model from epoch {}...".
              format(start_epoch))
        model.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        epochs_count = checkpoint["epochs_count"]
        train_losses = checkpoint["train_losses"]
        valid_losses = checkpoint["valid_losses"]
    # Compute loss and accuracy before starting (or resuming) training.
    _, valid_loss, valid_accuracy, auc = validate(model, dev_loader)
    print(
        "\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%, auc: {:.4f}"
        .format(valid_loss, (valid_accuracy * 100), auc))
    # -------------------- Training epochs ------------------- #
    print("\n", 20 * "=", "Training Bert model on device: {}".format(device),
          20 * "=")
    patience_counter = 0
    for epoch in range(start_epoch, epochs + 1):
        epochs_count.append(epoch)
        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader,
                                                       optimizer, epoch,
                                                       max_grad_norm)
        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%".
              format(epoch_time, epoch_loss, (epoch_accuracy * 100)))
        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy, epoch_auc = validate(
            model, dev_loader)
        valid_losses.append(epoch_loss)
        print(
            "-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%, auc: {:.4f}\n"
            .format(epoch_time, epoch_loss, (epoch_accuracy * 100), epoch_auc))
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)
        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1
        else:
            best_score = epoch_accuracy
            patience_counter = 0
            torch.save(
                {
                    "epoch": epoch,
                    "model": model.state_dict(),
                    "best_score": best_score,
                    "epochs_count": epochs_count,
                    "train_losses": train_losses,
                    "valid_losses": valid_losses
                }, os.path.join(target_dir, "best.pth.tar"))
        if patience_counter >= patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break
Ejemplo n.º 5
0
Archivo: main.py Proyecto: jeanm/text
    # We use only ~1% data to fine-tune the model.
    train, dev = SQuAD1()
    raw_train = list(train)[:1024]
    raw_dev = list(dev)[:128]
    convert_to_arrow(raw_train, "train_arrow")
    convert_to_arrow(raw_dev, "dev_arrow")

    base_url = 'https://pytorch.s3.amazonaws.com/models/text/torchtext_bert_example/'
    vocab_path = download_from_url(base_url + 'bert_vocab.txt')
    data_module = QuestionAnswerDataModule(train_arrow_path='train_arrow',
                                           dev_arrow_path='dev_arrow',
                                           vocab_filepath=vocab_path,
                                           batch_size=BATCH_SIZE)

    # Load pretrained model and generate task
    # default parameters from the pretrained model
    vocab_size, emsize, nhead, nhid, nlayers, dropout = 99230, 768, 12, 3072, 12, 0.2
    pretrained_bert = BertModel(vocab_size, emsize, nhead, nhid, nlayers,
                                dropout)
    pretrained_model_path = download_from_url(base_url + 'ns_bert.pt')
    pretrained_bert.load_state_dict(
        torch.load(pretrained_model_path, map_location='cpu'))
    qa_model = QuestionAnswerModel(pretrained_bert)

    task = QuestionAnswerTask(qa_model, LR)
    trainer = Trainer(gpus=0,
                      max_epochs=EPOCH,
                      progress_bar_refresh_rate=40,
                      fast_dev_run=True)
    trainer.fit(task, data_module)
Ejemplo n.º 6
0
def run_main(args, rank=None):
    # Set the random seed manually for reproducibility.
    torch.manual_seed(args.seed)
    if args.parallel == 'DDP':
        n = torch.cuda.device_count() // args.world_size
        device = list(range(rank * n, (rank + 1) * n))
    else:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    vocab = torch.load(args.save_vocab)
    cls_id = vocab.stoi['<cls>']
    pad_id = vocab.stoi['<pad>']
    sep_id = vocab.stoi['<sep>']

    if args.dataset == 'WikiText103':
        from torchtext.experimental.datasets import WikiText103
        train_dataset, valid_dataset, test_dataset = WikiText103(vocab=vocab)
    elif args.dataset == 'BookCorpus':
        from data import BookCorpus
        train_dataset, valid_dataset, test_dataset = BookCorpus(vocab, min_sentence_len=60)

    if rank is not None:
        chunk_len = len(train_dataset.data) // args.world_size
        train_dataset.data = train_dataset.data[(rank * chunk_len):((rank + 1) * chunk_len)]

    if args.checkpoint != 'None':
        model = torch.load(args.checkpoint)
    else:
        pretrained_bert = BertModel(len(vocab), args.emsize, args.nhead, args.nhid, args.nlayers, args.dropout)
        pretrained_bert.load_state_dict(torch.load(args.bert_model))
        model = NextSentenceTask(pretrained_bert)

    if args.parallel == 'DDP':
        model = model.to(device[0])
        model = DDP(model, device_ids=device)
    else:
        model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
    best_val_loss = None
    train_loss_log, val_loss_log = [], []

    for epoch in range(1, args.epochs + 1):
        epoch_start_time = time.time()
        train(process_raw_data(train_dataset, args), model, train_loss_log, device, optimizer,
              criterion, epoch, scheduler, cls_id, sep_id, pad_id, args, rank)
        val_loss = evaluate(process_raw_data(valid_dataset, args), model, device, criterion,
                            cls_id, sep_id, pad_id, args)
        val_loss_log.append(val_loss)

        if (rank is None) or (rank == 0):
            print('-' * 89)
            print('| end of epoch {:3d} | time: {:5.2f}s '
                  '| valid loss {:8.5f} | '.format(epoch,
                                                   (time.time() - epoch_start_time),
                                                   val_loss))
            print('-' * 89)
        if not best_val_loss or val_loss < best_val_loss:
            if rank is None:
                with open(args.save, 'wb') as f:
                    torch.save(model, f)
            elif rank == 0:
                with open(args.save, 'wb') as f:
                    torch.save(model.state_dict(), f)
            best_val_loss = val_loss
        else:
            scheduler.step()
    if args.parallel == 'DDP':
        rank0_devices = [x - rank * len(device) for x in device]
        device_pairs = zip(rank0_devices, device)
        map_location = {'cuda:%d' % x: 'cuda:%d' % y for x, y in device_pairs}
        model.load_state_dict(torch.load(args.save, map_location=map_location))
        test_loss = evaluate(process_raw_data(test_dataset, args), model, device, criterion,
                             cls_id, sep_id, pad_id, args)
        if rank == 0:
            wrap_up(train_loss_log, val_loss_log, test_loss, args, model.module, 'ns_loss.txt', 'ns_model.pt')
    else:
        with open(args.save, 'rb') as f:
            model = torch.load(f)

        test_loss = evaluate(process_raw_data(test_dataset, args), model, device, criterion,
                             cls_id, sep_id, pad_id, args)
        wrap_up(train_loss_log, val_loss_log, test_loss, args, model, 'ns_loss.txt', 'ns_model.pt')
Ejemplo n.º 7
0
def model_init(app):
    ArgsSet = type('ArgsSet', (object, ), {})
    client = ArgsSet()
    parser = ArgumentParser()
    parser.add_argument("--model-config",
                        type=str,
                        default="openai-gpt",
                        help="Path, url or short name of the model")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--outlens", type=int, default=30)
    parser.add_argument("--beam", type=int, default=1)
    parser.add_argument("--fuse-checkpoints", type=str)
    parser.add_argument("--gpt-checkpoints", type=str)
    parser.add_argument("--qa-style-checkpoints", type=str)
    parser.add_argument("--multi-task", type=str)
    parser.add_argument("--split-sentence-with-task-embedding-checkpoints",
                        type=str)
    parser.add_argument("--special-cls-checkpoints", type=str)

    parser.add_argument("--port", type=int, default=8866)

    args = parser.parse_args()
    args.load_model = True
    args.fp32_embedding = False
    args.fp32_layernorm = False
    args.fp32_tokentypes = False
    args.layernorm_epsilon = 1e-12

    fuse_model = BertModel(None, args)
    state_dict = convert_model(torch.load(args.fuse_checkpoints)['sd'])
    fuse_model.load_state_dict(state_dict)
    fuse_model.to(args.device)
    fuse_model.eval()
    print("| Load model from {}".format(args.fuse_checkpoints))

    gpt = BertModel(None, args)
    state_dict = convert_model(torch.load(args.gpt_checkpoints)['sd'])
    gpt.load_state_dict(state_dict)
    gpt.to(args.device)
    gpt.eval()
    tokenizer = BertWordPieceTokenizer("bert-base-chinese",
                                       cache_dir="temp_cache_dir")
    print(" Load model from {}".format(args.gpt_checkpoints))

    # Load bert checkpoints
    args.load_model = False
    args.fp32_embedding = False
    args.fp32_layernorm = False
    args.fp32_tokentypes = False
    args.layernorm_epsilon = 1e-12
    bert = BertModel(None, args)
    bert.to(args.device)
    bert.eval()

    client.tokenizer = tokenizer
    client.fuse_model = fuse_model
    client.fuse_beam = SequenceGenerator(fuse_model,
                                         tokenizer,
                                         beam_size=args.beam,
                                         max_lens=args.outlens)
    client.gpt = gpt
    client.gpt_beam = SequenceGenerator(gpt,
                                        tokenizer,
                                        beam_size=args.beam,
                                        max_lens=args.outlens)
    client.bert = bert
    client.device = args.device
    client.port = args.port
    client.generator = sample_sequence

    # multi task model

    multi_task = BertModel(None, args)
    state_dict = convert_model(torch.load(args.multi_task)['sd'])
    print("| Load model from {}".format(args.multi_task))
    multi_task.load_state_dict(state_dict)
    multi_task.to(args.device)
    multi_task.eval()
    client.multi_task_model = multi_task
    client.multi_task_beam = SequenceGenerator(multi_task,
                                               tokenizer,
                                               beam_size=args.beam,
                                               max_lens=args.outlens)

    # qa style model
    qa_style = BertModel(None, args)
    state_dict = convert_model(torch.load(args.qa_style_checkpoints)['sd'])
    qa_style.load_state_dict(state_dict)
    qa_style.to(args.device)
    qa_style.eval()
    print(" Load model from {}".format(args.qa_style_checkpoints))
    client.qa_task_model = qa_style

    # special cls tokens
    special_cls_model = BertModel(None, args)
    special_cls_model.eval()
    state_dict = convert_model(torch.load(args.special_cls_checkpoints)['sd'])
    special_cls_model.load_state_dict(state_dict)
    special_cls_model.to(args.device)
    special_cls_model.eval()
    print(" Load model from {}".format(args.special_cls_checkpoints))
    client.special_cls_model = special_cls_model
    client.special_beam = SequenceGenerator(special_cls_model,
                                            tokenizer,
                                            beam_size=args.beam,
                                            max_lens=args.outlens)

    # split sentence model with task embedding
    split_sentence_model = BertModel(None, args)
    split_sentence_model.eval()
    state_dict = convert_model(
        torch.load(args.split_sentence_with_task_embedding_checkpoints)['sd'])
    split_sentence_model.load_state_dict(state_dict)
    split_sentence_model.to(args.device)
    split_sentence_model.eval()
    print(" Load model from {}".format(
        args.split_sentence_with_task_embedding_checkpoints))
    client.split_sentence_model = split_sentence_model
    client.split_sentence_beam = SequenceGenerator(split_sentence_model,
                                                   tokenizer,
                                                   beam_size=args.beam,
                                                   max_lens=args.outlens)

    return client
Ejemplo n.º 8
0
import re
import torch
import sentencepiece as spm

from model import BertModel

sp = spm.SentencePieceProcessor()
sp.load('resource/sentencepiece.unigram.35000.model')
vocab_size = sp.get_piece_size()

n_embedding = 512
n_layer = 8

model = BertModel(vocab_size, n_embedding, n_layer)
model.eval()
model.load_state_dict(torch.load('resource/model.{}.{}.th'.format(n_embedding, n_layer),
                                 map_location='cpu'))

# you should enable cuda if it is available
# model.cuda()

# if you are using a GPU that has tensor cores (nvidia volta, Turing architecture), you can enable half precision
# inference and training, we recommend to use the nvidia official apex to make everything as clean as possible from
# apex import amp [model] = amp.initialize([model], opt_level="O2")
device = model.embedding.weight.data.device


def clean_text(txt):
    txt = txt.lower()
    txt = re.sub('\s*', '', txt)
    return txt