Exemple #1
0
        print("Loading Marker Dataset\n")
        train_dataset = MarkerDataset(fname=fn_train, MD=MD)
        test_dataset = MarkerDataset(fname=fn_test, MD=MD)
        epoch = marker_epoch

    # Trainer Config
    tconf = TrainerConfig(max_epochs=epoch,
                          batch_size=358,
                          learning_rate=6e-4,
                          lr_decay=True,
                          warmup_tokens=1024,
                          final_tokens=epoch * len(train_dataset) *
                          (MD.vocab_size + 1),
                          num_workers=6)

    # Create the first training round
    print("Training: ", str(current_it))
    trainer = Trainer(model, train_dataset, test_dataset, tconf)
    trainer.train()
    trainer.save_checkpoint(exp_folder, str(current_it))

    # Examine the model and create new dataset
    if current_it % 2 == 0:
        print("Exam and new dataset-------------\n")
        print("Training exam \n")
        examiner.exam(fn_train, train_dataset, trainer)
        print("Test exam \n")
        examiner.exam(fn_test, test_dataset, trainer)

    current_it += 1
Exemple #2
0
                  train_dataset.block_size,
                  n_layer=8,
                  n_head=8,
                  n_embd=512)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=2,
                      batch_size=512,
                      learning_rate=6e-4,
                      lr_decay=True,
                      warmup_tokens=512 * 20,
                      final_tokens=2 * len(train_dataset) * block_size,
                      num_workers=4,
                      ckpt_path="./model.bin")

trainer = Trainer(model, train_dataset, None, tconf)
trainer.train()
trainer.save_checkpoint()

# alright, let's sample some character-level Shakespeare
from mingpt.utils import sample

context = "Today, Amazon announced "
x = torch.tensor([train_dataset.stoi[s] for s in context],
                 dtype=torch.long)[None, ...].to(trainer.device)
y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=10)[0]
completion = ''.join([train_dataset.itos[int(i)] for i in y])
print(completion)