예제 #1
0
파일: oteann.py 프로젝트: marxav/oteann2
def train(config):

    training_t0 = datetime.datetime.now()

    block_size = config['block_size']

    print("config['train_filename']:", config['train_filename'])
    text = open(config['train_filename'], 'r').read()
    train_dataset = CharDataset(config['chars'], text, block_size,
                                debug=True)  # one line is 63 characters

    # create model
    mconf = GPTConfig(train_dataset.vocab_size,
                      train_dataset.block_size,
                      n_layer=config['n_layer'],
                      n_head=config['n_head'],
                      n_embd=config['n_embd'])

    model = GPT(mconf)

    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    print('ANN parameters: %d' % pytorch_total_params)

    # train
    tconf = TrainerConfig(max_epochs=2,
                          batch_size=config['batch_size'],
                          learning_rate=6e-4,
                          lr_decay=True,
                          warmup_tokens=512 * 20,
                          final_tokens=2 * len(train_dataset) * block_size,
                          num_workers=4,
                          tqdm=False)  # not config['do_finetune'])
    trainer = Trainer(model, train_dataset, None, tconf)
    trainer.train()
    training_t1 = datetime.datetime.now()
    training_duration = training_t1 - training_t0
    print('training_duration', training_duration)

    torch.save(model.state_dict(), config['model_filename'])

    return model
예제 #2
0
def train(model, n_epochs, train_dataset, test_dataset, checkpoint_path):
    tokens_per_epoch = len(train_dataset) * train_dataset.block_size

    # initialize a trainer instance and kick off training
    tconf = TrainerConfig(
        max_epochs=n_epochs,
        batch_size=4,
        learning_rate=3e-3,
        betas=(0.9, 0.95),
        weight_decay=0,
        lr_decay=True,
        warmup_tokens=tokens_per_epoch,
        final_tokens=n_epochs * tokens_per_epoch,
        ckpt_path=checkpoint_path,
        num_workers=1
    )
    trainer = Trainer(model, train_dataset, test_dataset, tconf)
    trainer.train()

    return trainer
예제 #3
0
def train():
    train_dataset = get_train_dataset()
    test_dataset = get_test_dataset()
    model = get_model()
    epochs = 20
    tokens_per_epoch = len(train_dataset) * BLOCK_SIZE
    conf = TrainerConfig(
        max_epochs=epochs,
        batch_size=4,
        learning_rate=3e-5,
        lr_decay=False,
        warmup_tokens=tokens_per_epoch,
        final_tokens=epochs * tokens_per_epoch,
        num_workers=4,
    )
    trainer = Trainer(model, train_dataset, test_dataset, conf)
    trainer.train()
    torch.save(model, MODEL_PATH)
    print(f"saved model to {MODEL_PATH}")
    return model
예제 #4
0
def train():
    train_dataset = ReorderDataset("train", 10000)
    test_dataset = ReorderDataset("test", 1000)
    model = get_model()
    epochs = 100
    # NOTE: I think final_tokens is set wrong
    conf = TrainerConfig(
        max_epochs=epochs,
        batch_size=512,
        learning_rate=6e-4,
        lr_decay=True,
        warmup_tokens=1024,
        final_tokens=epochs * len(train_dataset) * len(TOKENS),
        num_workers=4,
    )
    trainer = Trainer(model, train_dataset, test_dataset, conf)
    trainer.train()
    torch.save(model, MODEL_PATH)
    print(f"saved model to {MODEL_PATH}")
    return model
예제 #5
0
파일: test.py 프로젝트: zhonglj2012/minGPT
        chunk = self.data[idx:idx + self.block_size + 1]
        dix = [self.stoi[s] for s in chunk]
        x = torch.tensor(dix[:-1], dtype=torch.long)
        y = torch.tensor(dix[1:], dtype=torch.long)
        return x, y


block_size = 128
text = open('input.txt', 'r').read()
train_dataset = CharDataset(text, block_size)

from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size,
                  train_dataset.block_size,
                  n_layer=8,
                  n_head=8,
                  n_embd=512)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig
from mingpt.utils import sample
while True:
    tconf = TrainerConfig(max_epochs=1, batch_size=256, learning_rate=6e-4)
    trainer = Trainer(model, train_dataset, tconf)
    trainer.train()
    context = "O God, O God!"
    x = torch.tensor([train_dataset.stoi[s] for s in context],
                     dtype=torch.long)[None, ...].to(trainer.device)
    y = sample(model, x, 100, temperature=1.0, sample=True, top_k=10)[0]
    completion = ''.join([train_dataset.itos[int(i)] for i in y])
    print(completion)