Exemple #1
0
def train(config):

    training_t0 = datetime.datetime.now()

    block_size = config['block_size']

    print("config['train_filename']:", config['train_filename'])
    text = open(config['train_filename'], 'r').read()
    train_dataset = CharDataset(config['chars'], text, block_size,
                                debug=True)  # one line is 63 characters

    # create model
    mconf = GPTConfig(train_dataset.vocab_size,
                      train_dataset.block_size,
                      n_layer=config['n_layer'],
                      n_head=config['n_head'],
                      n_embd=config['n_embd'])

    model = GPT(mconf)

    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    print('ANN parameters: %d' % pytorch_total_params)

    # train
    tconf = TrainerConfig(max_epochs=2,
                          batch_size=config['batch_size'],
                          learning_rate=6e-4,
                          lr_decay=True,
                          warmup_tokens=512 * 20,
                          final_tokens=2 * len(train_dataset) * block_size,
                          num_workers=4,
                          tqdm=False)  # not config['do_finetune'])
    trainer = Trainer(model, train_dataset, None, tconf)
    trainer.train()
    training_t1 = datetime.datetime.now()
    training_duration = training_t1 - training_t0
    print('training_duration', training_duration)

    torch.save(model.state_dict(), config['model_filename'])

    return model
Exemple #2
0
def train(model, n_epochs, train_dataset, test_dataset, checkpoint_path):
    tokens_per_epoch = len(train_dataset) * train_dataset.block_size

    # initialize a trainer instance and kick off training
    tconf = TrainerConfig(
        max_epochs=n_epochs,
        batch_size=4,
        learning_rate=3e-3,
        betas=(0.9, 0.95),
        weight_decay=0,
        lr_decay=True,
        warmup_tokens=tokens_per_epoch,
        final_tokens=n_epochs * tokens_per_epoch,
        ckpt_path=checkpoint_path,
        num_workers=1
    )
    trainer = Trainer(model, train_dataset, test_dataset, tconf)
    trainer.train()

    return trainer
Exemple #3
0
def train():
    train_dataset = get_train_dataset()
    test_dataset = get_test_dataset()
    model = get_model()
    epochs = 20
    tokens_per_epoch = len(train_dataset) * BLOCK_SIZE
    conf = TrainerConfig(
        max_epochs=epochs,
        batch_size=4,
        learning_rate=3e-5,
        lr_decay=False,
        warmup_tokens=tokens_per_epoch,
        final_tokens=epochs * tokens_per_epoch,
        num_workers=4,
    )
    trainer = Trainer(model, train_dataset, test_dataset, conf)
    trainer.train()
    torch.save(model, MODEL_PATH)
    print(f"saved model to {MODEL_PATH}")
    return model
Exemple #4
0
def train():
    train_dataset = ReorderDataset("train", 10000)
    test_dataset = ReorderDataset("test", 1000)
    model = get_model()
    epochs = 100
    # NOTE: I think final_tokens is set wrong
    conf = TrainerConfig(
        max_epochs=epochs,
        batch_size=512,
        learning_rate=6e-4,
        lr_decay=True,
        warmup_tokens=1024,
        final_tokens=epochs * len(train_dataset) * len(TOKENS),
        num_workers=4,
    )
    trainer = Trainer(model, train_dataset, test_dataset, conf)
    trainer.train()
    torch.save(model, MODEL_PATH)
    print(f"saved model to {MODEL_PATH}")
    return model
Exemple #5
0
from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size,
                  train_dataset.block_size,
                  n_layer=8,
                  n_head=8,
                  n_embd=512)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=2,
                      batch_size=512,
                      learning_rate=6e-4,
                      lr_decay=True,
                      warmup_tokens=512 * 20,
                      final_tokens=2 * len(train_dataset) * block_size,
                      num_workers=4,
                      device='cpu',
                      ckpt_path="./prfaqgen.bin")

trainer = Trainer(model, train_dataset, None, tconf)
# trainer.train()
trainer.load_checkpoint()
# trainer.save_checkpoint()

# alright, let's sample some character-level Shakespeare
from mingpt.utils import sample

context = "The launch of Skills Store Arabic was GREEN. The team "
x = torch.tensor([train_dataset.stoi[s] for s in context],
Exemple #6
0
block_size = 128 # spatial extent of the model for its context

# you can download this file at https://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt
text = open('input.txt', 'r').read() # don't worry we won't run out of file handles
train_dataset = charDataset(text, block)    # one line of poem is roughly 50 characters
from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size,
                  n_layer=8, n_head=8, n_embd=512)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=2, batch_size=512, learning_rate=6e-4,
                      # Not sure about the final_token is about.
                      lr_decay=True, warmup_tokens=512*20, final_tokens=2*len(train_dataset)*block_size,
                      num_workers=4)
trainer = trainer(model, train_dataset, None, tconf)
trainer.train()

# alright, let's sample some character-level Shakespeare
from mingpt.utils import sample

context = "O God, O God!"
x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].to(trainer.device)
y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=10)[0]
completion = ''.join([train_dataset.itos[int(i)] for i in y])
print(completion)


Exemple #7
0
        chunk = self.data[idx:idx + self.block_size + 1]
        dix = [self.stoi[s] for s in chunk]
        x = torch.tensor(dix[:-1], dtype=torch.long)
        y = torch.tensor(dix[1:], dtype=torch.long)
        return x, y


block_size = 128
text = open('input.txt', 'r').read()
train_dataset = CharDataset(text, block_size)

from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size,
                  train_dataset.block_size,
                  n_layer=8,
                  n_head=8,
                  n_embd=512)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig
from mingpt.utils import sample
while True:
    tconf = TrainerConfig(max_epochs=1, batch_size=256, learning_rate=6e-4)
    trainer = Trainer(model, train_dataset, tconf)
    trainer.train()
    context = "O God, O God!"
    x = torch.tensor([train_dataset.stoi[s] for s in context],
                     dtype=torch.long)[None, ...].to(trainer.device)
    y = sample(model, x, 100, temperature=1.0, sample=True, top_k=10)[0]
    completion = ''.join([train_dataset.itos[int(i)] for i in y])
    print(completion)
Exemple #8
0
    if current_it % 2 == 0:
        print("Loading Main Dataset\n")
        train_dataset = MathDataset(fname=fn_train, MD=MD)
        test_dataset = MathDataset(fname=fn_test, MD=MD)
        epoch = main_epoch
    else:
        print("Loading Marker Dataset\n")
        train_dataset = MarkerDataset(fname=fn_train, MD=MD)
        test_dataset = MarkerDataset(fname=fn_test, MD=MD)
        epoch = marker_epoch

    # Trainer Config
    tconf = TrainerConfig(max_epochs=epoch,
                          batch_size=358,
                          learning_rate=6e-4,
                          lr_decay=True,
                          warmup_tokens=1024,
                          final_tokens=epoch * len(train_dataset) *
                          (MD.vocab_size + 1),
                          num_workers=6)

    # Create the first training round
    print("Training: ", str(current_it))
    trainer = Trainer(model, train_dataset, test_dataset, tconf)
    trainer.train()
    trainer.save_checkpoint(exp_folder, str(current_it))

    # Examine the model and create new dataset
    if current_it % 2 == 0:
        print("Exam and new dataset-------------\n")
        print("Training exam \n")
        examiner.exam(fn_train, train_dataset, trainer)
Exemple #9
0
def test(config, task, language, df_results, debug=False):

    model = get_model(config)

    # followwing two lines are copied from train()
    block_size = config['block_size']
    text = open(config['train_filename'], 'r').read()
    train_dataset = CharDataset(config['chars'], text, block_size)

    tconf = TrainerConfig(max_epochs=2,
                          batch_size=config['batch_size'],
                          learning_rate=6e-4,
                          lr_decay=True,
                          warmup_tokens=512 * 20,
                          final_tokens=2 * len(train_dataset) * block_size,
                          num_workers=4)
    trainer = Trainer(model, train_dataset, None, tconf)

    # test
    testing_t0 = datetime.datetime.now()

    df_test = pd.read_csv(config['test_filename'],
                          header=None,
                          names=config['features'])

    n = 0
    n_ok = 0
    n_ko = 0
    for index, row in df_test.iterrows():

        if row.Task != task or row.Language != language:
            continue

        n += 1

        # build the context
        context = language + ',' + task + ',' + row.Input + ','

        # get the predicted output string
        prediction_padded = predict_pron(config, train_dataset, trainer, model,
                                         context)

        # identify where the interesting output is in the raw output
        if prediction_padded.startswith(context):

            # remove unwanted prefix
            prediction_padded = prediction_padded[len(context):]

            # remove unwanted postfix (i.e. remove padding)
            eos = prediction_padded.find('\n', 1)
            #eos_p = prediction_padded.find(',', 1)
            #eos_t = row.Output.find(',', 1)
            #if eos_p < 0 or eos_t < 0:
            if eos < 0:
                n_ko += 1
            else:
                #prediction = prediction_padded[:eos_p]
                #target = row.Output[:eos_t]
                prediction = prediction_padded[:eos]
                target = row.Output
                # check if prediction is same as target
                if prediction == target:
                    n_ok += 1
                else:
                    if debug and language != 'eno':
                        print('language:%s, target:%s, prediction:%s,' %
                              (language, target, prediction))
                    n_ko += 1
        else:
            n_ko += 1

    pctg_ok = int(n_ok / n * 100)
    pctg_ko = 100 - pctg_ok
    if config['episodes'] == 1:
        print('%s %5s: n=%d, n_ok=%d, n_ko=%d => %%n_ok=%d%%' %
              (language, task, n, n_ok, n_ko, pctg_ok))
    testing_t1 = datetime.datetime.now()
    test_duration = testing_t1 - testing_t0

    dict_res = {
        'lang': language,
        'task': task,
        'test_accuracy': n_ok / n,
        #'training_duration': training_duration,
        'test_duration': test_duration
    }
    return dict_res
Exemple #10
0
If you don't have as many computational resources you have to bring down
the batch_size until the model fits into your memory, and then you may
also need to adjust the learning rate (e.g. decrease it a bit). Alternatively,
you can use an even smaller model up above, bringing down the number of layers,
number of heads, and the embedding size.
"""


tokens_per_epoch = len(train_data) * train_dataset.block_size
train_epochs = 20 # todo run a bigger model and longer, this is tiny


# initalize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs = train_epochs, batch_size = 16*8, learning_rate=3e-3,
                    betas = (0.9, 0.95), weight_decay=0,
                    lr_decay=True, warmup_tokens=tokens_per_epoch, final_tokens=train_epochs*tokens_per_epoch,
                    ckpt_path = 'cifar10_model.pt',
                    num_workers=8)   
trainer = Trainer(model, train_dataset, test_dataset, tconf)
trainer.train()

# load the state of the best model we've seen based on early stopping
checkpoint = torch.load('cifar10_model.pt')
model.load_state_dict(checkpoint)

# to sample we also have to technically "train" a separate model for the first token in the sequence
# we are going to do so below simply by calculating and normalizing the histogram of the first token
counts = torch.ones(ncluster) # start counts as 1 not zero, this is called "smoothing"
rp = torch.randperm(len(train_dataset))
nest = 5000 # how many images to use for the estimation
for i in range(nest):
Exemple #11
0
    train_dataset = CharDataset(text, block_size=sample_block_size)

    model_config = GPTConfig(
        train_dataset.vocab_size,
        train_dataset.block_size,
        n_layer=8,
        n_head=8,
        n_emb=512,
    )
    model = GPT(model_config)

    trainer_config = TrainerConfig(
        max_epochs=200,
        batch_size=512,
        learning_rate=6e-4,
        lr_decay=True,
        warmup_tokens=512 * 20,
        final_tokens=200 * len(train_dataset) * sample_block_size,
        num_workers=4,
    )
    trainer = Trainer(model, train_dataset, None, trainer_config)
    trainer.train()

    context = "대한민국의 대통령"
    sample_x = torch.tensor(
        [train_dataset.char2idx[s] for s in context],
        dtype=torch.long,
    )[None, ...].to(trainer.device)
    sample_y = sample(model,
                      sample_x,
                      2000,