Exemple #1
0
def train(config):

    training_t0 = datetime.datetime.now()

    block_size = config['block_size']

    print("config['train_filename']:", config['train_filename'])
    text = open(config['train_filename'], 'r').read()
    train_dataset = CharDataset(config['chars'], text, block_size,
                                debug=True)  # one line is 63 characters

    # create model
    mconf = GPTConfig(train_dataset.vocab_size,
                      train_dataset.block_size,
                      n_layer=config['n_layer'],
                      n_head=config['n_head'],
                      n_embd=config['n_embd'])

    model = GPT(mconf)

    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    print('ANN parameters: %d' % pytorch_total_params)

    # train
    tconf = TrainerConfig(max_epochs=2,
                          batch_size=config['batch_size'],
                          learning_rate=6e-4,
                          lr_decay=True,
                          warmup_tokens=512 * 20,
                          final_tokens=2 * len(train_dataset) * block_size,
                          num_workers=4,
                          tqdm=False)  # not config['do_finetune'])
    trainer = Trainer(model, train_dataset, None, tconf)
    trainer.train()
    training_t1 = datetime.datetime.now()
    training_duration = training_t1 - training_t0
    print('training_duration', training_duration)

    torch.save(model.state_dict(), config['model_filename'])

    return model
Exemple #2
0
def get_model(rebuild=False):
    set_seed(42)
    if not rebuild:
        try:
            model = torch.load(MODEL_PATH)
            print(f"resuming from existing model at {MODEL_PATH}")
            return model
        except FileNotFoundError:
            pass
    print("constructing new model")
    conf = GPTConfig(VOCAB_SIZE, BLOCK_SIZE, n_layer=2, n_head=4, n_embd=128)
    model = GPT(conf)
    return model
Exemple #3
0
def get_model(config):
    # following two lines are copied from train()
    block_size = config['block_size']
    text = open(config['train_filename'], 'r').read()
    train_dataset = CharDataset(config['chars'], text, config['block_size'])

    mconf = GPTConfig(train_dataset.vocab_size,
                      train_dataset.block_size,
                      n_layer=config['n_layer'],
                      n_head=config['n_head'],
                      n_embd=config['n_embd'])
    model = GPT(mconf)
    model.load_state_dict(torch.load(config['model_filename']))
    model.eval()
    return model
Exemple #4
0
block_size = 128  # spatial extent of the model for its context

# you can download this file at https://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt
text = open('input.txt',
            'r').read()  # don't worry we won't run out of file handles
train_dataset = CharDataset(
    text, block_size)  # one line of poem is roughly 50 characters

from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size,
                  train_dataset.block_size,
                  n_layer=8,
                  n_head=8,
                  n_embd=512)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=2,
                      batch_size=512,
                      learning_rate=6e-4,
                      lr_decay=True,
                      warmup_tokens=512 * 20,
                      final_tokens=2 * len(train_dataset) * block_size,
                      num_workers=4,
                      device='cpu',
                      ckpt_path="./prfaqgen.bin")

trainer = Trainer(model, train_dataset, None, tconf)
Exemple #5
0
def get_model(mconf):
    return GPT(mconf)
Exemple #6
0
        os.system(
            "wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
        )

    # you can download this file at https://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt
    text = open('input.txt',
                'r').read()  # don't worry we won't run out of file handles
    train_dataset = CharDataset(
        text, args.block_size)  # one line of poem is roughly 50 characters
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.num_workers)

    model = GPT(vocab_size=train_dataset.vocab_size,
                block_size=train_dataset.block_size,
                n_layer=args.n_layer,
                n_head=args.n_head,
                n_embd=args.n_embd,
                learning_rate=args.learning_rate)

    lr_decay = LearningRateDecayCallback(learning_rate=6e-4,
                                         warmup_tokens=512 * 20,
                                         final_tokens=2 * len(train_dataset) *
                                         args.block_size)

    trainer = Trainer.from_argparse_args(
        args,
        max_epochs=5,
        tpu_cores=8,
        gradient_clip_val=1.0,
        callbacks=[lr_decay, XLAStatsMonitor()],
    )
Exemple #7
0
        but during test time we can only go B at a time, T times, with T forward 
        passes.
        """
        x = torch.tensor(dix[:-1], dtype=torch.long)
        y = torch.tensor(dix[1:], dtype=torch.long)
        return x, y

block_size = 128 # spatial extent of the model for its context
text = open('chat/all.txt', 'r').read() # don't worry we won't run out of file handles
train_dataset = CharDataset(text, block_size) # one line of poem is roughly 50 characters

# Load Model
from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size,
                  n_layer=8, n_head=8, n_embd=512)
model = GPT(mconf).cuda()

# Load weight
model.load_state_dict(torch.load("model.pth"))

from mingpt.utils import sample
def run(context):
    x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].cuda()
    y = sample(model, x, 500, temperature=1.0, sample=True, top_k=10)[0]
    completion = ''.join([train_dataset.itos[int(i)] for i in y])
    # print(completion)
    return completion

# Bot
import telebot
from telebot import types
Exemple #8
0
        # Before    
      #ix = ((xpt[:, None, :] - C[None, :, :])**2).sum(-1).argmin(1) # cluster assignments for each pixel
        a = ((x[:, None, :] - self.clusters[None, :, :]) **2).sum(-1).argmin(1) # cluster assignments
        return a[:-1], a[1:] # always just predict the next one in the sequence

train_dataset = ImageDataset(train_data, C)
test_dataset = ImageDataset(test_data, C)
train_dataset[0][0] # one example image flattened out into integers

from mingpt.model import GPT, GPTConfig, GPT1Config

# we'll do something a bit smaller
mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size,
                embd_pdrop=0.0, resid_pdrop=0.0, attn_pdrop=0.0,
                n_layer=12, n_head=8, n_embd=256)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig

"""
Note that I am running on an 8-GPU V100 machine so each GPU has 32GB.
If you don't have as many computational resources you have to bring down
the batch_size until the model fits into your memory, and then you may
also need to adjust the learning rate (e.g. decrease it a bit). Alternatively,
you can use an even smaller model up above, bringing down the number of layers,
number of heads, and the embedding size.
"""


tokens_per_epoch = len(train_data) * train_dataset.block_size
train_epochs = 20 # todo run a bigger model and longer, this is tiny
Exemple #9
0
        return math.ceil(len(self.data) / (self.block_size + 1))


if __name__ == "__main__":
    sample_block_size = 128
    text = open("input.txt", "r", encoding="utf-8").read()
    train_dataset = CharDataset(text, block_size=sample_block_size)

    model_config = GPTConfig(
        train_dataset.vocab_size,
        train_dataset.block_size,
        n_layer=8,
        n_head=8,
        n_emb=512,
    )
    model = GPT(model_config)

    trainer_config = TrainerConfig(
        max_epochs=200,
        batch_size=512,
        learning_rate=6e-4,
        lr_decay=True,
        warmup_tokens=512 * 20,
        final_tokens=200 * len(train_dataset) * sample_block_size,
        num_workers=4,
    )
    trainer = Trainer(model, train_dataset, None, trainer_config)
    trainer.train()

    context = "대한민국의 대통령"
    sample_x = torch.tensor(
Exemple #10
0
        return x, y


block_size = 128  # spatial extent of the model for its context

text = open('chat/all.txt', 'r').read()  # Chat text file
train_dataset = CharDataset(
    text, block_size)  # one line of poem is roughly 50 characters

from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size,
                  train_dataset.block_size,
                  n_layer=8,
                  n_head=8,
                  n_embd=512)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=4,
                      batch_size=512,
                      learning_rate=6e-4,
                      lr_decay=True,
                      warmup_tokens=512 * 20,
                      final_tokens=2 * len(train_dataset) * block_size,
                      num_workers=4)
trainer = Trainer(model, train_dataset, None, tconf)
trainer.train()

torch.save(model.state_dict(), "model.pth")
Exemple #11
0
args = parser.parse_args()
print(args)

# load the data
train_dataset = torch.load(args.data_cache)

## set up model (TODO: better way to handle the model config)
mconf = GPTConfig(train_dataset.vocab_size,
                  train_dataset.block_size,
                  embd_pdrop=0.0,
                  resid_pdrop=0.0,
                  attn_pdrop=0.0,
                  n_layer=24,
                  n_head=8,
                  n_embd=512)
model = GPT(mconf)

# load the model
print("Loading model")
model_ckpt = torch.load(args.model_cache)
model.load_state_dict(model_ckpt['model_state_dict'])

if torch.cuda.is_available():
    model = model.cuda()

if args.condition == 'uncond':
    # generate some samples unconditionally
    print("Generating unconditional samples")
    generate_samples(model, train_dataset, 32)
elif args.condition == 'half' or args.condition == 'chimera':
    # generate samples conditioned on upper half