def train(config): training_t0 = datetime.datetime.now() block_size = config['block_size'] print("config['train_filename']:", config['train_filename']) text = open(config['train_filename'], 'r').read() train_dataset = CharDataset(config['chars'], text, block_size, debug=True) # one line is 63 characters # create model mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=config['n_layer'], n_head=config['n_head'], n_embd=config['n_embd']) model = GPT(mconf) pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('ANN parameters: %d' % pytorch_total_params) # train tconf = TrainerConfig(max_epochs=2, batch_size=config['batch_size'], learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * block_size, num_workers=4, tqdm=False) # not config['do_finetune']) trainer = Trainer(model, train_dataset, None, tconf) trainer.train() training_t1 = datetime.datetime.now() training_duration = training_t1 - training_t0 print('training_duration', training_duration) torch.save(model.state_dict(), config['model_filename']) return model
def get_model(rebuild=False): set_seed(42) if not rebuild: try: model = torch.load(MODEL_PATH) print(f"resuming from existing model at {MODEL_PATH}") return model except FileNotFoundError: pass print("constructing new model") conf = GPTConfig(VOCAB_SIZE, BLOCK_SIZE, n_layer=2, n_head=4, n_embd=128) model = GPT(conf) return model
def get_model(config): # following two lines are copied from train() block_size = config['block_size'] text = open(config['train_filename'], 'r').read() train_dataset = CharDataset(config['chars'], text, config['block_size']) mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=config['n_layer'], n_head=config['n_head'], n_embd=config['n_embd']) model = GPT(mconf) model.load_state_dict(torch.load(config['model_filename'])) model.eval() return model
block_size = 128 # spatial extent of the model for its context # you can download this file at https://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt text = open('input.txt', 'r').read() # don't worry we won't run out of file handles train_dataset = CharDataset( text, block_size) # one line of poem is roughly 50 characters from mingpt.model import GPT, GPTConfig mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig # initialize a trainer instance and kick off training tconf = TrainerConfig(max_epochs=2, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * block_size, num_workers=4, device='cpu', ckpt_path="./prfaqgen.bin") trainer = Trainer(model, train_dataset, None, tconf)
def get_model(mconf): return GPT(mconf)
os.system( "wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt" ) # you can download this file at https://github.com/karpathy/char-rnn/blob/master/data/tinyshakespeare/input.txt text = open('input.txt', 'r').read() # don't worry we won't run out of file handles train_dataset = CharDataset( text, args.block_size) # one line of poem is roughly 50 characters train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) model = GPT(vocab_size=train_dataset.vocab_size, block_size=train_dataset.block_size, n_layer=args.n_layer, n_head=args.n_head, n_embd=args.n_embd, learning_rate=args.learning_rate) lr_decay = LearningRateDecayCallback(learning_rate=6e-4, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * args.block_size) trainer = Trainer.from_argparse_args( args, max_epochs=5, tpu_cores=8, gradient_clip_val=1.0, callbacks=[lr_decay, XLAStatsMonitor()], )
but during test time we can only go B at a time, T times, with T forward passes. """ x = torch.tensor(dix[:-1], dtype=torch.long) y = torch.tensor(dix[1:], dtype=torch.long) return x, y block_size = 128 # spatial extent of the model for its context text = open('chat/all.txt', 'r').read() # don't worry we won't run out of file handles train_dataset = CharDataset(text, block_size) # one line of poem is roughly 50 characters # Load Model from mingpt.model import GPT, GPTConfig mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512) model = GPT(mconf).cuda() # Load weight model.load_state_dict(torch.load("model.pth")) from mingpt.utils import sample def run(context): x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].cuda() y = sample(model, x, 500, temperature=1.0, sample=True, top_k=10)[0] completion = ''.join([train_dataset.itos[int(i)] for i in y]) # print(completion) return completion # Bot import telebot from telebot import types
# Before #ix = ((xpt[:, None, :] - C[None, :, :])**2).sum(-1).argmin(1) # cluster assignments for each pixel a = ((x[:, None, :] - self.clusters[None, :, :]) **2).sum(-1).argmin(1) # cluster assignments return a[:-1], a[1:] # always just predict the next one in the sequence train_dataset = ImageDataset(train_data, C) test_dataset = ImageDataset(test_data, C) train_dataset[0][0] # one example image flattened out into integers from mingpt.model import GPT, GPTConfig, GPT1Config # we'll do something a bit smaller mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, embd_pdrop=0.0, resid_pdrop=0.0, attn_pdrop=0.0, n_layer=12, n_head=8, n_embd=256) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig """ Note that I am running on an 8-GPU V100 machine so each GPU has 32GB. If you don't have as many computational resources you have to bring down the batch_size until the model fits into your memory, and then you may also need to adjust the learning rate (e.g. decrease it a bit). Alternatively, you can use an even smaller model up above, bringing down the number of layers, number of heads, and the embedding size. """ tokens_per_epoch = len(train_data) * train_dataset.block_size train_epochs = 20 # todo run a bigger model and longer, this is tiny
return math.ceil(len(self.data) / (self.block_size + 1)) if __name__ == "__main__": sample_block_size = 128 text = open("input.txt", "r", encoding="utf-8").read() train_dataset = CharDataset(text, block_size=sample_block_size) model_config = GPTConfig( train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_emb=512, ) model = GPT(model_config) trainer_config = TrainerConfig( max_epochs=200, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=200 * len(train_dataset) * sample_block_size, num_workers=4, ) trainer = Trainer(model, train_dataset, None, trainer_config) trainer.train() context = "대한민국의 대통령" sample_x = torch.tensor(
return x, y block_size = 128 # spatial extent of the model for its context text = open('chat/all.txt', 'r').read() # Chat text file train_dataset = CharDataset( text, block_size) # one line of poem is roughly 50 characters from mingpt.model import GPT, GPTConfig mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig # initialize a trainer instance and kick off training tconf = TrainerConfig(max_epochs=4, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * block_size, num_workers=4) trainer = Trainer(model, train_dataset, None, tconf) trainer.train() torch.save(model.state_dict(), "model.pth")
args = parser.parse_args() print(args) # load the data train_dataset = torch.load(args.data_cache) ## set up model (TODO: better way to handle the model config) mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, embd_pdrop=0.0, resid_pdrop=0.0, attn_pdrop=0.0, n_layer=24, n_head=8, n_embd=512) model = GPT(mconf) # load the model print("Loading model") model_ckpt = torch.load(args.model_cache) model.load_state_dict(model_ckpt['model_state_dict']) if torch.cuda.is_available(): model = model.cuda() if args.condition == 'uncond': # generate some samples unconditionally print("Generating unconditional samples") generate_samples(model, train_dataset, 32) elif args.condition == 'half' or args.condition == 'chimera': # generate samples conditioned on upper half