def get_model(rebuild=False): set_seed(42) if not rebuild: try: model = torch.load(MODEL_PATH) print(f"resuming from existing model at {MODEL_PATH}") return model except FileNotFoundError: pass print("constructing new model") conf = GPTConfig(VOCAB_SIZE, BLOCK_SIZE, n_layer=2, n_head=4, n_embd=128) model = GPT(conf) return model
#!/usr/bin/env python # set up logging import logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) # make deterministic from mingpt.utils import set_seed set_seed(42) import numpy as np import torch import torch.nn as nn from torch.nn import functional as F import math from torch.utils.data import Dataset class CharDataset(Dataset): def __init__(self, data, block_size): chars = sorted(list(set(data))) data_size, vocab_size = len(data), len(chars) print('data has %d characters, %d unique.' % (data_size, vocab_size)) self.stoi = {ch: i for i, ch in enumerate(chars)} self.itos = {i: ch for i, ch in enumerate(chars)} self.block_size = block_size
from torch.utils.data import Dataset from mingpt.utils import set_seed, sample from mingpt.model import GPT, GPTConfig from mingpt.trainer import Trainer, TrainerConfig import os logging.basicConfig( format='%(asctime)s|%(levelname)s|%(name)s|%(message)s', datefmt='%Y-%d-%d %H:%M:%S', level=logging.INFO, ) set_seed(42) # make deterministic GPT_S = dict( embd_pdrop=0.0, resid_pdrop=0.0, attn_pdrop=0.0, n_layer=24, n_head=8, n_embd=512, ) def now_utc(): # unix time seconds = round(time.time()) millis = seconds * 1000 unix = int(millis)