Beispiel #1
0
def sample_some(trainer, model, dataset, X_train, n_samples=40, out_path='./samples.png'):
    prob = model_first_token(dataset, X_train)

    start_pixel = np.random.choice(np.arange(dataset.vocab_size), size=(n_samples, 1), replace=True, p=prob.numpy())
    start_pixel = torch.from_numpy(start_pixel).to(trainer.device)
    flattened_image_size = 32 * 32
    pixels = sample(model, start_pixel, flattened_image_size - 1, temperature=1.0, sample=True, top_k=40)

    # for visualization we have to invert the permutation used to produce the pixels
    iperm = torch.argsort(dataset.perm)

    pixel_size = 32
    plt.close('all')

    n_cols = 8
    n_rows = n_samples // n_cols
    fig, axis = plt.subplots(n_rows, n_cols, figsize=(32, 16))
    for i, ax in enumerate(axis.ravel()):
        pxi = pixels[i][iperm]  # undo the encoding permutation
        pxi = pxi.view(pixel_size, pixel_size).cpu().numpy().astype(np.uint8)  # grayscale -> 2D

        ax.imshow(pxi, cmap='magma')
        # ax.set_aspect(aspect=1.0)

    plt.tight_layout()
    plt.savefig(out_path)
    plt.close('all')
Beispiel #2
0
def run_one(model, input_tokens):
    input_ints = list(map(cast_int, input_tokens))
    input_tensor = torch.tensor([input_ints], dtype=torch.long).to(
        torch.cuda.current_device())
    full_tensor = sample(model, input_tensor, EXPRESSION_SIZE)
    full_ints = list(map(int, full_tensor[0]))
    output_ints = full_ints[-EXPRESSION_SIZE:]
    return list(map(cast_token, output_ints))
Beispiel #3
0
def run_one(model, n):
    "Returns a list of tokens produced."
    input_ints = get_pixels(n)
    input_tensor = torch.tensor([input_ints], dtype=torch.long).to(
        torch.cuda.current_device())
    full_tensor = sample(model, input_tensor, OUTPUT_SIZE)
    full_ints = list(map(int, full_tensor[0]))
    output_ints = full_ints[-OUTPUT_SIZE:]
    return [TOKENS[i] for i in output_ints]
Beispiel #4
0
    def make_prediction(self, x_in):

        # Reset tracker variables
        self.batch, self.predict, self.prev_src_len, = 0, 0, 0
        clip_src_mem = self.get_clip_src_mem_len(x_in[0]) + 1
        x_cut = x_in[:, :clip_src_mem]

        pred = x_cut.to(self.trainer.device)
        pred = sample(self.trainer.model, pred, int(self.max_trg + 1))

        return pred, clip_src_mem
    def model_predict(self, x_in):

        # Reset tracker variables
        self.predict, self.batches = 0, 0

        # Prepare input
        cut_index = self.get_input_len(x_in[0])
        x_cut = x_in[:, :cut_index]
        pred = x_cut.to(self.trainer.device)

        # Make prediction
        output = sample(self.trainer.model, pred, int(self.max_trg + 1))
        return output
Beispiel #6
0
def predict_pron(config, train_dataset, trainer, model, word):
    try:
        x = torch.tensor([train_dataset.stoi[s] for s in word],
                         dtype=torch.long)[None, ...].to(trainer.device)
        y = sample(model,
                   x,
                   config['output_max_len'],
                   temperature=1.0,
                   sample=True,
                   top_k=10)[0]
        completion = ''.join([train_dataset.itos[int(i)] for i in y])
    except:
        e = sys.exc_info()[0]
        print('predict_pron(): error %s for word:%s' % (e, word))
        # Typically, this can happen if a tested word contains a char
        # that did not existing during the training step
        completion = 'N/A'
    return completion
Beispiel #7
0
                  n_head=8,
                  n_embd=512)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig

# initialize a trainer instance and kick off training
tconf = TrainerConfig(max_epochs=2,
                      batch_size=512,
                      learning_rate=6e-4,
                      lr_decay=True,
                      warmup_tokens=512 * 20,
                      final_tokens=2 * len(train_dataset) * block_size,
                      num_workers=4,
                      device='cpu',
                      ckpt_path="./prfaqgen.bin")

trainer = Trainer(model, train_dataset, None, tconf)
# trainer.train()
trainer.load_checkpoint()
# trainer.save_checkpoint()

# alright, let's sample some character-level Shakespeare
from mingpt.utils import sample

context = "The launch of Skills Store Arabic was GREEN. The team "
x = torch.tensor([train_dataset.stoi[s] for s in context],
                 dtype=torch.long)[None, ...].to(trainer.device)
y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=40)[0]
completion = ''.join([train_dataset.itos[int(i)] for i in y])
print(completion)
Beispiel #8
0
        chunk = self.data[idx:idx + self.block_size + 1]
        dix = [self.stoi[s] for s in chunk]
        x = torch.tensor(dix[:-1], dtype=torch.long)
        y = torch.tensor(dix[1:], dtype=torch.long)
        return x, y


block_size = 128
text = open('input.txt', 'r').read()
train_dataset = CharDataset(text, block_size)

from mingpt.model import GPT, GPTConfig
mconf = GPTConfig(train_dataset.vocab_size,
                  train_dataset.block_size,
                  n_layer=8,
                  n_head=8,
                  n_embd=512)
model = GPT(mconf)

from mingpt.trainer import Trainer, TrainerConfig
from mingpt.utils import sample
while True:
    tconf = TrainerConfig(max_epochs=1, batch_size=256, learning_rate=6e-4)
    trainer = Trainer(model, train_dataset, tconf)
    trainer.train()
    context = "O God, O God!"
    x = torch.tensor([train_dataset.stoi[s] for s in context],
                     dtype=torch.long)[None, ...].to(trainer.device)
    y = sample(model, x, 100, temperature=1.0, sample=True, top_k=10)[0]
    completion = ''.join([train_dataset.itos[int(i)] for i in y])
    print(completion)
Beispiel #9
0
def run(context):
    x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].cuda()
    y = sample(model, x, 500, temperature=1.0, sample=True, top_k=10)[0]
    completion = ''.join([train_dataset.itos[int(i)] for i in y])
    # print(completion)
    return completion
Beispiel #10
0
rp = torch.randperm(len(train_dataset))
nest = 5000 # how many images to use for the estimation
for i in range(nest):
    a, _ = train_dataset[int(rp[i])]
    t = a[0].item() # index of first token in the sequence
    counts[t] +=1
prob = counts/counts.sum()

%%time

from mingpt.utils import sample

n_samples = 32
start_pixel = np.random.choice(np.arange(C.size(0)), size=(n_samples, 1), replace=True, p=prob)
start_pixel = torch.from_numpy(start_pixel).to(trainer.device)
pixels = sample(model, start_pixel, 32*32-1, temperature=1.0, sample=True, top_k=100)

# for visualization we have to invert the permutation used to produce the pixels
iperm = torch.argsort(train_dataset.perm)

ncol = 8
nrow = n_samples // ncoal
plt.figure(figsize = (16, 8))
for i in range(n_samples):
    pxi = pixels[i][iperm] # note: undo the encoding permutation

    plt.subplot(nrow, ncol, i+1)
    plt.imshow(C[pxi].view(32, 32, 3).numpy().astype(np.unit8))
    plt.axis('off')

#visualize some of the learned positional embeddings, maybe they contain structure
Beispiel #11
0
        n_head=8,
        n_emb=512,
    )
    model = GPT(model_config)

    trainer_config = TrainerConfig(
        max_epochs=200,
        batch_size=512,
        learning_rate=6e-4,
        lr_decay=True,
        warmup_tokens=512 * 20,
        final_tokens=200 * len(train_dataset) * sample_block_size,
        num_workers=4,
    )
    trainer = Trainer(model, train_dataset, None, trainer_config)
    trainer.train()

    context = "대한민국의 대통령"
    sample_x = torch.tensor(
        [train_dataset.char2idx[s] for s in context],
        dtype=torch.long,
    )[None, ...].to(trainer.device)
    sample_y = sample(model,
                      sample_x,
                      2000,
                      temperature=0.9,
                      sampling=True,
                      top_k=5)[0]
    completion = "".join([train_dataset.idx2char[int(i)] for i in sample_y])
    print(completion)