Example #1
0
dalle.load_state_dict(torch.load("dalle-small.pth"))
"""
text = torch.randint(0, NUM_TOKENS, (BATCH_SIZE, TEXTSEQLEN))
images = torch.randn(BATCH_SIZE, 3, IMAGE_SIZE, IMAGE_SIZE)
mask = torch.ones_like(text).bool()
"""

tokenDset = token_dataset('./coco/merged-smallsample.txt')

# do the above for a long time with a lot of data ... then

num_pics = 30


def denorm(img: torch.Tensor):
    mean = torch.mean(img)
    min_maxrange = (torch.max(img) - torch.min(img))
    return (((img - mean) / (min_maxrange) + 0.5) * 255)


for i in range(30):

    test_text = "犬が地面に寝そべっている写真"

    textToken, mask = fixlen([tokenDset.tokenizeList(test_text)])
    textToken = textToken.cuda()
    mask = mask.cuda()
    images = dalle.generate_images(textToken, mask=mask)
    #print(images[0][0])
    print(images.size())
Example #2
0
optimizerDALLE = torch.optim.Adam(dalle.parameters(), lr=learning_rate)
DALLEloss = []

for epoch in range(EPOCHS):
    for i in range(DATASET_SIZE):
        #print(i,":",tokenDset.getRand(i),img.size())
        optimizerDALLE.zero_grad()
        img, strs = cap[i]
        #print(img.size())
        img = img.unsqueeze(0).cuda()
        if i % 10 == 0:
            print("DALLE epoch {} / {}".format(i + epoch * DATASET_SIZE,
                                               EPOCHS * DATASET_SIZE))
        try:
            textToken, mask = fixlen([tokenDset.getRand(i)])
        except KeyError:
            continue
        loss = dalle(textToken.cuda(), img, mask=mask.cuda(), return_loss=True)
        DALLEloss.append(loss.detach().cpu().numpy())
        loss.backward()
        optimizerDALLE.step()

np.savetxt("dalleloss.csv", np.asarray(DALLEloss), delimiter=",")

# do the above for a long time with a lot of data ... then

torch.save(dalle.state_dict(), "dalle-small.pth")

test_text = "犬が地面に寝そべっている写真"