dalle.load_state_dict(torch.load("dalle-small.pth")) """ text = torch.randint(0, NUM_TOKENS, (BATCH_SIZE, TEXTSEQLEN)) images = torch.randn(BATCH_SIZE, 3, IMAGE_SIZE, IMAGE_SIZE) mask = torch.ones_like(text).bool() """ tokenDset = token_dataset('./coco/merged-smallsample.txt') # do the above for a long time with a lot of data ... then num_pics = 30 def denorm(img: torch.Tensor): mean = torch.mean(img) min_maxrange = (torch.max(img) - torch.min(img)) return (((img - mean) / (min_maxrange) + 0.5) * 255) for i in range(30): test_text = "犬が地面に寝そべっている写真" textToken, mask = fixlen([tokenDset.tokenizeList(test_text)]) textToken = textToken.cuda() mask = mask.cuda() images = dalle.generate_images(textToken, mask=mask) #print(images[0][0]) print(images.size())
optimizerDALLE = torch.optim.Adam(dalle.parameters(), lr=learning_rate) DALLEloss = [] for epoch in range(EPOCHS): for i in range(DATASET_SIZE): #print(i,":",tokenDset.getRand(i),img.size()) optimizerDALLE.zero_grad() img, strs = cap[i] #print(img.size()) img = img.unsqueeze(0).cuda() if i % 10 == 0: print("DALLE epoch {} / {}".format(i + epoch * DATASET_SIZE, EPOCHS * DATASET_SIZE)) try: textToken, mask = fixlen([tokenDset.getRand(i)]) except KeyError: continue loss = dalle(textToken.cuda(), img, mask=mask.cuda(), return_loss=True) DALLEloss.append(loss.detach().cpu().numpy()) loss.backward() optimizerDALLE.step() np.savetxt("dalleloss.csv", np.asarray(DALLEloss), delimiter=",") # do the above for a long time with a lot of data ... then torch.save(dalle.state_dict(), "dalle-small.pth") test_text = "犬が地面に寝そべっている写真"