def __init__(self): super().__init__() dev = torch.device('cpu') self.enc = load_model(OPENAI_VAE_ENCODER_PATH, dev) self.dec = load_model(OPENAI_VAE_DECODER_PATH, dev) self.num_layers = 3 self.image_size = 256 self.num_tokens = 8192
for session_index in range(len(self.Session)): session_of_story = self.Session[session_index] session_name = self.Session_Name[session_index] print("Session Name : ",session_name) print("-------------------------------->") session_story = "" for line_of_session in session_of_story: session_story += line_of_session print(session_story) if __name__ == "__main__": print("Load Torch with CPU") dev = torch.device('cpu') print("Load Encoder Model") enc = load_model("encoder.pkl",dev) #enc = load_model("https://cdn.openai.com/dall-e/encoder.pkl", dev) print("Decoder Model") #dec = load_model("https://cdn.openai.com/dall-e/decoder.pkl", dev) dec = load_model("decoder.pkl",dev) print("Preprocessing and download image") x = preprocessing(download_img('https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg')) img = T.ToPILImage(mode='RGB')(x[0]) plt.imshow(img) plt.show() import torch.nn.functional as F z_logits = enc(x) z = torch.argmax(z_logits, axis=1)
def preprocess(img): s = min(img.size) if s < target_image_size: raise ValueError(f'min dim for image {s} < {target_image_size}') r = target_image_size / s s = (round(r * img.size[1]), round(r * img.size[0])) img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS) img = TF.center_crop(img, output_size=2 * [target_image_size]) img = torch.unsqueeze(T.ToTensor()(img), 0) return map_pixels(img) pytorch_enc = load_model("encoder.pkl", torch.device('cpu')) pytorch_dec = load_model("decoder.pkl", torch.device('cpu')) jax_enc_fn, jax_enc_params = get_encoder("encoder.pkl") jax_dec_fn, jax_dec_params = get_decoder("decoder.pkl") x = preprocess( download_image( 'https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg' )) z_logits_pytorch = pytorch_enc(x) z_logits_jax = jax_enc_fn(jax_enc_params, x.detach().numpy()) assert np.allclose(z_logits_jax, z_logits_pytorch.detach().numpy(),
crop_list.append(crop) img = torch.cat(crop_list, axis=0) return img clip_model, clip_preprocess = clip.load("ViT-B/32", device=device) clip_model.eval() clip_transform = torchvision.transforms.Compose([ # clip_preprocess.transforms[2], clip_preprocess.transforms[4], ]) dec = load_model("https://cdn.openai.com/dall-e/decoder.pkl", device) dec.eval() z_logits = torch.rand((1, 8192, 64, 64)).cuda() z_logits = torch.nn.Parameter(z_logits, requires_grad=True) optimizer = torch.optim.Adam( params=[z_logits], lr=lr, betas=(0.9, 0.999), ) counter = 0 while True: z = torch.nn.functional.gumbel_softmax(
z_logits = encoder(x) z = torch.argmax(z_logits, axis=1) print(f"DALL-E: latent shape: {z.shape}") z = F.one_hot(z, num_classes=encoder.vocab_size).permute(0, 3, 1, 2).float() x_stats = decoder(z).float() x_rec = unmap_pixels(torch.sigmoid(x_stats[:, :3])) x_rec = T.ToPILImage(mode='RGB')(x_rec[0]) return x_rec if __name__ == '__main__': encoder_dalle = load_model("/opt/project/data/dall-e/encoder.pkl", 'cuda') decoder_dalle = load_model("/opt/project/data/dall-e/decoder.pkl", 'cuda') folder = '/opt/project/valid/data2' filename = 'image-35081.png' x = load_image(os.path.join(folder, filename)) recon_x = reconstruct_with_dalle(x, encoder_dalle, decoder_dalle, do_preprocess=True) recon_x.save(os.path.join(folder, filename.split('.')[0] + '_recon.jpg')) # print('encoder:') # print(encoder_dalle) # print('encoder size', get_model_size(encoder_dalle))
def main(): # Automatically creates 'output' folder create_outputfolder() # Initialize Clip perceptor, preprocess = clip.load('ViT-B/32') perceptor = perceptor.eval() # Load the model if generator == 'biggan': model = BigGAN.from_pretrained('biggan-deep-512') model = model.cuda().eval() elif generator == 'dall-e': model = load_model("decoder.pkl", 'cuda') elif generator == 'stylegan': model = g_synthesis.eval().cuda() # Read the textfile # descs - list to append the Description and Timestamps descs = init_textfile(textfile) # list of temporary PTFiles templist = [] # Loop over the description list for d in tqdm(descs): timestamp = d[0] line = d[1] # stamps_descs_list.append((timestamp, line)) lats = Pars(gen=generator).cuda() # Init Generator's latents if generator == 'biggan': par = lats.parameters() lr = 0.1 #.07 elif generator == 'stylegan': par = [lats.normu] lr = .01 elif generator == 'dall-e': par = [lats.normu] lr = .1 # Init optimizer optimizer = torch.optim.Adam(par, lr) # tokenize the current description with clip and encode the text txt = clip.tokenize(line) percep = perceptor.encode_text(txt.cuda()).detach().clone() # Training Loop for i in range(epochs): zs = train(i, model, lats, sideX, sideY, perceptor, percep, optimizer, line, txt, epochs=epochs, gen=generator) # save each line's last latent to a torch file temporarily latent_temp = tempfile.NamedTemporaryFile() torch.save(zs, latent_temp) #f'./output/pt_folder/{line}.pt') latent_temp.seek(0) #append it to templist so it can be accessed later templist.append(latent_temp) return templist, descs, model
if s < target_image_size: raise ValueError(f"min dim for image {s} < {target_image_size}") r = target_image_size / s s = (round(r * img.size[1]), round(r * img.size[0])) img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS) img = TF.center_crop(img, output_size=2 * [target_image_size]) img = torch.unsqueeze(T.ToTensor()(img), 0) return map_pixels(img) # This can be changed to a GPU, e.g. 'cuda:0'. device = torch.device("cpu") # For faster load times, download these files locally and use the local paths instead. enc = load_model("models/encoder.pkl", device) dec = load_model("models/decoder.pkl", device) def main(): x = preprocess( download_image( "https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg" )) orig_image = T.ToPILImage(mode="RGB")(x[0]) orig_image.show() # orig_image.save("test.jpg") z_logits = enc(x) z = torch.argmax(z_logits, axis=1) z = F.one_hot(z, num_classes=enc.vocab_size).permute(0, 3, 1, 2).float()
img = img_list[i] x = preprocess_img(img, size) x = x.to(DEVICE) if model_type == "VQGAN": x1 = reconstruct_with_vqgan(preprocess_vqgan(x), model) frame_out = custom_to_pil(x1[0]) else: frame_out = reconstruct_with_dalle(x, model[0], model[1]) img_out_list.append(frame_out) save_frames(img_out_list, output_folder) return ############################## MAIN SCRIPT ###################################### # For faster load times, download these files locally and use the local paths instead. encoder_dalle = load_model("logs/DALLE/checkpoints/encoder.pkl", DEVICE) decoder_dalle = load_model("logs/DALLE/checkpoints/decoder.pkl", DEVICE) config1024 = load_config( "logs/vqgan_imagenet_f16_1024/configs/model.yaml", display=False) config16384 = load_config( "logs/vqgan_imagenet_f16_16384/configs/model.yaml", display=False) model1024 = load_vqgan( config1024, ckpt_path="logs/vqgan_imagenet_f16_1024/checkpoints/last.ckpt").to(DEVICE) model16384 = load_vqgan( config16384, ckpt_path="logs/vqgan_imagenet_f16_16384/checkpoints/last.ckpt").to(DEVICE) # ## Generate result on a test image using size 384 # img = reconstruction_pipeline(url='https://heibox.uni-heidelberg.de/f/7bb608381aae4539ba7a/?dl=1', size=384) # ## Generate result on a test image using size 512
if s < target_size: raise ValueError(f'min dim for image {s} < {target_size}') r = target_size / s s = (round(r * img.size[1]), round(r * img.size[0])) # img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS) img = TF.resize(img, s, interpolation=TF.InterpolationMode.LANCZOS) img = TF.center_crop(img, output_size=2 * [target_size]) img = torch.unsqueeze(T.ToTensor()(img), 0) return map_pixels(img) dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Loading models...") enc = load_model("./dall-e/encoder.pkl", dev) dec = load_model("./dall-e/decoder.pkl", dev) from flask import Flask, request, send_file, send_from_directory, jsonify import json from waitress import serve app = Flask('app') # Uncomment these two lines to enable CORS headers for all routes: # from flask_cors import CORS # CORS(app) def serve_pil_image(pil_img): img_io = io.BytesIO() pil_img.save(img_io, 'JPEG', quality=70)