コード例 #1
0
ファイル: vae.py プロジェクト: juny116/mlvu-dalle
    def __init__(self):
        super().__init__()

        dev = torch.device('cpu')
        self.enc = load_model(OPENAI_VAE_ENCODER_PATH, dev)
        self.dec = load_model(OPENAI_VAE_DECODER_PATH, dev)

        self.num_layers = 3
        self.image_size = 256
        self.num_tokens = 8192
コード例 #2
0
        for session_index in range(len(self.Session)):
            session_of_story = self.Session[session_index]
            session_name = self.Session_Name[session_index]
            print("Session Name : ",session_name)
            print("-------------------------------->")
            session_story = ""
            for line_of_session in session_of_story:
                session_story += line_of_session
            print(session_story)


if __name__ == "__main__":
    print("Load Torch with CPU")
    dev = torch.device('cpu')
    print("Load Encoder Model")
    enc = load_model("encoder.pkl",dev)
    #enc = load_model("https://cdn.openai.com/dall-e/encoder.pkl", dev)
    print("Decoder Model")
    #dec = load_model("https://cdn.openai.com/dall-e/decoder.pkl", dev)
    dec = load_model("decoder.pkl",dev)
    print("Preprocessing and download image")
    x = preprocessing(download_img('https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg'))

    img = T.ToPILImage(mode='RGB')(x[0])
    plt.imshow(img)
    plt.show()
    import torch.nn.functional as F

    z_logits = enc(x)

    z = torch.argmax(z_logits, axis=1)
コード例 #3
0
def preprocess(img):
    s = min(img.size)

    if s < target_image_size:
        raise ValueError(f'min dim for image {s} < {target_image_size}')

    r = target_image_size / s
    s = (round(r * img.size[1]), round(r * img.size[0]))
    img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)
    img = TF.center_crop(img, output_size=2 * [target_image_size])
    img = torch.unsqueeze(T.ToTensor()(img), 0)
    return map_pixels(img)


pytorch_enc = load_model("encoder.pkl", torch.device('cpu'))
pytorch_dec = load_model("decoder.pkl", torch.device('cpu'))

jax_enc_fn, jax_enc_params = get_encoder("encoder.pkl")
jax_dec_fn, jax_dec_params = get_decoder("decoder.pkl")

x = preprocess(
    download_image(
        'https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg'
    ))

z_logits_pytorch = pytorch_enc(x)
z_logits_jax = jax_enc_fn(jax_enc_params, x.detach().numpy())

assert np.allclose(z_logits_jax,
                   z_logits_pytorch.detach().numpy(),
コード例 #4
0
        crop_list.append(crop)

    img = torch.cat(crop_list, axis=0)

    return img


clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)
clip_model.eval()

clip_transform = torchvision.transforms.Compose([
    # clip_preprocess.transforms[2],
    clip_preprocess.transforms[4],
])

dec = load_model("https://cdn.openai.com/dall-e/decoder.pkl", device)
dec.eval()

z_logits = torch.rand((1, 8192, 64, 64)).cuda()

z_logits = torch.nn.Parameter(z_logits, requires_grad=True)

optimizer = torch.optim.Adam(
    params=[z_logits],
    lr=lr,
    betas=(0.9, 0.999),
)

counter = 0
while True:
    z = torch.nn.functional.gumbel_softmax(
コード例 #5
0
    z_logits = encoder(x)
    z = torch.argmax(z_logits, axis=1)

    print(f"DALL-E: latent shape: {z.shape}")
    z = F.one_hot(z, num_classes=encoder.vocab_size).permute(0, 3, 1,
                                                             2).float()

    x_stats = decoder(z).float()
    x_rec = unmap_pixels(torch.sigmoid(x_stats[:, :3]))
    x_rec = T.ToPILImage(mode='RGB')(x_rec[0])

    return x_rec


if __name__ == '__main__':
    encoder_dalle = load_model("/opt/project/data/dall-e/encoder.pkl", 'cuda')
    decoder_dalle = load_model("/opt/project/data/dall-e/decoder.pkl", 'cuda')

    folder = '/opt/project/valid/data2'
    filename = 'image-35081.png'
    x = load_image(os.path.join(folder, filename))
    recon_x = reconstruct_with_dalle(x,
                                     encoder_dalle,
                                     decoder_dalle,
                                     do_preprocess=True)

    recon_x.save(os.path.join(folder, filename.split('.')[0] + '_recon.jpg'))

    # print('encoder:')
    # print(encoder_dalle)
    # print('encoder size', get_model_size(encoder_dalle))
コード例 #6
0
ファイル: main.py プロジェクト: yk/clip_music_video
def main():

    # Automatically creates 'output' folder
    create_outputfolder()

    # Initialize Clip
    perceptor, preprocess = clip.load('ViT-B/32')
    perceptor = perceptor.eval()

    # Load the model
    if generator == 'biggan':
        model = BigGAN.from_pretrained('biggan-deep-512')
        model = model.cuda().eval()
    elif generator == 'dall-e':
        model = load_model("decoder.pkl", 'cuda')
    elif generator == 'stylegan':
        model = g_synthesis.eval().cuda()

    # Read the textfile
    # descs - list to append the Description and Timestamps
    descs = init_textfile(textfile)

    # list of temporary PTFiles
    templist = []

    # Loop over the description list
    for d in tqdm(descs):

        timestamp = d[0]
        line = d[1]
        # stamps_descs_list.append((timestamp, line))

        lats = Pars(gen=generator).cuda()

        # Init Generator's latents
        if generator == 'biggan':
            par = lats.parameters()
            lr = 0.1  #.07
        elif generator == 'stylegan':
            par = [lats.normu]
            lr = .01
        elif generator == 'dall-e':
            par = [lats.normu]
            lr = .1

        # Init optimizer
        optimizer = torch.optim.Adam(par, lr)

        # tokenize the current description with clip and encode the text
        txt = clip.tokenize(line)
        percep = perceptor.encode_text(txt.cuda()).detach().clone()

        # Training Loop
        for i in range(epochs):
            zs = train(i,
                       model,
                       lats,
                       sideX,
                       sideY,
                       perceptor,
                       percep,
                       optimizer,
                       line,
                       txt,
                       epochs=epochs,
                       gen=generator)

        # save each line's last latent to a torch file temporarily
        latent_temp = tempfile.NamedTemporaryFile()
        torch.save(zs, latent_temp)  #f'./output/pt_folder/{line}.pt')
        latent_temp.seek(0)
        #append it to templist so it can be accessed later
        templist.append(latent_temp)
    return templist, descs, model
コード例 #7
0
ファイル: dalle.py プロジェクト: alpha2phi/speech-to-image
    if s < target_image_size:
        raise ValueError(f"min dim for image {s} < {target_image_size}")

    r = target_image_size / s
    s = (round(r * img.size[1]), round(r * img.size[0]))
    img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)
    img = TF.center_crop(img, output_size=2 * [target_image_size])
    img = torch.unsqueeze(T.ToTensor()(img), 0)
    return map_pixels(img)


# This can be changed to a GPU, e.g. 'cuda:0'.
device = torch.device("cpu")

# For faster load times, download these files locally and use the local paths instead.
enc = load_model("models/encoder.pkl", device)
dec = load_model("models/decoder.pkl", device)


def main():
    x = preprocess(
        download_image(
            "https://assets.bwbx.io/images/users/iqjWHBFdfxIU/iKIWgaiJUtss/v2/1000x-1.jpg"
        ))
    orig_image = T.ToPILImage(mode="RGB")(x[0])
    orig_image.show()
    # orig_image.save("test.jpg")

    z_logits = enc(x)
    z = torch.argmax(z_logits, axis=1)
    z = F.one_hot(z, num_classes=enc.vocab_size).permute(0, 3, 1, 2).float()
コード例 #8
0
        img = img_list[i]
        x = preprocess_img(img, size)
        x = x.to(DEVICE) 
        if model_type == "VQGAN":   
            x1 = reconstruct_with_vqgan(preprocess_vqgan(x), model)
            frame_out = custom_to_pil(x1[0])
        else:
            frame_out = reconstruct_with_dalle(x, model[0], model[1])
        img_out_list.append(frame_out)
    save_frames(img_out_list, output_folder)
    return

############################## MAIN SCRIPT ######################################

# For faster load times, download these files locally and use the local paths instead.
encoder_dalle = load_model("logs/DALLE/checkpoints/encoder.pkl", DEVICE)
decoder_dalle = load_model("logs/DALLE/checkpoints/decoder.pkl", DEVICE)

config1024 = load_config(
    "logs/vqgan_imagenet_f16_1024/configs/model.yaml", display=False)
config16384 = load_config(
    "logs/vqgan_imagenet_f16_16384/configs/model.yaml", display=False)

model1024 = load_vqgan(
    config1024, ckpt_path="logs/vqgan_imagenet_f16_1024/checkpoints/last.ckpt").to(DEVICE)
model16384 = load_vqgan(
    config16384, ckpt_path="logs/vqgan_imagenet_f16_16384/checkpoints/last.ckpt").to(DEVICE)

# ## Generate result on a test image using size 384
# img = reconstruction_pipeline(url='https://heibox.uni-heidelberg.de/f/7bb608381aae4539ba7a/?dl=1', size=384)
# ## Generate result on a test image using size 512
コード例 #9
0
    if s < target_size:
        raise ValueError(f'min dim for image {s} < {target_size}')

    r = target_size / s
    s = (round(r * img.size[1]), round(r * img.size[0]))
    # img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)
    img = TF.resize(img, s, interpolation=TF.InterpolationMode.LANCZOS)
    img = TF.center_crop(img, output_size=2 * [target_size])
    img = torch.unsqueeze(T.ToTensor()(img), 0)
    return map_pixels(img)

dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print("Loading models...")
enc = load_model("./dall-e/encoder.pkl", dev)
dec = load_model("./dall-e/decoder.pkl", dev)

from flask import Flask, request, send_file, send_from_directory, jsonify
import json 
from waitress import serve

app = Flask('app')

# Uncomment these two lines to enable CORS headers for all routes:
# from flask_cors import CORS
# CORS(app)  

def serve_pil_image(pil_img):
    img_io = io.BytesIO()
    pil_img.save(img_io, 'JPEG', quality=70)