コード例 #1
0
def main():
    a = get_args()
    tempdir = os.path.join(a.out_dir, 'a')
    os.makedirs(tempdir, exist_ok=True)

    ptfiles = file_list(a.in_dir, 'pt')

    ptest = torch.load(ptfiles[0])
    if isinstance(ptest, list): ptest = ptest[0]
    shape = [*ptest.shape[:3], (ptest.shape[3] - 1) * 2]

    vsteps = int(a.length * 25 /
                 len(ptfiles)) if a.steps is None else a.steps  # 25 fps
    pbar = ProgressBar(vsteps * len(ptfiles))
    for px in range(len(ptfiles)):
        params1 = read_pt(ptfiles[px])
        params2 = read_pt(ptfiles[(px + 1) % len(ptfiles)])

        params, image_f, _ = fft_image(shape, resume=params1)
        image_f = to_valid_rgb(image_f)

        for i in range(vsteps):
            with torch.no_grad():
                img = image_f(
                    (params2 - params1) *
                    math.sin(1.5708 * i / vsteps)**2)[0].permute(1, 2, 0)
                img = torch.clip(img * 255, 0,
                                 255).cpu().numpy().astype(np.uint8)
            imsave(os.path.join(tempdir, '%05d.jpg' % (px * vsteps + i)), img)
            if a.verbose is True: cvshow(img)
            pbar.upd()

    os.system('ffmpeg -v warning -y -i %s/\%%05d.jpg "%s-pts.mp4"' %
              (tempdir, a.in_dir))
コード例 #2
0
def main():
    a = get_args()

    # Load CLIP models
    model_clip, _ = clip.load(a.model)
    if a.verbose is True: print(' using model', a.model)
    xmem = {'RN50':0.5, 'RN50x4':0.16, 'RN101':0.33}
    if 'RN' in a.model:
        a.samples = int(a.samples * xmem[a.model])
    workdir = os.path.join(a.out_dir, basename(a.in_txt))
    workdir += '-%s' % a.model if 'RN' in a.model.upper() else ''
    os.makedirs(workdir, exist_ok=True)

    if a.diverse != 0:
        a.samples = int(a.samples * 0.5)
            
    if a.transform is True:
        trform_f = transforms.transforms_custom  
        a.samples = int(a.samples * 0.95)
    else:
        trform_f = transforms.normalize()

    if a.in_txt0 is not None:
        if a.verbose is True: print(' subtract text:', basename(a.in_txt0))
        if a.translate:
            translator = Translator()
            a.in_txt0 = translator.translate(a.in_txt0, dest='en').text
            if a.verbose is True: print(' translated to:', a.in_txt0) 
        if a.multilang is True:
            model_lang = SentenceTransformer('clip-ViT-B-32-multilingual-v1').cuda()
            txt_enc0 = model_lang.encode([a.in_txt0], convert_to_tensor=True, show_progress_bar=False).detach().clone()
            del model_lang
        else:
            txt_enc0 = model_clip.encode_text(clip.tokenize(a.in_txt0).cuda()).detach().clone()

    # make init
    global params_start, params_ema
    params_shape = [1, 3, a.size[0], a.size[1]//2+1, 2]
    params_start = torch.randn(*params_shape).cuda() # random init
    params_ema = 0.
    if a.resume is not None and os.path.isfile(a.resume):
        if a.verbose is True: print(' resuming from', a.resume)
        params_start = load_params(a.resume).cuda()
        if a.keep > 0:
            params_ema = params_start[0].detach().clone()
    else:
        a.resume = 'init.pt'

    torch.save(params_start, 'init.pt') # final init
    shutil.copy(a.resume, os.path.join(workdir, '000-%s.pt' % basename(a.resume)))
    
    prev_enc = 0
    def process(txt, num):

        sd = 0.01
        if a.keep > 0: sd = a.keep + (1-a.keep) * sd
        params, image_f = fft_image([1, 3, *a.size], resume='init.pt', sd=sd, decay_power=a.decay)
        image_f = to_valid_rgb(image_f, colors = a.colors)

        if a.prog is True:
            lr1 = a.lrate * 2
            lr0 = a.lrate * 0.1
        else:
            lr0 = a.lrate
        optimizer = torch.optim.Adam(params, lr0)
    
        if a.verbose is True: print(' ref text: ', txt)
        if a.translate:
            translator = Translator()
            txt = translator.translate(txt, dest='en').text
            if a.verbose is True: print(' translated to:', txt)
        if a.multilang is True:
            model_lang = SentenceTransformer('clip-ViT-B-32-multilingual-v1').cuda()
            txt_enc = model_lang.encode([txt], convert_to_tensor=True, show_progress_bar=False).detach().clone()
            del model_lang
        else:
            txt_enc = model_clip.encode_text(clip.tokenize(txt).cuda()).detach().clone()
        if a.notext > 0:
            txt_plot = torch.from_numpy(plot_text(txt, a.modsize)/255.).unsqueeze(0).permute(0,3,1,2).cuda()
            txt_plot_enc = model_clip.encode_image(txt_plot).detach().clone()
        else: txt_plot_enc = None

        out_name = '%03d-%s' % (num+1, txt_clean(txt))
        out_name += '-%s' % a.model if 'RN' in a.model.upper() else ''
        tempdir = os.path.join(workdir, out_name)
        os.makedirs(tempdir, exist_ok=True)
        
        pbar = ProgressBar(a.steps // a.fstep)
        for i in range(a.steps):
            loss = 0

            noise = a.noise * torch.randn(1, 1, *params[0].shape[2:4], 1).cuda() if a.noise > 0 else None
            img_out = image_f(noise)
            
            if a.sharp != 0:
                lx = torch.mean(torch.abs(img_out[0,:,:,1:] - img_out[0,:,:,:-1]))
                ly = torch.mean(torch.abs(img_out[0,:,1:,:] - img_out[0,:,:-1,:]))
                loss -= a.sharp * (ly+lx)

            imgs_sliced = slice_imgs([img_out], a.samples, a.modsize, trform_f, a.align, micro=1.)
            out_enc = model_clip.encode_image(imgs_sliced[-1])
            loss -= torch.cosine_similarity(txt_enc, out_enc, dim=-1).mean()
            if a.notext > 0:
                loss += a.notext * torch.cosine_similarity(txt_plot_enc, out_enc, dim=-1).mean()
            if a.diverse != 0:
                imgs_sliced = slice_imgs([image_f(noise)], a.samples, a.modsize, trform_f, a.align, micro=1.)
                out_enc2 = model_clip.encode_image(imgs_sliced[-1])
                loss += a.diverse * torch.cosine_similarity(out_enc, out_enc2, dim=-1).mean()
                del out_enc2; torch.cuda.empty_cache()
            if a.expand > 0:
                global prev_enc
                if i > 0:
                    loss += a.expand * torch.cosine_similarity(out_enc, prev_enc, dim=-1).mean()
                prev_enc = out_enc.detach().clone()
            if a.in_txt0 is not None: # subtract text
                loss += torch.cosine_similarity(txt_enc0, out_enc, dim=-1).mean()
            del img_out, imgs_sliced, out_enc; torch.cuda.empty_cache()

            if a.prog is True:
                lr_cur = lr0 + (i / a.steps) * (lr1 - lr0)
                for g in optimizer.param_groups: 
                    g['lr'] = lr_cur
        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % a.fstep == 0:
                with torch.no_grad():
                    img = image_f(contrast=a.contrast).cpu().numpy()[0]
                if a.sharp != 0:
                    img = img **1.3 # empirical tone mapping
                checkout(img, os.path.join(tempdir, '%04d.jpg' % (i // a.fstep)), verbose=a.verbose)
                pbar.upd()
                del img

        if a.keep > 0:
            global params_start, params_ema
            params_ema = ema(params_ema, params[0].detach().clone(), num+1)
            torch.save((1-a.keep) * params_start + a.keep * params_ema, 'init.pt')
        
        torch.save(params[0], '%s.pt' % os.path.join(workdir, out_name))
        shutil.copy(img_list(tempdir)[-1], os.path.join(workdir, '%s-%d.jpg' % (out_name, a.steps)))
        os.system('ffmpeg -v warning -y -i %s\%%04d.jpg "%s.mp4"' % (tempdir, os.path.join(workdir, out_name)))

    with open(a.in_txt, 'r', encoding="utf-8") as f:
        texts = f.readlines()
        texts = [tt.strip() for tt in texts if len(tt.strip()) > 0 and tt[0] != '#']
    if a.verbose is True: 
        print(' total lines:', len(texts))
        print(' samples:', a.samples)

    for i, txt in enumerate(texts):
        process(txt, i)

    vsteps = int(a.length * 25 / len(texts)) # 25 fps
    tempdir = os.path.join(workdir, '_final')
    os.makedirs(tempdir, exist_ok=True)
    
    def read_pt(file):
        return torch.load(file).cuda()

    if a.verbose is True: print(' rendering complete piece')
    ptfiles = file_list(workdir, 'pt')
    pbar = ProgressBar(vsteps * len(ptfiles))
    for px in range(len(ptfiles)):
        params1 = read_pt(ptfiles[px])
        params2 = read_pt(ptfiles[(px+1) % len(ptfiles)])

        params, image_f = fft_image([1, 3, *a.size], resume=params1, sd=1., decay_power=a.decay)
        image_f = to_valid_rgb(image_f, colors = a.colors)

        for i in range(vsteps):
            with torch.no_grad():
                img = image_f((params2 - params1) * math.sin(1.5708 * i/vsteps)**2)[0].permute(1,2,0)
                img = torch.clip(img*255, 0, 255).cpu().numpy().astype(np.uint8)
            imsave(os.path.join(tempdir, '%05d.jpg' % (px * vsteps + i)), img)
            if a.verbose is True: cvshow(img)
            pbar.upd()

    os.system('ffmpeg -v warning -y -i %s\%%05d.jpg "%s.mp4"' % (tempdir, os.path.join(a.out_dir, basename(a.in_txt))))
    if a.keep > 0: os.remove('init.pt')
コード例 #3
0
    def process(txt, num):

        sd = 0.01
        if a.keep > 0: sd = a.keep + (1-a.keep) * sd
        params, image_f = fft_image([1, 3, *a.size], resume='init.pt', sd=sd, decay_power=a.decay)
        image_f = to_valid_rgb(image_f, colors = a.colors)

        if a.prog is True:
            lr1 = a.lrate * 2
            lr0 = a.lrate * 0.1
        else:
            lr0 = a.lrate
        optimizer = torch.optim.Adam(params, lr0)
    
        if a.verbose is True: print(' ref text: ', txt)
        if a.translate:
            translator = Translator()
            txt = translator.translate(txt, dest='en').text
            if a.verbose is True: print(' translated to:', txt)
        if a.multilang is True:
            model_lang = SentenceTransformer('clip-ViT-B-32-multilingual-v1').cuda()
            txt_enc = model_lang.encode([txt], convert_to_tensor=True, show_progress_bar=False).detach().clone()
            del model_lang
        else:
            txt_enc = model_clip.encode_text(clip.tokenize(txt).cuda()).detach().clone()
        if a.notext > 0:
            txt_plot = torch.from_numpy(plot_text(txt, a.modsize)/255.).unsqueeze(0).permute(0,3,1,2).cuda()
            txt_plot_enc = model_clip.encode_image(txt_plot).detach().clone()
        else: txt_plot_enc = None

        out_name = '%03d-%s' % (num+1, txt_clean(txt))
        out_name += '-%s' % a.model if 'RN' in a.model.upper() else ''
        tempdir = os.path.join(workdir, out_name)
        os.makedirs(tempdir, exist_ok=True)
        
        pbar = ProgressBar(a.steps // a.fstep)
        for i in range(a.steps):
            loss = 0

            noise = a.noise * torch.randn(1, 1, *params[0].shape[2:4], 1).cuda() if a.noise > 0 else None
            img_out = image_f(noise)
            
            if a.sharp != 0:
                lx = torch.mean(torch.abs(img_out[0,:,:,1:] - img_out[0,:,:,:-1]))
                ly = torch.mean(torch.abs(img_out[0,:,1:,:] - img_out[0,:,:-1,:]))
                loss -= a.sharp * (ly+lx)

            imgs_sliced = slice_imgs([img_out], a.samples, a.modsize, trform_f, a.align, micro=1.)
            out_enc = model_clip.encode_image(imgs_sliced[-1])
            loss -= torch.cosine_similarity(txt_enc, out_enc, dim=-1).mean()
            if a.notext > 0:
                loss += a.notext * torch.cosine_similarity(txt_plot_enc, out_enc, dim=-1).mean()
            if a.diverse != 0:
                imgs_sliced = slice_imgs([image_f(noise)], a.samples, a.modsize, trform_f, a.align, micro=1.)
                out_enc2 = model_clip.encode_image(imgs_sliced[-1])
                loss += a.diverse * torch.cosine_similarity(out_enc, out_enc2, dim=-1).mean()
                del out_enc2; torch.cuda.empty_cache()
            if a.expand > 0:
                global prev_enc
                if i > 0:
                    loss += a.expand * torch.cosine_similarity(out_enc, prev_enc, dim=-1).mean()
                prev_enc = out_enc.detach().clone()
            if a.in_txt0 is not None: # subtract text
                loss += torch.cosine_similarity(txt_enc0, out_enc, dim=-1).mean()
            del img_out, imgs_sliced, out_enc; torch.cuda.empty_cache()

            if a.prog is True:
                lr_cur = lr0 + (i / a.steps) * (lr1 - lr0)
                for g in optimizer.param_groups: 
                    g['lr'] = lr_cur
        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % a.fstep == 0:
                with torch.no_grad():
                    img = image_f(contrast=a.contrast).cpu().numpy()[0]
                if a.sharp != 0:
                    img = img **1.3 # empirical tone mapping
                checkout(img, os.path.join(tempdir, '%04d.jpg' % (i // a.fstep)), verbose=a.verbose)
                pbar.upd()
                del img

        if a.keep > 0:
            global params_start, params_ema
            params_ema = ema(params_ema, params[0].detach().clone(), num+1)
            torch.save((1-a.keep) * params_start + a.keep * params_ema, 'init.pt')
        
        torch.save(params[0], '%s.pt' % os.path.join(workdir, out_name))
        shutil.copy(img_list(tempdir)[-1], os.path.join(workdir, '%s-%d.jpg' % (out_name, a.steps)))
        os.system('ffmpeg -v warning -y -i %s\%%04d.jpg "%s.mp4"' % (tempdir, os.path.join(workdir, out_name)))
コード例 #4
0
    def process(txt, num):

        global params_start
        params, image_f = fft_image([1, 3, *a.size], resume='init.pt')
        image_f = to_valid_rgb(image_f)
        optimizer = torch.optim.Adam(params, a.lrate)

        if a.verbose is True: print(' ref text: ', txt)
        if a.translate:
            translator = Translator()
            txt = translator.translate(txt, dest='en').text
            if a.verbose is True: print(' translated to:', txt)
        tx = clip.tokenize(txt).cuda()
        txt_enc = model_clip.encode_text(tx).detach().clone()

        out_name = '%03d-%s' % (num + 1, txt_clean(txt))
        out_name += '-%s' % a.model if 'RN' in a.model.upper() else ''
        tempdir = os.path.join(workdir, out_name)
        os.makedirs(tempdir, exist_ok=True)

        pbar = ProgressBar(a.steps // a.fstep)
        for i in range(a.steps):
            loss = 0

            noise = a.noise * torch.randn(1, 1, *params[0].shape[2:4],
                                          1).cuda() if a.noise > 0 else None
            img_out = image_f(noise)

            imgs_sliced = slice_imgs([img_out],
                                     a.samples,
                                     a.modsize,
                                     norm_in,
                                     a.overscan,
                                     micro=None)
            out_enc = model_clip.encode_image(imgs_sliced[-1])
            loss -= torch.cosine_similarity(txt_enc, out_enc, dim=-1).mean()
            if a.in_txt0 is not None:  # subtract text
                loss += torch.cosine_similarity(txt_enc0, out_enc,
                                                dim=-1).mean()
            del img_out, imgs_sliced, out_enc
            torch.cuda.empty_cache()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % a.fstep == 0:
                with torch.no_grad():
                    img = image_f(contrast=a.contrast).cpu().numpy()[0]
                checkout(img,
                         os.path.join(tempdir, '%04d.jpg' % (i // a.fstep)),
                         verbose=a.verbose)
                pbar.upd()
                del img

        if a.keep == 'all':
            params_start = ema(params_start, params[0].detach(), num + 1)
            torch.save(params_start, 'init.pt')
        elif a.keep == 'last':
            torch.save((params_start + params[0].detach()) / 2, 'init.pt')

        torch.save(params[0], '%s.pt' % os.path.join(workdir, out_name))
        shutil.copy(
            img_list(tempdir)[-1],
            os.path.join(workdir, '%s-%d.jpg' % (out_name, a.steps)))
        os.system('ffmpeg -v warning -y -i %s\%%04d.jpg "%s.mp4"' %
                  (tempdir, os.path.join(workdir, out_name)))
コード例 #5
0
def main():
    a = get_args()

    # Load CLIP models
    model_clip, _ = clip.load(a.model)
    if a.verbose is True: print(' using model', a.model)
    xmem = {'RN50': 0.5, 'RN50x4': 0.16, 'RN101': 0.33}
    if 'RN' in a.model:
        a.samples = int(a.samples * xmem[a.model])
    workdir = os.path.join(a.out_dir, basename(a.in_txt))
    workdir += '-%s' % a.model if 'RN' in a.model.upper() else ''
    os.makedirs(workdir, exist_ok=True)

    norm_in = torchvision.transforms.Normalize(
        (0.48145466, 0.4578275, 0.40821073),
        (0.26862954, 0.26130258, 0.27577711))

    if a.in_txt0 is not None:
        if a.verbose is True: print(' subtract text:', basename(a.in_txt0))
        if a.translate:
            translator = Translator()
            a.in_txt0 = translator.translate(a.in_txt0, dest='en').text
            if a.verbose is True: print(' translated to:', a.in_txt0)
        tx0 = clip.tokenize(a.in_txt0).cuda()
        txt_enc0 = model_clip.encode_text(tx0).detach().clone()

    # make init
    global params_start
    params_shape = [1, 3, a.size[0], a.size[1] // 2 + 1, 2]
    params_start = torch.randn(*params_shape).cuda()  # random init

    if a.resume is not None and os.path.isfile(a.resume):
        if a.verbose is True: print(' resuming from', a.resume)
        params, _ = fft_image([1, 3, *a.size], resume=a.resume)
        params_start = ema(params_start, params[0].detach(), 1)
    else:
        a.resume = 'init.pt'

    shutil.copy(a.resume,
                os.path.join(workdir, '000-%s.pt' % basename(a.resume)))
    torch.save(params_start, 'init.pt')  # final init

    def process(txt, num):

        global params_start
        params, image_f = fft_image([1, 3, *a.size], resume='init.pt')
        image_f = to_valid_rgb(image_f)
        optimizer = torch.optim.Adam(params, a.lrate)

        if a.verbose is True: print(' ref text: ', txt)
        if a.translate:
            translator = Translator()
            txt = translator.translate(txt, dest='en').text
            if a.verbose is True: print(' translated to:', txt)
        tx = clip.tokenize(txt).cuda()
        txt_enc = model_clip.encode_text(tx).detach().clone()

        out_name = '%03d-%s' % (num + 1, txt_clean(txt))
        out_name += '-%s' % a.model if 'RN' in a.model.upper() else ''
        tempdir = os.path.join(workdir, out_name)
        os.makedirs(tempdir, exist_ok=True)

        pbar = ProgressBar(a.steps // a.fstep)
        for i in range(a.steps):
            loss = 0

            noise = a.noise * torch.randn(1, 1, *params[0].shape[2:4],
                                          1).cuda() if a.noise > 0 else None
            img_out = image_f(noise)

            imgs_sliced = slice_imgs([img_out],
                                     a.samples,
                                     a.modsize,
                                     norm_in,
                                     a.overscan,
                                     micro=None)
            out_enc = model_clip.encode_image(imgs_sliced[-1])
            loss -= torch.cosine_similarity(txt_enc, out_enc, dim=-1).mean()
            if a.in_txt0 is not None:  # subtract text
                loss += torch.cosine_similarity(txt_enc0, out_enc,
                                                dim=-1).mean()
            del img_out, imgs_sliced, out_enc
            torch.cuda.empty_cache()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % a.fstep == 0:
                with torch.no_grad():
                    img = image_f(contrast=a.contrast).cpu().numpy()[0]
                checkout(img,
                         os.path.join(tempdir, '%04d.jpg' % (i // a.fstep)),
                         verbose=a.verbose)
                pbar.upd()
                del img

        if a.keep == 'all':
            params_start = ema(params_start, params[0].detach(), num + 1)
            torch.save(params_start, 'init.pt')
        elif a.keep == 'last':
            torch.save((params_start + params[0].detach()) / 2, 'init.pt')

        torch.save(params[0], '%s.pt' % os.path.join(workdir, out_name))
        shutil.copy(
            img_list(tempdir)[-1],
            os.path.join(workdir, '%s-%d.jpg' % (out_name, a.steps)))
        os.system('ffmpeg -v warning -y -i %s\%%04d.jpg "%s.mp4"' %
                  (tempdir, os.path.join(workdir, out_name)))

    with open(a.in_txt, 'r', encoding="utf-8") as f:
        texts = f.readlines()
        texts = [
            tt.strip() for tt in texts if len(tt.strip()) > 0 and tt[0] != '#'
        ]
    if a.verbose is True:
        print(' total lines:', len(texts))
        print(' samples:', a.samples)

    for i, txt in enumerate(texts):
        process(txt, i)

    vsteps = int(a.length * 25 / len(texts))  # 25 fps
    tempdir = os.path.join(workdir, '_final')
    os.makedirs(tempdir, exist_ok=True)

    def read_pt(file):
        return torch.load(file).cuda()

    if a.verbose is True: print(' rendering complete piece')
    ptfiles = file_list(workdir, 'pt')
    pbar = ProgressBar(vsteps * len(ptfiles))
    for px in range(len(ptfiles)):
        params1 = read_pt(ptfiles[px])
        params2 = read_pt(ptfiles[(px + 1) % len(ptfiles)])

        params, image_f = fft_image([1, 3, *a.size], resume=params1)
        image_f = to_valid_rgb(image_f)

        for i in range(vsteps):
            with torch.no_grad():
                img = image_f(
                    (params2 - params1) *
                    math.sin(1.5708 * i / vsteps)**2)[0].permute(1, 2, 0)
                img = torch.clip(img * 255, 0,
                                 255).cpu().numpy().astype(np.uint8)
            imsave(os.path.join(tempdir, '%05d.jpg' % (px * vsteps + i)), img)
            if a.verbose is True: cvshow(img)
            pbar.upd()

    os.system('ffmpeg -v warning -y -i %s\%%05d.jpg "%s.mp4"' %
              (tempdir, os.path.join(a.out_dir, basename(a.in_txt))))
    if a.keep is True: os.remove('init.pt')