def main(): a = get_args() tempdir = os.path.join(a.out_dir, 'a') os.makedirs(tempdir, exist_ok=True) ptfiles = file_list(a.in_dir, 'pt') ptest = torch.load(ptfiles[0]) if isinstance(ptest, list): ptest = ptest[0] shape = [*ptest.shape[:3], (ptest.shape[3] - 1) * 2] vsteps = int(a.length * 25 / len(ptfiles)) if a.steps is None else a.steps # 25 fps pbar = ProgressBar(vsteps * len(ptfiles)) for px in range(len(ptfiles)): params1 = read_pt(ptfiles[px]) params2 = read_pt(ptfiles[(px + 1) % len(ptfiles)]) params, image_f = fft_image(shape, resume=params1) image_f = to_valid_rgb(image_f) for i in range(vsteps): with torch.no_grad(): img = image_f( (params2 - params1) * math.sin(1.5708 * i / vsteps)**2)[0].permute(1, 2, 0) img = torch.clip(img * 255, 0, 255).cpu().numpy().astype(np.uint8) imsave(os.path.join(tempdir, '%05d.jpg' % (px * vsteps + i)), img) if a.verbose is True: cvshow(img) pbar.upd() os.system('ffmpeg -v warning -y -i %s\%%05d.jpg "%s-pts.mp4"' % (tempdir, a.in_dir))
def main(): a = get_args() # Load CLIP models model_clip, _ = clip.load(a.model) if a.verbose is True: print(' using model', a.model) xmem = {'RN50':0.5, 'RN50x4':0.16, 'RN101':0.33} if 'RN' in a.model: a.samples = int(a.samples * xmem[a.model]) workdir = os.path.join(a.out_dir, basename(a.in_txt)) workdir += '-%s' % a.model if 'RN' in a.model.upper() else '' os.makedirs(workdir, exist_ok=True) if a.diverse != 0: a.samples = int(a.samples * 0.5) if a.transform is True: trform_f = transforms.transforms_custom a.samples = int(a.samples * 0.95) else: trform_f = transforms.normalize() if a.in_txt0 is not None: if a.verbose is True: print(' subtract text:', basename(a.in_txt0)) if a.translate: translator = Translator() a.in_txt0 = translator.translate(a.in_txt0, dest='en').text if a.verbose is True: print(' translated to:', a.in_txt0) if a.multilang is True: model_lang = SentenceTransformer('clip-ViT-B-32-multilingual-v1').cuda() txt_enc0 = model_lang.encode([a.in_txt0], convert_to_tensor=True, show_progress_bar=False).detach().clone() del model_lang else: txt_enc0 = model_clip.encode_text(clip.tokenize(a.in_txt0).cuda()).detach().clone() # make init global params_start, params_ema params_shape = [1, 3, a.size[0], a.size[1]//2+1, 2] params_start = torch.randn(*params_shape).cuda() # random init params_ema = 0. if a.resume is not None and os.path.isfile(a.resume): if a.verbose is True: print(' resuming from', a.resume) params_start = load_params(a.resume).cuda() if a.keep > 0: params_ema = params_start[0].detach().clone() else: a.resume = 'init.pt' torch.save(params_start, 'init.pt') # final init shutil.copy(a.resume, os.path.join(workdir, '000-%s.pt' % basename(a.resume))) prev_enc = 0 def process(txt, num): sd = 0.01 if a.keep > 0: sd = a.keep + (1-a.keep) * sd params, image_f = fft_image([1, 3, *a.size], resume='init.pt', sd=sd, decay_power=a.decay) image_f = to_valid_rgb(image_f, colors = a.colors) if a.prog is True: lr1 = a.lrate * 2 lr0 = a.lrate * 0.1 else: lr0 = a.lrate optimizer = torch.optim.Adam(params, lr0) if a.verbose is True: print(' ref text: ', txt) if a.translate: translator = Translator() txt = translator.translate(txt, dest='en').text if a.verbose is True: print(' translated to:', txt) if a.multilang is True: model_lang = SentenceTransformer('clip-ViT-B-32-multilingual-v1').cuda() txt_enc = model_lang.encode([txt], convert_to_tensor=True, show_progress_bar=False).detach().clone() del model_lang else: txt_enc = model_clip.encode_text(clip.tokenize(txt).cuda()).detach().clone() if a.notext > 0: txt_plot = torch.from_numpy(plot_text(txt, a.modsize)/255.).unsqueeze(0).permute(0,3,1,2).cuda() txt_plot_enc = model_clip.encode_image(txt_plot).detach().clone() else: txt_plot_enc = None out_name = '%03d-%s' % (num+1, txt_clean(txt)) out_name += '-%s' % a.model if 'RN' in a.model.upper() else '' tempdir = os.path.join(workdir, out_name) os.makedirs(tempdir, exist_ok=True) pbar = ProgressBar(a.steps // a.fstep) for i in range(a.steps): loss = 0 noise = a.noise * torch.randn(1, 1, *params[0].shape[2:4], 1).cuda() if a.noise > 0 else None img_out = image_f(noise) if a.sharp != 0: lx = torch.mean(torch.abs(img_out[0,:,:,1:] - img_out[0,:,:,:-1])) ly = torch.mean(torch.abs(img_out[0,:,1:,:] - img_out[0,:,:-1,:])) loss -= a.sharp * (ly+lx) imgs_sliced = slice_imgs([img_out], a.samples, a.modsize, trform_f, a.align, micro=1.) out_enc = model_clip.encode_image(imgs_sliced[-1]) loss -= torch.cosine_similarity(txt_enc, out_enc, dim=-1).mean() if a.notext > 0: loss += a.notext * torch.cosine_similarity(txt_plot_enc, out_enc, dim=-1).mean() if a.diverse != 0: imgs_sliced = slice_imgs([image_f(noise)], a.samples, a.modsize, trform_f, a.align, micro=1.) out_enc2 = model_clip.encode_image(imgs_sliced[-1]) loss += a.diverse * torch.cosine_similarity(out_enc, out_enc2, dim=-1).mean() del out_enc2; torch.cuda.empty_cache() if a.expand > 0: global prev_enc if i > 0: loss += a.expand * torch.cosine_similarity(out_enc, prev_enc, dim=-1).mean() prev_enc = out_enc.detach().clone() if a.in_txt0 is not None: # subtract text loss += torch.cosine_similarity(txt_enc0, out_enc, dim=-1).mean() del img_out, imgs_sliced, out_enc; torch.cuda.empty_cache() if a.prog is True: lr_cur = lr0 + (i / a.steps) * (lr1 - lr0) for g in optimizer.param_groups: g['lr'] = lr_cur optimizer.zero_grad() loss.backward() optimizer.step() if i % a.fstep == 0: with torch.no_grad(): img = image_f(contrast=a.contrast).cpu().numpy()[0] if a.sharp != 0: img = img **1.3 # empirical tone mapping checkout(img, os.path.join(tempdir, '%04d.jpg' % (i // a.fstep)), verbose=a.verbose) pbar.upd() del img if a.keep > 0: global params_start, params_ema params_ema = ema(params_ema, params[0].detach().clone(), num+1) torch.save((1-a.keep) * params_start + a.keep * params_ema, 'init.pt') torch.save(params[0], '%s.pt' % os.path.join(workdir, out_name)) shutil.copy(img_list(tempdir)[-1], os.path.join(workdir, '%s-%d.jpg' % (out_name, a.steps))) os.system('ffmpeg -v warning -y -i %s\%%04d.jpg "%s.mp4"' % (tempdir, os.path.join(workdir, out_name))) with open(a.in_txt, 'r', encoding="utf-8") as f: texts = f.readlines() texts = [tt.strip() for tt in texts if len(tt.strip()) > 0 and tt[0] != '#'] if a.verbose is True: print(' total lines:', len(texts)) print(' samples:', a.samples) for i, txt in enumerate(texts): process(txt, i) vsteps = int(a.length * 25 / len(texts)) # 25 fps tempdir = os.path.join(workdir, '_final') os.makedirs(tempdir, exist_ok=True) def read_pt(file): return torch.load(file).cuda() if a.verbose is True: print(' rendering complete piece') ptfiles = file_list(workdir, 'pt') pbar = ProgressBar(vsteps * len(ptfiles)) for px in range(len(ptfiles)): params1 = read_pt(ptfiles[px]) params2 = read_pt(ptfiles[(px+1) % len(ptfiles)]) params, image_f = fft_image([1, 3, *a.size], resume=params1, sd=1., decay_power=a.decay) image_f = to_valid_rgb(image_f, colors = a.colors) for i in range(vsteps): with torch.no_grad(): img = image_f((params2 - params1) * math.sin(1.5708 * i/vsteps)**2)[0].permute(1,2,0) img = torch.clip(img*255, 0, 255).cpu().numpy().astype(np.uint8) imsave(os.path.join(tempdir, '%05d.jpg' % (px * vsteps + i)), img) if a.verbose is True: cvshow(img) pbar.upd() os.system('ffmpeg -v warning -y -i %s\%%05d.jpg "%s.mp4"' % (tempdir, os.path.join(a.out_dir, basename(a.in_txt)))) if a.keep > 0: os.remove('init.pt')
def main(): a = get_args() # Load CLIP models model_clip, _ = clip.load(a.model) if a.verbose is True: print(' using model', a.model) xmem = {'RN50': 0.5, 'RN50x4': 0.16, 'RN101': 0.33} if 'RN' in a.model: a.samples = int(a.samples * xmem[a.model]) workdir = os.path.join(a.out_dir, basename(a.in_txt)) workdir += '-%s' % a.model if 'RN' in a.model.upper() else '' os.makedirs(workdir, exist_ok=True) norm_in = torchvision.transforms.Normalize( (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)) if a.in_txt0 is not None: if a.verbose is True: print(' subtract text:', basename(a.in_txt0)) if a.translate: translator = Translator() a.in_txt0 = translator.translate(a.in_txt0, dest='en').text if a.verbose is True: print(' translated to:', a.in_txt0) tx0 = clip.tokenize(a.in_txt0).cuda() txt_enc0 = model_clip.encode_text(tx0).detach().clone() # make init global params_start params_shape = [1, 3, a.size[0], a.size[1] // 2 + 1, 2] params_start = torch.randn(*params_shape).cuda() # random init if a.resume is not None and os.path.isfile(a.resume): if a.verbose is True: print(' resuming from', a.resume) params, _ = fft_image([1, 3, *a.size], resume=a.resume) params_start = ema(params_start, params[0].detach(), 1) else: a.resume = 'init.pt' shutil.copy(a.resume, os.path.join(workdir, '000-%s.pt' % basename(a.resume))) torch.save(params_start, 'init.pt') # final init def process(txt, num): global params_start params, image_f = fft_image([1, 3, *a.size], resume='init.pt') image_f = to_valid_rgb(image_f) optimizer = torch.optim.Adam(params, a.lrate) if a.verbose is True: print(' ref text: ', txt) if a.translate: translator = Translator() txt = translator.translate(txt, dest='en').text if a.verbose is True: print(' translated to:', txt) tx = clip.tokenize(txt).cuda() txt_enc = model_clip.encode_text(tx).detach().clone() out_name = '%03d-%s' % (num + 1, txt_clean(txt)) out_name += '-%s' % a.model if 'RN' in a.model.upper() else '' tempdir = os.path.join(workdir, out_name) os.makedirs(tempdir, exist_ok=True) pbar = ProgressBar(a.steps // a.fstep) for i in range(a.steps): loss = 0 noise = a.noise * torch.randn(1, 1, *params[0].shape[2:4], 1).cuda() if a.noise > 0 else None img_out = image_f(noise) imgs_sliced = slice_imgs([img_out], a.samples, a.modsize, norm_in, a.overscan, micro=None) out_enc = model_clip.encode_image(imgs_sliced[-1]) loss -= torch.cosine_similarity(txt_enc, out_enc, dim=-1).mean() if a.in_txt0 is not None: # subtract text loss += torch.cosine_similarity(txt_enc0, out_enc, dim=-1).mean() del img_out, imgs_sliced, out_enc torch.cuda.empty_cache() optimizer.zero_grad() loss.backward() optimizer.step() if i % a.fstep == 0: with torch.no_grad(): img = image_f(contrast=a.contrast).cpu().numpy()[0] checkout(img, os.path.join(tempdir, '%04d.jpg' % (i // a.fstep)), verbose=a.verbose) pbar.upd() del img if a.keep == 'all': params_start = ema(params_start, params[0].detach(), num + 1) torch.save(params_start, 'init.pt') elif a.keep == 'last': torch.save((params_start + params[0].detach()) / 2, 'init.pt') torch.save(params[0], '%s.pt' % os.path.join(workdir, out_name)) shutil.copy( img_list(tempdir)[-1], os.path.join(workdir, '%s-%d.jpg' % (out_name, a.steps))) os.system('ffmpeg -v warning -y -i %s\%%04d.jpg "%s.mp4"' % (tempdir, os.path.join(workdir, out_name))) with open(a.in_txt, 'r', encoding="utf-8") as f: texts = f.readlines() texts = [ tt.strip() for tt in texts if len(tt.strip()) > 0 and tt[0] != '#' ] if a.verbose is True: print(' total lines:', len(texts)) print(' samples:', a.samples) for i, txt in enumerate(texts): process(txt, i) vsteps = int(a.length * 25 / len(texts)) # 25 fps tempdir = os.path.join(workdir, '_final') os.makedirs(tempdir, exist_ok=True) def read_pt(file): return torch.load(file).cuda() if a.verbose is True: print(' rendering complete piece') ptfiles = file_list(workdir, 'pt') pbar = ProgressBar(vsteps * len(ptfiles)) for px in range(len(ptfiles)): params1 = read_pt(ptfiles[px]) params2 = read_pt(ptfiles[(px + 1) % len(ptfiles)]) params, image_f = fft_image([1, 3, *a.size], resume=params1) image_f = to_valid_rgb(image_f) for i in range(vsteps): with torch.no_grad(): img = image_f( (params2 - params1) * math.sin(1.5708 * i / vsteps)**2)[0].permute(1, 2, 0) img = torch.clip(img * 255, 0, 255).cpu().numpy().astype(np.uint8) imsave(os.path.join(tempdir, '%05d.jpg' % (px * vsteps + i)), img) if a.verbose is True: cvshow(img) pbar.upd() os.system('ffmpeg -v warning -y -i %s\%%05d.jpg "%s.mp4"' % (tempdir, os.path.join(a.out_dir, basename(a.in_txt)))) if a.keep is True: os.remove('init.pt')