def main(): G = nn.Sequential( OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=1024)) ])) ## ichao : load the pretrained generator's weight G.load_state_dict( torch.load("weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt", map_location=device)) G.eval() G.to(device) g_mapping, g_synthesis = G[0], G[1] dlatent = torch.randn((1, 512), device=device) dlatent = g_mapping(dlatent) #dlatent = dlatent.expand(1, 18, 512) synth_img = g_synthesis(dlatent) synth_img = (synth_img + 1.0) / 2.0 save_image(synth_img.clamp(0, 1), "source_image/sample_rand.png") counter = 0 for i, m in enumerate(g_synthesis.blocks.values()): counter += 2 m.epi1.top_epi[0].noise.requires_grad = True m.epi2.top_epi[0].noise.requires_grad = True print(counter)
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--resolution', default=1024, type=int) parser.add_argument( '--weight_file', default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt", type=str) parser.add_argument('--latent_file1', default="latent_W/0.npy") parser.add_argument('--latent_file2', default="latent_W/sample.npy") args = parser.parse_args() g_all = nn.Sequential( OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=args.resolution)) ])) g_all.load_state_dict(torch.load(args.weight_file, map_location=device)) g_all.eval() g_all.to(device) g_mapping, g_synthesis = g_all[0], g_all[1] latents_0 = np.load(args.latent_file1) latents_1 = np.load(args.latent_file2) latents_0 = torch.tensor(latents_0).to(device) latents_1 = torch.tensor(latents_1).to(device) for i in range(100): alpha = (1 / 100) * i latents = alpha * latents_0 + (1 - alpha) * latents_1 synth_img = g_synthesis(latents) synth_img = (synth_img + 1.0) / 2.0 save_image(synth_img.clamp(0, 1), "morph_result/encode1/{}.png".format(i))
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--resolution', default=1024, type=int) parser.add_argument( '--weight_file', default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt", type=str) parser.add_argument('--latent_file', default="latent_W/0.npy") args = parser.parse_args() g_all = nn.Sequential( OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=args.resolution)) ])) g_all.load_state_dict(torch.load(args.weight_file, map_location=device)) g_all.eval() g_all.to(device) g_mapping, g_synthesis = g_all[0], g_all[1] boundary_name = [ "stylegan_ffhq_gender_w_boundary.npy", "stylegan_ffhq_age_w_boundary.npy", "stylegan_ffhq_pose_w_boundary.npy", "stylegan_ffhq_eyeglasses_w_boundary.npy", "stylegan_ffhq_smile_w_boundary.npy" ] semantic = ["gender", "age", "pose", "eye_glass", "smile"] for i in range(5): latents_0 = np.load(args.latent_file) latents_0 = torch.tensor(latents_0).to(device) #.unsqueeze(0) boundary = np.load("boundaries/" + boundary_name[i]) make_morph(boundary, i, latents_0, g_synthesis, semantic)
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--resolution', default=1024, type=int) parser.add_argument('--src_im', default="sample.png") parser.add_argument('--src_dir', default="source_image/") parser.add_argument( '--weight_file', default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt", type=str) parser.add_argument('--iteration', default=1000, type=int) args = parser.parse_args() g_all = nn.Sequential( OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=args.resolution)) ])) g_all.load_state_dict(torch.load(args.weight_file, map_location=device)) g_all.eval() g_all.to(device) g_mapping, g_synthesis = g_all[0], g_all[1] name = args.src_im.split(".")[0] img = image_reader(args.src_dir + args.src_im) #(1,3,1024,1024) -1~1 img = img.to(device) MSE_Loss = nn.MSELoss(reduction="mean") img_p = img.clone() #Perceptual loss 用画像 upsample2d = torch.nn.Upsample(scale_factor=256 / args.resolution, mode='bilinear') #VGG入力のため(256,256)にリサイズ img_p = upsample2d(img_p) perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device) dlatent = torch.zeros((1, 18, 512), requires_grad=True, device=device) optimizer = optim.Adam({dlatent}, lr=0.01, betas=(0.9, 0.999), eps=1e-8) print("Start") loss_list = [] for i in range(args.iteration): optimizer.zero_grad() synth_img = g_synthesis(dlatent) synth_img = (synth_img + 1.0) / 2.0 mse_loss, perceptual_loss = caluclate_loss(synth_img, img, perceptual_net, img_p, MSE_Loss, upsample2d) loss = mse_loss + perceptual_loss loss.backward() optimizer.step() loss_np = loss.detach().cpu().numpy() loss_p = perceptual_loss.detach().cpu().numpy() loss_m = mse_loss.detach().cpu().numpy() loss_list.append(loss_np) if i % 10 == 0: print( "iter{}: loss -- {}, mse_loss --{}, percep_loss --{}".format( i, loss_np, loss_m, loss_p)) save_image(synth_img.clamp(0, 1), "save_image/encode1/{}.png".format(i)) #np.save("loss_list.npy",loss_list) np.save("latent_W/{}.npy".format(name), dlatent.detach().cpu().numpy())
import torch.nn.functional as F import torchvision from collections import OrderedDict import pickle import numpy as np import matplotlib.pyplot as plt device = 'cuda:0' if torch.cuda.is_available() else 'cpu' from stylegan_layers import G_mapping, G_synthesis, D_basic resolution = 1024 g_all = nn.Sequential( OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=resolution)) ])) d_basic = D_basic(resolution=resolution) a = True tensorflow_dir = "../drive/My Drive/stylegan_pretrained_model/" pytorch_dir = "../drive/My Drive/stylegan_pretrained_model/pytorch/" weight_name = "karras2019stylegan-ffhq-1024x1024" if a: # this can be run to get the weights, but you need the reference implementation and weights import dnnlib, dnnlib.tflib, pickle, torch, collections dnnlib.tflib.init_tf()
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--resolution', default=1024, type=int) parser.add_argument('--src_im1', default="source_image/sample.png") parser.add_argument('--src_im2', default="source_image/0.png") parser.add_argument( '--weight_file', default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt", type=str) parser.add_argument('--iteration', default=1000, type=int) args = parser.parse_args() g_all = nn.Sequential( OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=args.resolution)) ])) g_all.load_state_dict(torch.load(args.weight_file, map_location=device)) g_all.eval() g_all.to(device) g_mapping, g_synthesis = g_all[0], g_all[1] img_0 = image_reader(args.src_im1) #(1,3,1024,1024) -1~1 img_0 = img_0.to(device) img_1 = image_reader(args.src_im2) img_1 = img_1.to(device) #(1,3,1024,1024) MSE_Loss = nn.MSELoss(reduction="mean") upsample2d = torch.nn.Upsample(scale_factor=0.5, mode='bilinear') img_p0 = img_0.clone() #resize for perceptual net img_p0 = upsample2d(img_p0) img_p0 = upsample2d(img_p0) #(1,3,256,256) img_p1 = img_1.clone() img_p1 = upsample2d(img_p1) img_p1 = upsample2d(img_p1) #(1,3,256,256) perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to( device) #conv1_1,conv1_2,conv2_2,conv3_3 dlatent_a = torch.zeros((1, 18, 512), requires_grad=True, device=device) #appearace latent s1 dlatent_e = torch.zeros((1, 18, 512), requires_grad=True, device=device) # expression latent s2 optimizer = optim.Adam({dlatent_a, dlatent_e}, lr=0.01, betas=(0.9, 0.999), eps=1e-8) alpha = torch.zeros((1, 18, 512)).to(device) # 使用4-7特征码改变面部 #alpha[:,3:5,:]=1 alpha[:, 4:8, :] = 1 print("Start") loss_list = [] for i in range(args.iteration): optimizer.zero_grad() synth_img_a = g_synthesis(dlatent_a) synth_img_a = (synth_img_a + 1.0) / 2.0 synth_img_e = g_synthesis(dlatent_e) synth_img_e = (synth_img_e + 1.0) / 2.0 loss_1 = caluclate_contentloss(synth_img_a, perceptual_net, img_p1, MSE_Loss, upsample2d) loss_1.backward() # optimizer.step() loss_2 = caluclate_styleloss(synth_img_e, img_p0, perceptual_net, upsample2d) loss_2.backward() optimizer.step() loss_1 = loss_1.detach().cpu().numpy() loss_2 = loss_2.detach().cpu().numpy() dlatent1 = dlatent_a * alpha + dlatent_e * (1 - alpha) # 对潜向量做操作 dlatent2 = dlatent_a * (1 - alpha) + dlatent_e * alpha synth_img1 = g_synthesis(dlatent1) synth_img1 = (synth_img1 + 1.0) / 2.0 synth_img2 = g_synthesis(dlatent2) synth_img2 = (synth_img2 + 1.0) / 2.0 if i % 10 == 0: print("iter{}: loss0 --{}, loss1 --{}".format( i, loss_1, loss_2)) save_image(synth_img_a.clamp(0, 1), "save_image/exchange/a/{}_a.png".format(i)) save_image(synth_img_e.clamp(0, 1), "save_image/exchange/e/{}_e.png".format(i)) save_image( synth_img1.clamp(0, 1), "save_image/exchange/result1/{}_exchange1.png".format(i)) save_image( synth_img2.clamp(0, 1), "save_image/exchange/result2/{}_exchange2.png".format(i)) np.save("latent_W/exchange1.npy", dlatent1.detach().cpu().numpy()) np.save("latent_W/exchange2.npy", dlatent2.detach().cpu().numpy())
def main(): parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss') parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--resolution',default=1024,type=int) parser.add_argument('--src_im1',default="source_image/joker.png") parser.add_argument('--src_im2',default="source_image/0.png") parser.add_argument('--mask',default="source_image/Blur_mask.png") parser.add_argument('--weight_file',default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",type=str) parser.add_argument('--iteration',default=1500,type=int) args=parser.parse_args() g_all = nn.Sequential(OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=args.resolution)) ])) g_all.load_state_dict(torch.load(args.weight_file, map_location=device)) g_all.eval() g_all.to(device) g_mapping,g_synthesis=g_all[0],g_all[1] img_0=image_reader(args.src_im1) #(1,3,1024,1024) -1~1 img_0=img_0.to(device) img_1=image_reader(args.src_im2) img_1=img_1.to(device) #(1,3,1024,1024) blur_mask0=image_reader(args.mask).to(device) blur_mask0=blur_mask0[:,0,:,:].unsqueeze(0) blur_mask1=blur_mask0.clone() blur_mask1=1-blur_mask1 MSE_Loss=nn.MSELoss(reduction="mean") upsample2d=torch.nn.Upsample(scale_factor=0.5, mode='bilinear') img_p0=img_0.clone() #resize for perceptual net img_p0=upsample2d(img_p0) img_p0=upsample2d(img_p0) #(1,3,256,256) img_p1=img_1.clone() img_p1=upsample2d(img_p1) img_p1=upsample2d(img_p1) #(1,3,256,256) perceptual_net=VGG16_for_Perceptual(n_layers=[2,4,14,21]).to(device) #conv1_1,conv1_2,conv2_2,conv3_3 dlatent=torch.zeros((1,18,512),requires_grad=True,device=device) optimizer=optim.Adam({dlatent},lr=0.01,betas=(0.9,0.999),eps=1e-8) print("Start") loss_list=[] for i in range(args.iteration): optimizer.zero_grad() synth_img=g_synthesis(dlatent) synth_img = (synth_img + 1.0) / 2.0 loss_wl0=caluclate_loss(synth_img,img_0,perceptual_net,img_p0,blur_mask0,MSE_Loss,upsample2d) loss_wl1=caluclate_loss(synth_img,img_1,perceptual_net,img_p1,blur_mask1,MSE_Loss,upsample2d) loss=loss_wl0+loss_wl1 loss.backward() optimizer.step() loss_np=loss.detach().cpu().numpy() loss_0=loss_wl0.detach().cpu().numpy() loss_1=loss_wl1.detach().cpu().numpy() loss_list.append(loss_np) if i%10==0: print("iter{}: loss -- {}, loss0 --{}, loss1 --{}".format(i,loss_np,loss_0,loss_1)) save_image(synth_img.clamp(0,1),"save_image/crossover/{}.png".format(i)) np.save("latent_W/crossover.npy",dlatent.detach().cpu().numpy())
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--resolution', default=1024, type=int) parser.add_argument('--src_im', default="sample.png") parser.add_argument('--src_dir', default="source_image/") parser.add_argument('--save_dir', default="save_image/encode1") parser.add_argument( '--weight_file', default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt", type=str) parser.add_argument('--w_iteration', default=1000, type=int) parser.add_argument('--n_iteration', default=1000, type=int) parser.add_argument('--loop_time', default=5, type=int) args = parser.parse_args() ## ichao : this is the generator part, you can replace here using the generator you found g_all = nn.Sequential( OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=args.resolution)) ])) ## ichao : load the pretrained generator's weight g_all.load_state_dict(torch.load(args.weight_file, map_location=device)) g_all.eval() g_all.to(device) g_mapping, g_synthesis = g_all[0], g_all[1] ## ichao : end of generator part ## ichao : read the input image (size : 3x1024x1024) name = args.src_im.split(".")[0] img = image_reader(args.src_dir + args.src_im) #(1,3,1024,1024) -1~1 img = img.to(device) MSE_Loss = nn.MSELoss(reduction="mean") img_p = img.clone() ## ichao : used for perceptual loss ## ichao : resize the image to put into VGG upsample2d = torch.nn.Upsample(scale_factor=256 / args.resolution, mode='bilinear') img_p = upsample2d(img_p) # [4,9,16,23] # [2,4,14,21] perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device) mean_w = get_mean_latent(g_mapping, device) ## ichao : initialize the latent code we want to optimize dlatent = mean_w dlatent = dlatent.requires_grad_() synth_img = g_synthesis(dlatent) #dlatent = -2.0 * torch.randn((1,18,512), device=device) + 1.0 #dlatent = dlatent.requires_grad_() #optimizer = optim.Adam({dlatent}, lr=0.01) # Latent code optimization loop_iteration = args.w_iteration + args.n_iteration print("Start") for loop in range(args.loop_time): for m in g_synthesis.blocks.values(): m.epi1.top_epi[0].noise.requires_grad = False m.epi2.top_epi[0].noise.requires_grad = False print("========Latent Code Optimization=========") optimizer = optim.Adam({dlatent}, lr=0.01, betas=(0.9, 0.999), eps=1e-8) loss_list = [] for i in range(args.w_iteration): optimizer.zero_grad() synth_img = g_synthesis(dlatent) synth_img = (synth_img + 1.0) / 2.0 # Why mse_loss, perceptual_loss = caluclate_loss(synth_img, img, perceptual_net, img_p, MSE_Loss, 1, 1, upsample2d) # adjust ratio to control the gradient part. # atio = 0.8 # loss = (1 - ratio) * mse_loss + ratio * perceptual_loss loss = mse_loss + perceptual_loss loss.backward() optimizer.step() loss_np = loss.detach().cpu().numpy() loss_p = perceptual_loss.detach().cpu().numpy() loss_m = mse_loss.detach().cpu().numpy() loss_list.append(loss_np) print( "iter{}: loss -- {}, mse_loss --{}, percep_loss --{}".format( loop * loop_iteration + i, loss_np, loss_m, loss_p)) if i % 10 == 0: save_image( synth_img.clamp(0, 1), "{dir}/{number}.png".format( dir=args.save_dir, number=loop * loop_iteration + i)) np.save("latent_W/{}.npy".format(name), dlatent.detach().cpu().numpy()) # Noise optimization print("============Noise Optimization============") dlatent.requires_grad = False noises = [] for i, m in enumerate(g_synthesis.blocks.values()): m.epi1.top_epi[0].noise.requires_grad = True noises.append(m.epi1.top_epi[0].noise) m.epi2.top_epi[0].noise.requires_grad = True noises.append(m.epi2.top_epi[0].noise) optimizer = optim.Adam(noises, lr=5, betas=(0.9, 0.999), eps=1e-8) for i in range(args.n_iteration): optimizer.zero_grad() ## ichao : generate an image using the current latent code #dlatent_ex= g_mapping(dlatent) synth_img = g_synthesis(dlatent) synth_img = (synth_img + 1.0) / 2.0 # Why mse_loss, perceptual_loss = caluclate_loss(synth_img, img, perceptual_net, img_p, MSE_Loss, 0, 1, upsample2d) # adjust ratio to control the gradient part. # atio = 0.8 # loss = (1 - ratio) * mse_loss + ratio * perceptual_loss loss = mse_loss + perceptual_loss loss.backward() optimizer.step() loss_np = loss.detach().cpu().numpy() loss_p = perceptual_loss.detach().cpu().numpy() loss_m = mse_loss.detach().cpu().numpy() loss_list.append(loss_np) print( "iter{}: loss -- {}, mse_loss --{}, percep_loss --{}".format( loop * loop_iteration + args.w_iteration + i, loss_np, loss_m, loss_p)) if i % 10 == 0: save_image( synth_img.clamp(0, 1), "{dir}/{number}.png".format( dir=args.save_dir, number=loop * loop_iteration + args.w_iteration + i)) np.save("noise/{}.npy".format(name), np.array(noises))