def optimize_style(source_image, model, model_name, gan_type, dlatent, iteration): resolution = parse_resolution(model_name) img = image_reader(source_image, resize=resolution) # (1,3,1024,1024) -1~1 img = img.to(device) MSE_Loss = nn.MSELoss(reduction="mean") img_p = img.clone() # Perceptual loss 用画像 upsample2d = torch.nn.Upsample(scale_factor=256 / resolution, mode="bilinear") # VGG入力のため(256,256)にリサイズ img_p = upsample2d(img_p) perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device) w = to_tensor(dlatent).requires_grad_() optimizer = optim.Adam({w}, lr=0.01, betas=(0.9, 0.999), eps=1e-8) for i in progress_bar(range(iteration)): optimizer.zero_grad() synth_img = forward(model, gan_type, w) synth_img = (synth_img + 1.0) / 2.0 mse_loss, perceptual_loss = caluclate_loss(synth_img, img, perceptual_net, img_p, MSE_Loss, upsample2d) loss = mse_loss + perceptual_loss loss.backward() optimizer.step() return w.detach().cpu().numpy()
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--resolution', default=1024, type=int) parser.add_argument('--src_im', default="sample.png") parser.add_argument('--src_dir', default="source_image/") parser.add_argument( '--weight_file', default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt", type=str) parser.add_argument('--iteration', default=1000, type=int) args = parser.parse_args() g_all = nn.Sequential( OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=args.resolution)) ])) g_all.load_state_dict(torch.load(args.weight_file, map_location=device)) g_all.eval() g_all.to(device) g_mapping, g_synthesis = g_all[0], g_all[1] name = args.src_im.split(".")[0] img = image_reader(args.src_dir + args.src_im) #(1,3,1024,1024) -1~1 img = img.to(device) MSE_Loss = nn.MSELoss(reduction="mean") img_p = img.clone() #Perceptual loss 用画像 upsample2d = torch.nn.Upsample(scale_factor=256 / args.resolution, mode='bilinear') #VGG入力のため(256,256)にリサイズ img_p = upsample2d(img_p) perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device) dlatent = torch.zeros((1, 18, 512), requires_grad=True, device=device) optimizer = optim.Adam({dlatent}, lr=0.01, betas=(0.9, 0.999), eps=1e-8) print("Start") loss_list = [] for i in range(args.iteration): optimizer.zero_grad() synth_img = g_synthesis(dlatent) synth_img = (synth_img + 1.0) / 2.0 mse_loss, perceptual_loss = caluclate_loss(synth_img, img, perceptual_net, img_p, MSE_Loss, upsample2d) loss = mse_loss + perceptual_loss loss.backward() optimizer.step() loss_np = loss.detach().cpu().numpy() loss_p = perceptual_loss.detach().cpu().numpy() loss_m = mse_loss.detach().cpu().numpy() loss_list.append(loss_np) if i % 10 == 0: print( "iter{}: loss -- {}, mse_loss --{}, percep_loss --{}".format( i, loss_np, loss_m, loss_p)) save_image(synth_img.clamp(0, 1), "save_image/encode1/{}.png".format(i)) #np.save("loss_list.npy",loss_list) np.save("latent_W/{}.npy".format(name), dlatent.detach().cpu().numpy())
def main(): parser = argparse.ArgumentParser( description= "Find latent representation of reference images using perceptual loss") parser.add_argument("--src_im", default="sample.png") parser.add_argument("--src_dir", default="source_image/") iteration = 1000 args = parser.parse_args() model_name = 'stylegan_ffhq1024' model = load_generator(model_name) resolution = parse_resolution(model_name) gan_type = parse_gan_type(model) name = args.src_im.split(".")[0] img = image_reader(args.src_dir + args.src_im, resize=resolution) # (1,3,1024,1024) -1~1 img = img.to(device) MSE_Loss = nn.MSELoss(reduction="mean") img_p = img.clone() # Perceptual loss 用画像 upsample2d = torch.nn.Upsample(scale_factor=256 / resolution, mode="bilinear") # VGG入力のため(256,256)にリサイズ img_p = upsample2d(img_p) perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device) # dlatent = torch.randn(1, model.z_space_dim, requires_grad=True, device=device) w = to_tensor(sample(model, gan_type)).requires_grad_() optimizer = optim.Adam({w}, lr=0.01, betas=(0.9, 0.999), eps=1e-8) # optimizer = optim.SGD({dlatent}, lr=1.) #, momentum=0.9, nesterov=True) print("Start") loss_list = [] for i in range(iteration): optimizer.zero_grad() synth_img = forward(model, gan_type, w) synth_img = (synth_img + 1.0) / 2.0 mse_loss, perceptual_loss = caluclate_loss(synth_img, img, perceptual_net, img_p, MSE_Loss, upsample2d) loss = mse_loss + perceptual_loss loss.backward() optimizer.step() loss_np = loss.detach().cpu().numpy() loss_p = perceptual_loss.detach().cpu().numpy() loss_m = mse_loss.detach().cpu().numpy() loss_list.append(loss_np) if i % 10 == 0: print( "iter{}: loss -- {}, mse_loss --{}, percep_loss --{}".format( i, loss_np, loss_m, loss_p)) save_image(synth_img.clamp(0, 1), "save_image/encode1/{}.png".format(i)) # np.save("loss_list.npy",loss_list) np.save("latent_W/{}.npy".format(name), w.detach().cpu().numpy())
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--resolution', default=1024, type=int) parser.add_argument('--src_im1', default="source_image/sample.png") parser.add_argument('--src_im2', default="source_image/0.png") parser.add_argument( '--weight_file', default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt", type=str) parser.add_argument('--iteration', default=1000, type=int) args = parser.parse_args() g_all = nn.Sequential( OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=args.resolution)) ])) g_all.load_state_dict(torch.load(args.weight_file, map_location=device)) g_all.eval() g_all.to(device) g_mapping, g_synthesis = g_all[0], g_all[1] img_0 = image_reader(args.src_im1) #(1,3,1024,1024) -1~1 img_0 = img_0.to(device) img_1 = image_reader(args.src_im2) img_1 = img_1.to(device) #(1,3,1024,1024) MSE_Loss = nn.MSELoss(reduction="mean") upsample2d = torch.nn.Upsample(scale_factor=0.5, mode='bilinear') img_p0 = img_0.clone() #resize for perceptual net img_p0 = upsample2d(img_p0) img_p0 = upsample2d(img_p0) #(1,3,256,256) img_p1 = img_1.clone() img_p1 = upsample2d(img_p1) img_p1 = upsample2d(img_p1) #(1,3,256,256) perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to( device) #conv1_1,conv1_2,conv2_2,conv3_3 dlatent_a = torch.zeros((1, 18, 512), requires_grad=True, device=device) #appearace latent s1 dlatent_e = torch.zeros((1, 18, 512), requires_grad=True, device=device) # expression latent s2 optimizer = optim.Adam({dlatent_a, dlatent_e}, lr=0.01, betas=(0.9, 0.999), eps=1e-8) alpha = torch.zeros((1, 18, 512)).to(device) # 使用4-7特征码改变面部 #alpha[:,3:5,:]=1 alpha[:, 4:8, :] = 1 print("Start") loss_list = [] for i in range(args.iteration): optimizer.zero_grad() synth_img_a = g_synthesis(dlatent_a) synth_img_a = (synth_img_a + 1.0) / 2.0 synth_img_e = g_synthesis(dlatent_e) synth_img_e = (synth_img_e + 1.0) / 2.0 loss_1 = caluclate_contentloss(synth_img_a, perceptual_net, img_p1, MSE_Loss, upsample2d) loss_1.backward() # optimizer.step() loss_2 = caluclate_styleloss(synth_img_e, img_p0, perceptual_net, upsample2d) loss_2.backward() optimizer.step() loss_1 = loss_1.detach().cpu().numpy() loss_2 = loss_2.detach().cpu().numpy() dlatent1 = dlatent_a * alpha + dlatent_e * (1 - alpha) # 对潜向量做操作 dlatent2 = dlatent_a * (1 - alpha) + dlatent_e * alpha synth_img1 = g_synthesis(dlatent1) synth_img1 = (synth_img1 + 1.0) / 2.0 synth_img2 = g_synthesis(dlatent2) synth_img2 = (synth_img2 + 1.0) / 2.0 if i % 10 == 0: print("iter{}: loss0 --{}, loss1 --{}".format( i, loss_1, loss_2)) save_image(synth_img_a.clamp(0, 1), "save_image/exchange/a/{}_a.png".format(i)) save_image(synth_img_e.clamp(0, 1), "save_image/exchange/e/{}_e.png".format(i)) save_image( synth_img1.clamp(0, 1), "save_image/exchange/result1/{}_exchange1.png".format(i)) save_image( synth_img2.clamp(0, 1), "save_image/exchange/result2/{}_exchange2.png".format(i)) np.save("latent_W/exchange1.npy", dlatent1.detach().cpu().numpy()) np.save("latent_W/exchange2.npy", dlatent2.detach().cpu().numpy())
def main(): parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss') parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument('--resolution',default=1024,type=int) parser.add_argument('--src_im1',default="source_image/joker.png") parser.add_argument('--src_im2',default="source_image/0.png") parser.add_argument('--mask',default="source_image/Blur_mask.png") parser.add_argument('--weight_file',default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",type=str) parser.add_argument('--iteration',default=1500,type=int) args=parser.parse_args() g_all = nn.Sequential(OrderedDict([ ('g_mapping', G_mapping()), #('truncation', Truncation(avg_latent)), ('g_synthesis', G_synthesis(resolution=args.resolution)) ])) g_all.load_state_dict(torch.load(args.weight_file, map_location=device)) g_all.eval() g_all.to(device) g_mapping,g_synthesis=g_all[0],g_all[1] img_0=image_reader(args.src_im1) #(1,3,1024,1024) -1~1 img_0=img_0.to(device) img_1=image_reader(args.src_im2) img_1=img_1.to(device) #(1,3,1024,1024) blur_mask0=image_reader(args.mask).to(device) blur_mask0=blur_mask0[:,0,:,:].unsqueeze(0) blur_mask1=blur_mask0.clone() blur_mask1=1-blur_mask1 MSE_Loss=nn.MSELoss(reduction="mean") upsample2d=torch.nn.Upsample(scale_factor=0.5, mode='bilinear') img_p0=img_0.clone() #resize for perceptual net img_p0=upsample2d(img_p0) img_p0=upsample2d(img_p0) #(1,3,256,256) img_p1=img_1.clone() img_p1=upsample2d(img_p1) img_p1=upsample2d(img_p1) #(1,3,256,256) perceptual_net=VGG16_for_Perceptual(n_layers=[2,4,14,21]).to(device) #conv1_1,conv1_2,conv2_2,conv3_3 dlatent=torch.zeros((1,18,512),requires_grad=True,device=device) optimizer=optim.Adam({dlatent},lr=0.01,betas=(0.9,0.999),eps=1e-8) print("Start") loss_list=[] for i in range(args.iteration): optimizer.zero_grad() synth_img=g_synthesis(dlatent) synth_img = (synth_img + 1.0) / 2.0 loss_wl0=caluclate_loss(synth_img,img_0,perceptual_net,img_p0,blur_mask0,MSE_Loss,upsample2d) loss_wl1=caluclate_loss(synth_img,img_1,perceptual_net,img_p1,blur_mask1,MSE_Loss,upsample2d) loss=loss_wl0+loss_wl1 loss.backward() optimizer.step() loss_np=loss.detach().cpu().numpy() loss_0=loss_wl0.detach().cpu().numpy() loss_1=loss_wl1.detach().cpu().numpy() loss_list.append(loss_np) if i%10==0: print("iter{}: loss -- {}, loss0 --{}, loss1 --{}".format(i,loss_np,loss_0,loss_1)) save_image(synth_img.clamp(0,1),"save_image/crossover/{}.png".format(i)) np.save("latent_W/crossover.npy",dlatent.detach().cpu().numpy())