Esempio n. 1
0
def optimize_style(source_image, model, model_name, gan_type, dlatent,
                   iteration):
    resolution = parse_resolution(model_name)

    img = image_reader(source_image, resize=resolution)  # (1,3,1024,1024) -1~1
    img = img.to(device)

    MSE_Loss = nn.MSELoss(reduction="mean")

    img_p = img.clone()  # Perceptual loss 用画像
    upsample2d = torch.nn.Upsample(scale_factor=256 / resolution,
                                   mode="bilinear")  # VGG入力のため(256,256)にリサイズ
    img_p = upsample2d(img_p)

    perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device)
    w = to_tensor(dlatent).requires_grad_()
    optimizer = optim.Adam({w}, lr=0.01, betas=(0.9, 0.999), eps=1e-8)

    for i in progress_bar(range(iteration)):
        optimizer.zero_grad()
        synth_img = forward(model, gan_type, w)
        synth_img = (synth_img + 1.0) / 2.0
        mse_loss, perceptual_loss = caluclate_loss(synth_img, img,
                                                   perceptual_net, img_p,
                                                   MSE_Loss, upsample2d)
        loss = mse_loss + perceptual_loss
        loss.backward()
        optimizer.step()

    return w.detach().cpu().numpy()
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument('--resolution', default=1024, type=int)
    parser.add_argument('--src_im', default="sample.png")
    parser.add_argument('--src_dir', default="source_image/")
    parser.add_argument(
        '--weight_file',
        default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",
        type=str)
    parser.add_argument('--iteration', default=1000, type=int)

    args = parser.parse_args()

    g_all = nn.Sequential(
        OrderedDict([
            ('g_mapping', G_mapping()),
            #('truncation', Truncation(avg_latent)),
            ('g_synthesis', G_synthesis(resolution=args.resolution))
        ]))

    g_all.load_state_dict(torch.load(args.weight_file, map_location=device))
    g_all.eval()
    g_all.to(device)

    g_mapping, g_synthesis = g_all[0], g_all[1]
    name = args.src_im.split(".")[0]
    img = image_reader(args.src_dir + args.src_im)  #(1,3,1024,1024) -1~1
    img = img.to(device)

    MSE_Loss = nn.MSELoss(reduction="mean")

    img_p = img.clone()  #Perceptual loss 用画像
    upsample2d = torch.nn.Upsample(scale_factor=256 / args.resolution,
                                   mode='bilinear')  #VGG入力のため(256,256)にリサイズ
    img_p = upsample2d(img_p)

    perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device)
    dlatent = torch.zeros((1, 18, 512), requires_grad=True, device=device)
    optimizer = optim.Adam({dlatent}, lr=0.01, betas=(0.9, 0.999), eps=1e-8)

    print("Start")
    loss_list = []
    for i in range(args.iteration):
        optimizer.zero_grad()
        synth_img = g_synthesis(dlatent)
        synth_img = (synth_img + 1.0) / 2.0
        mse_loss, perceptual_loss = caluclate_loss(synth_img, img,
                                                   perceptual_net, img_p,
                                                   MSE_Loss, upsample2d)
        loss = mse_loss + perceptual_loss
        loss.backward()

        optimizer.step()

        loss_np = loss.detach().cpu().numpy()
        loss_p = perceptual_loss.detach().cpu().numpy()
        loss_m = mse_loss.detach().cpu().numpy()

        loss_list.append(loss_np)
        if i % 10 == 0:
            print(
                "iter{}: loss -- {},  mse_loss --{},  percep_loss --{}".format(
                    i, loss_np, loss_m, loss_p))
            save_image(synth_img.clamp(0, 1),
                       "save_image/encode1/{}.png".format(i))
            #np.save("loss_list.npy",loss_list)
            np.save("latent_W/{}.npy".format(name),
                    dlatent.detach().cpu().numpy())
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Find latent representation of reference images using perceptual loss")

    parser.add_argument("--src_im", default="sample.png")
    parser.add_argument("--src_dir", default="source_image/")

    iteration = 1000
    args = parser.parse_args()

    model_name = 'stylegan_ffhq1024'
    model = load_generator(model_name)
    resolution = parse_resolution(model_name)
    gan_type = parse_gan_type(model)

    name = args.src_im.split(".")[0]
    img = image_reader(args.src_dir + args.src_im,
                       resize=resolution)  # (1,3,1024,1024) -1~1
    img = img.to(device)

    MSE_Loss = nn.MSELoss(reduction="mean")

    img_p = img.clone()  # Perceptual loss 用画像
    upsample2d = torch.nn.Upsample(scale_factor=256 / resolution,
                                   mode="bilinear")  # VGG入力のため(256,256)にリサイズ
    img_p = upsample2d(img_p)

    perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device)
    # dlatent = torch.randn(1, model.z_space_dim, requires_grad=True, device=device)
    w = to_tensor(sample(model, gan_type)).requires_grad_()
    optimizer = optim.Adam({w}, lr=0.01, betas=(0.9, 0.999), eps=1e-8)
    # optimizer = optim.SGD({dlatent}, lr=1.) #, momentum=0.9, nesterov=True)

    print("Start")
    loss_list = []
    for i in range(iteration):
        optimizer.zero_grad()

        synth_img = forward(model, gan_type, w)
        synth_img = (synth_img + 1.0) / 2.0
        mse_loss, perceptual_loss = caluclate_loss(synth_img, img,
                                                   perceptual_net, img_p,
                                                   MSE_Loss, upsample2d)
        loss = mse_loss + perceptual_loss
        loss.backward()

        optimizer.step()

        loss_np = loss.detach().cpu().numpy()
        loss_p = perceptual_loss.detach().cpu().numpy()
        loss_m = mse_loss.detach().cpu().numpy()

        loss_list.append(loss_np)
        if i % 10 == 0:
            print(
                "iter{}: loss -- {},  mse_loss --{},  percep_loss --{}".format(
                    i, loss_np, loss_m, loss_p))
            save_image(synth_img.clamp(0, 1),
                       "save_image/encode1/{}.png".format(i))
            # np.save("loss_list.npy",loss_list)
            np.save("latent_W/{}.npy".format(name), w.detach().cpu().numpy())
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument('--resolution', default=1024, type=int)
    parser.add_argument('--src_im1', default="source_image/sample.png")
    parser.add_argument('--src_im2', default="source_image/0.png")
    parser.add_argument(
        '--weight_file',
        default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",
        type=str)
    parser.add_argument('--iteration', default=1000, type=int)

    args = parser.parse_args()

    g_all = nn.Sequential(
        OrderedDict([
            ('g_mapping', G_mapping()),
            #('truncation', Truncation(avg_latent)),
            ('g_synthesis', G_synthesis(resolution=args.resolution))
        ]))

    g_all.load_state_dict(torch.load(args.weight_file, map_location=device))
    g_all.eval()
    g_all.to(device)
    g_mapping, g_synthesis = g_all[0], g_all[1]

    img_0 = image_reader(args.src_im1)  #(1,3,1024,1024) -1~1
    img_0 = img_0.to(device)

    img_1 = image_reader(args.src_im2)
    img_1 = img_1.to(device)  #(1,3,1024,1024)

    MSE_Loss = nn.MSELoss(reduction="mean")
    upsample2d = torch.nn.Upsample(scale_factor=0.5, mode='bilinear')

    img_p0 = img_0.clone()  #resize for perceptual net
    img_p0 = upsample2d(img_p0)
    img_p0 = upsample2d(img_p0)  #(1,3,256,256)

    img_p1 = img_1.clone()
    img_p1 = upsample2d(img_p1)
    img_p1 = upsample2d(img_p1)  #(1,3,256,256)

    perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(
        device)  #conv1_1,conv1_2,conv2_2,conv3_3
    dlatent_a = torch.zeros((1, 18, 512), requires_grad=True,
                            device=device)  #appearace latent s1
    dlatent_e = torch.zeros((1, 18, 512), requires_grad=True,
                            device=device)  # expression latent s2
    optimizer = optim.Adam({dlatent_a, dlatent_e},
                           lr=0.01,
                           betas=(0.9, 0.999),
                           eps=1e-8)

    alpha = torch.zeros((1, 18, 512)).to(device)
    # 使用4-7特征码改变面部
    #alpha[:,3:5,:]=1
    alpha[:, 4:8, :] = 1

    print("Start")
    loss_list = []
    for i in range(args.iteration):
        optimizer.zero_grad()
        synth_img_a = g_synthesis(dlatent_a)
        synth_img_a = (synth_img_a + 1.0) / 2.0

        synth_img_e = g_synthesis(dlatent_e)
        synth_img_e = (synth_img_e + 1.0) / 2.0

        loss_1 = caluclate_contentloss(synth_img_a, perceptual_net, img_p1,
                                       MSE_Loss, upsample2d)
        loss_1.backward()

        # optimizer.step()

        loss_2 = caluclate_styleloss(synth_img_e, img_p0, perceptual_net,
                                     upsample2d)
        loss_2.backward()

        optimizer.step()

        loss_1 = loss_1.detach().cpu().numpy()
        loss_2 = loss_2.detach().cpu().numpy()

        dlatent1 = dlatent_a * alpha + dlatent_e * (1 - alpha)  # 对潜向量做操作
        dlatent2 = dlatent_a * (1 - alpha) + dlatent_e * alpha

        synth_img1 = g_synthesis(dlatent1)
        synth_img1 = (synth_img1 + 1.0) / 2.0

        synth_img2 = g_synthesis(dlatent2)
        synth_img2 = (synth_img2 + 1.0) / 2.0

        if i % 10 == 0:
            print("iter{}:   loss0 --{},  loss1 --{}".format(
                i, loss_1, loss_2))
            save_image(synth_img_a.clamp(0, 1),
                       "save_image/exchange/a/{}_a.png".format(i))
            save_image(synth_img_e.clamp(0, 1),
                       "save_image/exchange/e/{}_e.png".format(i))
            save_image(
                synth_img1.clamp(0, 1),
                "save_image/exchange/result1/{}_exchange1.png".format(i))
            save_image(
                synth_img2.clamp(0, 1),
                "save_image/exchange/result2/{}_exchange2.png".format(i))

            np.save("latent_W/exchange1.npy", dlatent1.detach().cpu().numpy())
            np.save("latent_W/exchange2.npy", dlatent2.detach().cpu().numpy())
Esempio n. 5
0
def main():
     parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss')
     parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int)
     parser.add_argument('--resolution',default=1024,type=int)
     parser.add_argument('--src_im1',default="source_image/joker.png")
     parser.add_argument('--src_im2',default="source_image/0.png")
     parser.add_argument('--mask',default="source_image/Blur_mask.png")
     parser.add_argument('--weight_file',default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",type=str)
     parser.add_argument('--iteration',default=1500,type=int)



     args=parser.parse_args()

     g_all = nn.Sequential(OrderedDict([
    ('g_mapping', G_mapping()),
    #('truncation', Truncation(avg_latent)),
    ('g_synthesis', G_synthesis(resolution=args.resolution))    
    ]))

     g_all.load_state_dict(torch.load(args.weight_file, map_location=device))
     g_all.eval()
     g_all.to(device)
     g_mapping,g_synthesis=g_all[0],g_all[1]


     img_0=image_reader(args.src_im1) #(1,3,1024,1024) -1~1
     img_0=img_0.to(device)

     img_1=image_reader(args.src_im2)
     img_1=img_1.to(device) #(1,3,1024,1024)

     blur_mask0=image_reader(args.mask).to(device)
     blur_mask0=blur_mask0[:,0,:,:].unsqueeze(0)
     blur_mask1=blur_mask0.clone()
     blur_mask1=1-blur_mask1

     MSE_Loss=nn.MSELoss(reduction="mean")
     upsample2d=torch.nn.Upsample(scale_factor=0.5, mode='bilinear')

     img_p0=img_0.clone() #resize for perceptual net
     img_p0=upsample2d(img_p0)
     img_p0=upsample2d(img_p0) #(1,3,256,256)

     img_p1=img_1.clone()
     img_p1=upsample2d(img_p1)
     img_p1=upsample2d(img_p1) #(1,3,256,256)




     perceptual_net=VGG16_for_Perceptual(n_layers=[2,4,14,21]).to(device) #conv1_1,conv1_2,conv2_2,conv3_3
     dlatent=torch.zeros((1,18,512),requires_grad=True,device=device)
     optimizer=optim.Adam({dlatent},lr=0.01,betas=(0.9,0.999),eps=1e-8)


     print("Start")
     loss_list=[]
     for i in range(args.iteration):
          optimizer.zero_grad()
          synth_img=g_synthesis(dlatent)
          synth_img = (synth_img + 1.0) / 2.0
          loss_wl0=caluclate_loss(synth_img,img_0,perceptual_net,img_p0,blur_mask0,MSE_Loss,upsample2d)
          loss_wl1=caluclate_loss(synth_img,img_1,perceptual_net,img_p1,blur_mask1,MSE_Loss,upsample2d)
          loss=loss_wl0+loss_wl1
          loss.backward()

          optimizer.step()

          loss_np=loss.detach().cpu().numpy()
          loss_0=loss_wl0.detach().cpu().numpy()
          loss_1=loss_wl1.detach().cpu().numpy()

          loss_list.append(loss_np)
          if i%10==0:
               print("iter{}: loss -- {},  loss0 --{},  loss1 --{}".format(i,loss_np,loss_0,loss_1))
               save_image(synth_img.clamp(0,1),"save_image/crossover/{}.png".format(i))
               np.save("latent_W/crossover.npy",dlatent.detach().cpu().numpy())