Example #1
0
def main():
    G = nn.Sequential(
        OrderedDict([
            ('g_mapping', G_mapping()),
            #('truncation', Truncation(avg_latent)),
            ('g_synthesis', G_synthesis(resolution=1024))
        ]))
    ## ichao : load the pretrained generator's weight
    G.load_state_dict(
        torch.load("weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",
                   map_location=device))
    G.eval()
    G.to(device)
    g_mapping, g_synthesis = G[0], G[1]
    dlatent = torch.randn((1, 512), device=device)
    dlatent = g_mapping(dlatent)
    #dlatent = dlatent.expand(1, 18, 512)
    synth_img = g_synthesis(dlatent)
    synth_img = (synth_img + 1.0) / 2.0

    save_image(synth_img.clamp(0, 1), "source_image/sample_rand.png")
    counter = 0
    for i, m in enumerate(g_synthesis.blocks.values()):
        counter += 2
        m.epi1.top_epi[0].noise.requires_grad = True
        m.epi2.top_epi[0].noise.requires_grad = True
        print(counter)
Example #2
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument('--resolution', default=1024, type=int)
    parser.add_argument(
        '--weight_file',
        default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",
        type=str)
    parser.add_argument('--latent_file1', default="latent_W/0.npy")
    parser.add_argument('--latent_file2', default="latent_W/sample.npy")

    args = parser.parse_args()

    g_all = nn.Sequential(
        OrderedDict([
            ('g_mapping', G_mapping()),
            #('truncation', Truncation(avg_latent)),
            ('g_synthesis', G_synthesis(resolution=args.resolution))
        ]))

    g_all.load_state_dict(torch.load(args.weight_file, map_location=device))
    g_all.eval()
    g_all.to(device)

    g_mapping, g_synthesis = g_all[0], g_all[1]

    latents_0 = np.load(args.latent_file1)
    latents_1 = np.load(args.latent_file2)

    latents_0 = torch.tensor(latents_0).to(device)
    latents_1 = torch.tensor(latents_1).to(device)

    for i in range(100):
        alpha = (1 / 100) * i
        latents = alpha * latents_0 + (1 - alpha) * latents_1

        synth_img = g_synthesis(latents)
        synth_img = (synth_img + 1.0) / 2.0
        save_image(synth_img.clamp(0, 1),
                   "morph_result/encode1/{}.png".format(i))
Example #3
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument('--resolution', default=1024, type=int)
    parser.add_argument(
        '--weight_file',
        default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",
        type=str)
    parser.add_argument('--latent_file', default="latent_W/0.npy")

    args = parser.parse_args()

    g_all = nn.Sequential(
        OrderedDict([
            ('g_mapping', G_mapping()),
            #('truncation', Truncation(avg_latent)),
            ('g_synthesis', G_synthesis(resolution=args.resolution))
        ]))

    g_all.load_state_dict(torch.load(args.weight_file, map_location=device))
    g_all.eval()
    g_all.to(device)

    g_mapping, g_synthesis = g_all[0], g_all[1]

    boundary_name = [
        "stylegan_ffhq_gender_w_boundary.npy",
        "stylegan_ffhq_age_w_boundary.npy",
        "stylegan_ffhq_pose_w_boundary.npy",
        "stylegan_ffhq_eyeglasses_w_boundary.npy",
        "stylegan_ffhq_smile_w_boundary.npy"
    ]
    semantic = ["gender", "age", "pose", "eye_glass", "smile"]

    for i in range(5):
        latents_0 = np.load(args.latent_file)
        latents_0 = torch.tensor(latents_0).to(device)  #.unsqueeze(0)
        boundary = np.load("boundaries/" + boundary_name[i])
        make_morph(boundary, i, latents_0, g_synthesis, semantic)
Example #4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument('--resolution', default=1024, type=int)
    parser.add_argument('--src_im', default="sample.png")
    parser.add_argument('--src_dir', default="source_image/")
    parser.add_argument(
        '--weight_file',
        default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",
        type=str)
    parser.add_argument('--iteration', default=1000, type=int)

    args = parser.parse_args()

    g_all = nn.Sequential(
        OrderedDict([
            ('g_mapping', G_mapping()),
            #('truncation', Truncation(avg_latent)),
            ('g_synthesis', G_synthesis(resolution=args.resolution))
        ]))

    g_all.load_state_dict(torch.load(args.weight_file, map_location=device))
    g_all.eval()
    g_all.to(device)

    g_mapping, g_synthesis = g_all[0], g_all[1]
    name = args.src_im.split(".")[0]
    img = image_reader(args.src_dir + args.src_im)  #(1,3,1024,1024) -1~1
    img = img.to(device)

    MSE_Loss = nn.MSELoss(reduction="mean")

    img_p = img.clone()  #Perceptual loss 用画像
    upsample2d = torch.nn.Upsample(scale_factor=256 / args.resolution,
                                   mode='bilinear')  #VGG入力のため(256,256)にリサイズ
    img_p = upsample2d(img_p)

    perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device)
    dlatent = torch.zeros((1, 18, 512), requires_grad=True, device=device)
    optimizer = optim.Adam({dlatent}, lr=0.01, betas=(0.9, 0.999), eps=1e-8)

    print("Start")
    loss_list = []
    for i in range(args.iteration):
        optimizer.zero_grad()
        synth_img = g_synthesis(dlatent)
        synth_img = (synth_img + 1.0) / 2.0
        mse_loss, perceptual_loss = caluclate_loss(synth_img, img,
                                                   perceptual_net, img_p,
                                                   MSE_Loss, upsample2d)
        loss = mse_loss + perceptual_loss
        loss.backward()

        optimizer.step()

        loss_np = loss.detach().cpu().numpy()
        loss_p = perceptual_loss.detach().cpu().numpy()
        loss_m = mse_loss.detach().cpu().numpy()

        loss_list.append(loss_np)
        if i % 10 == 0:
            print(
                "iter{}: loss -- {},  mse_loss --{},  percep_loss --{}".format(
                    i, loss_np, loss_m, loss_p))
            save_image(synth_img.clamp(0, 1),
                       "save_image/encode1/{}.png".format(i))
            #np.save("loss_list.npy",loss_list)
            np.save("latent_W/{}.npy".format(name),
                    dlatent.detach().cpu().numpy())
Example #5
0
import torch.nn.functional as F
import torchvision

from collections import OrderedDict
import pickle
import numpy as np
import matplotlib.pyplot as plt
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

from stylegan_layers import G_mapping, G_synthesis, D_basic

resolution = 1024

g_all = nn.Sequential(
    OrderedDict([
        ('g_mapping', G_mapping()),
        #('truncation', Truncation(avg_latent)),
        ('g_synthesis', G_synthesis(resolution=resolution))
    ]))

d_basic = D_basic(resolution=resolution)
a = True

tensorflow_dir = "../drive/My Drive/stylegan_pretrained_model/"
pytorch_dir = "../drive/My Drive/stylegan_pretrained_model/pytorch/"
weight_name = "karras2019stylegan-ffhq-1024x1024"

if a:
    # this can be run to get the weights, but you need the reference implementation and weights
    import dnnlib, dnnlib.tflib, pickle, torch, collections
    dnnlib.tflib.init_tf()
Example #6
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument('--resolution', default=1024, type=int)
    parser.add_argument('--src_im1', default="source_image/sample.png")
    parser.add_argument('--src_im2', default="source_image/0.png")
    parser.add_argument(
        '--weight_file',
        default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",
        type=str)
    parser.add_argument('--iteration', default=1000, type=int)

    args = parser.parse_args()

    g_all = nn.Sequential(
        OrderedDict([
            ('g_mapping', G_mapping()),
            #('truncation', Truncation(avg_latent)),
            ('g_synthesis', G_synthesis(resolution=args.resolution))
        ]))

    g_all.load_state_dict(torch.load(args.weight_file, map_location=device))
    g_all.eval()
    g_all.to(device)
    g_mapping, g_synthesis = g_all[0], g_all[1]

    img_0 = image_reader(args.src_im1)  #(1,3,1024,1024) -1~1
    img_0 = img_0.to(device)

    img_1 = image_reader(args.src_im2)
    img_1 = img_1.to(device)  #(1,3,1024,1024)

    MSE_Loss = nn.MSELoss(reduction="mean")
    upsample2d = torch.nn.Upsample(scale_factor=0.5, mode='bilinear')

    img_p0 = img_0.clone()  #resize for perceptual net
    img_p0 = upsample2d(img_p0)
    img_p0 = upsample2d(img_p0)  #(1,3,256,256)

    img_p1 = img_1.clone()
    img_p1 = upsample2d(img_p1)
    img_p1 = upsample2d(img_p1)  #(1,3,256,256)

    perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(
        device)  #conv1_1,conv1_2,conv2_2,conv3_3
    dlatent_a = torch.zeros((1, 18, 512), requires_grad=True,
                            device=device)  #appearace latent s1
    dlatent_e = torch.zeros((1, 18, 512), requires_grad=True,
                            device=device)  # expression latent s2
    optimizer = optim.Adam({dlatent_a, dlatent_e},
                           lr=0.01,
                           betas=(0.9, 0.999),
                           eps=1e-8)

    alpha = torch.zeros((1, 18, 512)).to(device)
    # 使用4-7特征码改变面部
    #alpha[:,3:5,:]=1
    alpha[:, 4:8, :] = 1

    print("Start")
    loss_list = []
    for i in range(args.iteration):
        optimizer.zero_grad()
        synth_img_a = g_synthesis(dlatent_a)
        synth_img_a = (synth_img_a + 1.0) / 2.0

        synth_img_e = g_synthesis(dlatent_e)
        synth_img_e = (synth_img_e + 1.0) / 2.0

        loss_1 = caluclate_contentloss(synth_img_a, perceptual_net, img_p1,
                                       MSE_Loss, upsample2d)
        loss_1.backward()

        # optimizer.step()

        loss_2 = caluclate_styleloss(synth_img_e, img_p0, perceptual_net,
                                     upsample2d)
        loss_2.backward()

        optimizer.step()

        loss_1 = loss_1.detach().cpu().numpy()
        loss_2 = loss_2.detach().cpu().numpy()

        dlatent1 = dlatent_a * alpha + dlatent_e * (1 - alpha)  # 对潜向量做操作
        dlatent2 = dlatent_a * (1 - alpha) + dlatent_e * alpha

        synth_img1 = g_synthesis(dlatent1)
        synth_img1 = (synth_img1 + 1.0) / 2.0

        synth_img2 = g_synthesis(dlatent2)
        synth_img2 = (synth_img2 + 1.0) / 2.0

        if i % 10 == 0:
            print("iter{}:   loss0 --{},  loss1 --{}".format(
                i, loss_1, loss_2))
            save_image(synth_img_a.clamp(0, 1),
                       "save_image/exchange/a/{}_a.png".format(i))
            save_image(synth_img_e.clamp(0, 1),
                       "save_image/exchange/e/{}_e.png".format(i))
            save_image(
                synth_img1.clamp(0, 1),
                "save_image/exchange/result1/{}_exchange1.png".format(i))
            save_image(
                synth_img2.clamp(0, 1),
                "save_image/exchange/result2/{}_exchange2.png".format(i))

            np.save("latent_W/exchange1.npy", dlatent1.detach().cpu().numpy())
            np.save("latent_W/exchange2.npy", dlatent2.detach().cpu().numpy())
Example #7
0
def main():
     parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss')
     parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int)
     parser.add_argument('--resolution',default=1024,type=int)
     parser.add_argument('--src_im1',default="source_image/joker.png")
     parser.add_argument('--src_im2',default="source_image/0.png")
     parser.add_argument('--mask',default="source_image/Blur_mask.png")
     parser.add_argument('--weight_file',default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",type=str)
     parser.add_argument('--iteration',default=1500,type=int)



     args=parser.parse_args()

     g_all = nn.Sequential(OrderedDict([
    ('g_mapping', G_mapping()),
    #('truncation', Truncation(avg_latent)),
    ('g_synthesis', G_synthesis(resolution=args.resolution))    
    ]))

     g_all.load_state_dict(torch.load(args.weight_file, map_location=device))
     g_all.eval()
     g_all.to(device)
     g_mapping,g_synthesis=g_all[0],g_all[1]


     img_0=image_reader(args.src_im1) #(1,3,1024,1024) -1~1
     img_0=img_0.to(device)

     img_1=image_reader(args.src_im2)
     img_1=img_1.to(device) #(1,3,1024,1024)

     blur_mask0=image_reader(args.mask).to(device)
     blur_mask0=blur_mask0[:,0,:,:].unsqueeze(0)
     blur_mask1=blur_mask0.clone()
     blur_mask1=1-blur_mask1

     MSE_Loss=nn.MSELoss(reduction="mean")
     upsample2d=torch.nn.Upsample(scale_factor=0.5, mode='bilinear')

     img_p0=img_0.clone() #resize for perceptual net
     img_p0=upsample2d(img_p0)
     img_p0=upsample2d(img_p0) #(1,3,256,256)

     img_p1=img_1.clone()
     img_p1=upsample2d(img_p1)
     img_p1=upsample2d(img_p1) #(1,3,256,256)




     perceptual_net=VGG16_for_Perceptual(n_layers=[2,4,14,21]).to(device) #conv1_1,conv1_2,conv2_2,conv3_3
     dlatent=torch.zeros((1,18,512),requires_grad=True,device=device)
     optimizer=optim.Adam({dlatent},lr=0.01,betas=(0.9,0.999),eps=1e-8)


     print("Start")
     loss_list=[]
     for i in range(args.iteration):
          optimizer.zero_grad()
          synth_img=g_synthesis(dlatent)
          synth_img = (synth_img + 1.0) / 2.0
          loss_wl0=caluclate_loss(synth_img,img_0,perceptual_net,img_p0,blur_mask0,MSE_Loss,upsample2d)
          loss_wl1=caluclate_loss(synth_img,img_1,perceptual_net,img_p1,blur_mask1,MSE_Loss,upsample2d)
          loss=loss_wl0+loss_wl1
          loss.backward()

          optimizer.step()

          loss_np=loss.detach().cpu().numpy()
          loss_0=loss_wl0.detach().cpu().numpy()
          loss_1=loss_wl1.detach().cpu().numpy()

          loss_list.append(loss_np)
          if i%10==0:
               print("iter{}: loss -- {},  loss0 --{},  loss1 --{}".format(i,loss_np,loss_0,loss_1))
               save_image(synth_img.clamp(0,1),"save_image/crossover/{}.png".format(i))
               np.save("latent_W/crossover.npy",dlatent.detach().cpu().numpy())
Example #8
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument('--resolution', default=1024, type=int)
    parser.add_argument('--src_im', default="sample.png")
    parser.add_argument('--src_dir', default="source_image/")
    parser.add_argument('--save_dir', default="save_image/encode1")
    parser.add_argument(
        '--weight_file',
        default="weight_files/pytorch/karras2019stylegan-ffhq-1024x1024.pt",
        type=str)
    parser.add_argument('--w_iteration', default=1000, type=int)
    parser.add_argument('--n_iteration', default=1000, type=int)
    parser.add_argument('--loop_time', default=5, type=int)
    args = parser.parse_args()

    ## ichao : this is the generator part, you can replace here using the generator you found
    g_all = nn.Sequential(
        OrderedDict([
            ('g_mapping', G_mapping()),
            #('truncation', Truncation(avg_latent)),
            ('g_synthesis', G_synthesis(resolution=args.resolution))
        ]))
    ## ichao : load the pretrained generator's weight
    g_all.load_state_dict(torch.load(args.weight_file, map_location=device))
    g_all.eval()
    g_all.to(device)
    g_mapping, g_synthesis = g_all[0], g_all[1]
    ## ichao : end of generator part

    ## ichao : read the input image (size : 3x1024x1024)
    name = args.src_im.split(".")[0]
    img = image_reader(args.src_dir + args.src_im)  #(1,3,1024,1024) -1~1
    img = img.to(device)

    MSE_Loss = nn.MSELoss(reduction="mean")

    img_p = img.clone()  ## ichao : used for perceptual loss
    ## ichao : resize the image to put into VGG
    upsample2d = torch.nn.Upsample(scale_factor=256 / args.resolution,
                                   mode='bilinear')

    img_p = upsample2d(img_p)
    # [4,9,16,23]
    # [2,4,14,21]
    perceptual_net = VGG16_for_Perceptual(n_layers=[2, 4, 14, 21]).to(device)

    mean_w = get_mean_latent(g_mapping, device)
    ## ichao : initialize the latent code we want to optimize
    dlatent = mean_w
    dlatent = dlatent.requires_grad_()
    synth_img = g_synthesis(dlatent)
    #dlatent = -2.0 * torch.randn((1,18,512), device=device) + 1.0
    #dlatent = dlatent.requires_grad_()

    #optimizer = optim.Adam({dlatent}, lr=0.01)
    # Latent code optimization
    loop_iteration = args.w_iteration + args.n_iteration
    print("Start")
    for loop in range(args.loop_time):
        for m in g_synthesis.blocks.values():
            m.epi1.top_epi[0].noise.requires_grad = False
            m.epi2.top_epi[0].noise.requires_grad = False

        print("========Latent Code Optimization=========")
        optimizer = optim.Adam({dlatent},
                               lr=0.01,
                               betas=(0.9, 0.999),
                               eps=1e-8)
        loss_list = []
        for i in range(args.w_iteration):
            optimizer.zero_grad()

            synth_img = g_synthesis(dlatent)
            synth_img = (synth_img + 1.0) / 2.0  # Why
            mse_loss, perceptual_loss = caluclate_loss(synth_img, img,
                                                       perceptual_net, img_p,
                                                       MSE_Loss, 1, 1,
                                                       upsample2d)
            # adjust ratio to control the gradient part.
            # atio = 0.8
            # loss = (1 - ratio) * mse_loss + ratio * perceptual_loss
            loss = mse_loss + perceptual_loss
            loss.backward()

            optimizer.step()

            loss_np = loss.detach().cpu().numpy()
            loss_p = perceptual_loss.detach().cpu().numpy()
            loss_m = mse_loss.detach().cpu().numpy()

            loss_list.append(loss_np)
            print(
                "iter{}: loss -- {},  mse_loss --{},  percep_loss --{}".format(
                    loop * loop_iteration + i, loss_np, loss_m, loss_p))
            if i % 10 == 0:
                save_image(
                    synth_img.clamp(0, 1), "{dir}/{number}.png".format(
                        dir=args.save_dir, number=loop * loop_iteration + i))

                np.save("latent_W/{}.npy".format(name),
                        dlatent.detach().cpu().numpy())
        # Noise optimization
        print("============Noise Optimization============")
        dlatent.requires_grad = False
        noises = []
        for i, m in enumerate(g_synthesis.blocks.values()):
            m.epi1.top_epi[0].noise.requires_grad = True
            noises.append(m.epi1.top_epi[0].noise)
            m.epi2.top_epi[0].noise.requires_grad = True
            noises.append(m.epi2.top_epi[0].noise)

        optimizer = optim.Adam(noises, lr=5, betas=(0.9, 0.999), eps=1e-8)
        for i in range(args.n_iteration):
            optimizer.zero_grad()
            ## ichao : generate an image using the current latent code
            #dlatent_ex= g_mapping(dlatent)
            synth_img = g_synthesis(dlatent)
            synth_img = (synth_img + 1.0) / 2.0  # Why
            mse_loss, perceptual_loss = caluclate_loss(synth_img, img,
                                                       perceptual_net, img_p,
                                                       MSE_Loss, 0, 1,
                                                       upsample2d)
            # adjust ratio to control the gradient part.
            # atio = 0.8
            # loss = (1 - ratio) * mse_loss + ratio * perceptual_loss
            loss = mse_loss + perceptual_loss
            loss.backward()

            optimizer.step()

            loss_np = loss.detach().cpu().numpy()
            loss_p = perceptual_loss.detach().cpu().numpy()
            loss_m = mse_loss.detach().cpu().numpy()

            loss_list.append(loss_np)
            print(
                "iter{}: loss -- {},  mse_loss --{},  percep_loss --{}".format(
                    loop * loop_iteration + args.w_iteration + i, loss_np,
                    loss_m, loss_p))
            if i % 10 == 0:
                save_image(
                    synth_img.clamp(0, 1), "{dir}/{number}.png".format(
                        dir=args.save_dir,
                        number=loop * loop_iteration + args.w_iteration + i))
                np.save("noise/{}.npy".format(name), np.array(noises))