Beispiel #1
0
def main2(seed):
    tflib.init_tf()
    #_G, _D, Gs = pickle.load(open("karras2019stylegan-ffhq-1024x1024.pkl", "rb"))
    _G, _D, Gs = pretrained_networks.load_networks("dummy")
    generator = Generator(Gs, batch_size=1, randomize_noise=False)
    Gs.print_layers()
    rnd = np.random.RandomState(None)
    fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
    synthesis_kwargs = dict(output_transform=fmt,
                            truncation_psi=0.7,
                            minibatch_size=8)
    vectors = gen_img_with_18_512(Gs, fmt, rnd, dst_seeds=seed)
    np.save(os.path.join(result_dir, 'test_changed/0-original.npy'), vectors)
    create_order_npy("0-original.npy", vectors)
    # load all directions
    direction_vectors = "D:/Projects/training_datasets/emotions/style2/*.npy"
    dataset = glob.glob(direction_vectors)
    dataset = natural_sort(dataset)
    for npy in dataset:
        print(npy)
        file_name = os.path.basename(npy)
        file_name_no_extension = os.path.splitext(file_name)[0]
        print(file_name_no_extension)
        #vectors = create_full(vectors, npy, file_name_no_extension, Gs, generator)
        create_all(vectors, npy, file_name_no_extension, Gs, generator, cof)
Beispiel #2
0
def generate_image(latent_vector):
    generator = Generator(Gs_network, batch_size=1, randomize_noise=False)
    latent_vector = latent_vector.reshape((1, 18, 512))
    generator.set_dlatents(latent_vector)
    img_array = generator.generate_images()[0]
    img = PIL.Image.fromarray(img_array, 'RGB')
    return img.resize((256, 256))
Beispiel #3
0
def init_dependencies():
    tfl.init_tf()
    landmarks_model_path = unpack_bz2(
        get_file('shape_predictor_68_face_landmarks.dat.bz2',
                 LANDMARKS_MODEL_URL,
                 cache_subdir='cache'))
    landmarks_detector = LandmarksDetector(landmarks_model_path)
    if os.path.exists(args['load_resnet']):
        print("Loading ResNet Model:")
        ff_model = load_model(args['load_resnet'])

    with open(args['model_dir'], 'rb') as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args['batch_size'],
                          clipping_threshold=args['clipping_threshold'],
                          tiled_dlatent=args['tile_dlatents'],
                          model_res=args['model_res'],
                          randomize_noise=args['randomize_noise'])

    perceptual_model = PerceptualModel(args,
                                       perc_model=None,
                                       batch_size=args['batch_size'])
    perceptual_model.build_perceptual_model(generator, discriminator_network)
    return landmarks_detector, ff_model, generator, perceptual_model
Beispiel #4
0
def mix_pic(npy1, npy2, psi=0.5, begin=0, end=8):
    os.makedirs(config.generated_dir, exist_ok=True)
    print("载入图像向量1...")
    img_src1 = np.load(os.path.join(config.src_latents_dir, npy1))
    print("载入图像向量2...")
    img_src2 = np.load(os.path.join(config.src_latents_dir, npy2))
    tmp_vec = img_src1.copy()
    print("载入混合向量...")
    tmp_vec = img_src1 * psi + img_src2 * (1 - psi)
    new_latent_vector = tmp_vec.reshape((1, 18, 512))
    tflib.init_tf()
    Gs_network = load_Gs(Model)
    global generator
    generator = Generator(Gs_network, batch_size=1, randomize_noise=False)
    generator.set_dlatents(new_latent_vector)
    print("生成图像...")
    new_person_image = generator.generate_images()[0]
    canvas = PIL.Image.new('RGB', (1024, 1024), 'white')
    temp_img = PIL.Image.fromarray(new_person_image, 'RGB')

    filename = npy1[:8] + '_' + npy2[4:8] + 'mixed.png'
    print("保存混合图像...")
    canvas.paste(temp_img, ((0, 0)))
    canvas.save(os.path.join(config.generated_dir, filename))
    npy_file = os.path.join(config.src_latents_dir, filename[:-4] + '.npy')
    np.save(npy_file, new_latent_vector)
    print("Done!")
    return resizeImg(temp_img)
def main():
    parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss')
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir', help='Directory for storing generated images')
    parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations')

    # for now it's unclear if larger batch leads to better performance/quality
    parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int)

    # Perceptual model params
    parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int)
    parser.add_argument('--lr', default=1., help='Learning rate for perceptual model', type=float)
    parser.add_argument('--iterations', default=1000, help='Number of optimization steps for each batch', type=int)

    # Generator params
    parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool)
    args, other_args = parser.parse_known_args()

    ref_image = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)]
    ref_image = list(filter(os.path.isfile, ref_image))

    already_processed = set(map(lambda x: x.split(".")[0], os.listdir(args.generated_images_dir)))

    ref_images = list(filter(lambda x: x.split("/")[-1].split(".")[0] not in already_processed, ref_image))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise)
    perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size):
        names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch]

        perceptual_model.set_reference_images(images_batch)
        op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr)
        pbar = tqdm(op, leave=False, total=args.iterations)
        for loss in pbar:
            pbar.set_description(' '.join(names)+' Loss: %.2f' % loss)
        print(' '.join(names), ' loss:', loss)

        # Generate images from found dlatents and save them
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
Beispiel #6
0
def main():
    # 初始化
    tflib.init_tf()
    # 调用预训练模型
    Gs_network = load_Gs(Model)
    generator = Generator(Gs_network, batch_size=1, randomize_noise=False)

    # 读取对应真实人脸的latent,用于图像变化,qing_01.npy可以替换为你自己的文件名
    os.makedirs(config.dlatents_dir, exist_ok=True)
    person = np.load(os.path.join(config.dlatents_dir, 'qing_01.npy'))

    # 读取已训练好的用于改变人脸特性/表情的向量
    # 包括:改变年龄、改变水平角度、改变性别、改变眼睛大小、是否佩戴眼镜、改变笑容等
    age_direction = np.load('ffhq_dataset/latent_directions/age.npy')
    angle_direction = np.load('ffhq_dataset/latent_directions/angle_horizontal.npy')
    gender_direction = np.load('ffhq_dataset/latent_directions/gender.npy')
    eyes_direction = np.load('ffhq_dataset/latent_directions/eyes_open.npy')
    glasses_direction = np.load('ffhq_dataset/latent_directions/glasses.npy')
    smile_direction = np.load('ffhq_dataset/latent_directions/smile.npy')

    # 混合人脸和变化向量,生成变化后的图片
    move_and_show(generator, 0, person, age_direction, [-6, -4, -3, -2, 0, 2, 3, 4, 6])
    move_and_show(generator, 1, person, angle_direction, [-6, -4, -3, -2, 0, 2, 3, 4, 6])
    move_and_show(generator, 2, person, gender_direction, [-6, -4, -3, -2, 0, 2, 3, 4, 6])
    move_and_show(generator, 3, person, eyes_direction, [-3, -2, -1, -0.5, 0, 0.5, 1, 2, 3])
    move_and_show(generator, 4, person, glasses_direction, [-6, -4, -3, -2, 0, 2, 3, 4, 6])
    move_and_show(generator, 5, person, smile_direction, [-3, -2, -1, -0.5, 0, 0.5, 1, 2, 3])
def setup():
    tflib.init_tf()
    with dnnlib.util.open_url(URL_FFHQ) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)
    generator = Generator(Gs_network, 1, randomize_noise=False)
    perceptual_model = PerceptualModel(256, layer=9, batch_size=1)
    perceptual_model.build_perceptual_model(generator.generated_image)
    return perceptual_model, generator
Beispiel #8
0
def encode_images(src_dir,
                  generated_images_dir,
                  dlatent_dir,
                  Gs_network,
                  batch_size=1,
                  image_size=256,
                  lr=1.,
                  iterations=1000,
                  randomize_noise=False):

    ref_images = [os.path.join(src_dir, x) for x in os.listdir(src_dir)]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % src_dir)

    os.makedirs(generated_images_dir, exist_ok=True)
    os.makedirs(dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()

    generator = Generator(Gs_network,
                          batch_size,
                          randomize_noise=randomize_noise)
    perceptual_model = PerceptualModel(image_size,
                                       layer=9,
                                       batch_size=batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, batch_size),
                             total=len(ref_images) // batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]

        perceptual_model.set_reference_images(images_batch)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=iterations,
                                       learning_rate=lr)
        pbar = tqdm(op, leave=False, total=iterations)
        for loss in pbar:
            pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss)
        print(' '.join(names), ' loss:', loss)

        # Generate images from found dlatents and save them
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(os.path.join(generated_images_dir, f'{img_name}.png'),
                     'PNG')
            np.save(os.path.join(dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
        return generated_dlatents
Beispiel #9
0
    def initUtils(self):
        tflib.init_tf()
        with dnnlib.util.open_url(stylegan_utils.URL_FFHQ,
                                  cache_dir=config.cache_dir) as f:
            generator_network, discriminator_network, Gs = pickle.load(f)

        self.generator = Generator(Gs, batch_size=1, randomize_noise=False)
        self.Gs = Gs
        return self.generator, self.Gs
Beispiel #10
0
def main():
    # 初始化
    tflib.init_tf()
    # 调用预训练模型
    Gs_network = load_Gs(Model)
    generator = Generator(Gs_network, batch_size=1, randomize_noise=False)

    # 读取对应真实人脸的latent,用于图像变化,qing_01.npy可以替换为你自己的文件名
    os.makedirs(config.dlatents_dir, exist_ok=True)
    target = np.load(os.path.join(config.dlatents_dir, '1_01_1.npy'))
Beispiel #11
0
def ImageFromVec(npy):
    img_src = np.load(os.path.join(config.src_latents_dir, npy))
    tflib.init_tf()
    Gs_network = load_Gs(Model)
    global generator
    generator = Generator(Gs_network, batch_size=1, randomize_noise=False)
    generator.set_dlatents(img_src)
    print("生成图像...")
    new_image = generator.generate_images()[0]
    temp_img = PIL.Image.fromarray(new_image, 'RGB')
    return resizeImg(temp_img)
def setup():
    tflib.init_tf()
    # Load pre-trained network.
    url = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ'  # karras2019stylegan-ffhq-1024x1024.pkl
    with dnnlib.util.open_url(url, cache_dir=config.cache_dir) as f:
        _G, _D, Gs = pickle.load(f)
    generator = Generator(
        Gs, batch_size=1,
        randomize_noise=False)  # -- RUNNING >1 TIMES THROWS ERROR
    fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
    return [_G, _D, Gs, generator, fmt]
    def encode(self, src_dir, generated_images_dir, dlatent_dir,
        batch_size=1, image_size=256, lr=1, iterations=1000, randomize_noise=False):
        """
        Find latent representation of reference images using perceptual loss
        Params:
            src_dir: Directory for storing genrated images
            generated_images_dir: Directory for storing generated images
            dlatent_dir: Directory for storing dlatent representations
            batch_size: Batch size for generator and perceptual model
            image_size: Size of images for perceptual model
            lr: Size of images for perceptual model
            iterations: Number of optimization steps for each batch
            randomize_noise: Add noise to dlatents during optimization
        """
        ref_images = [os.path.join(src_dir, x) for x in os.listdir(src_dir)]
        ref_images = list(filter(os.path.isfile, ref_images))

        if len(ref_images) == 0:
            raise Exception('%s is empty' % src_dir)

        os.makedirs(generated_images_dir, exist_ok=True)
        os.makedirs(dlatent_dir, exist_ok=True)

        # Initialize generator and perceptual model
        tflib.init_tf()
        with dnnlib.util.open_url(self.URL_FFHQ, cache_dir=config.cache_dir) as f:
            generator_network, discriminator_network, Gs_network = pickle.load(f)

        generator = Generator(Gs_network, batch_size, randomize_noise=randomize_noise)
        perceptual_model = PerceptualModel(image_size, layer=9, batch_size=batch_size)
        perceptual_model.build_perceptual_model(generator.generated_image)

        # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
        for images_batch in tqdm(self.split_to_batches(ref_images, batch_size), total=len(ref_images)//batch_size):
            names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch]

            perceptual_model.set_reference_images(images_batch)
            op = perceptual_model.optimize(generator.dlatent_variable, iterations=iterations, learning_rate=lr)
            pbar = tqdm(op, leave=False, total=iterations)
            for loss in pbar:
                pbar.set_description(' '.join(names)+' Loss: %.2f' % loss)
            print(' '.join(names), ' loss:', loss)

            # Generate images from found dlatents and save them
            generated_images = generator.generate_images()
            generated_dlatents = generator.get_dlatents()
            for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names):
                img = PIL.Image.fromarray(img_array, 'RGB')
                img.save(os.path.join(generated_images_dir, f'{img_name}.png'), 'PNG')
                np.save(os.path.join(dlatent_dir, f'{img_name}.npy'), dlatent)

            generator.reset_dlatents()
Beispiel #14
0
def generate_image(latent_vector):
    from encoder.generator_model import Generator
    import dnnlib
    import dnnlib.tflib as tflib
    tflib.init_tf()
    with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network, batch_size=1, randomize_noise=False)
    latent_vector = latent_vector.reshape((1, 18, 512))
    generator.set_dlatents(latent_vector)
    img_array = generator.generate_images()[0]
    img = PIL.Image.fromarray(img_array, 'RGB')
    return img.resize((512, 512))
Beispiel #15
0
def setup(opts):
    # Initialize generator and perceptual model
    global perceptual_model
    global generator
    tflib.init_tf()
    model = opts['checkpoint']
    print("open model %s" % model)
    with open(model, 'rb') as file:
        G, D, Gs = pickle.load(file)
    Gs.print_layers()
    generator = Generator(Gs, batch_size=1, randomize_noise=False)
    perceptual_model = PerceptualModel(512, layer=9, batch_size=1)
    perceptual_model.build_perceptual_model(generator.generated_image)
    return generator
Beispiel #16
0
def main():
    tflib.init_tf()
    Gs_network = load_Gs(Model)
    generator = Generator(Gs_network, batch_size=1, randomize_noise=False)

    os.makedirs(config.dlatents_dir, exist_ok=True)
    target = np.load(os.path.join(config.dlatents_dir, '1_01.npy'))

    #move_and_show(generator,target, "age", [-20, -16, -12, -8, 0, 8, 12, 16, 20])
    #move_and_show(generator,target, "race_black", [-40, -32, -24, -16, 0, 16, 24, 32, 40])
    #move_and_show(generator,target, "gender", [-20, -16, -12, -8, 0, 8, 12, 16, 20])
    move_and_show(generator, target, "eyes_open", [
        -12, -11, -10, -9, -8, -7, -6, -5, -4, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7,
        8, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24
    ])
Beispiel #17
0
def setup(opts):
	tflib.init_tf()
	model = opts['checkpoint']
	print("open model %s" % model)
	with open(model, 'rb') as file:
		G, D, Gs = pickle.load(file)
	Gs.print_layers()
	# load latent representation
	p1 = inputs['people_vector']
	global latent_vector_1
	latent_vector_1 = np.load(p1)
	p2 = inputs['people_vector2']
	global latent_vector_2
	latent_vector_2 = np.load(p2)
	global generator
	generator = Generator(Gs, batch_size=1, randomize_noise=False)
	return generator
Beispiel #18
0
    def load_snapshot(self):
        # Load pre-trained network.
        tflib.init_tf()

        self.rnd = np.random.RandomState()
        #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-final.pkl")
        #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-009247.pkl")
        #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-008044.pkl")
        #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-024287.pkl")
        url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-013458.pkl")
        #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-010450.pkl")
        #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-015263.pkl")
        #url = os.path.abspath("marrow/00021-sgan-dense512-8gpu/network-snapshot-011653.pkl")
        with open(url, 'rb') as f:
            self._G, self._D, self.Gs = pickle.load(f)
            self.generator = Generator(self.Gs, batch_size=1, randomize_noise=False)
        print(self.Gs)
Beispiel #19
0
def onLoadFile():
    global generator, encoderGenerator, SIZE_LATENT_SPACE, OUTPUT_RESOLUTION, pointsSaved, modelPath

    # modelPath = filedialog.askopenfilename(initialdir = PATH_LOAD_FILE, title = "Select file")
    modelPath = "/media/leandro/stuff/Data/ahegao/network-snapshot-011225.pkl"
    with open(modelPath, 'rb') as file:
        _, _, generator = pickle.load(file)
        SIZE_LATENT_SPACE = int(generator.list_layers()[0][1].shape[1])
        OUTPUT_RESOLUTION = int(generator.list_layers()[-1][1].shape[2])

        root.title('PGAN Generator - %s' % modelPath)

        # if canvas:
        #     canvas.delete("all")
        if pointList:
            pointList.delete(0, tk.END)
        pointsSaved = []

        encoderGenerator = Generator(generator, 1)
Beispiel #20
0
def generate_image(latent_vector):
    os.chdir('/home/bizon/CBIS-DDSM/other/fuse_face_flask/stylegan-encoder')
    print('dir changed')

    #################################
    # init generator #
    URL_FFHQ = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ'
    tflib.init_tf()
    with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network, batch_size=1, randomize_noise=False)
    #################################

    os.chdir('/home/bizon/CBIS-DDSM/other/fuse_face_flask')
    latent_vector = latent_vector.reshape((1, 18, 512))
    generator.set_dlatents(latent_vector)
    img_array = generator.generate_images()[0]
    img = PIL.Image.fromarray(img_array, 'RGB')
    return img
Beispiel #21
0
def choice(choice, npyfile, filename):
    tflib.init_tf()
    Gs_network = load_Gs(Model)
    global generator, flag, fname
    fname = filename
    flag = choice
    generator = Generator(Gs_network, batch_size=1, randomize_noise=False)

    os.makedirs(config.dlatents_dir, exist_ok=True)
    # person = np.load(os.path.join(config.dlatents_dir, 'Scarlett Johansson01_01.npy'))
    person = np.load(os.path.join(config.src_latents_dir,
                                  npyfile))  #(1,18,512)
    # Loading already learned latent directions
    direction_list = []
    direction_list.append(np.load('ffhq_dataset/latent_directions/age.npy'))
    direction_list.append(
        np.load('ffhq_dataset/latent_directions/angle_horizontal.npy'))
    direction_list.append(np.load('ffhq_dataset/latent_directions/gender.npy'))
    direction_list.append(
        np.load('ffhq_dataset/latent_directions/eyes_open.npy'))
    direction_list.append(
        np.load('ffhq_dataset/latent_directions/glasses.npy'))
    direction_list.append(np.load('ffhq_dataset/latent_directions/smile.npy'))
    direction_list.append(
        np.load('ffhq_dataset/latent_directions/race_white.npy'))
    direction_list.append(
        np.load('ffhq_dataset/latent_directions/race_yellow.npy'))
    direction_list.append(
        np.load('ffhq_dataset/latent_directions/race_black.npy'))

    coeffs_list = []
    coeffs_list.append([-20, -16, -12, -8, 0, 8, 12, 16, 20])
    coeffs_list.append([-40, -32, -24, -16, 0, 16, 24, 32, 40])
    coeffs_list.append([-40, -32, -24, -16, 0, 16, 24, 32, 40])
    coeffs_list.append([-8, -6, -4, -2, 0, 2, 4, 6, 8])
    coeffs_list.append([-16, -12, -8, -4, 0, 4, 8, 12, 16])
    coeffs_list.append([-16, -12, -8, -4, 0, 4, 8, 12, 16])
    coeffs_list.append([-10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10])
    coeffs_list.append([-10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10])
    coeffs_list.append([-10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10])
    move_and_show(person, direction_list[choice], coeffs_list[choice])
Beispiel #22
0
    def __init__(self):
        root_dir = 'latent_representations/'
        listdir = []
        sort_listdir = []
        num = {}
        for i, f in enumerate(os.listdir(root_dir)):
            listdir.append(f)
            num[i] = int(f.split('_')[0])
        num = sorted(num.items(), key=operator.itemgetter(1))
        for i in range(len(num)):
            sort_listdir.append(listdir[num[i][0]])
        print(sort_listdir)
        self.latent_vectors = [np.load(root_dir + f) for f in sort_listdir]
        self.directions = {'smile': np.load('ffhq_dataset/latent_directions/smile.npy'),
                          'gender': np.load('ffhq_dataset/latent_directions/gender.npy'),
                          'age' : np.load('ffhq_dataset/latent_directions/age.npy')}

        self.new_latent_vector = np.zeros((18,512))
        tflib.init_tf()	
        with open('karras2019stylegan-ffhq-1024x1024.pkl', 'rb') as f:
            generator_network, discriminator_network, Gs_network = pickle.load(f)
        self.generator = Generator(Gs_network, batch_size=1, randomize_noise=True)
os.makedirs(args.dlabel_dir, exist_ok=True)

# Initialize generator and perceptual model

# load network
network_pkl = misc.locate_network_pkl(args.results_dir)
print('Loading network from "%s"...' % network_pkl)
G, D, Gs = misc.load_network_pkl(args.results_dir, None)

# initiate random input
latents = misc.random_latents(1, Gs, random_state=np.random.RandomState(800))
labels = np.random.rand(1, args.labels_size)

generator = Generator(Gs,
                      labels_size=572,
                      batch_size=1,
                      clipping_threshold=args.clipping_threshold,
                      model_res=args.resolution)

perc_model = None
if (args.use_lpips_loss > 0.00000001):
    with open(args.load_perc_model, "rb") as f:
        perc_model = pickle.load(f)

ff_model = None
beautyrater_model = beautyrater.BeautyRater(args.load_vgg_beauty_rater_model)
facenet_model = facenet.FaceNet(args.load_facenet_model)
perceptual_model = PerceptualModel(args,
                                   perc_model=perc_model,
                                   batch_size=args.batch_size)
perceptual_model.build_perceptual_model(generator)
def main():
    parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir', help='Directory for storing generated images')
    parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations')
    parser.add_argument('--data_dir', default='data', help='Directory for storing optional models')
    parser.add_argument('--mask_dir', default='masks', help='Directory for storing optional masks')
    parser.add_argument('--load_last', default='', help='Start with embeddings from directory')
    parser.add_argument('--dlatent_avg', default='', help='Use dlatent from file specified here for truncation instead of dlatent_avg from Gs')
    parser.add_argument('--model_url', default='https://drive.google.com/uc?id=1aPjeguDIRE0hs4_PHiRghK1Y2Qh3zOi1', help='Fetch a StyleGAN model to train on from this URL') # karras2019stylegan-ffhq-1024x1024.pkl
    parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int)
    parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int)
    parser.add_argument('--optimizer', default='ggt', help='Optimization algorithm used for optimizing dlatents')

    # Perceptual model params
    parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int)
    parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int)
    parser.add_argument('--lr', default=0.25, help='Learning rate for perceptual model', type=float)
    parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float)
    parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int)
    parser.add_argument('--decay_steps', default=4, help='Decay steps for learning rate decay (as a percent of iterations)', type=float)
    parser.add_argument('--early_stopping', default=True, help='Stop early once training stabilizes', type=str2bool, nargs='?', const=True)
    parser.add_argument('--early_stopping_threshold', default=0.5, help='Stop after this threshold has been reached', type=float)
    parser.add_argument('--early_stopping_patience', default=10, help='Number of iterations to wait below threshold', type=int)    
    parser.add_argument('--load_effnet', default='data/finetuned_effnet.h5', help='Model to load for EfficientNet approximation of dlatents')
    parser.add_argument('--load_resnet', default='data/finetuned_resnet.h5', help='Model to load for ResNet approximation of dlatents')
    parser.add_argument('--use_preprocess_input', default=True, help='Call process_input() first before using feed forward net', type=str2bool, nargs='?', const=True)
    parser.add_argument('--use_best_loss', default=True, help='Output the lowest loss value found as the solution', type=str2bool, nargs='?', const=True)
    parser.add_argument('--average_best_loss', default=0.25, help='Do a running weighted average with the previous best dlatents found', type=float)
    parser.add_argument('--sharpen_input', default=True, help='Sharpen the input images', type=str2bool, nargs='?', const=True)

    # Loss function options
    parser.add_argument('--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int)
    parser.add_argument('--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_mssim_loss', default=200, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_l1_penalty', default=0.5, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_discriminator_loss', default=0.5, help='Use trained discriminator to evaluate realism.', type=float)
    parser.add_argument('--use_adaptive_loss', default=False, help='Use the adaptive robust loss function from Google Research for pixel and VGG feature loss.', type=str2bool, nargs='?', const=True)

    # Generator params
    parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=str2bool, nargs='?', const=True)
    parser.add_argument('--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=str2bool, nargs='?', const=True)
    parser.add_argument('--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float)

    # Masking params
    parser.add_argument('--load_mask', default=False, help='Load segmentation masks', type=str2bool, nargs='?', const=True)
    parser.add_argument('--face_mask', default=True, help='Generate a mask for predicting only the face area', type=str2bool, nargs='?', const=True)
    parser.add_argument('--use_grabcut', default=True, help='Use grabcut algorithm on the face mask to better segment the foreground', type=str2bool, nargs='?', const=True)
    parser.add_argument('--scale_mask', default=1.4, help='Look over a wider section of foreground for grabcut', type=float)
    parser.add_argument('--composite_mask', default=True, help='Merge the unmasked area back into the generated image', type=str2bool, nargs='?', const=True)
    parser.add_argument('--composite_blur', default=8, help='Size of blur filter to smoothly composite the images', type=int)

    # Video params
    parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos')
    parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool)
    parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name')
    parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int)
    parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int)
    parser.add_argument('--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int)

    args, other_args = parser.parse_known_args()

    args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations

    if args.output_video:
      import cv2
      synthesis_kwargs = dict(output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size)

    ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.mask_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise)
    if (args.dlatent_avg != ''):
        generator.set_dlatent_avg(np.load(args.dlatent_avg))

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f:
            perc_model =  pickle.load(f)
    perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator, discriminator_network)

    ff_model = None

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size):
        names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch]
        if args.output_video:
          video_out = {}
          for name in names:
            video_out[name] = cv2.VideoWriter(os.path.join(args.video_dir, f'{name}.avi'),cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size,args.video_size))

        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (args.load_last != ''): # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(os.path.join(args.load_last, f'{name}.npy')),axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents,dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    from keras.applications.resnet50 import preprocess_input
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    from efficientnet import preprocess_input
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
            if (ff_model is not None): # predict initial dlatents with ResNet model
                if (args.use_preprocess_input):
                    dlatents = ff_model.predict(preprocess_input(load_images(images_batch,image_size=args.resnet_image_size)))
                else:
                    dlatents = ff_model.predict(load_images(images_batch,image_size=args.resnet_image_size))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, use_optimizer=args.optimizer)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        avg_loss_count = 0
        if args.early_stopping:
            avg_loss = prev_loss = None
        for loss_dict in pbar:
            if args.early_stopping: # early stopping feature
                if prev_loss is not None:
                    if avg_loss is not None:
                        avg_loss = 0.5 * avg_loss + (prev_loss - loss_dict["loss"])
                        if avg_loss < args.early_stopping_threshold: # count while under threshold; else reset
                            avg_loss_count += 1
                        else:
                            avg_loss_count = 0
                        if avg_loss_count > args.early_stopping_patience: # stop once threshold is reached
                            print("")
                            break
                    else:
                        avg_loss = prev_loss - loss_dict["loss"]
            pbar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                if best_dlatent is None or args.average_best_loss <= 0.00000001:
                    best_dlatent = generator.get_dlatents()
                else:
                    best_dlatent = 0.25 * best_dlatent + 0.75 * generator.get_dlatents()
                if args.use_best_loss:
                    generator.set_dlatents(best_dlatent)
                best_loss = loss_dict["loss"]
            if args.output_video and (vid_count % args.video_skip == 0):
              batch_frames = generator.generate_images()
              for i, name in enumerate(names):
                video_frame = PIL.Image.fromarray(batch_frames[i], 'RGB').resize((args.video_size,args.video_size),PIL.Image.LANCZOS)
                video_out[name].write(cv2.cvtColor(np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
            prev_loss = loss_dict["loss"]
        if not args.use_best_loss:
            best_loss = prev_loss
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        if args.use_best_loss:
            generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_path, img_name in zip(generated_images, generated_dlatents, images_batch, names):
            mask_img = None
            if args.composite_mask and (args.load_mask or args.face_mask):
                _, im_name = os.path.split(img_path)
                mask_img = os.path.join(args.mask_dir, f'{im_name}')
            if args.composite_mask and mask_img is not None and os.path.isfile(mask_img):
                orig_img = PIL.Image.open(img_path).convert('RGB')
                width, height = orig_img.size
                imask = PIL.Image.open(mask_img).convert('L').resize((width, height))
                imask = imask.filter(ImageFilter.GaussianBlur(args.composite_blur))
                mask = np.array(imask)/255
                mask = np.expand_dims(mask,axis=-1)
                img_array = mask*np.array(img_array) + (1.0-mask)*np.array(orig_img)
                img_array = img_array.astype(np.uint8)
                #img_array = np.where(mask, np.array(img_array), orig_img)
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
Beispiel #25
0
import PIL.Image
import numpy as np
import dnnlib
import dnnlib.tflib as tflib
import config
from encoder.generator_model import Generator

import matplotlib.pyplot as plt

URL_FFHQ = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ'

tflib.init_tf()
with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
    generator_network, discriminator_network, Gs_network = pickle.load(f)

generator = Generator(Gs_network, batch_size=1, randomize_noise=False)


def generate_image(latent_vector):
    latent_vector = latent_vector.reshape((1, 18, 512))
    generator.set_dlatents(latent_vector)
    img_array = generator.generate_images()[0]
    img = PIL.Image.fromarray(img_array, 'RGB')
    return img.resize((256, 256))


def move_and_show(latent_vector, direction, coeffs):
    fig, ax = plt.subplots(1, len(coeffs), figsize=(15, 10), dpi=80)
    for i, coeff in enumerate(coeffs):
        new_latent_vector = latent_vector.copy()
        new_latent_vector[:8] = (latent_vector + coeff * direction)[:8]
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')

    parser.add_argument(
        '--network_pkl',
        default='gdrive:networks/stylegan2-ffhq-config-f.pkl',
        help='Path to local copy of stylegan2-ffhq-config-f.pkl')

    # for now it's unclear if larger batch leads to better performance/quality
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)

    # Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--lr',
                        default=1.,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--iterations',
                        default=1000,
                        help='Number of optimization steps for each batch',
                        type=int)
    parser.add_argument('--layer',
                        default=9,
                        help='Final layer for perceptual model',
                        type=int)
    parser.add_argument('--model',
                        default="vgg16",
                        help='Model for perceptual model')

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=bool)
    args, other_args = parser.parse_known_args()

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    generator_network, discriminator_network, Gs_network = pretrained_networks.load_networks(
        args.network_pkl)

    generator = Generator(Gs_network,
                          args.batch_size,
                          randomize_noise=args.randomize_noise)
    perceptual_model = PerceptualModel(args.image_size,
                                       layer=args.layer,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image,
                                            args.model)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]

        perceptual_model.set_reference_images(images_batch)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations,
                                       learning_rate=args.lr)
        pbar = tqdm(op, leave=False, total=args.iterations)
        for loss in pbar:
            pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss)
        print(' '.join(names), ' loss:', loss)

        # Generate images from found dlatents and save them
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            png_directory = args.generated_images_dir + f'{img_name}/{args.model}/lr{args.lr}'
            npy_directory = args.dlatent_dir + f'{img_name}/{args.model}/lr{args.lr}'
            os.makedirs(png_directory, exist_ok=True)
            os.makedirs(npy_directory, exist_ok=True)
            img.save(os.path.join(png_directory, f'layer{args.layer}.png'),
                     'PNG')
            np.save(os.path.join(npy_directory, f'layer{args.layer}.npy'),
                    dlatent)

        generator.reset_dlatents()
def main():
    src_dir = os.path.join("output", "aligned_images")
    generated_images_dir = os.path.join("output", "generated_images")
    generated_videos_dir = os.path.join("output", "generated_videos")
    dlatent_dir = os.path.join("output", "latent_representations")

    # for now it's unclear if larger batch leads to better performance/quality
    # Also, I may have broken >1 batch sizes, but happily they didn't seem to provide meaningful time savings anyway.
    batch_size = 1

    # Perceptual model params
    image_size = 1024
    iterations = 600
    # Generator params
    randomize_noise = False

    ref_images = [os.path.join(src_dir, x) for x in os.listdir(src_dir)]
    ref_images = list(sorted(filter(os.path.isfile, ref_images)))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % src_dir)

    os.makedirs(generated_images_dir, exist_ok=True)
    os.makedirs(dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    # I saved the FFHQ network as a pickle file to my hard drive to avoid relying on the Nvidia Google Drive share.
    local_network_path = "karras2019stylegan-ffhq-1024x1024.pkl"
    if os.path.exists(local_network_path):
        with open(local_network_path, "rb") as f:
            generator_network, discriminator_network, Gs_network = pickle.load(
                f)
    else:
        URL_FFHQ = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ'  # karras2019stylegan-ffhq-1024x1024.pkl
        with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
            generator_network, discriminator_network, Gs_network = pickle.load(
                f)

    # Set tiled_dlatent=False if you want to generate an 18x512 dlatent like in Puzer's original repo.
    # Set tiled_dlatent=True if you want to generate a 1x512 dlatent (subsequently tiled back to 18x512)
    # like the mapping network outputs.
    generator = Generator(Gs_network,
                          batch_size,
                          randomize_noise=randomize_noise,
                          tiled_dlatent=True)
    perceptual_model = PerceptualDiscriminatorModel(image_size,
                                                    batch_size=batch_size)
    perceptual_model.build_perceptual_model(discriminator_network,
                                            generator.generator_output,
                                            generator.generated_image,
                                            generator.dlatent_variable)

    # Optimize (only) dlatents by minimizing perceptual loss
    # between reference and generated images in feature space
    images = []
    video_frames = 100  # Set to >0 to save a video of the training, or to 0 to disable.
    if video_frames > 0:
        steps_per_frame = iterations / video_frames
        steps_until_frame = 0
    for images_batch in split_to_batches(ref_images, batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        perceptual_model.set_reference_images(images_batch)
        op = perceptual_model.optimize(iterations=iterations)
        pbar = tqdm(op, leave=False, total=iterations)
        best_loss = None
        best_dlatent = None
        dlatent_frames = []
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            if video_frames > 0:
                # If we're recording a video, consider taking a dlatent snapshot for later assembly.
                if steps_until_frame <= 0:
                    dlatent_frames.append(generator.get_dlatents()[0])
                    steps_until_frame += steps_per_frame
                steps_until_frame -= 1.
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        # Generate images from found dlatents and save them.
        generated_images = generator.generate_images(dlatents=best_dlatent)
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            images.append(
                PIL.Image.open(os.path.join(src_dir,
                                            "{}.png".format(img_name))))
            images.append(img)
            img.save(
                os.path.join(generated_images_dir, '{}.png'.format(img_name)),
                'PNG')
            np.save(os.path.join(dlatent_dir, '{}.npy'.format(img_name)),
                    dlatent)
        generator.reset_dlatents()
        bw_utils.save_images_to_grid(os.path.join(generated_images_dir,
                                                  "grid.png"),
                                     images,
                                     len(images),
                                     2, (1024, 1024),
                                     with_numbers=False)

        # Save video of training
        if video_frames > 0:
            os.makedirs(generated_videos_dir, exist_ok=True)
            video_name = os.path.join(generated_videos_dir, " ".join(names))
            # np.save(video_name + ".npy", np.array(dlatent_frames))
            # print("Saved dlatent video frames as {}.".format(video_name + ".npy"))
            image_generator = bw_utils.dlatents_image_generator_fn(
                dlatent_frames, Gs_network)
            bw_utils.save_video(image_generator, video_name)
Beispiel #28
0
def encode(a, b, c):
    args1 = {
        'src_dir': a,
        'generated_images_dir': b,
        'dlatent_dir': c,
        'batch_size': len(os.listdir(a)),
        'average_best_loss': 0.25,
        'clipping_threshold': 2.0,
        'composite_blur': 8,
        'composite_mask': True,
        'data_dir': 'data',
        'decay_rate': 0.9,
        'decay_steps': 24.0,
        'dlatent_avg': '',
        'early_stopping': True,
        'early_stopping_patience': 10,
        'early_stopping_threshold': 0.5,
        'face_mask': False,
        'image_size': 256,
        'iterations': 600,
        'load_effnet': 'data/finetuned_effnet.h5',
        'load_last': '',
        'load_mask': False,
        'load_resnet': 'data/finetuned_resnet.h5',
        'lr': 0.35,
        'mask_dir': 'masks',
        'model_res': 1024,
        'model_url': 'gdrive:networks/stylegan2-ffhq-config-f.pkl',
        'optimizer': 'ggt',
        'output_video': False,
        'randomize_noise': False,
        'resnet_image_size': 256,
        'scale_mask': 1.4,
        'sharpen_input': True,
        'tile_dlatents': False,
        'use_adaptive_loss': False,
        'use_best_loss': True,
        'use_discriminator_loss': 0.5,
        'use_grabcut': True,
        'use_l1_penalty': 0.5,
        'use_lpips_loss': 100,
        'use_mssim_loss': 200,
        'use_pixel_loss': 1.5,
        'use_preprocess_input': True,
        'use_vgg_layer': 9,
        'use_vgg_loss': 0.4,
        'vgg_url':
        'https://rolux.org/media/stylegan/vgg16_zhang_perceptual.pkl',
        'video_codec': 'MJPG',
        'video_dir': 'videos',
        'video_frame_rate': 24,
        'video_size': 512,
        'video_skip': 1
    }

    class Struct:
        def __init__(self, **entries):
            self.__dict__.update(entries)

    args = Struct(**args1)
    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    if args.output_video:
        import cv2
        synthesis_kwargs = dict(output_transform=dict(
            func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
        if x[0] not in '._'
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.mask_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    # Initialize generator and perceptual model
    # tflib.init_tf()
    generator_network, discriminator_network, Gs_network = pretrained_networks.load_networks(
        args.model_url)

    generator = Generator(Gs_network,
                          args.batch_size,
                          randomize_noise=args.randomize_noise)
    if len(ref_images) == 0:
        return generator
    if (args.dlatent_avg != ''):
        generator.set_dlatent_avg(np.load(args.dlatent_avg))

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url(args.vgg_url,
                                  cache_dir='.stylegan2-cache') as f:
            perc_model = pickle.load(f)

    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator, discriminator_network)

    ff_model = None

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(
                    os.path.join(args.video_dir, f'{name}.avi'),
                    cv2.VideoWriter_fourcc(*args.video_codec),
                    args.video_frame_rate, (args.video_size, args.video_size))

        perceptual_model.set_reference_images(images_batch)
        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (args.load_last != ''):  # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(
                    os.path.join(args.load_last, f'{name}.npy')),
                                    axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents, dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    from keras.applications.resnet50 import preprocess_input
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    from efficientnet import preprocess_input
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
            if (ff_model
                    is not None):  # predict initial dlatents with ResNet model
                if (args.use_preprocess_input):
                    dlatents = ff_model.predict(
                        preprocess_input(
                            load_images(images_batch,
                                        image_size=args.resnet_image_size)))
                else:
                    dlatents = ff_model.predict(
                        load_images(images_batch,
                                    image_size=args.resnet_image_size))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations,
                                       use_optimizer=args.optimizer)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        avg_loss_count = 0
        if args.early_stopping:
            avg_loss = prev_loss = None
        for loss_dict in pbar:
            if args.early_stopping:  # early stopping feature
                if prev_loss is not None:
                    if avg_loss is not None:
                        avg_loss = 0.5 * avg_loss + (prev_loss -
                                                     loss_dict["loss"])
                        if avg_loss < args.early_stopping_threshold:  # count while under threshold; else reset
                            avg_loss_count += 1
                        else:
                            avg_loss_count = 0
                        if avg_loss_count > args.early_stopping_patience:  # stop once threshold is reached
                            print("")
                            break
                    else:
                        avg_loss = prev_loss - loss_dict["loss"]
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                if best_dlatent is None or args.average_best_loss <= 0.00000001:
                    best_dlatent = generator.get_dlatents()
                else:
                    best_dlatent = 0.25 * best_dlatent + 0.75 * generator.get_dlatents(
                    )
                if args.use_best_loss:
                    generator.set_dlatents(best_dlatent)
                best_loss = loss_dict["loss"]
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = PIL.Image.fromarray(
                        batch_frames[i], 'RGB').resize(
                            (args.video_size, args.video_size),
                            PIL.Image.LANCZOS)
                    video_out[name].write(
                        cv2.cvtColor(
                            np.array(video_frame).astype('uint8'),
                            cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
            prev_loss = loss_dict["loss"]
        if not args.use_best_loss:
            best_loss = prev_loss
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        if args.use_best_loss:
            generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_path, img_name in zip(
                generated_images, generated_dlatents, images_batch, names):
            mask_img = None
            if args.composite_mask and (args.load_mask or args.face_mask):
                _, im_name = os.path.split(img_path)
                mask_img = os.path.join(args.mask_dir, f'{im_name}')
            if args.composite_mask and mask_img is not None and os.path.isfile(
                    mask_img):
                orig_img = PIL.Image.open(img_path).convert('RGB')
                width, height = orig_img.size
                imask = PIL.Image.open(mask_img).convert('L').resize(
                    (width, height))
                imask = imask.filter(
                    ImageFilter.GaussianBlur(args.composite_blur))
                mask = np.array(imask) / 255
                mask = np.expand_dims(mask, axis=-1)
                img_array = mask * np.array(img_array) + (
                    1.0 - mask) * np.array(orig_img)
                img_array = img_array.astype(np.uint8)
                #img_array = np.where(mask, np.array(img_array), orig_img)
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
        return generator
Beispiel #29
0
def main():
    parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # Output directories setting
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir', help='Directory for storing generated images')
    parser.add_argument('guessed_images_dir', help='Directory for storing initially guessed images')
    parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations')

    # General params
    parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int)
    parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int)
    parser.add_argument('--use_resnet', default=True, help='Use pretrained ResNet for approximating dlatents', type=lambda x: (str(x).lower() == 'true'))

    # Perceptual model params
    parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int)
    parser.add_argument('--lr', default=0.02, help='Learning rate for perceptual model', type=float)
    parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float)
    parser.add_argument('--decay_steps', default=10, help='Decay steps for learning rate decay (as a percent of iterations)', type=float)
    parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int)
    parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int)

    # Loss function options
    parser.add_argument('--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int)
    parser.add_argument('--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float)
    parser.add_argument('--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float)

    # Generator params
    parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=lambda x: (str(x).lower() == 'true'))
    parser.add_argument('--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=lambda x: (str(x).lower() == 'true'))
    parser.add_argument('--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float)

    # Masking params
    parser.add_argument('--mask_dir', default='masks/latent_interpolation', help='Directory for storing optional masks')
    parser.add_argument('--face_mask', default=False, help='Generate a mask for predicting only the face area', type=lambda x: (str(x).lower() == 'true'))
    parser.add_argument('--use_grabcut', default=True, help='Use grabcut algorithm on the face mask to better segment the foreground', type=lambda x: (str(x).lower() == 'true'))
    parser.add_argument('--scale_mask', default=1.5, help='Look over a wider section of foreground for grabcut', type=float)

    args, other_args = parser.parse_known_args()

    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)]
    ref_images = sorted(list(filter(os.path.isfile, ref_images)))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    # Create output directories
    os.makedirs('data', exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.guessed_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    if args.face_mask:
        os.makedirs(args.mask_dir, exist_ok=True)

    # Initialize generator
    tflib.init_tf()
    with open_url(url_styleGAN, cache_dir='cache') as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(model=Gs_network,
                          batch_size=args.batch_size,
                          clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents,
                          model_res=args.model_res,
                          randomize_noise=args.randomize_noise)

    # Initialize perceptual model
    perc_model = None
    if args.use_lpips_loss > 1e-7:
        with open_url(url_VGG_perceptual, cache_dir='cache') as f:
            perc_model = pickle.load(f)
    perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)

    # Initialize ResNet model
    resnet_model = None
    if args.use_resnet:
        print("\nLoading ResNet Model:")
        resnet_model_fn = 'data/finetuned_resnet.h5'
        gdown.download(url_resnet, resnet_model_fn, quiet=True)
        resnet_model = load_model(resnet_model_fn)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size):
        names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch]
        perceptual_model.set_reference_images(images_batch)

        # predict initial dlatents with ResNet model
        if resnet_model is not None:
            dlatents = resnet_model.predict(preprocess_input(load_images(images_batch, image_size=args.resnet_image_size)))
            generator.set_dlatents(dlatents)

        # Generate and save initially guessed images
        initial_dlatents = generator.get_dlatents()
        initial_images = generator.generate_images()
        for img_array, dlatent, img_name in zip(initial_images, initial_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(os.path.join(args.guessed_images_dir, f'{img_name}.png'), 'PNG')

        # Optimization process to find best latent vectors
        op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations)
        progress_bar = tqdm(op, leave=False, total=args.iterations)
        best_loss = None
        best_dlatent = None
        for loss_dict in progress_bar:
            progress_bar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        # Save found dlatents
        generator.set_dlatents(best_dlatent)
        generated_dlatents = generator.get_dlatents()
        for dlatent, img_name in zip(generated_dlatents, names):
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)
        generator.reset_dlatents()

    # Concatenate and save dlalents vectors
    list_dlatents = sorted(os.listdir(args.dlatent_dir))
    final_w_vectors = np.array([np.load(args.dlatent_dir + dlatent) for dlatent in list_dlatents])
    np.save(os.path.join(args.dlatent_dir, 'output_vectors.npy'), final_w_vectors)

    # Perform face morphing by interpolating the latent space
    w1, w2 = create_morphing_lists(final_w_vectors)
    ref_images_1, ref_images_2 = create_morphing_lists(ref_images)
    for i in range(len(ref_images_1)):
        avg_w_vector = (0.5 * (w1[i] + w2[i])).reshape((-1, 18, 512))
        generator.set_dlatents(avg_w_vector)
        img_array = generator.generate_images()[0]
        img = PIL.Image.fromarray(img_array, 'RGB')
        img_name = os.path.splitext(os.path.basename(ref_images_1[i]))[0] + '_vs_' + os.path.splitext(os.path.basename(ref_images_2[i]))[0]
        img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG')
    generator.reset_dlatents()
Beispiel #30
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual losses',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')
    parser.add_argument('--data_dir',
                        default='data',
                        help='Directory for storing optional models')
    parser.add_argument(
        '--model_url',
        default=
        'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ',
        help='Fetch a StyleGAN model to train on from this URL'
    )  # karras2019stylegan-ffhq-1024x1024.pkl
    parser.add_argument('--model_res',
                        default=1024,
                        help='The dimension of images in the StyleGAN model',
                        type=int)
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)

    # Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--resnet_image_size',
                        default=256,
                        help='Size of images for the Resnet model',
                        type=int)
    parser.add_argument('--lr',
                        default=0.02,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--decay_rate',
                        default=0.9,
                        help='Decay rate for learning rate',
                        type=float)
    parser.add_argument('--iterations',
                        default=100,
                        help='Number of optimization steps for each batch',
                        type=int)
    parser.add_argument(
        '--decay_steps',
        default=10,
        help='Decay steps for learning rate decay (as a percent of iterations)',
        type=float)
    parser.add_argument(
        '--load_resnet',
        default='data/finetuned_resnet.h5',
        help='Model to load for Resnet approximation of dlatents')

    # Loss function options
    parser.add_argument(
        '--use_vgg_loss',
        default=0.4,
        help='Use VGG perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_vgg_layer',
                        default=9,
                        help='Pick which VGG layer to use.',
                        type=int)
    parser.add_argument(
        '--use_pixel_loss',
        default=1.5,
        help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_mssim_loss',
        default=100,
        help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_lpips_loss',
        default=100,
        help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_l1_penalty',
        default=1,
        help='Use L1 penalty on latents; 0 to disable, > 0 to scale.',
        type=float)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=bool)
    parser.add_argument(
        '--tile_dlatents',
        default=False,
        help='Tile dlatents to use a single vector at each scale',
        type=bool)
    parser.add_argument(
        '--clipping_threshold',
        default=2.0,
        help='Stochastic clipping of gradient values outside of this threshold',
        type=float)

    # Video params
    parser.add_argument('--video_dir',
                        default='videos',
                        help='Directory for storing training videos')
    parser.add_argument('--output_video',
                        default=False,
                        help='Generate videos of the optimization process',
                        type=bool)
    parser.add_argument('--video_codec',
                        default='MJPG',
                        help='FOURCC-supported video codec name')
    parser.add_argument('--video_frame_rate',
                        default=24,
                        help='Video frames per second',
                        type=int)
    parser.add_argument('--video_size',
                        default=512,
                        help='Video size in pixels',
                        type=int)
    parser.add_argument(
        '--video_skip',
        default=1,
        help='Only write every n frames (1 = write every frame)',
        type=int)

    args, other_args = parser.parse_known_args()

    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    if args.output_video:
        import cv2
        synthesis_kwargs = dict(output_transform=dict(
            func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args.batch_size,
                          clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents,
                          model_res=args.model_res,
                          randomize_noise=args.randomize_noise)

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url(
                'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2',
                cache_dir=config.cache_dir) as f:
            perc_model = pickle.load(f)
    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)

    resnet_model = None
    if os.path.exists(args.load_resnet):
        print("Loading ResNet Model:")
        resnet_model = load_model(args.load_resnet)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(
                    os.path.join(args.video_dir, f'{name}.avi'),
                    cv2.VideoWriter_fourcc(*args.video_codec),
                    args.video_frame_rate, (args.video_size, args.video_size))

        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (resnet_model is not None):
            dlatents = resnet_model.predict(
                preprocess_resnet_input(
                    load_images(images_batch,
                                image_size=args.resnet_image_size)))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = PIL.Image.fromarray(
                        batch_frames[i], 'RGB').resize(
                            (args.video_size, args.video_size),
                            PIL.Image.LANCZOS)
                    video_out[name].write(
                        cv2.cvtColor(
                            np.array(video_frame).astype('uint8'),
                            cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()