Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual losses',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')
    parser.add_argument('--data_dir',
                        default='data',
                        help='Directory for storing optional models')
    parser.add_argument(
        '--model_url',
        default=
        'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ',
        help='Fetch a StyleGAN model to train on from this URL'
    )  # karras2019stylegan-ffhq-1024x1024.pkl
    parser.add_argument('--model_res',
                        default=1024,
                        help='The dimension of images in the StyleGAN model',
                        type=int)
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)

    # Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--resnet_image_size',
                        default=256,
                        help='Size of images for the Resnet model',
                        type=int)
    parser.add_argument('--lr',
                        default=0.02,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--decay_rate',
                        default=0.9,
                        help='Decay rate for learning rate',
                        type=float)
    parser.add_argument('--iterations',
                        default=100,
                        help='Number of optimization steps for each batch',
                        type=int)
    parser.add_argument(
        '--decay_steps',
        default=10,
        help='Decay steps for learning rate decay (as a percent of iterations)',
        type=float)
    parser.add_argument(
        '--load_resnet',
        default='data/finetuned_resnet.h5',
        help='Model to load for Resnet approximation of dlatents')

    # Loss function options
    parser.add_argument(
        '--use_vgg_loss',
        default=0.4,
        help='Use VGG perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_vgg_layer',
                        default=9,
                        help='Pick which VGG layer to use.',
                        type=int)
    parser.add_argument(
        '--use_pixel_loss',
        default=1.5,
        help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_mssim_loss',
        default=100,
        help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_lpips_loss',
        default=100,
        help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_l1_penalty',
        default=1,
        help='Use L1 penalty on latents; 0 to disable, > 0 to scale.',
        type=float)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=bool)
    parser.add_argument(
        '--tile_dlatents',
        default=False,
        help='Tile dlatents to use a single vector at each scale',
        type=bool)
    parser.add_argument(
        '--clipping_threshold',
        default=2.0,
        help='Stochastic clipping of gradient values outside of this threshold',
        type=float)

    # Video params
    parser.add_argument('--video_dir',
                        default='videos',
                        help='Directory for storing training videos')
    parser.add_argument('--output_video',
                        default=False,
                        help='Generate videos of the optimization process',
                        type=bool)
    parser.add_argument('--video_codec',
                        default='MJPG',
                        help='FOURCC-supported video codec name')
    parser.add_argument('--video_frame_rate',
                        default=24,
                        help='Video frames per second',
                        type=int)
    parser.add_argument('--video_size',
                        default=512,
                        help='Video size in pixels',
                        type=int)
    parser.add_argument(
        '--video_skip',
        default=1,
        help='Only write every n frames (1 = write every frame)',
        type=int)

    args, other_args = parser.parse_known_args()

    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    if args.output_video:
        import cv2
        synthesis_kwargs = dict(output_transform=dict(
            func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args.batch_size,
                          clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents,
                          model_res=args.model_res,
                          randomize_noise=args.randomize_noise)

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url(
                'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2',
                cache_dir=config.cache_dir) as f:
            perc_model = pickle.load(f)
    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)

    resnet_model = None
    if os.path.exists(args.load_resnet):
        print("Loading ResNet Model:")
        resnet_model = load_model(args.load_resnet)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(
                    os.path.join(args.video_dir, f'{name}.avi'),
                    cv2.VideoWriter_fourcc(*args.video_codec),
                    args.video_frame_rate, (args.video_size, args.video_size))

        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (resnet_model is not None):
            dlatents = resnet_model.predict(
                preprocess_resnet_input(
                    load_images(images_batch,
                                image_size=args.resnet_image_size)))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = PIL.Image.fromarray(
                        batch_frames[i], 'RGB').resize(
                            (args.video_size, args.video_size),
                            PIL.Image.LANCZOS)
                    video_out[name].write(
                        cv2.cvtColor(
                            np.array(video_frame).astype('uint8'),
                            cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
    perceptual_model = PerceptualModel(image_size,
                                       layer=9,
                                       batch_size=batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image)

    face_img_path = sys.argv[2]
    face_img_list = [face_img_path]
    file_path = os.path.splitext(face_img_path)[0]
    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(face_img_list, batch_size),
                             total=len(face_img_list) // batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]

        perceptual_model.set_reference_images(images_batch)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=iterations,
                                       learning_rate=lr)
        pbar = tqdm(op, leave=False, total=iterations)
        for loss in pbar:
            pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss)
        print(' '.join(names), ' loss:', loss)

        # Generate images from found dlatents and save them
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = Image.fromarray(img_array, 'RGB')
            img.save(file_path + '_generated.png')
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual losses',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # Output directories setting
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('guessed_images_dir',
                        help='Directory for storing initially guessed images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')

    # General params
    parser.add_argument('--model_res',
                        default=1024,
                        help='The dimension of images in the StyleGAN model',
                        type=int)
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument(
        '--use_resnet',
        default=True,
        help='Use pretrained ResNet for approximating dlatents',
        type=lambda x: (str(x).lower() == 'true'))

    # Perceptual model params
    parser.add_argument('--iterations',
                        default=100,
                        help='Number of optimization steps for each batch',
                        type=int)
    parser.add_argument('--lr',
                        default=0.02,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--decay_rate',
                        default=0.9,
                        help='Decay rate for learning rate',
                        type=float)
    parser.add_argument(
        '--decay_steps',
        default=10,
        help='Decay steps for learning rate decay (as a percent of iterations)',
        type=float)
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--resnet_image_size',
                        default=256,
                        help='Size of images for the Resnet model',
                        type=int)

    # Loss function options
    parser.add_argument(
        '--use_vgg_loss',
        default=0.4,
        help='Use VGG perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_vgg_layer',
                        default=9,
                        help='Pick which VGG layer to use.',
                        type=int)
    parser.add_argument(
        '--use_pixel_loss',
        default=1.5,
        help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_mssim_loss',
        default=100,
        help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_lpips_loss',
        default=100,
        help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_l1_penalty',
        default=1,
        help='Use L1 penalty on latents; 0 to disable, > 0 to scale.',
        type=float)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=lambda x: (str(x).lower() == 'true'))
    parser.add_argument(
        '--tile_dlatents',
        default=False,
        help='Tile dlatents to use a single vector at each scale',
        type=lambda x: (str(x).lower() == 'true'))
    parser.add_argument(
        '--clipping_threshold',
        default=2.0,
        help='Stochastic clipping of gradient values outside of this threshold',
        type=float)

    # Masking params
    parser.add_argument('--mask_dir',
                        default='masks/encoding',
                        help='Directory for storing optional masks')
    parser.add_argument(
        '--face_mask',
        default=False,
        help='Generate a mask for predicting only the face area',
        type=lambda x: (str(x).lower() == 'true'))
    parser.add_argument(
        '--use_grabcut',
        default=True,
        help=
        'Use grabcut algorithm on the face mask to better segment the foreground',
        type=lambda x: (str(x).lower() == 'true'))
    parser.add_argument(
        '--scale_mask',
        default=1.5,
        help='Look over a wider section of foreground for grabcut',
        type=float)

    args, other_args = parser.parse_known_args()

    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = sorted(list(filter(os.path.isfile, ref_images)))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    # Create output directories
    os.makedirs('data', exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.guessed_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    if args.face_mask:
        os.makedirs(args.mask_dir, exist_ok=True)

    # Initialize generator
    tflib.init_tf()
    with open_url(url_styleGAN, cache_dir='cache') as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(model=Gs_network,
                          batch_size=args.batch_size,
                          clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents,
                          model_res=args.model_res,
                          randomize_noise=args.randomize_noise)

    # Initialize perceptual model
    perc_model = None
    if args.use_lpips_loss > 1e-7:
        with open_url(url_VGG_perceptual, cache_dir='cache') as f:
            perc_model = pickle.load(f)
    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)

    # Initialize ResNet model
    resnet_model = None
    if args.use_resnet:
        print("\nLoading ResNet Model:")
        resnet_model_fn = 'data/finetuned_resnet.h5'
        gdown.download(url_resnet, resnet_model_fn, quiet=True)
        resnet_model = load_model(resnet_model_fn)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        perceptual_model.set_reference_images(images_batch)

        # predict initial dlatents with ResNet model
        if resnet_model is not None:
            dlatents = resnet_model.predict(
                preprocess_input(
                    load_images(images_batch,
                                image_size=args.resnet_image_size)))
            generator.set_dlatents(dlatents)

        # Generate and save initially guessed images
        initial_dlatents = generator.get_dlatents()
        initial_images = generator.generate_images()
        for img_array, dlatent, img_name in zip(initial_images,
                                                initial_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(os.path.join(args.guessed_images_dir, f'{img_name}.png'),
                     'PNG')

        # Optimization process to find best latent vectors
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations)
        progress_bar = tqdm(op, leave=False, total=args.iterations)
        best_loss = None
        best_dlatent = None
        for loss_dict in progress_bar:
            progress_bar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)
        generator.reset_dlatents()

    # Concatenate and save dlalents vectors
    list_dlatents = sorted(os.listdir(args.dlatent_dir))
    final_w_vectors = np.array(
        [np.load(args.dlatent_dir + dlatent) for dlatent in list_dlatents])
    np.save(os.path.join(args.dlatent_dir, 'output_vectors.npy'),
            final_w_vectors)
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')
    parser.add_argument('init_dlatent',
                        default=False,
                        help='path to init dlatent or False')
    parser.add_argument('move_to_folder',
                        default=False,
                        help='path to init dlatent or False')
    parser.add_argument('--iterations',
                        default=30,
                        help='Number of optimization steps for each batch',
                        type=int)
    parser.add_argument(
        '--network_pkl',
        default='gdrive:networks/stylegan2-ffhq-config-f.pkl',
        help='Path to local copy of stylegan2-ffhq-config-f.pkl')

    # for now it's unclear if larger batch leads to better performance/quality
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)

    # Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--lr',
                        default=1.,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--iterations',
                        default=30,
                        help='Number of optimization steps for each batch',
                        type=int)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=bool)
    args, other_args = parser.parse_known_args()

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    generator_network, discriminator_network, Gs_network = pretrained_networks.load_networks(
        args.network_pkl)
    generator = Generator(Gs_network,
                          args.batch_size,
                          randomize_noise=args.randomize_noise)
    #if args.cont != False:
    print("CONTINUING FROM PREVIOUS DLATENT")
    generator.define_dlatents(args.init_dlatent)
    #else:
    #generator.set_dlatents(generator.initial_dlatents)

    perceptual_model = PerceptualModel(args.image_size,
                                       layer=9,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        perceptual_model.set_reference_images(images_batch)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations,
                                       learning_rate=args.lr)
        pbar = tqdm(op, leave=False, total=args.iterations)
        for loss in pbar:
            pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss)
        print(' '.join(names), ' loss:', loss)
        shutil.move(
            str(args.src_dir) + "\\" + str(names[0]) + r".png",
            str(args.move_to_folder) + "\\" + str(names[0]) + r".png")

        # Generate images from found dlatents and save them

        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)
Esempio n. 5
0
def encode(resnet, learning_rate=0.02, iterations=200):

    args = eden.utils.DictMap()
    args_other = eden.utils.DictMap()

    args.src_dir = os.path.join(stylegan, 'aligned_images')
    args.generated_images_dir = os.path.join(stylegan, 'generated_images')
    args.dlatent_dir = os.path.join(stylegan, 'latent_representations')

    args.load_last = None
    args.dlatent_avg = None

    args.model_res = 1024
    args.batch_size = 1

    # Perceptual model params
    args.image_size = 256
    args.resnet_image_size = 256
    args.lr = learning_rate
    args.decay_rate = 0.9
    args.iterations = iterations
    args.decay_steps = 10
    args.load_effnet = None
    args.load_resnet = os.path.join(stylegan, resnet)

    # Loss function options
    args.use_vgg_loss = 0.4
    args.use_vgg_layer = 9
    args.use_pixel_loss = 1.5
    args.use_mssim_loss = 100
    args.use_lpips_loss = 100
    args.use_l1_penalty = 1

    # Generator params
    args.randomize_noise = False
    args.tile_dlatents = False
    args.clipping_threshold = 2.0

    # Masking params
    args.load_mask = False
    args.face_mask = False
    args.use_grabcut = True
    args.scale_mask = 1.5

    # Video params
    args.video_dir = os.path.join(stylegan, 'videos')
    args.output_video = True
    args.video_codec = 'MJPG'
    args.video_frame_rate = 30
    args.video_size = 1024
    args.video_skip = 1

    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    if args.output_video:
        synthesis_kwargs = dict(output_transform=dict(
            func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    eden.utils.try_make_folder(args.generated_images_dir)
    eden.utils.try_make_folder(args.dlatent_dir)
    eden.utils.try_make_folder(args.video_dir)

    # Initialize generator and perceptual model
    generator = Generator(Gs,
                          args.batch_size,
                          clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents,
                          model_res=args.model_res,
                          randomize_noise=args.randomize_noise)
    if (args.dlatent_avg is not None):
        generator.set_dlatent_avg(np.load(args.dlatent_avg))

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        cache_dir = os.path.join(stylegan, config.cache_dir)
        with dnnlib.util.open_url(
                'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2',
                cache_dir=cache_dir) as f:
            perc_model = pickle.load(f)
    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)

    ff_model = None

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(
                    os.path.join(args.video_dir, f'{name}.avi'),
                    cv2.VideoWriter_fourcc(*args.video_codec),
                    args.video_frame_rate, (args.video_size, args.video_size))
        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (args.load_last
                is not None):  # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(
                    os.path.join(args.load_last, f'{name}.npy')),
                                    axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents, dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
                    from keras.applications.resnet50 import preprocess_input
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
                    from efficientnet import preprocess_input
            if (ff_model
                    is not None):  # predict initial dlatents with ResNet model
                dlatents = ff_model.predict(
                    preprocess_input(
                        load_images(images_batch,
                                    image_size=args.resnet_image_size)))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = Image.fromarray(
                        batch_frames[i], 'RGB').resize(
                            (args.video_size, args.video_size), Image.LANCZOS)
                    video_out[name].write(
                        cv2.cvtColor(
                            np.array(video_frame).astype('uint8'),
                            cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')

    # Input and Output directories
    parser.add_argument('--src_dir',
                        default='./images',
                        help='Directory with images for encoding')
    parser.add_argument('--out_dir',
                        default='./outputs',
                        help='Directory for storing generated images')

    # Modes of run
    parser.add_argument(
        '--mode',
        default='dlatents',
        help=
        'Mode depending on the variables we want to optimize (latents/dlatents)'
    )
    parser.add_argument(
        '--ref_dlatents',
        default='./outputs/temp_latents.npy',
        help='Required for the regression of latents given the dlatents')

    # Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--lr',
                        default=0.009,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--iterations',
                        default=10,
                        help='Number of optimization steps for each batch',
                        type=int)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=bool)
    args, other_args = parser.parse_known_args()

    # Store in a list the path for all the images of the source directory
    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))
    names = [os.path.splitext(os.path.basename(x))[0] for x in ref_images]

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.out_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()

    with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    perceptual_model = PerceptualModel(args.image_size, layer=9)

    for idx, ref_img in enumerate(ref_images):
        generator = Generator(Gs_network,
                              randomize_noise=args.randomize_noise,
                              mode=args.mode)
        perceptual_model.build_perceptual_model(generator.generated_image,
                                                args.mode, args.ref_dlatents,
                                                ref_img)

        perceptual_model.set_reference_images(ref_img)
        op = perceptual_model.optimize(generator.latent_variable,
                                       iterations=args.iterations,
                                       learning_rate=args.lr,
                                       out_dir=args.out_dir,
                                       img_name=names[idx])
        pbar = tqdm(op, leave=False, total=args.iterations)
        for loss in pbar:
            pbar.set_description(names[idx] + ' Loss: %.2f' % loss)
        print(names[idx], ' loss:', loss)

        # Generate images from found dlatents and save them
        generated_images = generator.generate_images()
        generated_dlatent = generator.get_latents()
        if not os.path.exists(args.mode):
            os.mkdir(args.mode)
        np.save(os.path.join(args.mode, names[idx] + '.npy'),
                generated_dlatent)
Esempio n. 7
0
def optimize():
    latents = u64latents_to_latents(request.args.get('u64latents'))
    if latents is None:
        abort(413)
    u64reference_webp = request.args.get('u64reference')
    reference_image = io.BytesIO(base64.urlsafe_b64decode(u64reference_webp)) # todo: restrict input formats
    generator.set_dlatents(latents)
    model_url = "gdrive:networks/stylegan2-ffhq-config-f.pkl"
    vgg_url = "https://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl"
    optional_args = {
        "lr": 0.857,
        "decay_rate": 0.95,
        "iterations": 25,
        "decay_steps": 4,
        "image_size": 256,
        "use_vgg_layer": 9,
        "use_vgg_loss": 100,
        "use_pixel_loss": 1,
        "use_mssim_loss": 100,
        "use_lpips_loss": 0,
        "use_l1_penalty": 1,
        "use_adaptive_loss": False, # requires tf >= 2 😿
        "sharpen_input": False,
        "batch_size": 1,
        "use_discriminator_loss": 0,
        "optimizer": "ggt",
        "average_best_loss": 0.25,
    }
    forced_args = {
        "face_mask": False,
        "use_grabcut": None,
        "scale_mask": None,
        "mask_dir": None,
        "vgg_url": vgg_url, # we do not load random pickles that Internet Users ask us to
        "batch_size": 1,
        "model_url": model_url, # we do not load random pickles that Internet Users ask us to
        "model_res": 1024,
    }
    # print(f"{time.time()} default config done")
    merged_args = {k: type(optional_args.get(k))(request.args.get(k,optional_args.get(k))) for k in optional_args}
    for k in forced_args: merged_args[k] = forced_args[k]
    args = dict_as_namedtuple(merged_args, name="args")
    # print(f"{time.time()} config merged")

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url(args.vgg_url, cache_dir='.stylegan2-cache') as f:
            perc_model = pickle.load(f)

    # print(f"{time.time()} perc model")
    perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size)
    # print(f"{time.time()} perceptual model object")
    with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
        perceptual_model.build_perceptual_model(generator, discriminator_network)
    # print(f"{time.time()} reused scope built")

    perceptual_model.set_reference_images([reference_image])
    # print(f"{time.time()} refernce images set")
    op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, use_optimizer=args.optimizer)
    # print(f"{time.time()} optimizer iterable created")
    def generate():
        best_loss = None
        best_dlatent = None
        for loss_dict in op:
            if best_loss is None or loss_dict["loss"] < best_loss:
                if best_dlatent is None:
                    best_dlatent = generator.get_dlatents()
                else:
                    best_dlatent = args.average_best_loss * best_dlatent + (1 - args.average_best_loss) * generator.get_dlatents()
                yield base64.urlsafe_b64encode(generator.get_dlatents().tobytes('C'))+b"\n"
                generator.set_dlatents(best_dlatent)
                best_loss = loss_dict["loss"]
            generator.stochastic_clip_dlatents()
        yield base64.urlsafe_b64encode(best_dlatent.tobytes('C'))+b"\n"

            # print(f"best loss: {best_loss} @ {time.time()}")

    return Response(generate())
Esempio n. 8
0
def styleGAN_encoder(args):
    start_ = time.time()

    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    if args.output_video:
        import cv2
        synthesis_kwargs = dict(output_transform=dict(
            func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.mask_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args.batch_size,
                          clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents,
                          model_res=args.model_res,
                          randomize_noise=args.randomize_noise)
    if (args.dlatent_avg != ''):
        generator.set_dlatent_avg(np.load(args.dlatent_avg))

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url(
                'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2',
                cache_dir=config.cache_dir) as f:
            perc_model = pickle.load(f)
    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)

    ff_model = None

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(
                    os.path.join(args.video_dir, f'{name}.avi'),
                    cv2.VideoWriter_fourcc(*args.video_codec),
                    args.video_frame_rate, (args.video_size, args.video_size))

        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (args.load_last != ''):  # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(
                    os.path.join(args.load_last, f'{name}.npy')),
                                    axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents, dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
                    from keras.applications.resnet50 import preprocess_input
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
                    from efficientnet import preprocess_input
            if (ff_model
                    is not None):  # predict initial dlatents with ResNet model
                dlatents = ff_model.predict(
                    preprocess_input(
                        load_images(images_batch,
                                    image_size=args.resnet_image_size)))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = PIL.Image.fromarray(
                        batch_frames[i], 'RGB').resize(
                            (args.video_size, args.video_size),
                            PIL.Image.LANCZOS)
                    video_out[name].write(
                        cv2.cvtColor(
                            np.array(video_frame).astype('uint8'),
                            cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
    end_ = time.time()
    logging.info('图像的StyleEncoder编码耗费时间: %.2fs' % (end_ - start_))
Esempio n. 9
0
def styleGAN_encoder(args,path_A, path_B):
    start_ = time.time()
    args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations

    src_dir = args.src_dir
    name_A = src_dir+'/%s.png' %os.path.basename(os.path.splitext(path_A)[0])
    name_B = src_dir+'/%s.png' %os.path.basename(os.path.splitext(path_B)[0])
    ref_images = [name_A,name_B]
    ref_images = list(filter(os.path.isfile, ref_images))


    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise)

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f:
            perc_model =  pickle.load(f)
    perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)

    ff_model = None

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size):
        names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch]

        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (args.load_last != ''): # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(os.path.join(args.load_last, f'{name}.npy')),axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents,dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
                    from keras.applications.resnet50 import preprocess_input
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
                    from efficientnet import preprocess_input
            if (ff_model is not None): # predict initial dlatents with ResNet model
                dlatents = ff_model.predict(preprocess_input(load_images(images_batch,image_size=args.resnet_image_size)))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations)
        pbar = tqdm(op, leave=False, total=args.iterations)

        best_loss = None
        best_dlatent = None
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v)
                    for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()

            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))



        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names):
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
    end_ = time.time()
    logging.info('The time it takes for the StyleGAN Encoder: %.2fs' % (end_ - start_))
Esempio n. 10
0
def main():

    args = do_parsing()
    print(args)

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    # Load pre-trained network, already on file system
    model_filepath = ModelRetriever().get_model_filepath(args.model_name)

    generated_images_dir = os.path.join(args.generated_images_dir,
                                        args.model_name)
    dlatent_dir = os.path.join(args.dlatent_dir, args.model_name)

    os.makedirs(generated_images_dir, exist_ok=True)
    os.makedirs(dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    # with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
    with open(model_filepath, "rb") as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args.batch_size,
                          randomize_noise=args.randomize_noise)
    perceptual_model = PerceptualModel(args.image_size,
                                       layer=9,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]

        perceptual_model.set_reference_images(images_batch)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations,
                                       learning_rate=args.lr)
        pbar = tqdm(op, leave=False, total=args.iterations)
        for loss in pbar:
            pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss)
        print(' '.join(names), ' loss:', loss)

        # Generate images from found dlatents and save them
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(os.path.join(generated_images_dir, f'{img_name}.png'),
                     'PNG')
            np.save(os.path.join(dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
Esempio n. 11
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')

    # for now it's unclear if larger batch leads to better performance/quality
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)

    # Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--lr',
                        default=1.,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--iterations',
                        default=1000,
                        help='Number of optimization steps for each batch',
                        type=int)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=bool)
    args, other_args = parser.parse_known_args()

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args.batch_size,
                          randomize_noise=args.randomize_noise)

    #TODO: load dlatents here to pick up training if interrupted.

    #    latent = np.load('filename.npy')
    #     generator.set_dlatents()

    perceptual_model = PerceptualModel(args.image_size,
                                       layer=9,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    counter = 0
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]

        perceptual_model.set_reference_images(images_batch)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations,
                                       learning_rate=args.lr)
        pbar = tqdm(op, leave=False, total=args.iterations)
        for loss in pbar:
            counter = counter + 1
            checkpointed = False
            if counter % 100 == 0 or counter < 100:
                checkpointed = make_checkpoint(counter, generator, names,
                                               args.generated_images_dir,
                                               args.dlatent_dir)
                print("****************************")
                print(f"*counter: {counter}        *")
                print("****************************")
            pbar.set_description(
                ' '.join(names) +
                f" counter: {counter}, checkpointed: {checkpointed}" +
                ' Last Loss: %.2f' % loss)  # This is the output

        print(' '.join(names), ' loss:', loss)

        # Generate images from found dlatents and save them
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir,
                             f'{args.iterations}_iters_{img_name}.png'), 'PNG')
            np.save(
                os.path.join(args.dlatent_dir,
                             f'{args.iterations}_{img_name}.npy'), dlatent)

        generator.reset_dlatents()
Esempio n. 12
0
    def encodeImages(self,
                     src_dir,
                     generated_images_dir,
                     dlatent_dir,
                     batch_size=1,
                     image_size=256,
                     lr=1,
                     iterations=1000,
                     randomize_noise=False):
        """
        Find latent representation of reference images using perceptual loss
        Params:
            src_dir: Directory for storing genrated images
            generated_images_dir: Directory for storing generated images
            dlatent_dir: Directory for storing dlatent representations
            batch_size: Batch size for generator and perceptual model
            image_size: Size of images for perceptual model
            lr: Size of images for perceptual model
            iterations: Number of optimization steps for each batch
            randomize_noise: Add noise to dlatents during optimization
        """
        ref_images = [os.path.join(src_dir, x) for x in os.listdir(src_dir)]
        ref_images = list(filter(os.path.isfile, ref_images))

        if len(ref_images) == 0:
            raise Exception('%s is empty' % src_dir)

        os.makedirs(generated_images_dir, exist_ok=True)
        os.makedirs(dlatent_dir, exist_ok=True)

        # Initialize generator and perceptual model
        tflib.init_tf()

        perceptual_model = PerceptualModel(image_size,
                                           layer=9,
                                           batch_size=batch_size)
        perceptual_model.build_perceptual_model(self.generator.generated_image)

        # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
        for images_batch in tqdm(self._split_to_batches(
                ref_images, batch_size),
                                 total=len(ref_images) // batch_size):
            names = [
                os.path.splitext(os.path.basename(x))[0] for x in images_batch
            ]

            perceptual_model.set_reference_images(images_batch)
            op = perceptual_model.optimize(self.generator.dlatent_variable,
                                           iterations=iterations,
                                           learning_rate=lr)
            pbar = tqdm(op, leave=False, total=iterations)
            for loss in pbar:
                pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss)
            print(' '.join(names), ' loss:', loss)

            # Generate images from found dlatents and save them
            generated_images = self.generator.generate_images()
            generated_dlatents = self.generator.get_dlatents()
            for img_array, dlatent, img_name in zip(generated_images,
                                                    generated_dlatents, names):
                img = PIL.Image.fromarray(img_array, 'RGB')
                img.save(os.path.join(generated_images_dir, f'{img_name}.png'),
                         'PNG')
                np.save(os.path.join(dlatent_dir, f'{img_name}.npy'), dlatent)

            self.generator.reset_dlatents()
Esempio n. 13
0
def main():
    parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss')
    # parser.add_argument('--src_dir', default='./img/', help='Directory with images for encoding')
    parser.add_argument('--src_img', default='001.jpg', help='Directory with images for encoding')
    parser.add_argument('--src_dir', default='./aligned_images', help='Directory with images for encoding')
    parser.add_argument('--generated_images_dir', default='./generated_images/', help='Directory for storing generated images')
    parser.add_argument('--dlatent_dir', default='./latent/', help='Directory for storing dlatent representations')

    # for now it's unclear if larger batch leads to better performance/quality
    parser.add_argument('--batch_size', default=2, help='Batch size for generator and perceptual model', type=int)

    # Perceptual model params
    parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int)
    parser.add_argument('--lr', default=.65, help='Learning rate for perceptual model', type=float)
    parser.add_argument('--iterations', default=400, help='Number of optimization steps for each batch', type=int)

    # Generator params
    parser.add_argument('--randomize_noise', default=True, help='Add noise to dlatents during optimization', type=bool)
    args, other_args = parser.parse_known_args()

    ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)]
    # ref_image = args.src_dir
    # ref_image = list(filter(os.path.isfile, ref_image))
    # print(ref_images)

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)
    # if len(ref_image) == 0:
    #     raise Exception('%s is empty' % args.src_img)

    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    # with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
    with open(URL_FFHQ, mode='rb') as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise)
    perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    # for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size):
    # for images_batch in tqdm(split_to_batches(ref_image, args.batch_size), total=len(ref_image)//args.batch_size):

    images_batch = []

    images_batch.append(args.src_img)
    # print(images_batch)
    names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch]

    # image_batch = args.src_img
    # name = os.path.splittext(os.path.basename(image_batch)[0])

    perceptual_model.set_reference_images(images_batch)
    op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr)
    pbar = tqdm(op, leave=True, total=args.iterations)
    for loss in pbar:
        pbar.set_description(' '.join(names)+' Loss: %.2f' % loss)
    # print(' '.join(names), ' loss:', loss)

    # Generate images from found dlatents and save them
    generated_images = generator.generate_images()
    generated_dlatents = generator.get_dlatents()
    for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names):
        img = PIL.Image.fromarray(img_array, 'RGB')
        img.save(os.path.join(args.generated_images_dir, f'{img_name}.jpg'), 'JPEG')
        np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

    generator.reset_dlatents()

    print("Done image generated")
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')

    # for now it's unclear if larger batch leads to better performance/quality
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)

    # Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--lr',
                        default=1.,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--iterations',
                        default=2000,
                        help='Number of optimization steps for each batch',
                        type=int)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=bool)
    args, other_args = parser.parse_known_args()

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args.batch_size,
                          randomize_noise=args.randomize_noise)
    perceptual_model = PerceptualModel(args.image_size,
                                       layer=9,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image)
    # nonperceptual_model = NonperceptualModel(args.image_size, batch_size=args.batch_size)
    # nonperceptual_model.build_nonperceptual_model(generator.generated_image)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]

        perceptual_model.set_reference_images(images_batch)
        # nonperceptual_model.set_reference_images(images_batch)

        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations,
                                       learning_rate=args.lr)
        # op = nonperceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr)

        pbar = tqdm(op, leave=False, total=args.iterations)
        min_loss = np.inf
        img = None
        for i, per_loss, reg_loss, loss in pbar:
            # Generate images from found dlatents and save them
            if (loss < min_loss and i > 0.4 * args.iterations):
                min_loss = loss

                generated_images = generator.generate_images()
                generated_dlatents = generator.get_dlatents()
                for img_array, dlatent, img_name in zip(
                        generated_images, generated_dlatents, names):
                    img = PIL.Image.fromarray(img_array, 'RGB')

                print('\n' + ' '.join(names) +
                      ' Per/Reg/Total Loss: [{0:.2f},{1:.2f},{2:.2f}]'.format(
                          per_loss, reg_loss, loss) + '<-- BEST')
            else:
                print('\n' + ' '.join(names) +
                      ' Per/Reg/Total Loss: [{0:.2f},{1:.2f},{2:.2f}]'.format(
                          per_loss, reg_loss, loss))

            if (i % 100 == 0 and img is not None):
                img.save(
                    os.path.join(args.generated_images_dir, f'{img_name}.png'),
                    'PNG')
                np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'),
                        dlatent)
        print(' '.join(names), ' loss:', loss)
        img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'),
                 'PNG')
        np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        # # Generate images from found dlatents and save them
        # generated_images = generator.generate_images()
        # generated_dlatents = generator.get_dlatents()
        # for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names):
        #     img = PIL.Image.fromarray(img_array, 'RGB')
        #     img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG')
        #     np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
Esempio n. 15
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual losses',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')
    parser.add_argument('--data_dir',
                        default='data',
                        help='Directory for storing optional models')
    parser.add_argument('--mask_dir',
                        default='masks',
                        help='Directory for storing optional masks')
    parser.add_argument('--load_last',
                        default='',
                        help='Start with embeddings from directory')
    parser.add_argument(
        '--dlatent_avg',
        default='',
        help=
        'Use dlatent from file specified here for truncation instead of dlatent_avg from Gs'
    )
    parser.add_argument(
        '--model_url',
        default=
        'https://drive.google.com/uc?id=1opTWG1jYlyS9TXAuqVyVR68kQWhOhA99',
        help='Fetch a StyleGAN model to train on from this URL'
    )  # karras2019stylegan-ffhq-1024x1024.pkl
    parser.add_argument('--model_res',
                        default=1024,
                        help='The dimension of images in the StyleGAN model',
                        type=int)
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)
    parser.add_argument(
        '--optimizer',
        default='ggt',
        help='Optimization algorithm used for optimizing dlatents')

    # Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--resnet_image_size',
                        default=256,
                        help='Size of images for the Resnet model',
                        type=int)
    parser.add_argument('--lr',
                        default=0.25,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--decay_rate',
                        default=0.9,
                        help='Decay rate for learning rate',
                        type=float)
    parser.add_argument('--iterations',
                        default=100,
                        help='Number of optimization steps for each batch',
                        type=int)
    parser.add_argument(
        '--decay_steps',
        default=4,
        help='Decay steps for learning rate decay (as a percent of iterations)',
        type=float)
    parser.add_argument('--early_stopping',
                        default=True,
                        help='Stop early once training stabilizes',
                        type=str2bool,
                        nargs='?',
                        const=True)
    parser.add_argument('--early_stopping_threshold',
                        default=0.5,
                        help='Stop after this threshold has been reached',
                        type=float)
    parser.add_argument('--early_stopping_patience',
                        default=10,
                        help='Number of iterations to wait below threshold',
                        type=int)
    parser.add_argument(
        '--load_effnet',
        default='data/finetuned_effnet.h5',
        help='Model to load for EfficientNet approximation of dlatents')
    parser.add_argument(
        '--load_resnet',
        default='data/finetuned_resnet.h5',
        help='Model to load for ResNet approximation of dlatents')
    parser.add_argument(
        '--use_preprocess_input',
        default=True,
        help='Call process_input() first before using feed forward net',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--use_best_loss',
        default=True,
        help='Output the lowest loss value found as the solution',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--average_best_loss',
        default=0.25,
        help=
        'Do a running weighted average with the previous best dlatents found',
        type=float)
    parser.add_argument('--sharpen_input',
                        default=True,
                        help='Sharpen the input images',
                        type=str2bool,
                        nargs='?',
                        const=True)

    # Loss function options
    parser.add_argument(
        '--use_vgg_loss',
        default=0.4,
        help='Use VGG perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_vgg_layer',
                        default=9,
                        help='Pick which VGG layer to use.',
                        type=int)
    parser.add_argument(
        '--use_pixel_loss',
        default=1.5,
        help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_mssim_loss',
        default=200,
        help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_lpips_loss',
        default=100,
        help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_l1_penalty',
        default=0.5,
        help='Use L1 penalty on latents; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_discriminator_loss',
                        default=0.5,
                        help='Use trained discriminator to evaluate realism.',
                        type=float)
    parser.add_argument(
        '--use_adaptive_loss',
        default=False,
        help=
        'Use the adaptive robust loss function from Google Research for pixel and VGG feature loss.',
        type=str2bool,
        nargs='?',
        const=True)

    # Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=str2bool,
                        nargs='?',
                        const=True)
    parser.add_argument(
        '--tile_dlatents',
        default=False,
        help='Tile dlatents to use a single vector at each scale',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--clipping_threshold',
        default=2.0,
        help='Stochastic clipping of gradient values outside of this threshold',
        type=float)

    # Masking params
    parser.add_argument('--load_mask',
                        default=False,
                        help='Load segmentation masks',
                        type=str2bool,
                        nargs='?',
                        const=True)
    parser.add_argument(
        '--face_mask',
        default=True,
        help='Generate a mask for predicting only the face area',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--use_grabcut',
        default=True,
        help=
        'Use grabcut algorithm on the face mask to better segment the foreground',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--scale_mask',
        default=1.4,
        help='Look over a wider section of foreground for grabcut',
        type=float)
    parser.add_argument(
        '--composite_mask',
        default=True,
        help='Merge the unmasked area back into the generated image',
        type=str2bool,
        nargs='?',
        const=True)
    parser.add_argument(
        '--composite_blur',
        default=8,
        help='Size of blur filter to smoothly composite the images',
        type=int)

    # Video params
    parser.add_argument('--video_dir',
                        default='videos',
                        help='Directory for storing training videos')
    parser.add_argument('--output_video',
                        default=False,
                        help='Generate videos of the optimization process',
                        type=bool)
    parser.add_argument('--video_codec',
                        default='MJPG',
                        help='FOURCC-supported video codec name')
    parser.add_argument('--video_frame_rate',
                        default=24,
                        help='Video frames per second',
                        type=int)
    parser.add_argument('--video_size',
                        default=512,
                        help='Video size in pixels',
                        type=int)
    parser.add_argument(
        '--video_skip',
        default=1,
        help='Only write every n frames (1 = write every frame)',
        type=int)

    args, other_args = parser.parse_known_args()

    args.decay_steps *= 0.01 * args.iterations  # Calculate steps as a percent of total iterations

    if args.output_video:
        import cv2
        synthesis_kwargs = dict(output_transform=dict(
            func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    ref_images = [
        os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.mask_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args.batch_size,
                          clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents,
                          model_res=args.model_res,
                          randomize_noise=args.randomize_noise)
    if (args.dlatent_avg != ''):
        generator.set_dlatent_avg(np.load(args.dlatent_avg))

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url(
                'https://drive.google.com/uc?id=15IYd9qY9wNd1SSeI4LxPjRBBJxiOzvhW',
                cache_dir=config.cache_dir) as f:  #vgg16_zhang_perceptual.pkl
            perc_model = pickle.load(f)
    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator, discriminator_network)

    ff_model = None

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(
                    os.path.join(args.video_dir, f'{name}.avi'),
                    cv2.VideoWriter_fourcc(*args.video_codec),
                    args.video_frame_rate, (args.video_size, args.video_size))

        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (args.load_last != ''):  # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(
                    os.path.join(args.load_last, f'{name}.npy')),
                                    axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents, dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    from keras.applications.resnet50 import preprocess_input
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    from efficientnet import preprocess_input
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
            if (ff_model
                    is not None):  # predict initial dlatents with ResNet model
                if (args.use_preprocess_input):
                    dlatents = ff_model.predict(
                        preprocess_input(
                            load_images(images_batch,
                                        image_size=args.resnet_image_size)))
                else:
                    dlatents = ff_model.predict(
                        load_images(images_batch,
                                    image_size=args.resnet_image_size))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations,
                                       use_optimizer=args.optimizer)
        pbar = tqdm(op, leave=False, total=args.iterations)
        vid_count = 0
        best_loss = None
        best_dlatent = None
        avg_loss_count = 0
        if args.early_stopping:
            avg_loss = prev_loss = None
        for loss_dict in pbar:
            if args.early_stopping:  # early stopping feature
                if prev_loss is not None:
                    if avg_loss is not None:
                        avg_loss = 0.5 * avg_loss + (prev_loss -
                                                     loss_dict["loss"])
                        if avg_loss < args.early_stopping_threshold:  # count while under threshold; else reset
                            avg_loss_count += 1
                        else:
                            avg_loss_count = 0
                        if avg_loss_count > args.early_stopping_patience:  # stop once threshold is reached
                            print("")
                            break
                    else:
                        avg_loss = prev_loss - loss_dict["loss"]
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                if best_dlatent is None or args.average_best_loss <= 0.00000001:
                    best_dlatent = generator.get_dlatents()
                else:
                    best_dlatent = 0.25 * best_dlatent + 0.75 * generator.get_dlatents(
                    )
                if args.use_best_loss:
                    generator.set_dlatents(best_dlatent)
                best_loss = loss_dict["loss"]
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = PIL.Image.fromarray(
                        batch_frames[i], 'RGB').resize(
                            (args.video_size, args.video_size),
                            PIL.Image.LANCZOS)
                    video_out[name].write(
                        cv2.cvtColor(
                            np.array(video_frame).astype('uint8'),
                            cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
            prev_loss = loss_dict["loss"]
        if not args.use_best_loss:
            best_loss = prev_loss
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        if args.use_best_loss:
            generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_path, img_name in zip(
                generated_images, generated_dlatents, images_batch, names):
            mask_img = None
            if args.composite_mask and (args.load_mask or args.face_mask):
                _, im_name = os.path.split(img_path)
                mask_img = os.path.join(args.mask_dir, f'{im_name}')
            if args.composite_mask and mask_img is not None and os.path.isfile(
                    mask_img):
                orig_img = PIL.Image.open(img_path).convert('RGB')
                width, height = orig_img.size
                imask = PIL.Image.open(mask_img).convert('L').resize(
                    (width, height))
                imask = imask.filter(
                    ImageFilter.GaussianBlur(args.composite_blur))
                mask = np.array(imask) / 255
                mask = np.expand_dims(mask, axis=-1)
                img_array = mask * np.array(img_array) + (
                    1.0 - mask) * np.array(orig_img)
                img_array = img_array.astype(np.uint8)
                #img_array = np.where(mask, np.array(img_array), orig_img)
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
Esempio n. 16
0
def styleGAN_encoder(path_A, path_B):
    start_ = time.time()
    decay_steps =10
    decay_steps *= 0.01 * 100 # Calculate steps as a percent of total iterations

    src_dir = 'aligned_images'
    name_A = src_dir+'/%s.png' %os.path.basename(os.path.splitext(path_A)[0])
    name_B = src_dir+'/%s.png' %os.path.basename(os.path.splitext(path_B)[0])
    ref_images = [name_A,name_B]
    ref_images = list(filter(os.path.isfile, ref_images))


    os.makedirs('data', exist_ok=True)
    os.makedirs('masks', exist_ok=True)

    # Initialize generator and perceptual model
    tflib.init_tf()
    with dnnlib.util.open_url('https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ', cache_dir='cache') as f:
        generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network, 1, clipping_threshold=2.0, tiled_dlatent=False, model_res=1024, randomize_noise=False)
    print(generator.model_scale)

    perc_model = None
    if (100 > 0.00000001):
        with dnnlib.util.open_url('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir='cache') as f:
            perc_model =  pickle.load(f)
    perceptual_model = PerceptualModel(perc_model=perc_model, batch_size=1)
    perceptual_model.build_perceptual_model(generator)

    ff_model = None

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, 1), total=len(ref_images)//1):
        names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch]

        perceptual_model.set_reference_images(images_batch)
        dlatents = None


        if (ff_model is None):
            if os.path.exists('data/finetuned_resnet.h5'):
                print("Loading ResNet Model:")
                ff_model = load_model('data/finetuned_resnet.h5')
                from keras.applications.resnet50 import preprocess_input


        if (ff_model is not None): # predict initial dlatents with ResNet model
            dlatents = ff_model.predict(preprocess_input(load_images(images_batch,image_size=256)))
        if dlatents is not None:
            generator.set_dlatents(dlatents)

        op = perceptual_model.optimize(generator.dlatent_variable, iterations=100)
        pbar = tqdm(op, leave=False, total=100)

        best_loss = None
        best_dlatent = None
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v)
                    for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()

            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        print(best_dlatent)

        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        print(generator.initial_dlatents)



        for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names):
            np.save(os.path.join('latent_representations', f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()

    end_ = time.time()
    logging.info('The time it takes for the StyleGAN Encoder: %.2fs' % (end_ - start_))
Esempio n. 17
0
def main():
    parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss')
    parser.add_argument('src_dir', help='Directory with images for encoding')
    parser.add_argument('generated_images_dir', help='Directory for storing generated images')
    parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations')

    # for now it's unclear if larger batch leads to better performance/quality
    parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int)

    # Perceptual model params
    parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int)
    parser.add_argument('--lr', default=1., help='Learning rate for perceptual model', type=float)
    parser.add_argument('--iterations', default=1000, help='Number of optimization steps for each batch', type=int)

    # Generator params
    parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool)
    args, other_args = parser.parse_known_args()

    ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.src_dir)

    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)

    # Initialize generator and perceptual model
    URL_FFHQ = 'https://github.com/parameter-pollution/stylegan_paintings/releases/download/v0.1/network-snapshot-008040.pkl'
    tflib.init_tf()
    with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
     generator_network, discriminator_network, Gs = pickle.load(f)


    #tflib.init_tf()
    #with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
    #    generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs, args.batch_size, randomize_noise=args.randomize_noise)
    perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator.generated_image)

    # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space
    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size):
        names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch]

        perceptual_model.set_reference_images(images_batch)
        op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr)
        pbar = tqdm(op, leave=False, total=args.iterations)
        for loss in pbar:
            pbar.set_description(' '.join(names)+' Loss: %.2f' % loss)
        print(' '.join(names), ' loss:', loss)

        # Generate images from found dlatents and save them
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()
Esempio n. 18
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Find latent representation of reference images using perceptual loss')
    parser.add_argument('name', help='Name of a combined image')
    parser.add_argument('raw_dir',
                        help='Directory with a raw image for encoding')
    parser.add_argument('aligned_dir', help='Directory with a aligned image')
    parser.add_argument('generated_images_dir',
                        help='Directory for storing generated images')
    parser.add_argument('dlatent_dir',
                        help='Directory for storing dlatent representations')
    parser.add_argument('--data_dir',
                        default='data',
                        help='Directory for storing optional models')
    parser.add_argument('--mask_dir',
                        default='masks',
                        help='Directory for storing optional masks')
    parser.add_argument('--load_last',
                        default='',
                        help='Start with embeddings from directory')
    parser.add_argument(
        '--dlatent_avg',
        default='',
        help=
        'Use dlatent from file specified here for truncation instead of dlatent_avg from Gs'
    )
    parser.add_argument(
        '--model_url',
        default=
        'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ',
        help='Fetch a StyleGAN model to train on from this URL'
    )  # karras2019stylegan-ffhq-1024x1024.pkl
    parser.add_argument('--model_res',
                        default=1024,
                        help='The dimension of images in the StyleGAN model',
                        type=int)
    parser.add_argument('--batch_size',
                        default=1,
                        help='Batch size for generator and perceptual model',
                        type=int)

    #Perceptual model params
    parser.add_argument('--image_size',
                        default=256,
                        help='Size of images for perceptual model',
                        type=int)
    parser.add_argument('--resnet_image_size',
                        default=256,
                        help='Size of images for the Resnet model',
                        type=int)
    parser.add_argument('--lr',
                        default=0.03,
                        help='Learning rate for perceptual model',
                        type=float)
    parser.add_argument('--decay_rate',
                        default=0.9,
                        help='Decay rate for learning rate',
                        type=float)
    parser.add_argument('--iterations',
                        default=1000,
                        help='Number of optimization steps for each batch',
                        type=int)
    parser.add_argument(
        '--decay_steps',
        default=10,
        help='Decay steps for learning rate decay (as a percent of iterations)',
        type=float)
    parser.add_argument(
        '--load_effnet',
        default='data/finetuned_effnet.h5',
        help='Model to load for EfficientNet approximation of dlatents')
    parser.add_argument(
        '--load_resnet',
        default='data/finetuned_resnet.h5',
        help='Model to load for ResNet approximation of dlatents')

    #Loss function options
    parser.add_argument(
        '--use_vgg_loss',
        default=0.4,
        help='Use VGG perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument('--use_vgg_layer',
                        default=9,
                        help='Pick which VGG layer to use.',
                        type=int)
    parser.add_argument(
        '--use_pixel_loss',
        default=1.5,
        help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_mssim_loss',
        default=100,
        help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_lpips_loss',
        default=100,
        help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.',
        type=float)
    parser.add_argument(
        '--use_l1_penalty',
        default=1,
        help='Use L1 penalty on latents; 0 to disable, > 0 to scale.',
        type=float)

    #Generator params
    parser.add_argument('--randomize_noise',
                        default=False,
                        help='Add noise to dlatents during optimization',
                        type=bool)
    parser.add_argument(
        '--tile_dlatents',
        default=False,
        help='Tile dlatents to use a single vector at each scale',
        type=bool)
    parser.add_argument(
        '--clipping_threshold',
        default=2.0,
        help='Stochastic clipping of gradient values outside of this threshold',
        type=float)

    # Masking params
    parser.add_argument('--load_mask',
                        default=False,
                        help='Load segmentation masks',
                        type=bool)
    parser.add_argument(
        '--face_mask',
        default=False,
        help='Generate a mask for predicting only the face area',
        type=bool)
    parser.add_argument(
        '--use_grabcut',
        default=True,
        help=
        'Use grabcut algorithm on the face mask to better segment the foreground',
        type=bool)
    parser.add_argument(
        '--scale_mask',
        default=1.5,
        help='Look over a wider section of foreground for grabcut',
        type=float)

    # Video params
    parser.add_argument('--video_dir',
                        default='videos',
                        help='Directory for storing training videos')
    parser.add_argument('--output_video',
                        default=False,
                        help='Generate videos of the optimization process',
                        type=bool)
    parser.add_argument('--video_codec',
                        default='MJPG',
                        help='FOURCC-supported video codec name')
    parser.add_argument('--video_frame_rate',
                        default=24,
                        help='Video frames per second',
                        type=int)
    parser.add_argument('--video_size',
                        default=512,
                        help='Video size in pixels',
                        type=int)
    parser.add_argument(
        '--video_skip',
        default=1,
        help='Only write every n frames (1 = write every frame)',
        type=int)

    args, other_args = parser.parse_known_args()
    args.decay_steps *= 0.01 * args.iterations

    if args.output_video:
        import cv2
        synthesis_kwargs = dict(output_transform=dict(
            func=tflib.convert_images_to_uint8, nchw_to_nhwc=False),
                                minibatch_size=args.batch_size)

    #encoder_main
    os.makedirs(args.raw_dir, exist_ok=True)
    src_dir = args.raw_dir + args.name
    img = PIL.Image.open(src_dir)
    wpercent = (256 / float(img.size[0]))
    hsize = int((float(img.size[1]) * float(wpercent)))
    img = img.resize((256, hsize), PIL.Image.LANCZOS)
    #align_images
    os.makedirs(args.aligned_dir, exist_ok=True)
    align_images(args.raw_dir, args.aligned_dir)
    #encode_images
    ref_images = [
        os.path.join(args.aligned_dir, x) for x in os.listdir(args.aligned_dir)
    ]
    ref_images = list(filter(os.path.isfile, ref_images))

    if len(ref_images) == 0:
        raise Exception('%s is empty' % args.aligned_dir)

    os.makedirs(args.data_dir, exist_ok=True)
    os.makedirs(args.mask_dir, exist_ok=True)
    os.makedirs(args.generated_images_dir, exist_ok=True)
    os.makedirs(args.dlatent_dir, exist_ok=True)
    os.makedirs(args.video_dir, exist_ok=True)

    tflib.init_tf()
    #with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f:
    #generator_network, discriminator_network, Gs_network = pickle.load(f)
    ffhq = '/content/gdrive/My Drive/data/karras2019stylegan-ffhq-1024x1024.pkl'
    with open(ffhq, 'rb') as f:
        _generator_network, discriminator_network, Gs_network = pickle.load(f)

    generator = Generator(Gs_network,
                          args.batch_size,
                          clipping_threshold=args.clipping_threshold,
                          tiled_dlatent=args.tile_dlatents,
                          model_res=args.model_res,
                          randomize_noise=args.randomize_noise)
    if (args.dlatent_avg != ''):
        generator.set_dlatent_avg(np.load(args.dlatent_avg))

    perc_model = None
    if (args.use_lpips_loss > 0.00000001):
        with dnnlib.util.open_url(
                'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2',
                cache_dir=config.cache_dir) as f:
            perc_model = pickle.load(f)
    perceptual_model = PerceptualModel(args,
                                       perc_model=perc_model,
                                       batch_size=args.batch_size)
    perceptual_model.build_perceptual_model(generator)  #.generated_image

    ff_model = None

    for images_batch in tqdm(split_to_batches(ref_images, args.batch_size),
                             total=len(ref_images) // args.batch_size):
        names = [
            os.path.splitext(os.path.basename(x))[0] for x in images_batch
        ]
        if args.output_video:
            video_out = {}
            for name in names:
                video_out[name] = cv2.VideoWriter(
                    os.path.join(args.video_dir, f'{name}.avi'),
                    cv2.VideoWriter_fourcc(*args.video_codec),
                    args.video_frame_rate, (args.video_size, args.video_size))

        perceptual_model.set_reference_images(images_batch)
        dlatents = None
        if (args.load_last != ''):  # load previous dlatents for initialization
            for name in names:
                dl = np.expand_dims(np.load(
                    os.path.join(args.load_last, f'{name}.npy')),
                                    axis=0)
                if (dlatents is None):
                    dlatents = dl
                else:
                    dlatents = np.vstack((dlatents, dl))
        else:
            if (ff_model is None):
                if os.path.exists(args.load_resnet):
                    print("Loading ResNet Model:")
                    ff_model = load_model(args.load_resnet)
                    from keras.applications.resnet50 import preprocess_input
            if (ff_model is None):
                if os.path.exists(args.load_effnet):
                    import efficientnet
                    print("Loading EfficientNet Model:")
                    ff_model = load_model(args.load_effnet)
                    from efficientnet import preprocess_input
            if (ff_model
                    is not None):  # predict initial dlatents with ResNet model
                dlatents = ff_model.predict(
                    preprocess_input(
                        load_images(images_batch,
                                    image_size=args.resnet_image_size)))
        if dlatents is not None:
            generator.set_dlatents(dlatents)
        op = perceptual_model.optimize(generator.dlatent_variable,
                                       iterations=args.iterations)
        pbar = tqdm(op, leave=False, total=args.iterations)

        vid_count = 0
        best_loss = None
        best_dlatent = None
        for loss_dict in pbar:
            pbar.set_description(" ".join(names) + ": " + "; ".join(
                ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()]))
            if best_loss is None or loss_dict["loss"] < best_loss:
                best_loss = loss_dict["loss"]
                best_dlatent = generator.get_dlatents()
            if args.output_video and (vid_count % args.video_skip == 0):
                batch_frames = generator.generate_images()
                for i, name in enumerate(names):
                    video_frame = PIL.Image.fromarray(
                        batch_frames[i], 'RGB').resize(
                            (args.video_size, args.video_size),
                            PIL.Image.LANCZOS)
                    video_out[name].write(
                        cv2.cvtColor(
                            np.array(video_frame).astype('uint8'),
                            cv2.COLOR_RGB2BGR))
            generator.stochastic_clip_dlatents()
        print(" ".join(names), " Loss {:.4f}".format(best_loss))

        if args.output_video:
            for name in names:
                video_out[name].release()

        # Generate images from found dlatents and save them
        generator.set_dlatents(best_dlatent)
        generated_images = generator.generate_images()
        generated_dlatents = generator.get_dlatents()
        for img_array, dlatent, img_name in zip(generated_images,
                                                generated_dlatents, names):
            img = PIL.Image.fromarray(img_array, 'RGB')
            img.save(
                os.path.join(args.generated_images_dir, f'{img_name}.png'),
                'PNG')
            np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)

        generator.reset_dlatents()