def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('--data_dir', default='data', help='Directory for storing optional models') parser.add_argument( '--model_url', default= 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ', help='Fetch a StyleGAN model to train on from this URL' ) # karras2019stylegan-ffhq-1024x1024.pkl parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) parser.add_argument('--lr', default=0.02, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int) parser.add_argument( '--decay_steps', default=10, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument( '--load_resnet', default='data/finetuned_resnet.h5', help='Model to load for Resnet approximation of dlatents') # Loss function options parser.add_argument( '--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument( '--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) parser.add_argument( '--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=bool) parser.add_argument( '--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Video params parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos') parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool) parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name') parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int) parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int) parser.add_argument( '--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url( 'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) resnet_model = None if os.path.exists(args.load_resnet): print("Loading ResNet Model:") resnet_model = load_model(args.load_resnet) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch) dlatents = None if (resnet_model is not None): dlatents = resnet_model.predict( preprocess_resnet_input( load_images(images_batch, image_size=args.resnet_image_size))) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
perceptual_model = PerceptualModel(image_size, layer=9, batch_size=batch_size) perceptual_model.build_perceptual_model(generator.generated_image) face_img_path = sys.argv[2] face_img_list = [face_img_path] file_path = os.path.splitext(face_img_path)[0] # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(face_img_list, batch_size), total=len(face_img_list) // batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=iterations, learning_rate=lr) pbar = tqdm(op, leave=False, total=iterations) for loss in pbar: pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss) print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = Image.fromarray(img_array, 'RGB') img.save(file_path + '_generated.png')
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter) # Output directories setting parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('guessed_images_dir', help='Directory for storing initially guessed images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') # General params parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument( '--use_resnet', default=True, help='Use pretrained ResNet for approximating dlatents', type=lambda x: (str(x).lower() == 'true')) # Perceptual model params parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int) parser.add_argument('--lr', default=0.02, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument( '--decay_steps', default=10, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) # Loss function options parser.add_argument( '--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument( '--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=lambda x: (str(x).lower() == 'true')) parser.add_argument( '--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=lambda x: (str(x).lower() == 'true')) parser.add_argument( '--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Masking params parser.add_argument('--mask_dir', default='masks/encoding', help='Directory for storing optional masks') parser.add_argument( '--face_mask', default=False, help='Generate a mask for predicting only the face area', type=lambda x: (str(x).lower() == 'true')) parser.add_argument( '--use_grabcut', default=True, help= 'Use grabcut algorithm on the face mask to better segment the foreground', type=lambda x: (str(x).lower() == 'true')) parser.add_argument( '--scale_mask', default=1.5, help='Look over a wider section of foreground for grabcut', type=float) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = sorted(list(filter(os.path.isfile, ref_images))) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) # Create output directories os.makedirs('data', exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.guessed_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) if args.face_mask: os.makedirs(args.mask_dir, exist_ok=True) # Initialize generator tflib.init_tf() with open_url(url_styleGAN, cache_dir='cache') as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(model=Gs_network, batch_size=args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) # Initialize perceptual model perc_model = None if args.use_lpips_loss > 1e-7: with open_url(url_VGG_perceptual, cache_dir='cache') as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) # Initialize ResNet model resnet_model = None if args.use_resnet: print("\nLoading ResNet Model:") resnet_model_fn = 'data/finetuned_resnet.h5' gdown.download(url_resnet, resnet_model_fn, quiet=True) resnet_model = load_model(resnet_model_fn) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) # predict initial dlatents with ResNet model if resnet_model is not None: dlatents = resnet_model.predict( preprocess_input( load_images(images_batch, image_size=args.resnet_image_size))) generator.set_dlatents(dlatents) # Generate and save initially guessed images initial_dlatents = generator.get_dlatents() initial_images = generator.generate_images() for img_array, dlatent, img_name in zip(initial_images, initial_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(args.guessed_images_dir, f'{img_name}.png'), 'PNG') # Optimization process to find best latent vectors op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) progress_bar = tqdm(op, leave=False, total=args.iterations) best_loss = None best_dlatent = None for loss_dict in progress_bar: progress_bar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents() # Concatenate and save dlalents vectors list_dlatents = sorted(os.listdir(args.dlatent_dir)) final_w_vectors = np.array( [np.load(args.dlatent_dir + dlatent) for dlatent in list_dlatents]) np.save(os.path.join(args.dlatent_dir, 'output_vectors.npy'), final_w_vectors)
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('init_dlatent', default=False, help='path to init dlatent or False') parser.add_argument('move_to_folder', default=False, help='path to init dlatent or False') parser.add_argument('--iterations', default=30, help='Number of optimization steps for each batch', type=int) parser.add_argument( '--network_pkl', default='gdrive:networks/stylegan2-ffhq-config-f.pkl', help='Path to local copy of stylegan2-ffhq-config-f.pkl') # for now it's unclear if larger batch leads to better performance/quality parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--lr', default=1., help='Learning rate for perceptual model', type=float) parser.add_argument('--iterations', default=30, help='Number of optimization steps for each batch', type=int) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) args, other_args = parser.parse_known_args() ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() generator_network, discriminator_network, Gs_network = pretrained_networks.load_networks( args.network_pkl) generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise) #if args.cont != False: print("CONTINUING FROM PREVIOUS DLATENT") generator.define_dlatents(args.init_dlatent) #else: #generator.set_dlatents(generator.initial_dlatents) perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr) pbar = tqdm(op, leave=False, total=args.iterations) for loss in pbar: pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss) print(' '.join(names), ' loss:', loss) shutil.move( str(args.src_dir) + "\\" + str(names[0]) + r".png", str(args.move_to_folder) + "\\" + str(names[0]) + r".png") # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent)
def encode(resnet, learning_rate=0.02, iterations=200): args = eden.utils.DictMap() args_other = eden.utils.DictMap() args.src_dir = os.path.join(stylegan, 'aligned_images') args.generated_images_dir = os.path.join(stylegan, 'generated_images') args.dlatent_dir = os.path.join(stylegan, 'latent_representations') args.load_last = None args.dlatent_avg = None args.model_res = 1024 args.batch_size = 1 # Perceptual model params args.image_size = 256 args.resnet_image_size = 256 args.lr = learning_rate args.decay_rate = 0.9 args.iterations = iterations args.decay_steps = 10 args.load_effnet = None args.load_resnet = os.path.join(stylegan, resnet) # Loss function options args.use_vgg_loss = 0.4 args.use_vgg_layer = 9 args.use_pixel_loss = 1.5 args.use_mssim_loss = 100 args.use_lpips_loss = 100 args.use_l1_penalty = 1 # Generator params args.randomize_noise = False args.tile_dlatents = False args.clipping_threshold = 2.0 # Masking params args.load_mask = False args.face_mask = False args.use_grabcut = True args.scale_mask = 1.5 # Video params args.video_dir = os.path.join(stylegan, 'videos') args.output_video = True args.video_codec = 'MJPG' args.video_frame_rate = 30 args.video_size = 1024 args.video_skip = 1 args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) eden.utils.try_make_folder(args.generated_images_dir) eden.utils.try_make_folder(args.dlatent_dir) eden.utils.try_make_folder(args.video_dir) # Initialize generator and perceptual model generator = Generator(Gs, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) if (args.dlatent_avg is not None): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): cache_dir = os.path.join(stylegan, config.cache_dir) with dnnlib.util.open_url( 'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=cache_dir) as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last is not None): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load( os.path.join(args.load_last, f'{name}.npy')), axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents, dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) from keras.applications.resnet50 import preprocess_input if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) from efficientnet import preprocess_input if (ff_model is not None): # predict initial dlatents with ResNet model dlatents = ff_model.predict( preprocess_input( load_images(images_batch, image_size=args.resnet_image_size))) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') # Input and Output directories parser.add_argument('--src_dir', default='./images', help='Directory with images for encoding') parser.add_argument('--out_dir', default='./outputs', help='Directory for storing generated images') # Modes of run parser.add_argument( '--mode', default='dlatents', help= 'Mode depending on the variables we want to optimize (latents/dlatents)' ) parser.add_argument( '--ref_dlatents', default='./outputs/temp_latents.npy', help='Required for the regression of latents given the dlatents') # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--lr', default=0.009, help='Learning rate for perceptual model', type=float) parser.add_argument('--iterations', default=10, help='Number of optimization steps for each batch', type=int) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) args, other_args = parser.parse_known_args() # Store in a list the path for all the images of the source directory ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) names = [os.path.splitext(os.path.basename(x))[0] for x in ref_images] if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.out_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) perceptual_model = PerceptualModel(args.image_size, layer=9) for idx, ref_img in enumerate(ref_images): generator = Generator(Gs_network, randomize_noise=args.randomize_noise, mode=args.mode) perceptual_model.build_perceptual_model(generator.generated_image, args.mode, args.ref_dlatents, ref_img) perceptual_model.set_reference_images(ref_img) op = perceptual_model.optimize(generator.latent_variable, iterations=args.iterations, learning_rate=args.lr, out_dir=args.out_dir, img_name=names[idx]) pbar = tqdm(op, leave=False, total=args.iterations) for loss in pbar: pbar.set_description(names[idx] + ' Loss: %.2f' % loss) print(names[idx], ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatent = generator.get_latents() if not os.path.exists(args.mode): os.mkdir(args.mode) np.save(os.path.join(args.mode, names[idx] + '.npy'), generated_dlatent)
def optimize(): latents = u64latents_to_latents(request.args.get('u64latents')) if latents is None: abort(413) u64reference_webp = request.args.get('u64reference') reference_image = io.BytesIO(base64.urlsafe_b64decode(u64reference_webp)) # todo: restrict input formats generator.set_dlatents(latents) model_url = "gdrive:networks/stylegan2-ffhq-config-f.pkl" vgg_url = "https://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl" optional_args = { "lr": 0.857, "decay_rate": 0.95, "iterations": 25, "decay_steps": 4, "image_size": 256, "use_vgg_layer": 9, "use_vgg_loss": 100, "use_pixel_loss": 1, "use_mssim_loss": 100, "use_lpips_loss": 0, "use_l1_penalty": 1, "use_adaptive_loss": False, # requires tf >= 2 😿 "sharpen_input": False, "batch_size": 1, "use_discriminator_loss": 0, "optimizer": "ggt", "average_best_loss": 0.25, } forced_args = { "face_mask": False, "use_grabcut": None, "scale_mask": None, "mask_dir": None, "vgg_url": vgg_url, # we do not load random pickles that Internet Users ask us to "batch_size": 1, "model_url": model_url, # we do not load random pickles that Internet Users ask us to "model_res": 1024, } # print(f"{time.time()} default config done") merged_args = {k: type(optional_args.get(k))(request.args.get(k,optional_args.get(k))) for k in optional_args} for k in forced_args: merged_args[k] = forced_args[k] args = dict_as_namedtuple(merged_args, name="args") # print(f"{time.time()} config merged") perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url(args.vgg_url, cache_dir='.stylegan2-cache') as f: perc_model = pickle.load(f) # print(f"{time.time()} perc model") perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) # print(f"{time.time()} perceptual model object") with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): perceptual_model.build_perceptual_model(generator, discriminator_network) # print(f"{time.time()} reused scope built") perceptual_model.set_reference_images([reference_image]) # print(f"{time.time()} refernce images set") op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, use_optimizer=args.optimizer) # print(f"{time.time()} optimizer iterable created") def generate(): best_loss = None best_dlatent = None for loss_dict in op: if best_loss is None or loss_dict["loss"] < best_loss: if best_dlatent is None: best_dlatent = generator.get_dlatents() else: best_dlatent = args.average_best_loss * best_dlatent + (1 - args.average_best_loss) * generator.get_dlatents() yield base64.urlsafe_b64encode(generator.get_dlatents().tobytes('C'))+b"\n" generator.set_dlatents(best_dlatent) best_loss = loss_dict["loss"] generator.stochastic_clip_dlatents() yield base64.urlsafe_b64encode(best_dlatent.tobytes('C'))+b"\n" # print(f"best loss: {best_loss} @ {time.time()}") return Response(generate())
def styleGAN_encoder(args): start_ = time.time() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.mask_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) if (args.dlatent_avg != ''): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url( 'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load( os.path.join(args.load_last, f'{name}.npy')), axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents, dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) from keras.applications.resnet50 import preprocess_input if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) from efficientnet import preprocess_input if (ff_model is not None): # predict initial dlatents with ResNet model dlatents = ff_model.predict( preprocess_input( load_images(images_batch, image_size=args.resnet_image_size))) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents() end_ = time.time() logging.info('图像的StyleEncoder编码耗费时间: %.2fs' % (end_ - start_))
def styleGAN_encoder(args,path_A, path_B): start_ = time.time() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations src_dir = args.src_dir name_A = src_dir+'/%s.png' %os.path.basename(os.path.splitext(path_A)[0]) name_B = src_dir+'/%s.png' %os.path.basename(os.path.splitext(path_B)[0]) ref_images = [name_A,name_B] ref_images = list(filter(os.path.isfile, ref_images)) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size): names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch] perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load(os.path.join(args.load_last, f'{name}.npy')),axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents,dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) from keras.applications.resnet50 import preprocess_input if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) from efficientnet import preprocess_input if (ff_model is not None): # predict initial dlatents with ResNet model dlatents = ff_model.predict(preprocess_input(load_images(images_batch,image_size=args.resnet_image_size))) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) pbar = tqdm(op, leave=False, total=args.iterations) best_loss = None best_dlatent = None for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents() end_ = time.time() logging.info('The time it takes for the StyleGAN Encoder: %.2fs' % (end_ - start_))
def main(): args = do_parsing() print(args) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) # Load pre-trained network, already on file system model_filepath = ModelRetriever().get_model_filepath(args.model_name) generated_images_dir = os.path.join(args.generated_images_dir, args.model_name) dlatent_dir = os.path.join(args.dlatent_dir, args.model_name) os.makedirs(generated_images_dir, exist_ok=True) os.makedirs(dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() # with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: with open(model_filepath, "rb") as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise) perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr) pbar = tqdm(op, leave=False, total=args.iterations) for loss in pbar: pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss) print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') # for now it's unclear if larger batch leads to better performance/quality parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--lr', default=1., help='Learning rate for perceptual model', type=float) parser.add_argument('--iterations', default=1000, help='Number of optimization steps for each batch', type=int) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) args, other_args = parser.parse_known_args() ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise) #TODO: load dlatents here to pick up training if interrupted. # latent = np.load('filename.npy') # generator.set_dlatents() perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space counter = 0 for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr) pbar = tqdm(op, leave=False, total=args.iterations) for loss in pbar: counter = counter + 1 checkpointed = False if counter % 100 == 0 or counter < 100: checkpointed = make_checkpoint(counter, generator, names, args.generated_images_dir, args.dlatent_dir) print("****************************") print(f"*counter: {counter} *") print("****************************") pbar.set_description( ' '.join(names) + f" counter: {counter}, checkpointed: {checkpointed}" + ' Last Loss: %.2f' % loss) # This is the output print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{args.iterations}_iters_{img_name}.png'), 'PNG') np.save( os.path.join(args.dlatent_dir, f'{args.iterations}_{img_name}.npy'), dlatent) generator.reset_dlatents()
def encodeImages(self, src_dir, generated_images_dir, dlatent_dir, batch_size=1, image_size=256, lr=1, iterations=1000, randomize_noise=False): """ Find latent representation of reference images using perceptual loss Params: src_dir: Directory for storing genrated images generated_images_dir: Directory for storing generated images dlatent_dir: Directory for storing dlatent representations batch_size: Batch size for generator and perceptual model image_size: Size of images for perceptual model lr: Size of images for perceptual model iterations: Number of optimization steps for each batch randomize_noise: Add noise to dlatents during optimization """ ref_images = [os.path.join(src_dir, x) for x in os.listdir(src_dir)] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % src_dir) os.makedirs(generated_images_dir, exist_ok=True) os.makedirs(dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() perceptual_model = PerceptualModel(image_size, layer=9, batch_size=batch_size) perceptual_model.build_perceptual_model(self.generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(self._split_to_batches( ref_images, batch_size), total=len(ref_images) // batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(self.generator.dlatent_variable, iterations=iterations, learning_rate=lr) pbar = tqdm(op, leave=False, total=iterations) for loss in pbar: pbar.set_description(' '.join(names) + ' Loss: %.2f' % loss) print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = self.generator.generate_images() generated_dlatents = self.generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(dlatent_dir, f'{img_name}.npy'), dlatent) self.generator.reset_dlatents()
def main(): parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss') # parser.add_argument('--src_dir', default='./img/', help='Directory with images for encoding') parser.add_argument('--src_img', default='001.jpg', help='Directory with images for encoding') parser.add_argument('--src_dir', default='./aligned_images', help='Directory with images for encoding') parser.add_argument('--generated_images_dir', default='./generated_images/', help='Directory for storing generated images') parser.add_argument('--dlatent_dir', default='./latent/', help='Directory for storing dlatent representations') # for now it's unclear if larger batch leads to better performance/quality parser.add_argument('--batch_size', default=2, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--lr', default=.65, help='Learning rate for perceptual model', type=float) parser.add_argument('--iterations', default=400, help='Number of optimization steps for each batch', type=int) # Generator params parser.add_argument('--randomize_noise', default=True, help='Add noise to dlatents during optimization', type=bool) args, other_args = parser.parse_known_args() ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)] # ref_image = args.src_dir # ref_image = list(filter(os.path.isfile, ref_image)) # print(ref_images) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) # if len(ref_image) == 0: # raise Exception('%s is empty' % args.src_img) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() # with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: with open(URL_FFHQ, mode='rb') as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise) perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space # for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size): # for images_batch in tqdm(split_to_batches(ref_image, args.batch_size), total=len(ref_image)//args.batch_size): images_batch = [] images_batch.append(args.src_img) # print(images_batch) names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch] # image_batch = args.src_img # name = os.path.splittext(os.path.basename(image_batch)[0]) perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr) pbar = tqdm(op, leave=True, total=args.iterations) for loss in pbar: pbar.set_description(' '.join(names)+' Loss: %.2f' % loss) # print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(args.generated_images_dir, f'{img_name}.jpg'), 'JPEG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents() print("Done image generated")
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') # for now it's unclear if larger batch leads to better performance/quality parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--lr', default=1., help='Learning rate for perceptual model', type=float) parser.add_argument('--iterations', default=2000, help='Number of optimization steps for each batch', type=int) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) args, other_args = parser.parse_known_args() ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, randomize_noise=args.randomize_noise) perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator.generated_image) # nonperceptual_model = NonperceptualModel(args.image_size, batch_size=args.batch_size) # nonperceptual_model.build_nonperceptual_model(generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] perceptual_model.set_reference_images(images_batch) # nonperceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr) # op = nonperceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr) pbar = tqdm(op, leave=False, total=args.iterations) min_loss = np.inf img = None for i, per_loss, reg_loss, loss in pbar: # Generate images from found dlatents and save them if (loss < min_loss and i > 0.4 * args.iterations): min_loss = loss generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip( generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') print('\n' + ' '.join(names) + ' Per/Reg/Total Loss: [{0:.2f},{1:.2f},{2:.2f}]'.format( per_loss, reg_loss, loss) + '<-- BEST') else: print('\n' + ' '.join(names) + ' Per/Reg/Total Loss: [{0:.2f},{1:.2f},{2:.2f}]'.format( per_loss, reg_loss, loss)) if (i % 100 == 0 and img is not None): img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) print(' '.join(names), ' loss:', loss) img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) # # Generate images from found dlatents and save them # generated_images = generator.generate_images() # generated_dlatents = generator.get_dlatents() # for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): # img = PIL.Image.fromarray(img_array, 'RGB') # img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') # np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('--data_dir', default='data', help='Directory for storing optional models') parser.add_argument('--mask_dir', default='masks', help='Directory for storing optional masks') parser.add_argument('--load_last', default='', help='Start with embeddings from directory') parser.add_argument( '--dlatent_avg', default='', help= 'Use dlatent from file specified here for truncation instead of dlatent_avg from Gs' ) parser.add_argument( '--model_url', default= 'https://drive.google.com/uc?id=1opTWG1jYlyS9TXAuqVyVR68kQWhOhA99', help='Fetch a StyleGAN model to train on from this URL' ) # karras2019stylegan-ffhq-1024x1024.pkl parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) parser.add_argument( '--optimizer', default='ggt', help='Optimization algorithm used for optimizing dlatents') # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) parser.add_argument('--lr', default=0.25, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int) parser.add_argument( '--decay_steps', default=4, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument('--early_stopping', default=True, help='Stop early once training stabilizes', type=str2bool, nargs='?', const=True) parser.add_argument('--early_stopping_threshold', default=0.5, help='Stop after this threshold has been reached', type=float) parser.add_argument('--early_stopping_patience', default=10, help='Number of iterations to wait below threshold', type=int) parser.add_argument( '--load_effnet', default='data/finetuned_effnet.h5', help='Model to load for EfficientNet approximation of dlatents') parser.add_argument( '--load_resnet', default='data/finetuned_resnet.h5', help='Model to load for ResNet approximation of dlatents') parser.add_argument( '--use_preprocess_input', default=True, help='Call process_input() first before using feed forward net', type=str2bool, nargs='?', const=True) parser.add_argument( '--use_best_loss', default=True, help='Output the lowest loss value found as the solution', type=str2bool, nargs='?', const=True) parser.add_argument( '--average_best_loss', default=0.25, help= 'Do a running weighted average with the previous best dlatents found', type=float) parser.add_argument('--sharpen_input', default=True, help='Sharpen the input images', type=str2bool, nargs='?', const=True) # Loss function options parser.add_argument( '--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument( '--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_mssim_loss', default=200, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_l1_penalty', default=0.5, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_discriminator_loss', default=0.5, help='Use trained discriminator to evaluate realism.', type=float) parser.add_argument( '--use_adaptive_loss', default=False, help= 'Use the adaptive robust loss function from Google Research for pixel and VGG feature loss.', type=str2bool, nargs='?', const=True) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=str2bool, nargs='?', const=True) parser.add_argument( '--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=str2bool, nargs='?', const=True) parser.add_argument( '--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Masking params parser.add_argument('--load_mask', default=False, help='Load segmentation masks', type=str2bool, nargs='?', const=True) parser.add_argument( '--face_mask', default=True, help='Generate a mask for predicting only the face area', type=str2bool, nargs='?', const=True) parser.add_argument( '--use_grabcut', default=True, help= 'Use grabcut algorithm on the face mask to better segment the foreground', type=str2bool, nargs='?', const=True) parser.add_argument( '--scale_mask', default=1.4, help='Look over a wider section of foreground for grabcut', type=float) parser.add_argument( '--composite_mask', default=True, help='Merge the unmasked area back into the generated image', type=str2bool, nargs='?', const=True) parser.add_argument( '--composite_blur', default=8, help='Size of blur filter to smoothly composite the images', type=int) # Video params parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos') parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool) parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name') parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int) parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int) parser.add_argument( '--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.mask_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) if (args.dlatent_avg != ''): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url( 'https://drive.google.com/uc?id=15IYd9qY9wNd1SSeI4LxPjRBBJxiOzvhW', cache_dir=config.cache_dir) as f: #vgg16_zhang_perceptual.pkl perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator, discriminator_network) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load( os.path.join(args.load_last, f'{name}.npy')), axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents, dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): from keras.applications.resnet50 import preprocess_input print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet from efficientnet import preprocess_input print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) if (ff_model is not None): # predict initial dlatents with ResNet model if (args.use_preprocess_input): dlatents = ff_model.predict( preprocess_input( load_images(images_batch, image_size=args.resnet_image_size))) else: dlatents = ff_model.predict( load_images(images_batch, image_size=args.resnet_image_size)) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, use_optimizer=args.optimizer) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None avg_loss_count = 0 if args.early_stopping: avg_loss = prev_loss = None for loss_dict in pbar: if args.early_stopping: # early stopping feature if prev_loss is not None: if avg_loss is not None: avg_loss = 0.5 * avg_loss + (prev_loss - loss_dict["loss"]) if avg_loss < args.early_stopping_threshold: # count while under threshold; else reset avg_loss_count += 1 else: avg_loss_count = 0 if avg_loss_count > args.early_stopping_patience: # stop once threshold is reached print("") break else: avg_loss = prev_loss - loss_dict["loss"] pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: if best_dlatent is None or args.average_best_loss <= 0.00000001: best_dlatent = generator.get_dlatents() else: best_dlatent = 0.25 * best_dlatent + 0.75 * generator.get_dlatents( ) if args.use_best_loss: generator.set_dlatents(best_dlatent) best_loss = loss_dict["loss"] if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() prev_loss = loss_dict["loss"] if not args.use_best_loss: best_loss = prev_loss print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them if args.use_best_loss: generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_path, img_name in zip( generated_images, generated_dlatents, images_batch, names): mask_img = None if args.composite_mask and (args.load_mask or args.face_mask): _, im_name = os.path.split(img_path) mask_img = os.path.join(args.mask_dir, f'{im_name}') if args.composite_mask and mask_img is not None and os.path.isfile( mask_img): orig_img = PIL.Image.open(img_path).convert('RGB') width, height = orig_img.size imask = PIL.Image.open(mask_img).convert('L').resize( (width, height)) imask = imask.filter( ImageFilter.GaussianBlur(args.composite_blur)) mask = np.array(imask) / 255 mask = np.expand_dims(mask, axis=-1) img_array = mask * np.array(img_array) + ( 1.0 - mask) * np.array(orig_img) img_array = img_array.astype(np.uint8) #img_array = np.where(mask, np.array(img_array), orig_img) img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
def styleGAN_encoder(path_A, path_B): start_ = time.time() decay_steps =10 decay_steps *= 0.01 * 100 # Calculate steps as a percent of total iterations src_dir = 'aligned_images' name_A = src_dir+'/%s.png' %os.path.basename(os.path.splitext(path_A)[0]) name_B = src_dir+'/%s.png' %os.path.basename(os.path.splitext(path_B)[0]) ref_images = [name_A,name_B] ref_images = list(filter(os.path.isfile, ref_images)) os.makedirs('data', exist_ok=True) os.makedirs('masks', exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with dnnlib.util.open_url('https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ', cache_dir='cache') as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, 1, clipping_threshold=2.0, tiled_dlatent=False, model_res=1024, randomize_noise=False) print(generator.model_scale) perc_model = None if (100 > 0.00000001): with dnnlib.util.open_url('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir='cache') as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(perc_model=perc_model, batch_size=1) perceptual_model.build_perceptual_model(generator) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, 1), total=len(ref_images)//1): names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch] perceptual_model.set_reference_images(images_batch) dlatents = None if (ff_model is None): if os.path.exists('data/finetuned_resnet.h5'): print("Loading ResNet Model:") ff_model = load_model('data/finetuned_resnet.h5') from keras.applications.resnet50 import preprocess_input if (ff_model is not None): # predict initial dlatents with ResNet model dlatents = ff_model.predict(preprocess_input(load_images(images_batch,image_size=256))) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=100) pbar = tqdm(op, leave=False, total=100) best_loss = None best_dlatent = None for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join(["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) print(best_dlatent) # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() print(generator.initial_dlatents) for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): np.save(os.path.join('latent_representations', f'{img_name}.npy'), dlatent) generator.reset_dlatents() end_ = time.time() logging.info('The time it takes for the StyleGAN Encoder: %.2fs' % (end_ - start_))
def main(): parser = argparse.ArgumentParser(description='Find latent representation of reference images using perceptual loss') parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') # for now it's unclear if larger batch leads to better performance/quality parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--lr', default=1., help='Learning rate for perceptual model', type=float) parser.add_argument('--iterations', default=1000, help='Number of optimization steps for each batch', type=int) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) args, other_args = parser.parse_known_args() ref_images = [os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir)] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) # Initialize generator and perceptual model URL_FFHQ = 'https://github.com/parameter-pollution/stylegan_paintings/releases/download/v0.1/network-snapshot-008040.pkl' tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs = pickle.load(f) #tflib.init_tf() #with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: # generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs, args.batch_size, randomize_noise=args.randomize_noise) perceptual_model = PerceptualModel(args.image_size, layer=9, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator.generated_image) # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images)//args.batch_size): names = [os.path.splitext(os.path.basename(x))[0] for x in images_batch] perceptual_model.set_reference_images(images_batch) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations, learning_rate=args.lr) pbar = tqdm(op, leave=False, total=args.iterations) for loss in pbar: pbar.set_description(' '.join(names)+' Loss: %.2f' % loss) print(' '.join(names), ' loss:', loss) # Generate images from found dlatents and save them generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save(os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual loss') parser.add_argument('name', help='Name of a combined image') parser.add_argument('raw_dir', help='Directory with a raw image for encoding') parser.add_argument('aligned_dir', help='Directory with a aligned image') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('--data_dir', default='data', help='Directory for storing optional models') parser.add_argument('--mask_dir', default='masks', help='Directory for storing optional masks') parser.add_argument('--load_last', default='', help='Start with embeddings from directory') parser.add_argument( '--dlatent_avg', default='', help= 'Use dlatent from file specified here for truncation instead of dlatent_avg from Gs' ) parser.add_argument( '--model_url', default= 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ', help='Fetch a StyleGAN model to train on from this URL' ) # karras2019stylegan-ffhq-1024x1024.pkl parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) #Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) parser.add_argument('--lr', default=0.03, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--iterations', default=1000, help='Number of optimization steps for each batch', type=int) parser.add_argument( '--decay_steps', default=10, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument( '--load_effnet', default='data/finetuned_effnet.h5', help='Model to load for EfficientNet approximation of dlatents') parser.add_argument( '--load_resnet', default='data/finetuned_resnet.h5', help='Model to load for ResNet approximation of dlatents') #Loss function options parser.add_argument( '--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument( '--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) #Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) parser.add_argument( '--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=bool) parser.add_argument( '--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Masking params parser.add_argument('--load_mask', default=False, help='Load segmentation masks', type=bool) parser.add_argument( '--face_mask', default=False, help='Generate a mask for predicting only the face area', type=bool) parser.add_argument( '--use_grabcut', default=True, help= 'Use grabcut algorithm on the face mask to better segment the foreground', type=bool) parser.add_argument( '--scale_mask', default=1.5, help='Look over a wider section of foreground for grabcut', type=float) # Video params parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos') parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool) parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name') parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int) parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int) parser.add_argument( '--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) #encoder_main os.makedirs(args.raw_dir, exist_ok=True) src_dir = args.raw_dir + args.name img = PIL.Image.open(src_dir) wpercent = (256 / float(img.size[0])) hsize = int((float(img.size[1]) * float(wpercent))) img = img.resize((256, hsize), PIL.Image.LANCZOS) #align_images os.makedirs(args.aligned_dir, exist_ok=True) align_images(args.raw_dir, args.aligned_dir) #encode_images ref_images = [ os.path.join(args.aligned_dir, x) for x in os.listdir(args.aligned_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.aligned_dir) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.mask_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) tflib.init_tf() #with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: #generator_network, discriminator_network, Gs_network = pickle.load(f) ffhq = '/content/gdrive/My Drive/data/karras2019stylegan-ffhq-1024x1024.pkl' with open(ffhq, 'rb') as f: _generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) if (args.dlatent_avg != ''): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): with dnnlib.util.open_url( 'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=config.cache_dir) as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) #.generated_image ff_model = None for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load( os.path.join(args.load_last, f'{name}.npy')), axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents, dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) from keras.applications.resnet50 import preprocess_input if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) from efficientnet import preprocess_input if (ff_model is not None): # predict initial dlatents with ResNet model dlatents = ff_model.predict( preprocess_input( load_images(images_batch, image_size=args.resnet_image_size))) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()