def extract(tfrecord_dir, output_dir): print('Loading dataset "%s"' % tfrecord_dir) tflib.init_tf({'gpu_options.allow_growth': True}) dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0) tflib.init_uninitialized_vars() print('Extracting images to "%s"' % output_dir) if not os.path.isdir(output_dir): os.makedirs(output_dir) idx = 0 while True: if idx % 10 == 0: print('%d\r' % idx, end='', flush=True) try: images, _labels = dset.get_minibatch_np(1) except tf.errors.OutOfRangeError: break if images.shape[1] == 1: img = PIL.Image.fromarray(images[0][0], 'L') else: img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') img.save(os.path.join(output_dir, 'img%08d.png' % idx)) idx += 1 print('Extracted %d images.' % idx)
def display(tfrecord_dir): print('Loading dataset "%s"' % tfrecord_dir) tflib.init_tf({'gpu_options.allow_growth': True}) dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0) tflib.init_uninitialized_vars() import cv2 # pip install opencv-python idx = 0 while True: try: images, labels = dset.get_minibatch_np(1) except tf.errors.OutOfRangeError: break if idx == 0: print('Displaying images') cv2.namedWindow('dataset_tool') print('Press SPACE or ENTER to advance, ESC to exit') print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist())) cv2.imshow('dataset_tool', images[0].transpose( 1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR idx += 1 if cv2.waitKey() == 27: break print('\nDisplayed %d images.' % idx)
def run_pickle(submit_config, metric_args, network_pkl, dataset_args, mirror_augment): ctx = dnnlib.RunContext(submit_config) tflib.init_tf() print('Evaluating %s metric on network_pkl "%s"...' % (metric_args.name, network_pkl)) metric = dnnlib.util.call_func_by_name(**metric_args) print() metric.run(network_pkl, dataset_args=dataset_args, mirror_augment=mirror_augment, num_gpus=submit_config.num_gpus) print() ctx.close()
def run_snapshot(submit_config, metric_args, run_id, snapshot): ctx = dnnlib.RunContext(submit_config) tflib.init_tf() print('Evaluating %s metric on run_id %s, snapshot %s...' % (metric_args.name, run_id, snapshot)) run_dir = misc.locate_run_dir(run_id) network_pkl = misc.locate_network_pkl(run_dir, snapshot) metric = dnnlib.util.call_func_by_name(**metric_args) print() metric.run(network_pkl, run_dir=run_dir, num_gpus=submit_config.num_gpus) print() ctx.close()
def run_all_snapshots(submit_config, metric_args, run_id): ctx = dnnlib.RunContext(submit_config) tflib.init_tf() print('Evaluating %s metric on all snapshots of run_id %s...' % (metric_args.name, run_id)) run_dir = misc.locate_run_dir(run_id) network_pkls = misc.list_network_pkls(run_dir) metric = dnnlib.util.call_func_by_name(**metric_args) print() for idx, network_pkl in enumerate(network_pkls): ctx.update('', idx, len(network_pkls)) metric.run(network_pkl, run_dir=run_dir, num_gpus=submit_config.num_gpus) print() ctx.close()
def get_generator(batch_size=1): tiled_dlatent, randomize_noise = False, False clipping_threshold = 2 dlatent_avg = '' tflib.init_tf() with dnnlib.util.open_url(URL_FFHQ, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) del discriminator_network, generator_network generator = Generator(Gs_network, batch_size=batch_size, clipping_threshold=clipping_threshold, tiled_dlatent=tiled_dlatent, randomize_noise=randomize_noise) if (dlatent_avg != ''): generator.set_dlatent_avg(np.load(dlatent_avg)) return generator, Gs_network
def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels): max_label_size = 0 if ignore_labels else 'full' print('Loading dataset "%s"' % tfrecord_dir_a) tflib.init_tf({'gpu_options.allow_growth': True}) dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0) print('Loading dataset "%s"' % tfrecord_dir_b) dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0) tflib.init_uninitialized_vars() print('Comparing datasets') idx = 0 identical_images = 0 identical_labels = 0 while True: if idx % 100 == 0: print('%d\r' % idx, end='', flush=True) try: images_a, labels_a = dset_a.get_minibatch_np(1) except tf.errors.OutOfRangeError: images_a, labels_a = None, None try: images_b, labels_b = dset_b.get_minibatch_np(1) except tf.errors.OutOfRangeError: images_b, labels_b = None, None if images_a is None or images_b is None: if images_a is not None or images_b is not None: print('Datasets contain different number of images') break if images_a.shape == images_b.shape and np.all(images_a == images_b): identical_images += 1 else: print('Image %d is different' % idx) if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b): identical_labels += 1 else: print('Label %d is different' % idx) idx += 1 print('Identical images: %d / %d' % (identical_images, idx)) if not ignore_labels: print('Identical labels: %d / %d' % (identical_labels, idx))
default=-1, help='Run this many iterations (-1 for infinite, halt with CTRL-C)', type=int) args, other_args = parser.parse_known_args() os.makedirs(args.data_dir, exist_ok=True) if args.seed == -1: args.seed = None if args.use_fp16: K.set_floatx('float16') K.set_epsilon(1e-4) tflib.init_tf() model = get_resnet_model(args.model_path, model_res=args.model_res, depth=args.model_depth, size=args.model_size, activation=args.activation, optimizer=args.optimizer, loss=args.loss) with dnnlib.util.open_url(args.model_url, cache_dir=config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) def load_Gs(): return Gs_network
def main(): parser = argparse.ArgumentParser( description= 'Find latent representation of reference images using perceptual losses', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('src_dir', help='Directory with images for encoding') parser.add_argument('generated_images_dir', help='Directory for storing generated images') parser.add_argument('dlatent_dir', help='Directory for storing dlatent representations') parser.add_argument('--data_dir', default='data', help='Directory for storing optional models') parser.add_argument('--mask_dir', default='masks', help='Directory for storing optional masks') parser.add_argument('--load_last', default='', help='Start with embeddings from directory') parser.add_argument( '--dlatent_avg', default='', help= 'Use dlatent from file specified here for truncation instead of dlatent_avg from Gs' ) parser.add_argument( '--model_url', default= 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ', help='Fetch a StyleGAN model to train on from this URL' ) # karras2019stylegan-ffhq-1024x1024.pkl parser.add_argument('--model_res', default=1024, help='The dimension of images in the StyleGAN model', type=int) parser.add_argument('--batch_size', default=1, help='Batch size for generator and perceptual model', type=int) # Perceptual model params parser.add_argument('--image_size', default=256, help='Size of images for perceptual model', type=int) parser.add_argument('--resnet_image_size', default=256, help='Size of images for the Resnet model', type=int) parser.add_argument('--lr', default=0.02, help='Learning rate for perceptual model', type=float) parser.add_argument('--decay_rate', default=0.9, help='Decay rate for learning rate', type=float) parser.add_argument('--iterations', default=100, help='Number of optimization steps for each batch', type=int) parser.add_argument( '--decay_steps', default=10, help='Decay steps for learning rate decay (as a percent of iterations)', type=float) parser.add_argument( '--load_effnet', default='data/finetuned_effnet.h5', help='Model to load for EfficientNet approximation of dlatents') parser.add_argument( '--load_resnet', default='data/finetuned_resnet.h5', help='Model to load for ResNet approximation of dlatents') # Loss function options parser.add_argument( '--use_vgg_loss', default=0.4, help='Use VGG perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument('--use_vgg_layer', default=9, help='Pick which VGG layer to use.', type=int) parser.add_argument( '--use_pixel_loss', default=1.5, help='Use logcosh image pixel loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_mssim_loss', default=100, help='Use MS-SIM perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_lpips_loss', default=100, help='Use LPIPS perceptual loss; 0 to disable, > 0 to scale.', type=float) parser.add_argument( '--use_l1_penalty', default=1, help='Use L1 penalty on latents; 0 to disable, > 0 to scale.', type=float) # Generator params parser.add_argument('--randomize_noise', default=False, help='Add noise to dlatents during optimization', type=bool) parser.add_argument( '--tile_dlatents', default=False, help='Tile dlatents to use a single vector at each scale', type=bool) parser.add_argument( '--clipping_threshold', default=2.0, help='Stochastic clipping of gradient values outside of this threshold', type=float) # Masking params parser.add_argument('--load_mask', default=False, help='Load segmentation masks', type=bool) parser.add_argument( '--face_mask', default=False, help='Generate a mask for predicting only the face area', type=bool) parser.add_argument( '--use_grabcut', default=True, help= 'Use grabcut algorithm on the face mask to better segment the foreground', type=bool) parser.add_argument( '--scale_mask', default=1.5, help='Look over a wider section of foreground for grabcut', type=float) # Video params parser.add_argument('--video_dir', default='videos', help='Directory for storing training videos') parser.add_argument('--output_video', default=False, help='Generate videos of the optimization process', type=bool) parser.add_argument('--video_codec', default='MJPG', help='FOURCC-supported video codec name') parser.add_argument('--video_frame_rate', default=24, help='Video frames per second', type=int) parser.add_argument('--video_size', default=512, help='Video size in pixels', type=int) parser.add_argument( '--video_skip', default=1, help='Only write every n frames (1 = write every frame)', type=int) args, other_args = parser.parse_known_args() args.decay_steps *= 0.01 * args.iterations # Calculate steps as a percent of total iterations if args.output_video: import cv2 synthesis_kwargs = dict(output_transform=dict( func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=args.batch_size) ref_images = [ os.path.join(args.src_dir, x) for x in os.listdir(args.src_dir) ] ref_images = list(filter(os.path.isfile, ref_images)) if len(ref_images) == 0: raise Exception('%s is empty' % args.src_dir) os.makedirs(args.data_dir, exist_ok=True) os.makedirs(args.mask_dir, exist_ok=True) os.makedirs(args.generated_images_dir, exist_ok=True) os.makedirs(args.dlatent_dir, exist_ok=True) os.makedirs(args.video_dir, exist_ok=True) # Initialize generator and perceptual model tflib.init_tf() with familyGan.stylegan_encoder.dnnlib.util.open_url( args.model_url, cache_dir=familyGan.stylegan_encoder.config.cache_dir) as f: generator_network, discriminator_network, Gs_network = pickle.load(f) generator = Generator(Gs_network, args.batch_size, clipping_threshold=args.clipping_threshold, tiled_dlatent=args.tile_dlatents, model_res=args.model_res, randomize_noise=args.randomize_noise) if (args.dlatent_avg != ''): generator.set_dlatent_avg(np.load(args.dlatent_avg)) perc_model = None if (args.use_lpips_loss > 0.00000001): with familyGan.stylegan_encoder.dnnlib.util.open_url( 'https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2', cache_dir=familyGan.stylegan_encoder.config.cache_dir) as f: perc_model = pickle.load(f) perceptual_model = PerceptualModel(args, perc_model=perc_model, batch_size=args.batch_size) perceptual_model.build_perceptual_model(generator) ff_model = None # Optimize (only) dlatents by minimizing perceptual loss between reference and generated images in feature space for images_batch in tqdm(split_to_batches(ref_images, args.batch_size), total=len(ref_images) // args.batch_size): names = [ os.path.splitext(os.path.basename(x))[0] for x in images_batch ] if args.output_video: video_out = {} for name in names: video_out[name] = cv2.VideoWriter( os.path.join(args.video_dir, f'{name}.avi'), cv2.VideoWriter_fourcc(*args.video_codec), args.video_frame_rate, (args.video_size, args.video_size)) perceptual_model.set_reference_images(images_batch) dlatents = None if (args.load_last != ''): # load previous dlatents for initialization for name in names: dl = np.expand_dims(np.load( os.path.join(args.load_last, f'{name}.npy')), axis=0) if (dlatents is None): dlatents = dl else: dlatents = np.vstack((dlatents, dl)) else: if (ff_model is None): if os.path.exists(args.load_resnet): print("Loading ResNet Model:") ff_model = load_model(args.load_resnet) from keras.applications.resnet50 import preprocess_input if (ff_model is None): if os.path.exists(args.load_effnet): import efficientnet print("Loading EfficientNet Model:") ff_model = load_model(args.load_effnet) from efficientnet import preprocess_input if (ff_model is not None): # predict initial dlatents with ResNet model dlatents = ff_model.predict( preprocess_input( load_images(images_batch, image_size=args.resnet_image_size))) if dlatents is not None: generator.set_dlatents(dlatents) op = perceptual_model.optimize(generator.dlatent_variable, iterations=args.iterations) pbar = tqdm(op, leave=False, total=args.iterations) vid_count = 0 best_loss = None best_dlatent = None for loss_dict in pbar: pbar.set_description(" ".join(names) + ": " + "; ".join( ["{} {:.4f}".format(k, v) for k, v in loss_dict.items()])) if best_loss is None or loss_dict["loss"] < best_loss: best_loss = loss_dict["loss"] best_dlatent = generator.get_dlatents() if args.output_video and (vid_count % args.video_skip == 0): batch_frames = generator.generate_images() for i, name in enumerate(names): video_frame = PIL.Image.fromarray( batch_frames[i], 'RGB').resize( (args.video_size, args.video_size), PIL.Image.LANCZOS) video_out[name].write( cv2.cvtColor( np.array(video_frame).astype('uint8'), cv2.COLOR_RGB2BGR)) generator.stochastic_clip_dlatents() print(" ".join(names), " Loss {:.4f}".format(best_loss)) if args.output_video: for name in names: video_out[name].release() # Generate images from found dlatents and save them generator.set_dlatents(best_dlatent) generated_images = generator.generate_images() generated_dlatents = generator.get_dlatents() for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, names): img = PIL.Image.fromarray(img_array, 'RGB') img.save( os.path.join(args.generated_images_dir, f'{img_name}.png'), 'PNG') np.save(os.path.join(args.dlatent_dir, f'{img_name}.npy'), dlatent) generator.reset_dlatents()