def generate_imgs(network_pkl, seeds, truncation_psi): print("Loading networks from %s..." % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl)[:3] noise_vars = [ var for name, var in Gs.components.synthesis.vars.items() if name.startswith("noise") ] Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.output_transform = dict(func=tflib.convert_imgs_to_uint8, nchw_to_nhwc=True) Gs_kwargs.randomize_noise = False if truncation_psi is not None: Gs_kwargs.truncation_psi = truncation_psi for seed_idx, seed in enumerate(seeds): print("Generating image for seed %d (%d/%d)..." % (seed, seed_idx, len(seeds))) rnd = np.random.RandomState(seed) z = rnd.randn(1, *Gs.input_shape[1:]) # [minibatch, component] tflib.set_vars( {var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width] imgs = Gs.run(z, None, **Gs_kwargs) # [minibatch, height, width, channel] misc.to_pil(imgs[0]).save( dnnlib.make_run_dir_path("seed%04d_t%04d.png" % (seed, truncation_psi)))
def save_interpolation(imgs, name): imgs = np.split(imgs, components_num, axis = 0) for c in range(components_num): filename = "eval/interpolations_%s/%06d/%02d" % (name, i, c) imgs[c] = [misc.to_pil(img, drange = drange_net) for img in imgs[c]] imgs[c][-1].save(dnnlib.make_run_dir_path("{}.png".format(filename))) misc.save_gif(imgs[c], dnnlib.make_run_dir_path("{}.gif".format(filename)))
def generate_noisevar_imgs(network_pkl, seeds, num_samples, num_variants): print("Loading networks from %s..." % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl)[:3] Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.truncation_psi = 1 Gs_kwargs.output_transform = dict(func=tflib.convert_imgs_to_uint8, nchw_to_nhwc=True) Gs_kwargs.minibatch_size = 4 _, _, H, W = Gs.output_shape for seed_idx, seed in enumerate(seeds): print("Generating image for seed %d (%d/%d)..." % (seed, seed_idx, len(seeds))) canvas = PIL.Image.new("RGB", (W * (num_variants + 2), H), "white") z = np.stack([np.random.RandomState(seed).randn(Gs.input_shape[1])] * num_samples) # [minibatch, component] imgs = Gs.run(z, None, **Gs_kwargs) # [minibatch, height, width, channel] npimgs = imgs imgs = [misc.to_pil(img) for img in imgs] save_gif(imgs, dnnlib.make_run_dir_path("noisevar%04d.gif" % seed)) for i in range(num_variants + 1): canvas.paste(imgs[i], (i * W, 0)) diff = np.std(np.mean(npimgs, axis=3), axis=0) * 4 diff = np.clip(diff + 0.5, 0, 255).astype(np.uint8) canvas.paste(PIL.Image.fromarray(diff, "L"), (W * (num_variants + 1), 0)) canvas.save(dnnlib.make_run_dir_path("noisevar%04d.png" % seed))
def save_interpolation(imgs, name): imgs = np.split(imgs, k - 1, axis = 0) for c in range(k - 1): filename = f"{run_dir}/visuals/interpolations-{name}/{i:06d}/{c:02d}" imgs[c] = [misc.to_pil(img, drange = drange_net) for img in imgs[c]] imgs[c][-1].save(f"{filename}.png") misc.save_gif(imgs[c], f"{filename}.gif")
def generate_noisecomp_imgs(network_pkl, seeds, noise_ranges): print("Loading networks from %s..." % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl)[:3] Gsc = Gs.clone() noise_vars = [var for name, var in Gsc.components.synthesis.vars.items() if name.startswith("noise")] noise_pairs = list(zip(noise_vars, tflib.run(noise_vars))) # [(var, val), ...] Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.output_transform = dict(func = tflib.convert_imgs_to_uint8, nchw_to_nhwc = True) Gs_kwargs.randomize_noise = False Gs_kwargs.truncation_psi = 1 _, _, H, W = Gs.output_shape for seed_idx, seed in enumerate(seeds): print("Generating images for seed %d (%d/%d)..." % (seed, seed_idx, len(seeds))) canvas = PIL.Image.new("RGB", (W * len(noise_ranges), H), "white") z = np.random.RandomState(seed).randn(1, *Gsc.input_shape[1:]) # [minibatch, component] for i, noise_range in enumerate(noise_ranges): tflib.set_vars({var: val * (1 if vi in noise_range else 0) for vi, (var, val) in enumerate(noise_pairs)}) imgs = Gsc.run(z, None, **Gs_kwargs) # [minibatch, height, width, channel] canvas.paste(misc.to_pil(imgs[0]), (i * W, 0)) canvas.save(dnnlib.make_run_dir_path("noisecomp%04d.png" % seed))
def style_mixing_example(network_pkl, row_seeds, col_seeds, truncation_psi, col_styles, minibatch_size = 4): print("Loading networks from %s..." % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl)[:3] w_avg = Gs.get_var("dlatent_avg") # [component] Gs_syn_kwargs = dnnlib.EasyDict() Gs_syn_kwargs.output_transform = dict(func = tflib.convert_imgs_to_uint8, nchw_to_nhwc = True) Gs_syn_kwargs.randomize_noise = False Gs_syn_kwargs.minibatch_size = minibatch_size print("Generating W vectors...") all_seeds = list(set(row_seeds + col_seeds)) all_z = np.stack([np.random.RandomState(seed).randn(*Gs.input_shape[1:]) for seed in all_seeds]) # [minibatch, component] all_w = Gs.components.mapping.run(all_z, None) # [minibatch, layer, component] all_w = w_avg + (all_w - w_avg) * truncation_psi # [minibatch, layer, component] w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} # [layer, component] print("Generating images...") all_imgs = Gs.components.synthesis.run(all_w, **Gs_syn_kwargs) # [minibatch, height, width, channel] img_dict = {(seed, seed): img for seed, img in zip(all_seeds, list(all_imgs))} print("Generating style-mixed images...") for row_seed in row_seeds: for col_seed in col_seeds: w = w_dict[row_seed].copy() w[col_styles] = w_dict[col_seed][col_styles] img = Gs.components.synthesis.run(w[np.newaxis], **Gs_syn_kwargs)[0] img_dict[(row_seed, col_seed)] = img print("Saving images...") for (row_seed, col_seed), img in img_dict.items(): misc.to_pil(img).save(dnnlib.make_run_dir_path("%d-%d.png" % (row_seed, col_seed))) print("Saving image grid...") _N, _C, H, W = Gs.output_shape canvas = PIL.Image.new("RGB", (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), "black") for row_idx, row_seed in enumerate([None] + row_seeds): for col_idx, col_seed in enumerate([None] + col_seeds): if row_seed is None and col_seed is None: continue key = (row_seed, col_seed) if row_seed is None: key = (col_seed, col_seed) if col_seed is None: key = (row_seed, row_seed) canvas.paste(misc.to_pil(img_dict[key]), (W * col_idx, H * row_idx)) canvas.save(dnnlib.make_run_dir_path("grid.png"))
def run(model, gpus, output_dir, images_num, truncation_psi, batch_size): print("Loading networks...") os.environ["CUDA_VISIBLE_DEVICES"] = gpus # Set GPUs tflib.init_tf() # Initialize TensorFlow G, D, Gs = load_networks(model) # Load pre-trained network Gs.print_layers() # Print network details print("Generate images...") latents = np.random.randn(images_num, *Gs.input_shape[1:]) # Sample latent vectors images = Gs.run( latents, truncation_psi=truncation_psi, # Generate images minibatch_size=batch_size, verbose=True)[0] print("Saving images...") os.makedirs(output_dir, exist_ok=True) # Make output directory pattern = "{}/Sample_{{:06d}}.png".format( output_dir) # Output images pattern for i, image in tqdm(list(enumerate(images))): # Save images misc.to_pil(image).save(pattern.format(i))
def run(model, gpus, output_dir, images_num, truncation_psi, ratio): os.environ["CUDA_VISIBLE_DEVICES"] = gpus # Set GPUs device = torch.device("cuda") print("Loading networks...") G = loader.load_network(model, eval = True)["Gs"].to(device) # Load pre-trained network print("Generate and save images...") os.makedirs(output_dir, exist_ok = True) # Make output directory for i in trange(images_num): z = torch.randn([1, *G.input_shape[1:]], device = device) # Sample latent vector imgs = G(z, truncation_psi = truncation_psi)[0].cpu().numpy() # Generate an image pattern = "{}/sample_{{:06d}}.png".format(output_dir) # Output images pattern img = crop(misc.to_pil(imgs[0]), ratio).save(pattern.format(i)) # Save the image
def eval( G, dataset, # The dataset object for accessing the data batch_size, # Visualization batch size training=False, # Training mode latents=None, # Source latents to generate images from labels=None, # Source labels to generate images from (0 if no labels are used) # Model settings components_num=1, # Number of components the model has drange_net=[-1, 1], # Model image output range attention=False, # Whereas the model produces attention maps (for visualization) # Visualization settings vis_types=None, # Visualization types to be created num=100, # Number of produced samples rich_num=5, # Number of samples for which richer visualizations will be created # (requires more memory and disk space, and therefore rich_num < num) grid=None, # Whether to save the samples in one large grid files # or in separated files one per sample grid_size=None, # Grid proportions (w, h) step=None, # Step number to be used in visualization filenames verbose=None, # Verbose print progress messages # Visualization-specific settings alpha=0.3, # Proportion for generated images and attention maps blends intrp_density=8, # Number of samples in between two end points of an interpolation intrp_per_component=False, # Whether to perform interpolation along particular latent components (True) # or all of them at once (False) noise_samples_num=100, # Number of samples used to compute noise variation visualization section_size=100 ): # Visualization section size (section_size <= num) for reducing memory footprint def pattern_of(dir, step, suffix): return "eval/{}/{}%06d.{}".format( dir, "" if step is None else "{}_".format(step), suffix) # For time efficiency, during training save only image and map samples # rather than richer visualizations vis = vis_types if training: vis = {"imgs", "maps"} section_size = num = len(latents) else: if vis is None: vis = {"imgs", "maps", "ltnts", "interpolations", "noise_var"} # Set default options # Save image samples in one grid file during training if grid is None: grid = training # Disable verbose during training if verbose: verbose = not training # If grid size is provided, set number of visualized images accordingly if grid_size is not None: num = np.prod(grid_size) # build image functions save_images = misc.save_images_builder(drange_net, grid_size, grid, verbose) save_blends = misc.save_blends_builder(drange_net, grid_size, grid, verbose, alpha) # Set up logging noise_vars = [ var for name, var in G.subnets.synthesis.vars.items() if name.startswith("noise") ] noise_var_vals = { var: np.random.randn(*var.shape.as_list()) for var in noise_vars } tflib.set_vars(noise_var_vals) # Create directories dirs = [] if "imgs" in vis: dirs += ["images"] if "ltnts" in vis: dirs += ["latents-z", "latents-w"] if "maps" in vis: dirs += ["maps", "softmaps", "blends", "softblends"] if "layer_maps" in vis: dirs += ["layer_maps"] if "interpolations" in vis: dirs += ["interpolations-z", "interpolation-w"] for dir in dirs: misc.mkdir(dnnlib.make_run_dir_path("eval/{}".format(dir))) # Produce visualizations for idx in range(0, num, section_size): curr_size = curr_batch_size(num, idx, section_size) if verbose and num > curr_size: print("--- Batch {}/{}".format(idx + 1, num)) # Compute source latents images will be produced from if latents is None: latents = np.random.randn(curr_size, *G.input_shape[1:]) if labels is None: labels = dataset.get_minibatch_np(curr_size) # Run network over latents and produce images and attention maps if verbose: print("Running network...") images, attmaps_all_layers, wlatents_all_layers = G.run( latents, labels, randomize_noise=False, minibatch_size=batch_size, return_dlatents=True) # is_visualization = True # For memory efficiency, save full information only for a small amount of images attmaps_all_layers = attmaps_all_layers[:rich_num] wlatents = wlatents_all_layers[:, :, 0] # Save image samples if "imgs" in vis: if verbose: print("Saving image samples...") save_images(images, pattern_of("images", step, "png"), idx) # Save latent vectors if "ltnts" in vis: if verbose: print("Saving latents...") misc.save_npys(latents, pattern_of("latents-z", step, "npy"), idx) misc.save_npys(wlatents, pattern_of("latents-w", step, "npy"), idx) # For the GANsformer model, save attention maps if attention: if "maps" in vis: soft_maps = attmaps_all_layers[:, :, -1, 0] pallete = np.expand_dims(misc.get_colors(components_num), axis=[2, 3]) maps = (soft_maps == np.amax(soft_maps, axis=1, keepdims=True)).astype(float) soft_maps = np.sum(pallete * np.expand_dims(soft_maps, axis=2), axis=1) maps = np.sum(pallete * np.expand_dims(maps, axis=2), axis=1) if verbose: print("Saving maps...") save_images(soft_maps, pattern_of("softmaps", step, "png"), idx) save_images(maps, pattern_of("maps", step, "png"), idx) save_blends(maps, images, pattern_of("softblends", step, "png"), idx) save_blends(soft_maps, images, pattern_of("blends", step, "png"), idx) # Save maps from all attention heads and layers # (for efficiency, only for a small number of images) if "layer_maps" in vis: all_maps = [] maps_fakes = np.split(attmaps_all_layers, attmaps_all_layers.shape[2], axis=2) for layer, lmaps in enumerate(maps_fakes): lmaps = np.split(np.squeeze(lmaps, axis=2), mapfakes.shape[3], axis=2) for head, hmap in enumerate(lmaps): hmap = (hmap == np.amax(hmap, axis=1, keepdims=True)).astype(float) hmap = np.sum(pallete * hmap, axis=1) all_maps.append((hmap, "l{}_h{}".format(layer, head))) if verbose: print("Saving layer maps...") for i in trange(rich_num): misc.mkdir( dnnlib.make_run_dir_path("eval/layer_maps/%06d" % i)) for maps, name in tqdm(all_maps): dirname = "eval/layer_maps{}/%06d/{}{}.png".format( "" if step is None else ("/" + step), name) save_images(maps, dirname, idx) # Produce interpolations between pairs or source latents # In the GANsformer case, varying one component at a time if "interpolations" in vis: ts = np.array(np.linspace(0.0, 1.0, num=intrp_density, endpoint=True)) if verbose: print("Generating interpolations...") for i in trange(rich_num): misc.mkdir( dnnlib.make_run_dir_path("eval/interpolations-z/%06d" % i)) misc.mkdir( dnnlib.make_run_dir_path("eval/interpolations-w/%06d" % i)) z = np.random.randn(2, *G.input_shape[1:]) z[0] = latents[i:i + 1] w = G.run(z, labels, randomize_noise=False, return_dlatents=True, minibatch_size=batch_size)[-1] def update(t, fn, ts, dim): if dim == 3: ts = ts[:, np.newaxis] t_ups = [] if intrp_per_component: for c in range(components_num): # copy over all the components except component c that will get interpolated t_up = np.tile( np.copy(t[0])[None], [intrp_density] + [1] * dim) # interpolate component c t_up[:, c] = fn(t[0, c], t[1, c], ts) t_ups.append(t_up) t_up = np.concatenate(t_ups, axis=0) else: t_up = fn(t[0], t[1], ts) return t_up z_up = update(z, slerp, ts, 2) w_up = update(w, lerp, ts, 3) imgs1 = G.run(z_up, labels, randomize_noise=False, minibatch_size=batch_size)[0] imgs2 = G.run(w_up, labels, randomize_noise=False, minibatch_size=batch_size, take_wlatents=True)[0] def save_interpolation(imgs, name): imgs = np.split(imgs, components_num, axis=0) for c in range(components_num): filename = "eval/interpolations_%s/%06d/%02d" % (name, i, c) imgs[c] = [ misc.to_pil(img, drange=drange_net) for img in imgs[c] ] imgs[c][-1].save( dnnlib.make_run_dir_path("{}.png".format(filename))) misc.save_gif( imgs[c], dnnlib.make_run_dir_path("{}.gif".format(filename))) save_interpolation(imgs1, "z") save_interpolation(imgs2, "w") # Compute noise variance map # Shows what areas vary the most given fixed source # latents due to the use of stochastic local noise if "noise_var" in vis: if verbose: print("Generating noise variance...") z = np.tile(np.random.randn(1, *G.input_shape[1:]), [noise_samples_num, 1, 1]) imgs = G.run(z, labels, minibatch_size=batch_size)[0] imgs = np.stack([misc.to_pil(img, drange=drange_net) for img in imgs], axis=0) diff = np.std(np.mean(imgs, axis=3), axis=0) * 4 diff = np.clip(diff + 0.5, 0, 255).astype(np.uint8) PIL.Image.fromarray(diff, "L").save( dnnlib.make_run_dir_path("eval/noise_variance.png")) # Compute style mixing table, varying using the latent A in some of the layers and latent B in rest. # For the GANsformer, also produce component mixes (using latents from A in some of the components, # and latents from B in the rest. if "style_mix" in vis: if verbose: print("Generating style mixes...") cols, rows = 4, 2 row_lens = np.array([2, 5, 8, 11]) # Create latent mixes mixes = { "layer": (np.arange(wlatents_all_layers.shape[2]) < row_lens[:, None]).astype(np.float32)[:, None, None, None, :, None], "component": (np.arange(wlatents_all_layers.shape[1]) < row_lens[:, None]).astype(np.float32)[:, None, None, :, None, None] } ws = wlatents_all_layers[:cols + rows] orig_imgs = images[:cols + rows] col_ltnts = wlatents_all_layers[:cols][None, None] row_ltnts = wlatents_all_layers[cols:cols + rows][None, :, None] for name, mix in mixes.items(): # Produce image mixes mix_ltnts = mix * row_ltnts + (1 - mix) * col_ltnts mix_ltnts = np.reshape(mix_ltnts, [-1, *wlatents_all_layers.shape[1:]]) mix_imgs = G.run(mix_ltnts, labels, randomize_noise=False, take_dlatents=True, minibatch_size=batch_size)[0] mix_imgs = np.reshape( mix_imgs, [len(row_lens) * rows, cols, *mix_imgs.shape[1:]]) # Create image table canvas H, W = mix_imgs.shape[-2:] canvas = PIL.Image.new("RGB", (W * (cols + 1), H * (len(row_lens) * rows + 1)), "black") # Place image mixes respectively at each position (row_idx, col_idx) for row_idx, row_elem in enumerate( [None] + list(range(len(row_lens) * rows))): for col_idx, col_elem in enumerate([None] + list(range(cols))): if (row_elem, col_elem) == (None, None): continue if row_elem is None: img = orig_imgs[col_elem] elif col_elem is None: img = orig_imgs[cols + (row_elem % rows)] else: img = mix_imgs[row_elem, col_elem] canvas.paste(misc.to_pil(img, drange=drange_net), (W * col_idx, H * row_idx)) canvas.save( dnnlib.make_run_dir_path("eval/{}_mixing.png".format(name))) if verbose: print(misc.bcolored("Visualizations Completed!", "blue"))
def interpolate(network_pkl, seeds, dltnt, img_dir, samples_num, loss, lr): print("Loading networks from %s..." % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl)[:3] noise_vars = [ var for name, var in Gs.components.synthesis.vars.items() if name.startswith("noise") ] interp_range = np.linspace(0.0, 1.0, num=samples_num + 1, endpoint=True) ts = np.array(interp_range) proj = img_dir is not None mod = "w" if dltnt else "z" if proj: mod = "p{}".format(img_dir.split("/")[-1]) dltnt = True Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.output_transform = dict(func=tflib.convert_imgs_to_uint8, nchw_to_nhwc=True) Gs_kwargs.randomize_noise = False Gs_kwargs.truncation_psi = 1 Gs_kwargs.minibatch_size = 4 # Start interpolation from input images that will be first projected to the latent space if proj: tflib.set_vars( {var: np.random.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width] projc = projector.Projector() projc.num_steps = 7000 projc.lossType = loss projc.initial_learning_rate = lr projc.set_network(Gs) print("Loading images from %s" % img_dir) img_fns = sorted(glob.glob(os.path.join(img_dir, "*"))) seeds = range(len(img_fns[1:])) if len(img_fns) == 0: print("Error: No input images found") sys.exit(1) imgControl = img_fns[0] w0 = projectImage(Gs, projc, imgControl) for seed_idx, seed in enumerate(seeds): print("Generating image for seed %d (%d/%d)..." % (seed, seed_idx, len(seeds))) rnd = np.random.RandomState(seed) if not proj: tflib.set_vars( {var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width] z = rnd.randn(2, *Gs.input_shape[1:]) # [minibatch, component] # interpolate either in w space (if dltnt) or z space if dltnt: if proj: w = [w0, projectImage(Gs, projc, img_fns[seed_idx + 1])] else: w = Gs.components.mapping.run(z, None)[:, 0] w = lerp(w[0], w[1], ts) imgs = Gs.components.synthesis.run(broadcastLtnt(Gs, w), **Gs_kwargs) else: z = slerp(z[0], z[1], ts) imgs = Gs.run(z, None, **Gs_kwargs) # [minibatch, height, width, channel] imgs = [misc.to_pil(img) for img in imgs] save_gif( imgs, dnnlib.make_run_dir_path("interpolation_%s_%04d.gif" % (mod, seed)))
def generate_ltntprtrb_imgs(network_pkl, seeds, num_samples, noise_range, dltnt, group_size): print("Loading networks from %s..." % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl)[:3] _, _, H, W = Gs.output_shape noise_vars = [ var for name, var in Gs.components.synthesis.vars.items() if name.startswith("noise") ] Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.output_transform = dict(func=tflib.convert_imgs_to_uint8, nchw_to_nhwc=True) Gs_kwargs.randomize_noise = False Gs_kwargs.truncation_psi = 1 Gs_kwargs.minibatch_size = 4 ltnt_size = Gs.components.synthesis.input_shape[ 2] if dltnt else Gs.input_shape[1] stds = np.ones(ltnt_size) if dltnt: samples = np.random.randn(num_samples, Gs.input_shape[1]) # [minibatch, component] dlanents = Gs.components.mapping.run(samples, None, minibatch_size=32)[:, 0] stds = np.std(dlanents, axis=1) for seed_idx, seed in enumerate(seeds): print("Generating image for seed %d (%d/%d)..." % (seed, seed_idx, len(seeds))) rnd = np.random.RandomState(seed) if seed_idx == 0 and dltnt: ltnt = Gs.get_var("dlatent_avg")[np.newaxis] else: ltnt = rnd.randn(1, Gs.input_shape[1]) if dltnt: ltnt = Gs.components.mapping.run(ltnt, None)[:, 0] ltnt = np.tile(ltnt, (ltnt_size * len(noise_range), 1)) tflib.set_vars( {var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width] idx = 0 for j in range(ltnt_size): for r in noise_range: ltnt[idx, j] += r * stds[j] idx += 1 if dltnt: imgs = Gs.components.synthesis.run(broadcastLtnt(Gs, ltnt), **Gs_kwargs) else: imgs = Gs.run(ltnt, None, **Gs_kwargs) # [minibatch, height, width, channel] num_crops = int(ltnt_size / group_size) frameIdx, imgIdx = 0, 0 for m in range(num_crops): canvas = PIL.Image.new("RGB", (W * len(noise_range), H * group_size), "white") for i in range(group_size): imgStart = frameIdx for j in range(len(noise_range)): canvas.paste(misc.to_pil(imgs[frameIdx]), (j * W, i * H)) frameIdx += 1 imgsOut = [misc.to_pil(img) for img in imgs[imgStart:frameIdx]] save_gif(imgsOut, dnnlib.make_run_dir_path("latent_perturbation_%s_%04d_%04d.gif" % \ (("w" if dltnt else "z"), seed, imgIdx))) imgIdx += 1 canvas.save(dnnlib.make_run_dir_path("latent_perturbation_%s_%04d_%04d.png" % \ (("w" if dltnt else "z"), seed, m)))
def vis(G, dataset, # The dataset object for accessing the data device, # Device to run visualization on batch_size, # Visualization batch size run_dir = ".", # Output directory training = False, # Training mode latents = None, # Source latents to generate images from labels = None, # Source labels to generate images from (0 if no labels are used) ratio = 1.0, # Image height/width ratio in the dataset truncation_psi = 0.7, # Style strength multiplier for the truncation trick (used for visualizations only) # Model settings k = 1, # Number of components the model has drange_net = [-1,1], # Model image output range attention = False, # Whereas the model produces attention maps (for visualization) num_heads = 1, # Number of attention heads # Visualization settings vis_types = None, # Visualization types to be created num = 100, # Number of produced samples rich_num = 5, # Number of samples for which richer visualizations will be created # (requires more memory and disk space, and therefore rich_num <= num) grid = None, # Whether to save the samples in one large grid files # or in separated files one per sample grid_size = None, # Grid proportions (w, h) step = None, # Step number to be used in visualization filenames verbose = None, # Verbose print progress messages keep_samples = True, # Keep all prior samples during training # Visualization-specific settings alpha = 0.3, # Proportion for generated images and attention maps blends intrp_density = 8, # Number of samples in between two end points of an interpolation intrp_per_component = False, # Whether to perform interpolation along particular latent components (True) # or all of them at once (False) noise_samples_num = 100, # Number of samples used to compute noise variation visualization section_size = 100): # Visualization section size (section_size <= num) for reducing memory footprint def prefix(step): return "" if step is None else f"{step:06d}_" def pattern_of(dir, step, suffix): return f"{run_dir}/visuals/{dir}/{prefix(step)}%06d.{suffix}" # Set default options if verbose is None: verbose = not training # Disable verbose during training if grid is None: grid = training # Save image samples in one grid file during training if grid_size is not None: section_size = rich_num = num = np.prod(grid_size) # If grid size is provided, set images number accordingly _labels, _latents = labels, latents if _latents is not None: assert num == _latents.shape[0] if _labels is not None: assert num == _labels.shape[0] assert rich_num <= section_size vis = vis_types # For time efficiency, during training save only image and map samples rather than richer visualizations if training: vis = {"imgs"} # , "maps" # if num_heads == 1: # vis.add("layer_maps") else: vis = vis or {"imgs", "maps", "ltnts", "interpolations", "noise_var"} # Build utility functions save_images = misc.save_images_builder(drange_net, ratio, grid_size, grid, verbose) save_blends = misc.save_blends_builder(drange_net, ratio, grid_size, grid, verbose, alpha) crange = trange if verbose else range section_of = lambda a, i, n: a[i * n: (i + 1) * n] get_rnd_latents = lambda n: torch.randn([n, *G.input_shape[1:]], device = device) get_rnd_labels = lambda n: torch.from_numpy(dataset.get_random_labels(n)).to(device) # Create directories dirs = [] if "imgs" in vis: dirs += ["images"] if "ltnts" in vis: dirs += ["latents-z", "latents-w"] if "maps" in vis: dirs += ["maps", "softmaps", "blends", "softblends"] if "layer_maps" in vis: dirs += ["layer_maps"] if "interpolations" in vis: dirs += ["interpolations-z", "interpolation-w"] if not keep_samples: shutil.rmtree(f"{run_dir}/visuals") for dir in dirs: os.makedirs(f"{run_dir}/visuals/{dir}", exist_ok = True) if verbose: print("Running network and saving samples...") # Produce visualizations for idx in crange(0, num, section_size): curr_size = curr_section_size(num, idx, section_size) # Compute source latents/labels that images will be produced from latents = get_rnd_latents(curr_size) if _latents is None else section_of(_latents, idx, section_size) labels = get_rnd_labels(curr_size) if _labels is None else section_of(_labels, idx, section_size) if idx == 0: latents0, labels0 = latents, labels # Run network over latents and produce images and attention maps ret = run(G, latents, labels, batch_size, truncation_psi, noise_mode = "const", return_att = True, return_ws = True) # For memory efficiency, save full information only for a small amount of images images, attmaps_all_layers, wlatents_all_layers = ret soft_maps = attmaps_all_layers[:,:,-1,0] if attention else None attmaps_all_layers = attmaps_all_layers[:rich_num] wlatents = wlatents_all_layers[:,:,0] # Save image samples if "imgs" in vis: save_images(images, pattern_of("images", step, "png"), idx) # Save latent vectors if "ltnts" in vis: misc.save_npys(latents, pattern_of("latents-z", step, "npy"), verbose, idx) misc.save_npys(wlatents, pattern_of("latents-w", step, "npy"), verbose, idx) # For the GANformer model, save attention maps if attention: if "maps" in vis: pallete = np.expand_dims(misc.get_colors(k - 1), axis = [2, 3]) maps = (soft_maps == np.amax(soft_maps, axis = 1, keepdims = True)).astype(float) soft_maps = np.sum(pallete * np.expand_dims(soft_maps, axis = 2), axis = 1) maps = np.sum(pallete * np.expand_dims(maps, axis = 2), axis = 1) save_images(soft_maps, pattern_of("softmaps", step, "png"), idx) save_images(maps, pattern_of("maps", step, "png"), idx) save_blends(soft_maps, images, pattern_of("softblends", step, "png"), idx) save_blends(maps, images, pattern_of("blends", step, "png"), idx) # Save maps from all attention heads and layers # (for efficiency, only for a small number of images) if "layer_maps" in vis: all_maps = [] maps_fakes = np.split(attmaps_all_layers, attmaps_all_layers.shape[2], axis = 2) for layer, lmaps in enumerate(maps_fakes): lmaps = np.split(np.squeeze(lmaps, axis = 2), lmaps.shape[2], axis = 2) for head, hmap in enumerate(lmaps): hmap = (hmap == np.amax(hmap, axis = 1, keepdims = True)).astype(float) hmap = np.sum(pallete * hmap, axis = 1) all_maps.append((hmap, f"l{layer}_h{head}")) if not grid: for i in range(rich_num): stepdir = "" if step is None else (f"/{step:06d}") os.makedirs(f"{run_dir}/visuals/layer_maps/%06d" % i + stepdir, exist_ok = True) for maps, name in all_maps: if grid: pattern = f"{run_dir}/visuals/layer_maps/{prefix(step)}%06d-{name}.png" else: pattern = f"{run_dir}/visuals/layer_maps/%06d/{stepdir}/{name}.png" save_images(maps, pattern, idx) # Produce interpolations between pairs or source latents # In the GANformer case, varying one component at a time if "interpolations" in vis: ts = torch.linspace(0.0, 1.0, steps = intrp_density) if verbose: print("Generating interpolations...") for i in crange(rich_num): os.makedirs(f"{run_dir}/visuals/interpolations-z/%06d" % i, exist_ok = True) os.makedirs(f"{run_dir}/visuals/interpolations-w/%06d" % i, exist_ok = True) z = get_rnd_latents(2) z[0] = latents0[i] c = labels0[i:i+1] w = run(G, z, c, batch_size, truncation_psi, noise_mode = "const", return_ws = True)[-1] def update(t, fn, ts, dim): if dim == 3: ts = ts[:, None] t_ups = [] if intrp_per_component: for c in range(k - 1): # copy over all the components except component c that will get interpolated t_up = torch.clone(t[0]).unsqueeze(0).repeat((intrp_density, ) + tuple([1] * dim)) # interpolate component c t_up[:,c] = fn(t[0, c], t[1, c], ts) t_ups.append(t_up) t_up = torch.cat(t_ups, dim = 0) else: t_up = fn(t[0], t[1], ts.unsqueeze(1)) return t_up z_up = update(z, slerp, ts, 2) w_up = update(w, lerp, ts, 3) imgs1 = run(G, z_up, c, batch_size, truncation_psi, noise_mode = "const")[0] imgs2 = run(G, w_up, c, batch_size, truncation_psi, noise_mode = "const", take_w = True)[0] def save_interpolation(imgs, name): imgs = np.split(imgs, k - 1, axis = 0) for c in range(k - 1): filename = f"{run_dir}/visuals/interpolations-{name}/{i:06d}/{c:02d}" imgs[c] = [misc.to_pil(img, drange = drange_net) for img in imgs[c]] imgs[c][-1].save(f"{filename}.png") misc.save_gif(imgs[c], f"{filename}.gif") save_interpolation(imgs1, "z") save_interpolation(imgs2, "w") # Compute noise variance map # Shows what areas vary the most given fixed source # latents due to the use of stochastic local noise if "noise_var" in vis: if verbose: print("Generating noise variance...") z = get_rnd_latents(1).repeat(noise_samples_num, 1, 1) c = get_rnd_labels(1) imgs = run(G, z, c, batch_size, truncation_psi)[0] imgs = np.stack([misc.to_pil(img, drange = drange_net) for img in imgs], axis = 0) diff = np.std(np.mean(imgs, axis = 3), axis = 0) * 4 diff = np.clip(diff + 0.5, 0, 255).astype(np.uint8) PIL.Image.fromarray(diff, "L").save(f"{run_dir}/visuals/noise-variance.png") # Compute style mixing table, varying using the latent A in some of the layers and latent B in rest. # For the GANformer, also produce component mixes (using latents from A in some of the components, # and latents from B in the rest. if "style_mix" in vis: if verbose: print("Generating style mixes...") cols, rows = 4, 2 row_lens = np.array([2, 5, 8, 11]) c = get_rnd_labels(1) # Create latent mixes mixes = { "layer": (np.arange(wlatents_all_layers.shape[2]) < row_lens[:,None]).astype(np.float32)[:,None,None,None,:,None], "component": (np.arange(wlatents_all_layers.shape[1]) < row_lens[:,None]).astype(np.float32)[:,None,None,:,None,None] } ws = wlatents_all_layers[:cols+rows] orig_imgs = images[:cols+rows] col_z = wlatents_all_layers[:cols][None, None] row_z = wlatents_all_layers[cols:cols+rows][None,:,None] for name, mix in mixes.items(): # Produce image mixes mix_z = mix * row_z + (1 - mix) * col_z mix_z = torch.from_numpy(np.reshape(mix_z, [-1, *wlatents_all_layers.shape[1:]])).to(device) mix_imgs = run(G, mix_z, c, batch_size, truncation_psi, noise_mode = "const", take_w = True)[0] mix_imgs = np.reshape(mix_imgs, [len(row_lens) * rows, cols, *mix_imgs.shape[1:]]) # Create image table canvas H, W = mix_imgs.shape[-2:] canvas = PIL.Image.new("RGB", (W * (cols + 1), H * (len(row_lens) * rows + 1)), "black") # Place image mixes respectively at each position (row_idx, col_idx) for row_idx, row_elem in enumerate([None] + list(range(len(row_lens) * rows))): for col_idx, col_elem in enumerate([None] + list(range(cols))): if (row_elem, col_elem) == (None, None): continue if row_elem is None: img = orig_imgs[col_elem] elif col_elem is None: img = orig_imgs[cols + (row_elem % rows)] else: img = mix_imgs[row_elem, col_elem] canvas.paste(misc.to_pil(img, drange = drange_net), (W * col_idx, H * row_idx)) canvas.save(f"{run_dir}/visuals/{name}-mixing.png") if verbose: misc.log("Visualizations Completed!", "blue")