def create_from_image_folders(datadir, shuffle=True, size=None): assert os.path.isdir(datadir) imgs = [] labels = [] for root, subdirs, files in os.walk(datadir): for i, subdir in enumerate(subdirs): tmp_list = img_list(os.path.join(root, subdir)) imgs = imgs + tmp_list labels = labels + [i] * len(tmp_list) labels = np.array(labels) onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) onehot[np.arange(labels.size), labels] = 1. assert len(imgs) > 0, ' No input images found!' sample_img = np.asarray(PIL.Image.open(imgs[0])) sample_shape = sample_img.shape channels = sample_shape[2] if sample_img.ndim == 3 else 1 assert channels in [1, 3, 4], ' Weird color dim: %d' % channels print(' Making dataset ..', datadir, sample_shape, '%d labels' % (np.max(labels) + 1)) jpg = channels < 4 if jpg is True: print(' Loading JPG as is!') with TFRecordExporter(datadir, len(imgs)) as tfr: order = tfr.choose_shuffled_order() if shuffle else np.arange( len(imgs)) pbar = ProgressBar(order.size) for idx in range(order.size): img_path = imgs[order[idx]] tfr.add_image(img_path, jpg=jpg, size=size) pbar.upd() tfr.add_labels(onehot[order]) return tfr.tfr_file, len(imgs)
def copy_and_crop_or_pad_trainables(src_net, tgt_net) -> None: source_trainables = src_net.trainables.keys() target_trainables = tgt_net.trainables.keys() names = [pair for pair in zip(source_trainables, target_trainables)] skip = [] pbar = ProgressBar(len(names)) for pair in names: source_name, target_name = pair log = source_name x = src_net.get_var(source_name) y = tgt_net.get_var(target_name) source_shape = x.shape target_shape = y.shape if source_shape != target_shape: update = x index = None if 'Dense' in source_name: if source_shape[0] > target_shape[0]: gap = source_shape[0] - target_shape[0] start = abs(gap) // 2 end = start + target_shape[0] update = update[start:end, :] else: update = pad_symm_np(update, target_shape) log = (log, source_shape, '=>', target_shape) else: try: if source_shape[2] > target_shape[2]: index = 2 gap = source_shape[index] - target_shape[index] start = abs(gap) // 2 end = start + target_shape[index] update = update[:, :, start:end, :] if source_shape[3] > target_shape[3]: index = 3 gap = source_shape[index] - target_shape[index] start = abs(gap) // 2 end = start + target_shape[index] update = update[:, :, :, start:end] except: print(' Wrong var pair?', source_name, source_shape, target_name, target_shape) exit(1) if source_shape[2] < target_shape[2] or source_shape[ 3] < target_shape[3]: update = pad_symm_np(update, target_shape[2:]) log = (log, source_shape, '=>', target_shape) # print(pair, source_shape, target_shape) tgt_net.set_var(target_name, update) skip.append(source_name) pbar.upd(pair) weights_to_copy = { tgt_net.vars[pair[1]]: src_net.vars[pair[0]] for pair in names if pair[0] not in skip } tfutil.set_vars(tfutil.run(weights_to_copy))
def project_image(proj, targets, work_dir, resolution, num_snapshots): filename = osp.join(work_dir, basename(work_dir)) video_out = cv2.VideoWriter(filename + '.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 25, resolution) snapshot_steps = set(proj.num_steps - np.linspace(0, proj.num_steps, num_snapshots, endpoint=False, dtype=int)) misc.save_image_grid(targets, filename + '.jpg', drange=[-1,1]) proj.start(targets) pbar = ProgressBar(proj.num_steps) while proj.get_cur_step() < proj.num_steps: proj.step() write_video_frame(proj, video_out) if proj.get_cur_step() in snapshot_steps: misc.save_image_grid(proj.get_images(), filename + '-%04d.jpg' % proj.get_cur_step(), drange=[-1,1]) pbar.upd() dlats = proj.get_dlatents() np.save(filename + '-%04d.npy' % proj.get_cur_step(), dlats) video_out.release()
def copy_and_fill_trainables(src_net, tgt_net) -> None: # model => conditional train_vars = [ name for name in src_net.trainables.keys() if name in tgt_net.trainables.keys() ] skip = [] pbar = ProgressBar(len(train_vars)) for name in train_vars: x = src_net.get_var(name) y = tgt_net.get_var(name) src_shape = x.shape tgt_shape = y.shape if src_shape != tgt_shape: assert len(src_shape) == len( tgt_shape), "Different shapes: %s %s" % (str(src_shape), str(tgt_shape)) if np.less( tgt_shape, src_shape).any(): # kill labels: [1024,512] => [512,512] try: update = x[:tgt_shape[0], :tgt_shape[1], ...] # !!! corrects only first two dims except: update = x[:tgt_shape[0]] elif np.greater( tgt_shape, src_shape).any(): # add labels: [512,512] => [1024,512] tile_count = [ tgt_shape[i] // src_shape[i] for i in range(len(src_shape)) ] if a.verbose is True: print(name, tile_count, src_shape, '=>', tgt_shape, '\n\n') # G_mapping/Dense0, D/Output update = np.tile(x, tile_count) tgt_net.set_var(name, update) skip.append(name) pbar.upd(name) weights_to_copy = { tgt_net.vars[name]: src_net.vars[name] for name in train_vars if name not in skip } tfutil.set_vars(tfutil.run(weights_to_copy))
def create_from_images(datadir, shuffle=True, size=None): assert os.path.isdir(datadir) imgs = sorted(img_list(datadir, subdir=True)) assert len(imgs) > 0, ' No input images found!' sample_img = np.asarray(PIL.Image.open(imgs[0])) sample_shape = sample_img.shape channels = sample_shape[2] if sample_img.ndim == 3 else 1 assert channels in [1, 3, 4], ' Weird color dim: %d' % channels print(' Making dataset ..', datadir, sample_shape) jpg = channels < 4 if jpg is True: print(' Loading JPG as is!') with TFRecordExporter(datadir, len(imgs)) as tfr: order = tfr.choose_shuffled_order() if shuffle else np.arange( len(imgs)) pbar = ProgressBar(order.size) for idx in range(order.size): img_path = imgs[order[idx]] tfr.add_image(img_path, jpg=jpg, size=size) pbar.upd() return tfr.tfr_file, len(imgs)
def generate(): os.makedirs(a.out_dir, exist_ok=True) np.random.seed(seed=696) device = torch.device('cuda') # setup generator Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.verbose = a.verbose Gs_kwargs.size = a.size Gs_kwargs.scale_type = a.scale_type # mask/blend latents with external latmask or by splitting the frame if a.latmask is None: nHW = [int(s) for s in a.nXY.split('-')][::-1] assert len(nHW) == 2, ' Wrong count nXY: %d (must be 2)' % len(nHW) n_mult = nHW[0] * nHW[1] if a.verbose is True and n_mult > 1: print(' Latent blending w/split frame %d x %d' % (nHW[1], nHW[0])) lmask = np.tile(np.asarray([[[[1]]]]), (1, n_mult, 1, 1)) Gs_kwargs.countHW = nHW Gs_kwargs.splitfine = a.splitfine else: if a.verbose is True: print(' Latent blending with mask', a.latmask) n_mult = 2 if os.path.isfile(a.latmask): # single file lmask = np.asarray([[img_read(a.latmask)[:, :, 0] / 255.] ]) # [h,w] elif os.path.isdir(a.latmask): # directory with frame sequence lmask = np.asarray([[ img_read(f)[:, :, 0] / 255. for f in img_list(a.latmask) ]]) # [h,w] else: print(' !! Blending mask not found:', a.latmask) exit(1) lmask = np.concatenate((lmask, 1 - lmask), 1) # [frm,2,h,w] lmask = torch.from_numpy(lmask).to(device) # load base or custom network pkl_name = osp.splitext(a.model)[0] if '.pkl' in a.model.lower(): custom = False print(' .. Gs from pkl ..', basename(a.model)) else: custom = True print(' .. Gs custom ..', basename(a.model)) with dnnlib.util.open_url(pkl_name + '.pkl') as f: Gs = legacy.load_network_pkl(f, custom=custom, **Gs_kwargs)['G_ema'].to( device) # type: ignore if a.verbose is True: print(' out shape', Gs.output_shape[1:]) if a.verbose is True: print(' making timeline..') lats = [] # list of [frm,1,512] for i in range(n_mult): lat_tmp = latent_anima((1, Gs.z_dim), a.frames, a.fstep, cubic=a.cubic, gauss=a.gauss, verbose=False) # [frm,1,512] lats.append(lat_tmp) # list of [frm,1,512] latents = np.concatenate(lats, 1) # [frm,X,512] print(' latents', latents.shape) latents = torch.from_numpy(latents).to(device) frame_count = latents.shape[0] # distort image by tweaking initial const layer if a.digress > 0: try: init_res = Gs.init_res except: init_res = (4, 4) # default initial layer size dconst = [] for i in range(n_mult): dc_tmp = a.digress * latent_anima([1, Gs.z_dim, *init_res], a.frames, a.fstep, cubic=True, verbose=False) dconst.append(dc_tmp) dconst = np.concatenate(dconst, 1) else: dconst = np.zeros([frame_count, 1, 1, 1, 1]) dconst = torch.from_numpy(dconst).to(device) # labels / conditions label_size = Gs.c_dim if label_size > 0: labels = torch.zeros((frame_count, n_mult, label_size), device=device) # [frm,X,lbl] if a.labels is None: label_ids = [] for i in range(n_mult): label_ids.append(random.randint(0, label_size - 1)) else: label_ids = [int(x) for x in a.labels.split('-')] label_ids = label_ids[:n_mult] # ensure we have enough labels for i, l in enumerate(label_ids): labels[:, i, l] = 1 else: labels = [None] # generate images from latent timeline pbar = ProgressBar(frame_count) for i in range(frame_count): latent = latents[i] # [X,512] label = labels[i % len(labels)] latmask = lmask[i % len(lmask)] if lmask is not None else [None] # [X,h,w] dc = dconst[i % len(dconst)] # [X,512,4,4] # generate multi-latent result if custom: output = Gs(latent, label, latmask, dc, truncation_psi=a.trunc, noise_mode='const') else: output = Gs(latent, label, truncation_psi=a.trunc, noise_mode='const') output = (output.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to( torch.uint8).cpu().numpy() # save image ext = 'png' if output.shape[3] == 4 else 'jpg' filename = osp.join(a.out_dir, "%06d.%s" % (i, ext)) imsave(filename, output[0]) pbar.upd() # convert latents to dlatents, save them if a.save_lat is True: latents = latents.squeeze(1) # [frm,512] dlatents = Gs.mapping(latents, label) # [frm,18,512] if a.size is None: a.size = [''] * 2 filename = '{}-{}-{}.npy'.format(basename(a.model), a.size[1], a.size[0]) filename = osp.join(osp.dirname(a.out_dir), filename) dlatents = dlatents.cpu().numpy() np.save(filename, dlatents) print('saved dlatents', dlatents.shape, 'to', filename)
def main(): os.makedirs(a.out_dir, exist_ok=True) device = torch.device('cuda') # setup generator Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.verbose = a.verbose Gs_kwargs.size = a.size Gs_kwargs.scale_type = a.scale_type # load base or custom network pkl_name = osp.splitext(a.model)[0] if '.pkl' in a.model.lower(): custom = False print(' .. Gs from pkl ..', basename(a.model)) else: custom = True print(' .. Gs custom ..', basename(a.model)) with dnnlib.util.open_url(pkl_name + '.pkl') as f: Gs = legacy.load_network_pkl(f, custom=custom, **Gs_kwargs)['G_ema'].to( device) # type: ignore dlat_shape = (1, Gs.num_ws, Gs.w_dim) # [1,18,512] # read saved latents if a.dlatents is not None and osp.isfile(a.dlatents): key_dlatents = load_latents(a.dlatents) if len(key_dlatents.shape) == 2: key_dlatents = np.expand_dims(key_dlatents, 0) elif a.dlatents is not None and osp.isdir(a.dlatents): # if a.dlatents.endswith('/') or a.dlatents.endswith('\\'): a.dlatents = a.dlatents[:-1] key_dlatents = [] npy_list = file_list(a.dlatents, 'npy') for npy in npy_list: key_dlatent = load_latents(npy) if len(key_dlatent.shape) == 2: key_dlatent = np.expand_dims(key_dlatent, 0) key_dlatents.append(key_dlatent) key_dlatents = np.concatenate(key_dlatents) # [frm,18,512] else: print(' No input dlatents found') exit() key_dlatents = key_dlatents[:, np.newaxis] # [frm,1,18,512] print(' key dlatents', key_dlatents.shape) # replace higher layers with single (style) latent if a.style_dlat is not None: print(' styling with dlatent', a.style_dlat) style_dlatent = load_latents(a.style_dlat) while len(style_dlatent.shape) < 4: style_dlatent = np.expand_dims(style_dlatent, 0) # try replacing 5 by other value, less than Gs.num_ws key_dlatents[:, :, range(5, Gs.num_ws ), :] = style_dlatent[:, :, range(5, Gs.num_ws), :] frames = key_dlatents.shape[0] * a.fstep dlatents = latent_anima(dlat_shape, frames, a.fstep, key_latents=key_dlatents, cubic=a.cubic, verbose=True) # [frm,1,512] print(' dlatents', dlatents.shape) frame_count = dlatents.shape[0] dlatents = torch.from_numpy(dlatents).to(device) # distort image by tweaking initial const layer if a.digress > 0: try: init_res = Gs.init_res except Exception: init_res = (4, 4) # default initial layer size dconst = a.digress * latent_anima([1, Gs.z_dim, *init_res], frame_count, a.fstep, cubic=True, verbose=False) else: dconst = np.zeros([frame_count, 1, 1, 1, 1]) dconst = torch.from_numpy(dconst).to(device) # generate images from latent timeline pbar = ProgressBar(frame_count) for i in range(frame_count): # generate multi-latent result if custom: output = Gs.synthesis(dlatents[i], None, dconst[i], noise_mode='const') else: output = Gs.synthesis(dlatents[i], noise_mode='const') output = (output.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to( torch.uint8).cpu().numpy() ext = 'png' if output.shape[3] == 4 else 'jpg' filename = osp.join(a.out_dir, "%06d.%s" % (i, ext)) imsave(filename, output[0]) pbar.upd()
def update_G(src_dict, tgt_net, size, n_mlp): log_size = int(math.log(size, 2)) pbar = ProgressBar(n_mlp + log_size - 2 + log_size - 2 + (log_size - 2) * 2 + 1 + 2) for i in range(n_mlp): convert_dense(tgt_net, src_dict, f"G_mapping/Dense{i}", f"style.{i+1}") pbar.upd() update(tgt_net, "G_synthesis/4x4/Const/const", src_dict["input.input"]) convert_torgb(tgt_net, src_dict, "G_synthesis/4x4/ToRGB", "to_rgb1") pbar.upd() for i in range(log_size - 2): reso = 4 * 2**(i + 1) convert_torgb(tgt_net, src_dict, f"G_synthesis/{reso}x{reso}/ToRGB", f"to_rgbs.{i}") pbar.upd() convert_modconv(tgt_net, src_dict, "G_synthesis/4x4/Conv", "conv1") pbar.upd() conv_i = 0 for i in range(log_size - 2): reso = 4 * 2**(i + 1) convert_modconv(tgt_net, src_dict, f"G_synthesis/{reso}x{reso}/Conv0_up", f"convs.{conv_i}", flip=True) convert_modconv(tgt_net, src_dict, f"G_synthesis/{reso}x{reso}/Conv1", f"convs.{conv_i + 1}") conv_i += 2 pbar.upd() for i in range(0, (log_size - 2) * 2 + 1): update(tgt_net, f"G_synthesis/noise{i}", src_dict[f"noises.noise_{i}"]) pbar.upd()
def main(): os.makedirs(a.out_dir, exist_ok=True) np.random.seed(seed=696) # setup generator fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.func_name = 'training.stylegan2_multi.G_main' Gs_kwargs.verbose = a.verbose Gs_kwargs.size = a.size Gs_kwargs.scale_type = a.scale_type Gs_kwargs.impl = a.ops # mask/blend latents with external latmask or by splitting the frame if a.latmask is None: nHW = [int(s) for s in a.nXY.split('-')][::-1] assert len(nHW)==2, ' Wrong count nXY: %d (must be 2)' % len(nHW) n_mult = nHW[0] * nHW[1] if a.verbose is True and n_mult > 1: print(' Latent blending w/split frame %d x %d' % (nHW[1], nHW[0])) lmask = np.tile(np.asarray([[[[None]]]]), (1,n_mult,1,1)) Gs_kwargs.countHW = nHW Gs_kwargs.splitfine = a.splitfine else: if a.verbose is True: print(' Latent blending with mask', a.latmask) n_mult = 2 if os.path.isfile(a.latmask): # single file lmask = np.asarray([[img_read(a.latmask)[:,:,0] / 255.]]) # [h,w] elif os.path.isdir(a.latmask): # directory with frame sequence lmask = np.asarray([[img_read(f)[:,:,0] / 255. for f in img_list(a.latmask)]]) # [h,w] else: print(' !! Blending mask not found:', a.latmask); exit(1) lmask = np.concatenate((lmask, 1 - lmask), 1) # [frm,2,h,w] Gs_kwargs.latmask_res = lmask.shape[2:] # load model with arguments sess = tflib.init_tf({'allow_soft_placement':True}) pkl_name = osp.splitext(a.model)[0] with open(pkl_name + '.pkl', 'rb') as file: network = pickle.load(file, encoding='latin1') try: _, _, network = network except: pass for k in list(network.static_kwargs.keys()): Gs_kwargs[k] = network.static_kwargs[k] # reload custom network, if needed if '.pkl' in a.model.lower(): print(' .. Gs from pkl ..', basename(a.model)) Gs = network else: # reconstruct network print(' .. Gs custom ..', basename(a.model)) # print(Gs_kwargs) Gs = tflib.Network('Gs', **Gs_kwargs) Gs.copy_vars_from(network) if a.verbose is True: print('kwargs:', ['%s: %s'%(kv[0],kv[1]) for kv in sorted(Gs.static_kwargs.items())]) if a.verbose is True: print(' out shape', Gs.output_shape[1:]) if a.size is None: a.size = Gs.output_shape[2:] if a.verbose is True: print(' making timeline..') lats = [] # list of [frm,1,512] for i in range(n_mult): lat_tmp = latent_anima((1, Gs.input_shape[1]), a.frames, a.fstep, cubic=a.cubic, gauss=a.gauss, verbose=False) # [frm,1,512] lats.append(lat_tmp) # list of [frm,1,512] latents = np.concatenate(lats, 1) # [frm,X,512] print(' latents', latents.shape) frame_count = latents.shape[0] # distort image by tweaking initial const layer if a.digress > 0: try: latent_size = Gs.static_kwargs['latent_size'] except: latent_size = 512 # default latent size try: init_res = Gs.static_kwargs['init_res'] except: init_res = (4,4) # default initial layer size dconst = [] for i in range(n_mult): dc_tmp = a.digress * latent_anima([1, latent_size, *init_res], a.frames, a.fstep, cubic=True, verbose=False) dconst.append(dc_tmp) dconst = np.concatenate(dconst, 1) else: dconst = np.zeros([frame_count, 1, 1, 1, 1]) # labels / conditions try: label_size = Gs_kwargs.label_size except: label_size = 0 if label_size > 0: labels = np.zeros((frame_count, n_mult, label_size)) # [frm,X,lbl] if a.labels is None: label_ids = [] for i in range(n_mult): label_ids.append(random.randint(0, label_size-1)) else: label_ids = [int(x) for x in a.labels.split('-')] label_ids = label_ids[:n_mult] # ensure we have enough labels for i, l in enumerate(label_ids): labels[:,i,l] = 1 else: labels = [None] # generate images from latent timeline pbar = ProgressBar(frame_count) for i in range(frame_count): latent = latents[i] # [X,512] label = labels[i % len(labels)] latmask = lmask[i % len(lmask)] if lmask is not None else [None] # [X,h,w] dc = dconst[i % len(dconst)] # [X,512,4,4] # generate multi-latent result if Gs.num_inputs == 2: output = Gs.run(latent, label, truncation_psi=a.trunc, randomize_noise=False, output_transform=fmt) else: output = Gs.run(latent, label, latmask, dc, truncation_psi=a.trunc, randomize_noise=False, output_transform=fmt) # save image ext = 'png' if output.shape[3]==4 else 'jpg' filename = osp.join(a.out_dir, "%06d.%s" % (i,ext)) imsave(filename, output[0]) pbar.upd() # convert latents to dlatents, save them if a.save_lat is True: latents = latents.squeeze(1) # [frm,512] dlatents = Gs.components.mapping.run(latents, label, dtype='float16') # [frm,18,512] filename = '{}-{}-{}.npy'.format(basename(a.model), a.size[1], a.size[0]) filename = osp.join(osp.dirname(a.out_dir), filename) np.save(filename, dlatents) print('saved dlatents', dlatents.shape, 'to', filename)
def main(): if a.vector_dir is not None: if a.vector_dir.endswith('/') or a.vector_dir.endswith('\\'): a.vector_dir = a.vector_dir[:-1] os.makedirs(a.out_dir, exist_ok=True) device = torch.device('cuda') global Gs, use_d, custom # setup generator Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.verbose = a.verbose Gs_kwargs.size = a.size Gs_kwargs.scale_type = a.scale_type # load base or custom network pkl_name = osp.splitext(a.model)[0] if '.pkl' in a.model.lower(): custom = False print(' .. Gs from pkl ..', basename(a.model)) else: custom = True print(' .. Gs custom ..', basename(a.model)) with dnnlib.util.open_url(pkl_name + '.pkl') as f: Gs = legacy.load_network_pkl(f, custom=custom, **Gs_kwargs)['G_ema'].to( device) # type: ignore # load directions if a.vector_dir is not None: directions = [] vector_list = file_list(a.vector_dir, 'npy') for v in vector_list: direction = load_latents(v) if len(direction.shape) == 2: direction = np.expand_dims(direction, 0) directions.append(direction) directions = np.concatenate(directions)[:, np.newaxis] # [frm,1,18,512] else: print(' No vectors found') exit() if len(direction[0].shape) > 1 and direction[0].shape[0] > 1: use_d = True print(' directions', directions.shape, 'using d' if use_d else 'using w') directions = torch.from_numpy(directions).to(device) # latent direction range lrange = [-0.5, 0.5] # load saved latents if a.base_lat is not None: base_latent = load_latents(a.base_lat) base_latent = torch.from_numpy(base_latent).to(device) else: print(' No NPY input given, making random') base_latent = np.random.randn(1, Gs.z_dim) if use_d: base_latent = Gs.mapping(base_latent, None) # [frm,18,512] pbar = ProgressBar(len(directions)) for i, direction in enumerate(directions): make_loop(base_latent, direction, lrange, a.fstep * 2, a.fstep * 2 * i) pbar.upd()
def main(): if a.vector_dir is not None: if a.vector_dir.endswith('/') or a.vector_dir.endswith('\\'): a.vector_dir = a.vector_dir[:-1] os.makedirs(a.out_dir, exist_ok=True) global Gs, use_d # setup generator Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.func_name = 'training.stylegan2_multi.G_main' Gs_kwargs.verbose = a.verbose Gs_kwargs.size = a.size Gs_kwargs.scale_type = a.scale_type Gs_kwargs.impl = a.ops # load model with arguments sess = tflib.init_tf({'allow_soft_placement': True}) pkl_name = osp.splitext(a.model)[0] with open(pkl_name + '.pkl', 'rb') as file: network = pickle.load(file, encoding='latin1') try: _, _, network = network except: pass for k in list(network.static_kwargs.keys()): Gs_kwargs[k] = network.static_kwargs[k] # reload custom network, if needed if '.pkl' in a.model.lower(): print(' .. Gs from pkl ..', basename(a.model)) Gs = network else: # reconstruct network print(' .. Gs custom ..', basename(a.model)) Gs = tflib.Network('Gs', **Gs_kwargs) Gs.copy_vars_from(network) # load directions if a.vector_dir is not None: directions = [] vector_list = file_list(a.vector_dir, 'npy') for v in vector_list: direction = load_latents(v) if len(direction.shape) == 2: direction = np.expand_dims(direction, 0) directions.append(direction) directions = np.concatenate(directions)[:, np.newaxis] # [frm,1,18,512] else: print(' No vectors found') exit() if len(direction[0].shape) > 1 and direction[0].shape[0] > 1: use_d = True print(' directions', directions.shape, 'using d' if use_d else 'using w') # latent direction range lrange = [-0.5, 0.5] # load saved latents if a.base_lat is not None: base_latent = load_latents(a.base_lat) else: print(' No NPY input given, making random') z_dim = Gs.input_shape[1] shape = (1, z_dim) base_latent = np.random.randn(*shape) if use_d: base_latent = Gs.components.mapping.run(base_latent, None) # [frm,18,512] pbar = ProgressBar(len(directions)) for i, direction in enumerate(directions): make_loop(base_latent, direction, lrange, a.fstep * 2, a.fstep * 2 * i) pbar.upd()
def project( G, target: torch. Tensor, # [C,H,W] and dynamic range [0,255], W & H must match G output resolution *, num_steps=1000, w_avg_samples=10000, initial_learning_rate=0.1, initial_noise_factor=0.05, lr_rampdown_length=0.25, lr_rampup_length=0.05, noise_ramp_length=0.75, regularize_noise_weight=1e5, verbose=False, device: torch.device): assert target.shape == (G.img_channels, G.img_resolution, G.img_resolution) # def logprint(*args): # if verbose: # print(*args) G = copy.deepcopy(G).eval().requires_grad_(False).to( device) # type: ignore # Compute w stats. # logprint(f'Computing W midpoint and stddev using {w_avg_samples} samples...') z_samples = np.random.RandomState(123).randn(w_avg_samples, G.z_dim) w_samples = G.mapping(torch.from_numpy(z_samples).to(device), None) # [N, L, C] w_samples = w_samples[:, :1, :].cpu().numpy().astype( np.float32) # [N, 1, C] w_avg = np.mean(w_samples, axis=0, keepdims=True) # [1, 1, C] w_std = (np.sum((w_samples - w_avg)**2) / w_avg_samples)**0.5 # Setup noise inputs. noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name } # Load VGG16 feature detector. vgg_file = 'models/vgg/vgg16.pt' if os.path.isfile(vgg_file) and os.stat(vgg_file).st_size == 553469545: with dnnlib.util.open_url(vgg_file) as file: # network = pickle.load(file, encoding='latin1') vgg16 = torch.jit.load(file).eval().to(device) else: with dnnlib.util.open_url( 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt' ) as file: vgg16 = torch.jit.load(file).eval().to(device) # Features for target image. target_images = target.unsqueeze(0).to(device).to(torch.float32) if target_images.shape[2] > 256: target_images = F.interpolate(target_images, size=(256, 256), mode='area') target_features = vgg16(target_images, resize_images=False, return_lpips=True) w_opt = torch.tensor(w_avg, dtype=torch.float32, device=device, requires_grad=True) # pylint: disable=not-callable w_out = torch.zeros([num_steps] + list(w_opt.shape[1:]), dtype=torch.float32, device=device) optimizer = torch.optim.Adam([w_opt] + list(noise_bufs.values()), betas=(0.9, 0.999), lr=initial_learning_rate) # Init noise. for buf in noise_bufs.values(): buf[:] = torch.randn_like(buf) buf.requires_grad = True pbar = ProgressBar(num_steps) for step in range(num_steps): # Learning rate schedule. t = step / num_steps w_noise_scale = w_std * initial_noise_factor * max( 0.0, 1.0 - t / noise_ramp_length)**2 lr_ramp = min(1.0, (1.0 - t) / lr_rampdown_length) lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi) lr_ramp = lr_ramp * min(1.0, t / lr_rampup_length) lr = initial_learning_rate * lr_ramp for param_group in optimizer.param_groups: param_group['lr'] = lr # Synth images from opt_w. w_noise = torch.randn_like(w_opt) * w_noise_scale ws = (w_opt + w_noise).repeat([1, G.mapping.num_ws, 1]) synth_images = G.synthesis(ws, noise_mode='const') # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. synth_images = (synth_images + 1) * (255 / 2) if synth_images.shape[2] > 256: synth_images = F.interpolate(synth_images, size=(256, 256), mode='area') # Features for synth images. synth_features = vgg16(synth_images, resize_images=False, return_lpips=True) dist = (target_features - synth_features).square().sum() # Noise regularization. reg_loss = 0.0 for v in noise_bufs.values(): noise = v[None, None, :, :] # must be [1,1,H,W] for F.avg_pool2d() while True: reg_loss += (noise * torch.roll(noise, shifts=1, dims=3)).mean()**2 reg_loss += (noise * torch.roll(noise, shifts=1, dims=2)).mean()**2 if noise.shape[2] <= 8: break noise = F.avg_pool2d(noise, kernel_size=2) loss = dist + reg_loss * regularize_noise_weight # Step optimizer.zero_grad(set_to_none=True) loss.backward() optimizer.step() # logprint(f'step {step+1:>4d}/{num_steps}: dist {dist:<4.2f} loss {float(loss):<5.2f}') # Save projected W for each optimization step. w_out[step] = w_opt.detach()[0] # Normalize noise. with torch.no_grad(): for buf in noise_bufs.values(): buf -= buf.mean() buf *= buf.square().mean().rsqrt() pbar.upd() return w_out.repeat([1, G.mapping.num_ws, 1])
def main(): os.makedirs(a.out_dir, exist_ok=True) # setup generator fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.func_name = 'training.stylegan2_multi.G_main' Gs_kwargs.verbose = a.verbose Gs_kwargs.size = a.size Gs_kwargs.scale_type = a.scale_type Gs_kwargs.impl = a.ops # load model with arguments sess = tflib.init_tf({'allow_soft_placement': True}) pkl_name = osp.splitext(a.model)[0] with open(pkl_name + '.pkl', 'rb') as file: network = pickle.load(file, encoding='latin1') try: _, _, network = network except: pass for k in list(network.static_kwargs.keys()): Gs_kwargs[k] = network.static_kwargs[k] # reload custom network, if needed if '.pkl' in a.model.lower(): print(' .. Gs from pkl ..', basename(a.model)) Gs = network else: # reconstruct network print(' .. Gs custom ..', basename(a.model)) Gs = tflib.Network('Gs', **Gs_kwargs) Gs.copy_vars_from(network) z_dim = Gs.input_shape[1] dz_dim = 512 # dlatent_size try: dl_dim = 2 * (int(np.floor(np.log2(Gs_kwargs.resolution))) - 1) except: print(' Resave model, no resolution kwarg found!') exit(1) dlat_shape = (1, dl_dim, dz_dim) # [1,18,512] # read saved latents if a.dlatents is not None and osp.isfile(a.dlatents): key_dlatents = load_latents(a.dlatents) if len(key_dlatents.shape) == 2: key_dlatents = np.expand_dims(key_dlatents, 0) elif a.dlatents is not None and osp.isdir(a.dlatents): # if a.dlatents.endswith('/') or a.dlatents.endswith('\\'): a.dlatents = a.dlatents[:-1] key_dlatents = [] npy_list = file_list(a.dlatents, 'npy') for npy in npy_list: key_dlatent = load_latents(npy) if len(key_dlatent.shape) == 2: key_dlatent = np.expand_dims(key_dlatent, 0) key_dlatents.append(key_dlatent) key_dlatents = np.concatenate(key_dlatents) # [frm,18,512] else: print(' No input dlatents found') exit() key_dlatents = key_dlatents[:, np.newaxis] # [frm,1,18,512] print(' key dlatents', key_dlatents.shape) # replace higher layers with single (style) latent if a.style_dlat is not None: print(' styling with dlatent', a.style_dlat) style_dlatent = load_latents(a.style_dlat) while len(style_dlatent.shape) < 4: style_dlatent = np.expand_dims(style_dlatent, 0) # try replacing 5 by other value, less than dl_dim key_dlatents[:, :, range(5, dl_dim), :] = style_dlatent[:, :, range(5, dl_dim), :] frames = key_dlatents.shape[0] * a.fstep dlatents = latent_anima(dlat_shape, frames, a.fstep, key_latents=key_dlatents, cubic=a.cubic, verbose=True) # [frm,1,512] print(' dlatents', dlatents.shape) frame_count = dlatents.shape[0] # truncation trick dlatent_avg = Gs.get_var('dlatent_avg') # (512,) tr_range = range(0, 8) dlatents[:, :, tr_range, :] = dlatent_avg + (dlatents[:, :, tr_range, :] - dlatent_avg) * a.trunc # distort image by tweaking initial const layer if a.digress > 0: try: latent_size = Gs.static_kwargs['latent_size'] except: latent_size = 512 # default latent size try: init_res = Gs.static_kwargs['init_res'] except: init_res = (4, 4) # default initial layer size dconst = a.digress * latent_anima([1, latent_size, *init_res], frames, a.fstep, cubic=True, verbose=False) else: dconst = np.zeros([frame_count, 1, 1, 1, 1]) # generate images from latent timeline pbar = ProgressBar(frame_count) for i in range(frame_count): # generate multi-latent result if Gs.num_inputs == 2: output = Gs.components.synthesis.run(dlatents[i], randomize_noise=False, output_transform=fmt, minibatch_size=1) else: output = Gs.components.synthesis.run(dlatents[i], [None], dconst[i], randomize_noise=False, output_transform=fmt, minibatch_size=1) ext = 'png' if output.shape[3] == 4 else 'jpg' filename = osp.join(a.out_dir, "%06d.%s" % (i, ext)) imsave(filename, output[0]) pbar.upd()