def debug_image(nets, args, inputs, step): x_src, y_src = inputs.x_src, inputs.y_src x_ref, y_ref = inputs.x_ref, inputs.y_ref x_ref.stop_gradient = True y_ref.stop_gradient = True x_src.stop_gradient = True y_src.stop_gradient = True N = inputs.x_src.size(0) # translate and reconstruct (reference-guided) filename = ospj(args.sample_dir, '%06d_cycle_consistency.jpg' % (step)) translate_and_reconstruct(nets, args, x_src, y_src, x_ref, y_ref, filename) # latent-guided image synthesis y_trg_list = [ porch.tensor(y).repeat(N) for y in range(min(args.num_domains, 5)) ] z_trg_list = porch.randn(args.num_outs_per_domain, 1, args.latent_dim).repeat(1, N, 1) for psi in [0.5, 0.7, 1.0]: filename = ospj(args.sample_dir, '%06d_latent_psi_%.1f.jpg' % (step, psi)) translate_using_latent(nets, args, x_src, y_trg_list, z_trg_list, psi, filename) # reference-guided image synthesis filename = ospj(args.sample_dir, '%06d_reference.jpg' % (step)) translate_using_reference(nets, args, x_src, x_ref, y_ref, filename)
def translate_using_latent(nets, args, x_src, y_trg_list, z_trg_list, psi, filename): n_images = 100 x_src.stop_gradient = True N, C, H, W = x_src.shape latent_dim = z_trg_list[0].shape[1] x_concat = [x_src] masks = nets.fan.get_heatmap(x_src) if args.w_hpf > 0 else None for i, y_trg in enumerate(y_trg_list): z_many = porch.randn(n_images, latent_dim) # y_many = porch.LongTensor(10000).fill_(y_trg[0]) y_many = np.empty([n_images]) y_many.fill(y_trg[0].numpy()[0]) y_many = to_variable(y_many) s_many = nets.mapping_network(z_many, y_many) s_avg = porch.mean(s_many, dim=0, keepdim=True) s_avg = s_avg.repeat(N, 1) for z_trg in z_trg_list: s_trg = nets.mapping_network(z_trg, y_trg) s_trg = porch.lerp(s_avg, s_trg, psi) x_fake = nets.generator(x_src, s_trg, masks=masks) x_concat += [x_fake] x_concat = porch.cat(x_concat, dim=0) save_image(x_concat, N, filename)
def video_latent(nets, args, x_src, y_list, z_list, psi, fname): x_src.stop_gradient = True latent_dim = z_list[0].size(1) s_list = [] for i, y_trg in enumerate(y_list): z_many = porch.randn(10000, latent_dim) y_many = porch.LongTensor(10000).fill_(y_trg[0]) s_many = nets.mapping_network(z_many, y_many) s_avg = porch.mean(s_many, dim=0, keepdim=True) s_avg = s_avg.repeat(x_src.size(0), 1) for z_trg in z_list: s_trg = nets.mapping_network(z_trg, y_trg) s_trg = porch.lerp(s_avg, s_trg, psi) s_list.append(s_trg) s_prev = None video = [] # fetch reference images for idx_ref, s_next in enumerate(tqdm(s_list, 'video_latent', len(s_list))): if s_prev is None: s_prev = s_next continue if idx_ref % len(z_list) == 0: s_prev = s_next continue frames = interpolate(nets, args, x_src, s_prev, s_next).cpu() video.append(frames) s_prev = s_next for _ in range(10): video.append(frames[-1:]) video = tensor2ndarray255(porch.cat(video)) save_video(fname, video)
def __next__(self): x, y = self._fetch_inputs() if self.mode == 'train': x_ref, x_ref2, y_ref = self._fetch_refs() z_trg = porch.randn(x.shape[0], self.latent_dim) z_trg2 = porch.randn(x.shape[0], self.latent_dim) inputs = Munch(x_src=x, y_src=y, y_ref=y_ref, x_ref=x_ref, x_ref2=x_ref2, z_trg=z_trg, z_trg2=z_trg2) elif self.mode == 'val': x_ref, y_ref = self._fetch_inputs() inputs = Munch(x_src=x, y_src=y, x_ref=x_ref, y_ref=y_ref) elif self.mode == 'test': inputs = Munch(x=x, y=y) else: raise NotImplementedError return Munch({k: v for k, v in inputs.items()})
def __init__(self, num_svs, num_itrs, num_outputs, transpose=False, eps=1e-12): # Number of power iterations per step self.num_itrs = num_itrs # Number of singular values self.num_svs = num_svs # Transposed? self.transpose = transpose # Epsilon value for avoiding divide-by-0 self.eps = eps self.register_buffer = dict() # Register a singular vector for each sv self.name = "%d_%d_%d" % (num_svs, num_itrs, num_outputs) for i in range(self.num_svs): self.__setattr__('u%d' % i, torch.nn.Parameter(torch.randn(1, num_outputs))) self.__setattr__('sv%d' % i, torch.nn.Parameter(torch.ones(1)))
def calculate_metrics(nets, args, step, mode): print('Calculating evaluation metrics...') assert mode in ['latent', 'reference'] device = porch.device('cuda' if porch.cuda.is_available() else 'cpu') for name in nets: nets[name].eval() domains = os.listdir(args.val_img_dir) domains.sort() num_domains = len(domains) print('Number of domains: %d' % num_domains) enable_lpips=True # save time to check FID result if enable_lpips: lpips_dict = OrderedDict() for trg_idx, trg_domain in enumerate(domains): src_domains = [x for x in domains if x != trg_domain] if mode == 'reference': path_ref = os.path.join(args.val_img_dir, trg_domain) loader_ref = get_eval_loader(root=path_ref, img_size=args.img_size, batch_size=args.val_batch_size, imagenet_normalize=False, drop_last=True) for src_idx, src_domain in enumerate(src_domains): path_src = os.path.join(args.val_img_dir, src_domain) loader_src = get_eval_loader(root=path_src, img_size=args.img_size, batch_size=args.val_batch_size, imagenet_normalize=False) task = '%s2%s' % (src_domain, trg_domain) path_fake = os.path.join(args.eval_dir, task) shutil.rmtree(path_fake, ignore_errors=True) os.makedirs(path_fake) lpips_values = [] print('Generating images and calculating LPIPS for %s...' % task) for i, x_src in enumerate(tqdm(loader_src, total=len(loader_src))): x_src=porch.varbase_to_tensor(x_src[0]) N = x_src.size(0) y_trg = porch.tensor([trg_idx] * N) masks = nets.fan.get_heatmap(x_src) if args.w_hpf > 0 else None # generate 10 outputs from the same input group_of_images = [] for j in range(args.num_outs_per_domain): if mode == 'latent': z_trg = porch.randn(N, args.latent_dim) s_trg = nets.mapping_network(z_trg, y_trg) else: try: x_ref = next(iter_ref) except: iter_ref = iter(loader_ref) x_ref = next(iter_ref) x_ref=porch.varbase_to_tensor(x_ref[0]) if x_ref.size(0) > N: x_ref = x_ref[:N] s_trg = nets.style_encoder(x_ref, y_trg) x_fake = nets.generator(x_src, s_trg, masks=masks) group_of_images.append(x_fake) # save generated images to calculate FID later for k in range(N): filename = os.path.join( path_fake, '%.4i_%.2i.png' % (i*args.val_batch_size+(k+1), j+1)) utils.save_image(x_fake[k], ncol=1, filename=filename) lpips_value = calculate_lpips_given_images(group_of_images) lpips_values.append(lpips_value) # calculate LPIPS for each task (e.g. cat2dog, dog2cat) lpips_mean = np.array(lpips_values).mean().astype(float) lpips_dict['LPIPS_%s/%s' % (mode, task)] = lpips_mean # delete dataloaders del loader_src if mode == 'reference': del loader_ref del iter_ref # calculate the average LPIPS for all tasks lpips_mean = 0 for _, value in lpips_dict.items(): lpips_mean += value / len(lpips_dict) lpips_dict['LPIPS_%s/mean' % mode] = lpips_mean # report LPIPS values filename = os.path.join(args.eval_dir, 'LPIPS_%.5i_%s.json' % (step, mode)) utils.save_json(lpips_dict, filename) # calculate and report fid values return calculate_fid_for_all_tasks(args, domains, step=step, mode=mode) for name in nets: nets[name].train()