def normalize(x, eps=1e-6): """Apply min-max normalization.""" # x = x.contiguous() x = torch.varbase_to_tensor(x) N, C, H, W = x.shape x_ = x.view(N * C, -1) max_val = torch.max(x_, dim=1, keepdim=True)[0] min_val = torch.min(x_, dim=1, keepdim=True)[0] x_ = (x_ - min_val) / (max_val - min_val + eps) x_ = torch.varbase_to_tensor(x_) out = x_.view(N, C, H, W) return out
def video_ref(nets, args, x_src, x_ref, y_ref, fname): x_ref.stop_gradient = True y_ref.stop_gradient = True x_src.stop_gradient = True video = [] s_ref = nets.style_encoder(x_ref, y_ref) s_prev = None for data_next in tqdm(zip(x_ref, y_ref, s_ref), 'video_ref', len(x_ref)): x_next, y_next, s_next = [ porch.varbase_to_tensor(d).unsqueeze(0) for d in data_next ] if s_prev is None: x_prev, y_prev, s_prev = x_next, y_next, s_next continue if y_prev != y_next: x_prev, y_prev, s_prev = x_next, y_next, s_next continue interpolated = interpolate(nets, args, x_src, s_prev, s_next) entries = [x_prev, x_next] slided = slide(entries) # (T, C, 256*2, 256) frames = porch.cat([slided, interpolated], dim=3).cpu() # (T, C, 256*2, 256*(batch+1)) video.append(frames) x_prev, y_prev, s_prev = x_next, y_next, s_next # append last frame 10 time for _ in range(10): video.append(frames[-1:]) video = tensor2ndarray255(porch.cat(video)) save_video(fname, video)
def __init__(self, height=64, width=64, with_r=False, with_boundary=False): super(AddCoordsTh, self).__init__() self.with_r = with_r self.with_boundary = with_boundary device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') with torch.no_grad(): x_coords = torch.arange(height).unsqueeze(1).expand(height, width).float() y_coords = torch.arange(width).unsqueeze(0).expand(height, width).float() x_coords = (x_coords / (height - 1)) * 2 - 1 y_coords = (y_coords / (width - 1)) * 2 - 1 coords = torch.stack([x_coords, y_coords], dim=0) # (2, height, width) if self.with_r: rr = torch.sqrt( torch.pow(x_coords, 2) + torch.pow(y_coords, 2)) # (height, width) rr = torch.varbase_to_tensor(rr / torch.max(rr)).unsqueeze(0) coords = torch.cat([coords, rr], dim=0) self.coords = coords.unsqueeze(0) # (1, 2 or 3, height, width) self.x_coords = x_coords self.y_coords = y_coords
def forward(self, x): x = self.block1(x) # print("paddle block1",torch.mean(x).numpy()) x = self.block2(x) # print("paddle block2", torch.mean(x).numpy()) x = self.block3(x) # print("paddle block3", torch.mean(x).numpy()) x = self.block4(x) # print("paddle block4", torch.mean(x).numpy()) x = torch.varbase_to_tensor(x) return x.view(x.size(0), -1)
def forward(self, x, y): h = self.shared(x) h = porch.varbase_to_tensor(h) h = h.view(h.size(0), -1) out = [] for layer in self.unshared: out += [layer(h)] out = porch.stack(out, dim=1) # (batch, num_domains, style_dim) s = porch.take( out, list(zip(range(y.shape[0]), y.numpy().astype(int).tolist()))) return s
def forward(self, x, s, masks=None): x = self.from_rgb(x) cache = {} for block in self.encode: if (masks is not None) and (x.shape[2] in [32, 64, 128]): cache[x.shape[2]] = x x = block(x) for block in self.decode: x = block(x, s) if (masks is not None) and (x.shape[2] in [32, 64, 128]): mask = masks[0] if x.shape[2] in [32] else masks[1] mask = F.interpolate(mask, size=x.shape[2], mode='bilinear') x = x + self.hpf(mask * cache[x.shape[2]]) y = self.to_rgb(x) return porch.varbase_to_tensor(y)
def calculate_fid_given_paths(paths, img_size=256, batch_size=50): print('Calculating FID given paths %s and %s...' % (paths[0], paths[1])) device = porch.device('cuda' if porch.cuda.is_available() else 'cpu') inception = InceptionV3("./metrics/inception_v3_pretrained.pdparams") inception.eval() loaders = [get_eval_loader(path, img_size, batch_size) for path in paths] mu, cov = [], [] for loader in loaders: actvs = [] for x in tqdm(loader, total=len(loader)): x = porch.varbase_to_tensor(x[0]) actv = inception(x) actvs.append(actv) actvs = porch.cat(actvs, dim=0).numpy() mu.append(np.mean(actvs, axis=0)) cov.append(np.cov(actvs, rowvar=False)) fid_value = frechet_distance(mu[0], cov[0], mu[1], cov[1]) return fid_value.astype(float)
def calculate_metrics(nets, args, step, mode): print('Calculating evaluation metrics...') assert mode in ['latent', 'reference'] device = porch.device('cuda' if porch.cuda.is_available() else 'cpu') for name in nets: nets[name].eval() domains = os.listdir(args.val_img_dir) domains.sort() num_domains = len(domains) print('Number of domains: %d' % num_domains) enable_lpips=True # save time to check FID result if enable_lpips: lpips_dict = OrderedDict() for trg_idx, trg_domain in enumerate(domains): src_domains = [x for x in domains if x != trg_domain] if mode == 'reference': path_ref = os.path.join(args.val_img_dir, trg_domain) loader_ref = get_eval_loader(root=path_ref, img_size=args.img_size, batch_size=args.val_batch_size, imagenet_normalize=False, drop_last=True) for src_idx, src_domain in enumerate(src_domains): path_src = os.path.join(args.val_img_dir, src_domain) loader_src = get_eval_loader(root=path_src, img_size=args.img_size, batch_size=args.val_batch_size, imagenet_normalize=False) task = '%s2%s' % (src_domain, trg_domain) path_fake = os.path.join(args.eval_dir, task) shutil.rmtree(path_fake, ignore_errors=True) os.makedirs(path_fake) lpips_values = [] print('Generating images and calculating LPIPS for %s...' % task) for i, x_src in enumerate(tqdm(loader_src, total=len(loader_src))): x_src=porch.varbase_to_tensor(x_src[0]) N = x_src.size(0) y_trg = porch.tensor([trg_idx] * N) masks = nets.fan.get_heatmap(x_src) if args.w_hpf > 0 else None # generate 10 outputs from the same input group_of_images = [] for j in range(args.num_outs_per_domain): if mode == 'latent': z_trg = porch.randn(N, args.latent_dim) s_trg = nets.mapping_network(z_trg, y_trg) else: try: x_ref = next(iter_ref) except: iter_ref = iter(loader_ref) x_ref = next(iter_ref) x_ref=porch.varbase_to_tensor(x_ref[0]) if x_ref.size(0) > N: x_ref = x_ref[:N] s_trg = nets.style_encoder(x_ref, y_trg) x_fake = nets.generator(x_src, s_trg, masks=masks) group_of_images.append(x_fake) # save generated images to calculate FID later for k in range(N): filename = os.path.join( path_fake, '%.4i_%.2i.png' % (i*args.val_batch_size+(k+1), j+1)) utils.save_image(x_fake[k], ncol=1, filename=filename) lpips_value = calculate_lpips_given_images(group_of_images) lpips_values.append(lpips_value) # calculate LPIPS for each task (e.g. cat2dog, dog2cat) lpips_mean = np.array(lpips_values).mean().astype(float) lpips_dict['LPIPS_%s/%s' % (mode, task)] = lpips_mean # delete dataloaders del loader_src if mode == 'reference': del loader_ref del iter_ref # calculate the average LPIPS for all tasks lpips_mean = 0 for _, value in lpips_dict.items(): lpips_mean += value / len(lpips_dict) lpips_dict['LPIPS_%s/mean' % mode] = lpips_mean # report LPIPS values filename = os.path.join(args.eval_dir, 'LPIPS_%.5i_%s.json' % (step, mode)) utils.save_json(lpips_dict, filename) # calculate and report fid values return calculate_fid_for_all_tasks(args, domains, step=step, mode=mode) for name in nets: nets[name].train()
def denormalize(x): out = (x + 1) / 2 out = porch.varbase_to_tensor(out) return out.clamp_(0, 1)
def forward(self, x, s): h = self.fc(s) h = porch.varbase_to_tensor(h) h = h.view(h.size(0), h.size(1), 1, 1) gamma, beta = porch.chunk(h, chunks=2, dim=1) return (1 + gamma) * self.norm(x) + beta
def __init__(self, w_hpf, device): super(HighPass, self).__init__() self.filter = porch.varbase_to_tensor( porch.tensor([[-1, -1, -1], [-1, 8., -1], [-1, -1, -1]]).to(device) / w_hpf)