def _get(img_dir, clf_ckpt_p): out_file = os.path.join(img_dir, OPTIMAL_QS_TXT) clf = load_classifier(clf_ckpt_p) t = timer.TimeAccumulator() opt = {} for i, p in enumerate( cached_listdir_imgs(img_dir, discard_shitty=False).ps): with t.execute(): img = torch.from_numpy(np.array(Image.open(p))).to( pe.DEVICE, non_blocking=True).permute(2, 0, 1) assert img.shape[0] == 3 img = img.unsqueeze(0) img = SymbolTensor(img.long(), L=256).to_norm().get() q = clf.get_q(img) opt[os.path.splitext(os.path.basename(p))[0]] = q if i > 0 and i % 10 == 0: print(i, t.mean_time_spent()) with open(out_file, 'w', newline='') as csvfile: w = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL) w.writerow(['fn', 'q']) for filename, q in sorted(opt.items()): w.writerow([filename, q]) print('Created', out_file)
def __init__(self, network_out: prob_clf.NetworkOutput, x_r: NormalizedTensor, x_l: NormalizedTensor): self.network_out = network_out self.x_r = x_r.to_sym() res = self.x_r.t - x_l.to_sym().t self.res_sym = SymbolTensor(res, L=511, centered=True) self.res = self.res_sym.to_norm() self._mean_img = None
def get_psnr(x: SymbolTensor, y: SymbolTensor): """ Notes: Make sure that the input tensors are floats, as otherwise we get over-/underflows when we calculate MSE! Tested to be same as tf.image.psnr """ assert x.L == y.L max_val = x.L - 1 # NOTE: thats how tf.image.psnr does the mean, too: MSE over spatial, PSNR over batch mse = (x.get() - y.get()).pow(2).float().mean((1, 2, 3)) assert len(mse.shape) == 1, mse.shape return 10. * torch.log10((max_val**2) / mse).mean()
def unroll_unpack(self, img): """ Main function. Given an image `img`. Unrolls _unpack over all needed Qs: - if the dataset is not meta: just return _unpack(image) - otherwise: if run_clf: set self.clf_q from classifier if not CLF_ONLY: do the parabola search. Always yields a set of images, and the q used to optain it. """ if not self.is_meta: yield self._unpack(img), None else: # get a first Q guess if self.run_clf: assert self.clf # (x_r, _), _, _ = self._unpack(img[next(iter(img))]) raw, _, _ = self._unpack(img[next(iter(img))]) s_r = SymbolTensor(raw.long(), L=256) x_r = s_r.to_norm() torch.cuda.empty_cache() # crop_qs = [self.clf.get_q(x_r_crop) for x_r_crop in auto_crop.iter_crops(x_r.get())] # print('***\n',crop_qs,'\n') self.clf_q = self.clf.get_q(x_r.get()) torch.cuda.empty_cache() elif self.qstrategy == QStrategy.FIXED: # note: we use clf_q also for the fixed Q, confusing naming scheme, I know!!! self.clf_q = 14 # optimal on training set # do optimum find if self.qstrategy != QStrategy.CLF_ONLY: while self.next_q and self.next_q in img: # this is a dict with raw, compressed, bpps now yield self._unpack(img[self.next_q]), self.next_q # make sure the guess was also evaluated if self.clf_q and self.clf_q not in self.losses: if self.clf_q not in img: raise ValueError('**** CLF returned invalid q', self.clf_q) else: # get this one as well yield self._unpack(img[self.clf_q]), self.clf_q
def unpack(self, img_batch, fixed_first=None): raw = img_batch['raw'].to(pe.DEVICE, non_blocking=True) # uint8 or int16 q = img_batch['q'].to(pe.DEVICE).view(-1) # 1d tensor of floats if fixed_first is not None: raw[0, ...] = fixed_first['raw'] q[0, ...] = fixed_first['q'] q = q - self.config_clf.first_class if self.padding_fac: raw = self.pad(raw) raw = SymbolTensor(raw.long(), L=256) return raw.to_norm(), q
def unpack_batch_pad(self, img_or_imgbatch): raw = img_or_imgbatch['raw'].to(pe.DEVICE, non_blocking=True) # uint8 or int16 q = img_or_imgbatch['q'].to(pe.DEVICE).view(-1) # 1d tensor of floats if len(raw.shape) == 3: raw.unsqueeze_(0) if self.padding_fac: raw = self.pad(raw) q = q - self.config_clf.first_class assert len(raw.shape) == 4 raw = SymbolTensor(raw.long(), L=256) return raw.to_norm(), q
def decoder(_, C_cur): num_bytes = read_num_bytes_encoded(fin) encoded = fin.read(num_bytes) residual_sym_truncated_raw_c = r.range_decode( encoded, cdf=C_cur, time_logger=self.times).reshape(1, H, W) residual_sym_truncated_raw_c = residual_sym_truncated_raw_c.to( pe.DEVICE, non_blocking=True) # NOTE: here it's int16 bn_c = SymbolTensor(residual_sym_truncated_raw_c + x_range[0], L=511, centered=True).to_norm().t # yielding always bottleneck and extra_info (=None here) return bn_c, None
def new_bottleneck_summary(s: SymbolTensor): """ Grayscale bottleneck representation: Expects the actual bottleneck symbols. :param s: NCHW :return: [0, 1] image """ s_raw, L = s.get(), s.L assert s_raw.dim() == 4, s_raw.shape s_raw = s_raw.detach().float().div(L) grid = vis.grid.prep_for_grid(s_raw, channelwise=True) assert len(grid) == s_raw.shape[1], (len(grid), s_raw.shape) assert [g.max() <= 1 for g in grid], [g.max() for g in grid] assert grid[0].dtype == torch.float32, grid.dtype return torchvision.utils.make_grid(grid, nrow=5)
class EnhancementOut(object): def __init__(self, network_out: prob_clf.NetworkOutput, x_r: NormalizedTensor, x_l: NormalizedTensor): self.network_out = network_out self.x_r = x_r.to_sym() res = self.x_r.t - x_l.to_sym().t self.res_sym = SymbolTensor(res, L=511, centered=True) self.res = self.res_sym.to_norm() self._mean_img = None def get_mean_img(self, loss: DiscretizedMixLogisticLoss): if self._mean_img is None: self._mean_img = extract_mean_image_corrected( self.res, self.network_out, loss) return self._mean_img
def unpack(self, img_batch, fixed_first=None) -> InputTensors: raw = img_batch['raw'].to(pe.DEVICE, non_blocking=True) # uint8 or int16 compressed = img_batch['compressed'].to( pe.DEVICE, non_blocking=True) # uint8 or int16 bpps = img_batch['bpp'].to(pe.DEVICE) # 1d tensor of floats if fixed_first is not None: raw[0, ...] = fixed_first['raw'] compressed[0, ...] = fixed_first['compressed'] bpps[0] = fixed_first['bpp'] num_subpixels_before_pad = np.prod(raw.shape) if self.padding_fac: raw = self.pad(raw) compressed = self.pad(compressed) s_c = SymbolTensor(compressed.long(), L=256) x_c = s_c.to_norm() s_r = SymbolTensor(raw.long(), L=256) x_r = s_r.to_norm() return InputTensors((x_r, x_c), bpps, num_subpixels_before_pad)
def pad_pack(self, raw, compressed, bpps) -> InputTensors: """ Pad iimages and pack into a InputTensors instance :param raw: Batch of raw input images :param compressed: Output of compressing images in `raw` with BPG. :param bpps: The bitrates of the images. :return: InputTensors """ assert raw.shape == compressed.shape num_subpixels_before_pad = np.prod(raw.shape) if self.padding_fac: raw = self.pad(raw) compressed = self.pad(compressed) assert len(raw.shape) == 4 assert len(bpps.shape) == 1, (bpps.shape, raw.shape) s_c = SymbolTensor(compressed.long(), L=256) x_c = s_c.to_norm() s_r = SymbolTensor(raw.long(), L=256) x_r = s_r.to_norm() return InputTensors((x_r, x_c), bpps, num_subpixels_before_pad)
def _encode(self, pin, pout) -> EncodeOut: """ :param pin: :param pout: :return: tuple (img, actual_bpsp), where img is int64 1CHW """ assert not os.path.isfile(pout) img = self.pil_to_1CHW_long( Image.open(pin)) # int64 1CHW pe.DEVICE tensor assert len( img.shape ) == 4 and img.shape[0] == 1 and img.shape[1] == 3, img.shape # gt x_r = SymbolTensor(img, L=256).to_norm() if self.blueprint.clf is not None: with self.times.run('Q-Classifier'): q = self.blueprint.clf.get_q(x_r.get()) else: q = 12 # TODO with self.times.run(f'BPG'): # img = img.float() # Encode BPG pout_bpg = self._path_for_bpg(pout) bpp_bpg = self._encode_bpg(pin, pout_bpg, q) # 1. sym -> norm (for l) x_l: NormalizedTensor = self._decode_bpg(pout_bpg) with self.times.run('[-] encode forwardpass'): # 1. sym -> norm (for r) network_out: prob_clf.NetworkOutput = self.blueprint.forward_lossy( x_l, torch.tensor([bpp_bpg], device=pe.DEVICE)) # in here: # 2. norm -> sym (for l and r) out = EnhancementOut(network_out, x_r, x_l) if self.compare_with_theory: with self.times.run('[-] get loss'): num_subpixels_before_pad = np.prod(img.shape) loss_out = self.blueprint.losses( out, num_subpixels_before_pad=num_subpixels_before_pad, base_bpp=bpp_bpg) entropy_coding_bytes = [] # bytes used by different scales dmll = self.blueprint.losses.loss_dmol_rgb with open(pout, 'wb') as fout: with self.times.prefix_scope(f'RGB'): entropy_coding_bytes.append(self.encode_rgb(dmll, out, fout)) fout.write(_MAGIC_VALUE_SEP) num_subpixels = np.prod(img.shape) actual_num_bytes = os.path.getsize(pout) + os.path.getsize(pout_bpg) actual_bpsp = actual_num_bytes * 8 / num_subpixels if self.compare_with_theory: # TODO raise NotImplementedError # assumed_bpsps = [b * 8 / num_subpixels for b in entropy_coding_bytes] # tostr = lambda l: ' | '.join(map('{:.3f}'.format, l)) + f' => {sum(l):.3f}' # overhead = (sum(assumed_bpsps) / sum(loss_out.nonrecursive_bpsps) - 1) * 100 # return f'Bitrates:\n' \ # f'theory: {tostr(loss_out.nonrecursive_bpsps)}\n' \ # f'assumed: {tostr(list(reversed(assumed_bpsps)))} [{overhead:.2f}%]\n' \ # f'actual: => {actual_bpsp:.3f} [{actual_num_bytes} bytes]' else: return EncodeOut(img, actual_bpsp, None)
def _decode_bpg(self, p) -> NormalizedTensor: pout_png = p + '_topng.ppm' # ppm Should be faster decode_bpg_to_png(p, pout_png) img = self.pil_to_1CHW_long(Image.open(pout_png)) # int64, 1CHW os.remove(pout_png) return SymbolTensor(img, L=256).to_norm()