Beispiel #1
0
def _get(img_dir, clf_ckpt_p):
    out_file = os.path.join(img_dir, OPTIMAL_QS_TXT)

    clf = load_classifier(clf_ckpt_p)

    t = timer.TimeAccumulator()
    opt = {}
    for i, p in enumerate(
            cached_listdir_imgs(img_dir, discard_shitty=False).ps):
        with t.execute():
            img = torch.from_numpy(np.array(Image.open(p))).to(
                pe.DEVICE, non_blocking=True).permute(2, 0, 1)
            assert img.shape[0] == 3
            img = img.unsqueeze(0)
            img = SymbolTensor(img.long(), L=256).to_norm().get()
            q = clf.get_q(img)
            opt[os.path.splitext(os.path.basename(p))[0]] = q
        if i > 0 and i % 10 == 0:
            print(i, t.mean_time_spent())
    with open(out_file, 'w', newline='') as csvfile:
        w = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
        w.writerow(['fn', 'q'])
        for filename, q in sorted(opt.items()):
            w.writerow([filename, q])
    print('Created', out_file)
 def __init__(self, network_out: prob_clf.NetworkOutput,
              x_r: NormalizedTensor, x_l: NormalizedTensor):
     self.network_out = network_out
     self.x_r = x_r.to_sym()
     res = self.x_r.t - x_l.to_sym().t
     self.res_sym = SymbolTensor(res, L=511, centered=True)
     self.res = self.res_sym.to_norm()
     self._mean_img = None
Beispiel #3
0
def get_psnr(x: SymbolTensor, y: SymbolTensor):
    """
    Notes: Make sure that the input tensors are floats, as otherwise we get over-/underflows when we calculate MSE!
    Tested to be same as tf.image.psnr
    """
    assert x.L == y.L
    max_val = x.L - 1
    # NOTE: thats how tf.image.psnr does the mean, too: MSE over spatial, PSNR over batch
    mse = (x.get() - y.get()).pow(2).float().mean((1, 2, 3))
    assert len(mse.shape) == 1, mse.shape
    return 10. * torch.log10((max_val**2) / mse).mean()
    def unroll_unpack(self, img):
        """
        Main function. Given an image `img`. Unrolls _unpack over all needed Qs:

        - if the dataset is not meta:
            just return _unpack(image)
        - otherwise:
            if run_clf:
                set self.clf_q from classifier
            if not CLF_ONLY:
                do the parabola search.

        Always yields a set of images, and the q used to optain it.
        """
        if not self.is_meta:
            yield self._unpack(img), None
        else:
            # get a first Q guess
            if self.run_clf:
                assert self.clf
                # (x_r, _), _, _ = self._unpack(img[next(iter(img))])

                raw, _, _ = self._unpack(img[next(iter(img))])
                s_r = SymbolTensor(raw.long(), L=256)
                x_r = s_r.to_norm()

                torch.cuda.empty_cache()
                # crop_qs = [self.clf.get_q(x_r_crop) for x_r_crop in auto_crop.iter_crops(x_r.get())]
                # print('***\n',crop_qs,'\n')
                self.clf_q = self.clf.get_q(x_r.get())
                torch.cuda.empty_cache()
            elif self.qstrategy == QStrategy.FIXED:
                # note: we use clf_q also for the fixed Q, confusing naming scheme, I know!!!
                self.clf_q = 14  # optimal on training set

            # do optimum find
            if self.qstrategy != QStrategy.CLF_ONLY:
                while self.next_q and self.next_q in img:
                    # this is a dict with raw, compressed, bpps now
                    yield self._unpack(img[self.next_q]), self.next_q

            # make sure the guess was also evaluated
            if self.clf_q and self.clf_q not in self.losses:
                if self.clf_q not in img:
                    raise ValueError('**** CLF returned invalid q', self.clf_q)
                else:
                    # get this one as well
                    yield self._unpack(img[self.clf_q]), self.clf_q
Beispiel #5
0
    def unpack(self, img_batch, fixed_first=None):
        raw = img_batch['raw'].to(pe.DEVICE,
                                  non_blocking=True)  # uint8 or int16
        q = img_batch['q'].to(pe.DEVICE).view(-1)  # 1d tensor of floats

        if fixed_first is not None:
            raw[0, ...] = fixed_first['raw']
            q[0, ...] = fixed_first['q']

        q = q - self.config_clf.first_class

        if self.padding_fac:
            raw = self.pad(raw)

        raw = SymbolTensor(raw.long(), L=256)
        return raw.to_norm(), q
Beispiel #6
0
    def unpack_batch_pad(self, img_or_imgbatch):
        raw = img_or_imgbatch['raw'].to(pe.DEVICE,
                                        non_blocking=True)  # uint8 or int16
        q = img_or_imgbatch['q'].to(pe.DEVICE).view(-1)  # 1d tensor of floats

        if len(raw.shape) == 3:
            raw.unsqueeze_(0)

        if self.padding_fac:
            raw = self.pad(raw)

        q = q - self.config_clf.first_class

        assert len(raw.shape) == 4

        raw = SymbolTensor(raw.long(), L=256)
        return raw.to_norm(), q
 def decoder(_, C_cur):
     num_bytes = read_num_bytes_encoded(fin)
     encoded = fin.read(num_bytes)
     residual_sym_truncated_raw_c = r.range_decode(
         encoded, cdf=C_cur, time_logger=self.times).reshape(1, H, W)
     residual_sym_truncated_raw_c = residual_sym_truncated_raw_c.to(
         pe.DEVICE, non_blocking=True)
     # NOTE: here it's int16
     bn_c = SymbolTensor(residual_sym_truncated_raw_c + x_range[0],
                         L=511,
                         centered=True).to_norm().t
     # yielding always bottleneck and extra_info (=None here)
     return bn_c, None
Beispiel #8
0
def new_bottleneck_summary(s: SymbolTensor):
    """
    Grayscale bottleneck representation: Expects the actual bottleneck symbols.
    :param s: NCHW
    :return: [0, 1] image
    """
    s_raw, L = s.get(), s.L
    assert s_raw.dim() == 4, s_raw.shape
    s_raw = s_raw.detach().float().div(L)
    grid = vis.grid.prep_for_grid(s_raw, channelwise=True)
    assert len(grid) == s_raw.shape[1], (len(grid), s_raw.shape)
    assert [g.max() <= 1 for g in grid], [g.max() for g in grid]
    assert grid[0].dtype == torch.float32, grid.dtype
    return torchvision.utils.make_grid(grid, nrow=5)
class EnhancementOut(object):
    def __init__(self, network_out: prob_clf.NetworkOutput,
                 x_r: NormalizedTensor, x_l: NormalizedTensor):
        self.network_out = network_out
        self.x_r = x_r.to_sym()
        res = self.x_r.t - x_l.to_sym().t
        self.res_sym = SymbolTensor(res, L=511, centered=True)
        self.res = self.res_sym.to_norm()
        self._mean_img = None

    def get_mean_img(self, loss: DiscretizedMixLogisticLoss):
        if self._mean_img is None:
            self._mean_img = extract_mean_image_corrected(
                self.res, self.network_out, loss)
        return self._mean_img
    def unpack(self, img_batch, fixed_first=None) -> InputTensors:
        raw = img_batch['raw'].to(pe.DEVICE,
                                  non_blocking=True)  # uint8 or int16
        compressed = img_batch['compressed'].to(
            pe.DEVICE, non_blocking=True)  # uint8 or int16
        bpps = img_batch['bpp'].to(pe.DEVICE)  # 1d tensor of floats

        if fixed_first is not None:
            raw[0, ...] = fixed_first['raw']
            compressed[0, ...] = fixed_first['compressed']
            bpps[0] = fixed_first['bpp']

        num_subpixels_before_pad = np.prod(raw.shape)

        if self.padding_fac:
            raw = self.pad(raw)
            compressed = self.pad(compressed)

        s_c = SymbolTensor(compressed.long(), L=256)
        x_c = s_c.to_norm()
        s_r = SymbolTensor(raw.long(), L=256)
        x_r = s_r.to_norm()

        return InputTensors((x_r, x_c), bpps, num_subpixels_before_pad)
    def pad_pack(self, raw, compressed, bpps) -> InputTensors:
        """ Pad iimages and pack into a InputTensors instance

        :param raw: Batch of raw input images
        :param compressed: Output of compressing images in `raw` with BPG.
        :param bpps: The bitrates of the images.
        :return: InputTensors
        """
        assert raw.shape == compressed.shape

        num_subpixels_before_pad = np.prod(raw.shape)

        if self.padding_fac:
            raw = self.pad(raw)
            compressed = self.pad(compressed)

        assert len(raw.shape) == 4
        assert len(bpps.shape) == 1, (bpps.shape, raw.shape)

        s_c = SymbolTensor(compressed.long(), L=256)
        x_c = s_c.to_norm()
        s_r = SymbolTensor(raw.long(), L=256)
        x_r = s_r.to_norm()
        return InputTensors((x_r, x_c), bpps, num_subpixels_before_pad)
    def _encode(self, pin, pout) -> EncodeOut:
        """
        :param pin:
        :param pout:
        :return:  tuple (img, actual_bpsp), where img is int64 1CHW
        """
        assert not os.path.isfile(pout)
        img = self.pil_to_1CHW_long(
            Image.open(pin))  # int64 1CHW pe.DEVICE tensor
        assert len(
            img.shape
        ) == 4 and img.shape[0] == 1 and img.shape[1] == 3, img.shape

        # gt
        x_r = SymbolTensor(img, L=256).to_norm()

        if self.blueprint.clf is not None:
            with self.times.run('Q-Classifier'):
                q = self.blueprint.clf.get_q(x_r.get())
        else:
            q = 12  # TODO

        with self.times.run(f'BPG'):
            # img = img.float()
            # Encode BPG
            pout_bpg = self._path_for_bpg(pout)
            bpp_bpg = self._encode_bpg(pin, pout_bpg, q)
            # 1. sym -> norm (for l)
            x_l: NormalizedTensor = self._decode_bpg(pout_bpg)

        with self.times.run('[-] encode forwardpass'):
            # 1. sym -> norm (for r)
            network_out: prob_clf.NetworkOutput = self.blueprint.forward_lossy(
                x_l, torch.tensor([bpp_bpg], device=pe.DEVICE))
            # in here:
            # 2. norm -> sym (for l and r)
            out = EnhancementOut(network_out, x_r, x_l)

        if self.compare_with_theory:
            with self.times.run('[-] get loss'):
                num_subpixels_before_pad = np.prod(img.shape)
                loss_out = self.blueprint.losses(
                    out,
                    num_subpixels_before_pad=num_subpixels_before_pad,
                    base_bpp=bpp_bpg)

        entropy_coding_bytes = []  # bytes used by different scales

        dmll = self.blueprint.losses.loss_dmol_rgb

        with open(pout, 'wb') as fout:
            with self.times.prefix_scope(f'RGB'):
                entropy_coding_bytes.append(self.encode_rgb(dmll, out, fout))
                fout.write(_MAGIC_VALUE_SEP)

        num_subpixels = np.prod(img.shape)
        actual_num_bytes = os.path.getsize(pout) + os.path.getsize(pout_bpg)
        actual_bpsp = actual_num_bytes * 8 / num_subpixels

        if self.compare_with_theory:
            # TODO
            raise NotImplementedError
            # assumed_bpsps = [b * 8 / num_subpixels for b in entropy_coding_bytes]
            # tostr = lambda l: ' | '.join(map('{:.3f}'.format, l)) + f' => {sum(l):.3f}'
            # overhead = (sum(assumed_bpsps) / sum(loss_out.nonrecursive_bpsps) - 1) * 100
            # return f'Bitrates:\n' \
            #     f'theory:  {tostr(loss_out.nonrecursive_bpsps)}\n' \
            #     f'assumed: {tostr(list(reversed(assumed_bpsps)))} [{overhead:.2f}%]\n' \
            #     f'actual:                                => {actual_bpsp:.3f} [{actual_num_bytes} bytes]'
        else:
            return EncodeOut(img, actual_bpsp, None)
 def _decode_bpg(self, p) -> NormalizedTensor:
     pout_png = p + '_topng.ppm'  # ppm Should be faster
     decode_bpg_to_png(p, pout_png)
     img = self.pil_to_1CHW_long(Image.open(pout_png))  # int64, 1CHW
     os.remove(pout_png)
     return SymbolTensor(img, L=256).to_norm()