Exemple #1
0
def ssim_im2col(y, t, window_size, stride):
    n, c, w, h = y.shape
    ycol = F.im2col(y, window_size, stride)
    tcol = F.im2col(t, window_size, stride)

    mu_y = F.mean(ycol, 1)
    mu_t = F.mean(tcol, 1)
    mu_y_sq = F.mean(ycol * ycol, 1)
    mu_t_sq = F.mean(tcol * tcol, 1)
    mu_ty = F.mean(ycol * tcol, 1)
    muy_mut = mu_y * mu_t

    sq_mu_y = mu_y * mu_y
    sq_mu_t = mu_t * mu_t

    sigma_y_sq = mu_y_sq - sq_mu_y
    sigma_t_sq = mu_t_sq - sq_mu_t
    sigma_yt = mu_ty - muy_mut

    c1 = 0.01**2
    c2 = 0.03**2

    ssim_map = ((2 * muy_mut + c1) *
                (2 * sigma_yt + c2)) / ((sq_mu_y + sq_mu_t + c1) *
                                        (sigma_y_sq + sigma_t_sq + c2))
    return ssim_map
Exemple #2
0
    def process_convolution2d(self, func, in_data):
        """ Work on the convolution2d input. """
        assert len(in_data) == 2

        func_id = self.inc_counter(func)
        X, W = in_data
        xp = backend.get_array_module(X)

        ksize = W.shape[2]
        stride = 1
        pad = 1 if ksize == 3 else 0

        X_ = F.im2col(X, ksize, stride=stride, pad=pad).reshape(
            [X.shape[0], -1, X.shape[2] * X.shape[3]])
        X_ = F.transpose(X_, axes=(0, 2, 1)).reshape([-1, X_.shape[1]])
        X_ = X_.array

        W_ = W.reshape([W.shape[0], -1])

        W_nz = (W_ != 0).astype('bool')
        X_nz = (X_ != 0).astype('bool')

        n_zm = 0
        for i in range(W_.shape[0]):
            M = xp.multiply(X_, W_[i, :])  # multiply

            # zero mult
            ZM = xp.logical_and(M == 0, xp.logical_and(W_nz[i, :], X_nz))
            n_zm += ZM.sum()

        self.results.append([
            self.current_epoch, self.current_iteration, func_id, func.label,
            n_zm.item(), W_.shape[0] * X_.shape[0] * X_.shape[1]
        ])
Exemple #3
0
    def check_forward(self, x, kh, kw, sy, sx, ph, pw, dy, dx, gpu):
        x = x.copy()
        n, c, h, w = x.shape
        col = functions.im2col(x, (kh, kw), (sy, sx), (ph, pw),
                               dilate=(dy, dx)).data
        col_h = get_conv_outsize(h, kh, sy, ph, d=dy)
        col_w = get_conv_outsize(w, kw, sx, pw, d=dx)

        self.assertEqual(col.shape, (n, c * kh * kw, col_h, col_w))
        col = col.reshape(n, c, kh, kw, col_h, col_w)
        col = cuda.to_cpu(col)

        for y in moves.range(col_h):
            for x in moves.range(col_w):
                for ky in moves.range(kh):
                    for kx in moves.range(kw):
                        oy = y * sy - ph + ky * dy
                        ox = x * sx - pw + kx * dx
                        if 0 <= oy < h and 0 <= ox < w:
                            testing.assert_allclose(col[:, :, ky, kx, y, x],
                                                    self.x[:, :, oy, ox])
                        else:
                            testing.assert_allclose(
                                col[:, :, ky, kx, y, x],
                                numpy.zeros((2, 3), numpy.float32))
Exemple #4
0
 def f(x):
     return functions.im2col(x,
                             ksize,
                             stride=stride,
                             pad=pad,
                             cover_all=cover_all,
                             dilate=dilate)
Exemple #5
0
    def check_forward(self, x, kh, kw, sy, sx, ph, pw, dy, dx, gpu):
        x = x.copy()
        n, c, h, w = x.shape
        col = functions.im2col(
            x, (kh, kw), (sy, sx), (ph, pw), dilate=(dy, dx)).data
        col_h = get_conv_outsize(h, kh, sy, ph, d=dy)
        col_w = get_conv_outsize(w, kw, sx, pw, d=dx)

        self.assertEqual(col.shape, (n, c * kh * kw, col_h, col_w))
        col = col.reshape(n, c, kh, kw, col_h, col_w)
        col = cuda.to_cpu(col)

        for y in moves.range(col_h):
            for x in moves.range(col_w):
                for ky in moves.range(kh):
                    for kx in moves.range(kw):
                        oy = y * sy - ph + ky * dy
                        ox = x * sx - pw + kx * dx
                        if 0 <= oy < h and 0 <= ox < w:
                            testing.assert_allclose(
                                col[:, :, ky, kx, y, x],
                                self.x[:, :, oy, ox])
                        else:
                            testing.assert_allclose(
                                col[:, :, ky, kx, y, x],
                                numpy.zeros((2, 3), self.dtype))
Exemple #6
0
 def acts_expand_convolution_2d(self):
     acts = self.in_acts
     ksize, stride, pad = self.conv2d_args
     acts_expand = im2col(acts, ksize, stride, pad).data
     # n x c*ksize*ksize x ho x wo
     n, _, ho, wo = acts_expand.shape
     # n x ho x wo x c*ksize*ksize
     acts_expand = acts_expand.transpose(0, 2, 3, 1)
     # n*ho*wo x c*ksize*ksize
     acts_expand = acts_expand.reshape(n * ho * wo, -1)
     return acts_expand
    def compute_A(self, in_data):
        x = in_data[0]
        ksize, stride, pad = \
            self._link.ksize, self._link.stride[0], self._link.pad[0]
        xp = cuda.get_array_module(x)

        x = im2col(x, ksize, stride, pad).data
        x = x.transpose(0, 2, 3, 1)  # NCHW -> NHWC

        n, ho, wo, _ = x.shape
        x = x.reshape(n * ho * wo, -1)
        if self._link.b is not None:
            ones = xp.ones(x.shape[0], dtype=x.dtype)
            x = xp.column_stack((x, ones))

        A_scale = 1 / n
        if x.dtype == xp.float16:
            x = cast(x, xp.float32).data
            A = x.T.dot(x) * A_scale
        else:
            A = x.T.dot(x) * A_scale

        return A
Exemple #8
0
 def f(x):
     return functions.im2col(
         x, ksize, stride=stride, pad=pad, cover_all=cover_all,
         dilate=dilate)
Exemple #9
0
def contextual_attention(f,
                         b,
                         mask=None,
                         ksize=3,
                         stride=1,
                         rate=1,
                         fuse_k=3,
                         softmax_scale=10.,
                         training=True,
                         fuse=True,
                         return_flow=False):
    """ Contextual attention layer implementation.
    Contextual attention is first introduced in publication:
        Generative Image Inpainting with Contextual Attention, Yu et al.
    Args:
        x: Input feature to match (foreground).
        t: Input feature for match (background).
        mask: Input mask for t, indicating patches not available.
        ksize: Kernel size for contextual attention.
        stride: Stride for extracting patches from t.
        rate: Dilation for matching.
        softmax_scale: Scaled softmax for attention.
        training: Indicating if current graph is training or inference.
    """
    xp = cuda.get_array_module(f.data)
    # get shapes
    raw_fs = f.shape
    raw_int_fs = f.shape
    raw_int_bs = b.shape
    # extract patches from background with stride and rate
    kernel = 2 * rate
    pad = (kernel - rate * stride + 1) // 2
    raw_w = F.im2col(b, kernel, rate * stride, pad=pad).transpose(0, 2, 3, 1)
    raw_w = raw_w.reshape(raw_int_bs[0], -1, raw_int_bs[1], kernel, kernel)
    # raw_w = raw_w.transpose(0, 1, 4, 2, 3)  # transpose to b*hw*c*k*k
    # downscaling foreground option: downscaling both foreground and
    # background for matching and use original background for reconstruction.
    f = f[:, :, ::rate, ::rate]
    b = b[:, :, ::rate, ::rate]
    if mask is not None:
        mask = mask[:, :, ::rate, ::rate]
    fs = f.shape
    int_fs = f.shape
    f_groups = F.split_axis(f, int_fs[0], axis=0)
    # from t(H*W*C) to w(b*k*k*c*h*w)
    bs = b.shape
    int_bs = b.shape
    pad = (ksize - stride + 1) // 2
    w = F.im2col(b, ksize, stride, pad=pad).transpose(0, 2, 3, 1)
    w = w.reshape(int_fs[0], -1, int_fs[1], ksize, ksize)
    # w = w.transpose(0, 1, 4, 2, 3)  # transpose to b*hw*c*k*k
    # process mask
    if mask is None:
        mask = xp.zeros([1, 1, bs[2], bs[3]])
    m = F.im2col(mask, ksize, stride, pad=pad).transpose(0, 2, 3, 1).data
    m = m.reshape(
        1,
        -1,
        1,
        ksize,
        ksize,
    )
    # m = m.transpose(0, 1, 4, 2, 3)  # transpose to b*hw*c*k*k
    # m = m[0]
    m = (m.mean(axis=(2, 3,
                      4)) == 0.).astype("float32").reshape(bs[0], 1, -1, 1, 1)
    w_groups = F.split_axis(w, int_bs[0], axis=0)
    raw_w_groups = F.split_axis(raw_w, int_bs[0], axis=0)
    y = []
    offsets = []
    k = fuse_k
    scale = softmax_scale
    fuse_weight = xp.eye(k).reshape(1, 1, k, k)
    for i, (xi, wi, raw_wi) in enumerate(zip(f_groups, w_groups,
                                             raw_w_groups)):
        # conv for compare
        wi = wi[0]
        mm = m[i]
        norm = F.sqrt(F.sum(F.square(wi), axis=(1, 2, 3),
                            keepdims=True)) + 1e-4
        wi_normed = wi / (F.tile(norm, (1, *wi.shape[1:])))
        pad = (ksize) // 2
        yi = F.convolution_2d(xi, wi_normed, pad=pad)

        # conv implementation for fuse scores to encourage large patches
        if fuse:
            yi = yi.reshape(1, 1, fs[2] * fs[3], bs[2] * bs[3])
            pad = (fuse_k) // 2
            yi = F.convolution_2d(yi, fuse_weight, pad=pad)
            yi = yi.reshape(1, fs[2], fs[3], bs[2], bs[3])
            yi = yi.transpose(0, 2, 1, 4, 3)
            yi = yi.reshape(1, 1, fs[2] * fs[3], bs[2] * bs[3])
            yi = F.convolution_2d(yi, fuse_weight, pad=pad)
            yi = yi.reshape(1, fs[3], fs[2], bs[3], bs[2])
            yi = yi.transpose(0, 4, 3, 2, 1)
        yi = yi.reshape(1, bs[2] * bs[3], fs[2], fs[3])

        # softmax to match
        yi *= mm  # mask
        yi = F.softmax(yi * scale, 1)
        yi *= mm  # mask
        # deconv for patch pasting
        # 3.1 paste center
        wi_center = raw_wi[0]
        pad = (kernel + rate * (yi.shape[2] - 1) - raw_fs[2]) // 2
        yi = F.deconvolution_2d(
            yi, wi_center, outsize=raw_fs[2:], stride=rate, pad=pad) / 4.
        y.append(yi)
        if return_flow:
            offset = xp.argmax(yi.data, axis=1)
            offset = xp.concatenate([offset // fs[2], offset % fs[2]], axis=0)
            offsets.append(offset)
    y = F.concat(y, axis=0).reshape(*raw_int_fs)
    if return_flow:
        offsets = xp.concatenate(offsets,
                                 axis=0).reshape(int_bs[0], 2, int_bs[2],
                                                 int_bs[3])
        # case1: visualize optical flow: minus current position
        h_add = xp.tile(xp.reshape(xp.arange(bs[1]), [1, bs[1], 1, 1]),
                        [bs[0], 1, bs[2], 1])
        w_add = xp.tile(xp.reshape(xp.arange(bs[2]), [1, 1, bs[2], 1]),
                        [bs[0], bs[1], 1, 1])
        offsets = offsets - xp.concatenate([h_add, w_add], axis=3)
        # to flow image
        flow = flow_to_image_chainer(offsets)
        # # case2: visualize which pixels are attended
        if rate != 1:
            flow = F.unpooling_2d(flow, rate)
        return y, flow
    return y, None