Ejemplo n.º 1
 def test_col2im_consistency(self):
     col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 2, 2, dy=2, dx=2)
     h, w = self.x.shape[2:]
     im_cpu = conv.col2im_cpu(col, 2, 2, 2, 2, h, w, dy=2, dx=2)
     im_gpu = conv.col2im_gpu(
         cuda.to_gpu(col), 2, 2, 2, 2, h, w, dy=2, dx=2)
     testing.assert_allclose(im_cpu, im_gpu.get())
 def backward_cpu(self, x, gy):
     h, w = x[0].shape[2:]
     gcol = numpy.tile(gy[0][:, :, numpy.newaxis, numpy.newaxis],
                       (1, 1, self.kh, self.kw, 1, 1))
     gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
     gx /= self.kh * self.kw
     return gx,
Ejemplo n.º 3
    def forward(self, x):

        h, w = x[0].shape[2:]
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,
        xp = cuda.get_array_module(*x)
        col = xp.tile(x[0][:, :, None, None], (1, 1, self.kh, self.kw, 1, 1))
        if xp is numpy:
            y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
            y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
        return y,
Ejemplo n.º 4
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        n, c, h, w = x.shape
        out_c, input_c, kh, kw = W.shape
        gn, gout_c, gout_h, gout_w = gy.shape

        For MKLDNN backward, only support float32
        if mkld.enable_convF(inputs):
            gW = numpy.empty(shape=(out_c, input_c, kh, kw), dtype=W.dtype)
            gx = numpy.empty(shape=(n, c, h, w), dtype=W.dtype)
            if b is None:
                mkldnn.Convolution2D_F32.do_backward(x, W, gy, gW, gx, kh, kw, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr, self.mkldnn_opt)
                return gx, gW
                gb = numpy.empty(shape=b.shape, dtype=W.dtype)
                mkldnn.Convolution2D_F32.do_backward(x, W, b, gy, gW, gx, gb, kh, kw, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr, self.mkldnn_opt)
                return gx, gW, gb
            gW = numpy.tensordot(
                    gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False)
            gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
            gcol = numpy.rollaxis(gcol, 3)
            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
            if b is None:
                return gx, gW
                gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Ejemplo n.º 5
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(
            gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False)

        if not self.requires_x_grad:
            gx = None
            gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
            gcol = numpy.rollaxis(gcol, 3)
            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw,
                                 h, w)

        if b is None:
            return gx, gW
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Ejemplo n.º 6
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]
        N = numpy.random.uniform(-0.5, 0.5)
        coef = numpy.max(gy, axis=tuple([i for i in xrange(1,gy.ndim)])).astype(numpy.float32)
        coef = _as_mat(coef)
        gy = _as_mat(gy)
        coef_invert = 0.5*coef**(-1)
        gy = gy*coef_invert+0.5+N/E_g
        gy = quantize(gy, E_g)
        gy = 2*coef*(gy-0.5)
        gy = gy.reshape(grad_outputs[0].shape)
        gW = numpy.tensordot(gy, self.col,
                             ((0, 2, 3), (0, 4, 5))).astype(W.dtype,

        W = numpy.clip(W * 0.5 + 5, 0, 1)
        Wq = 2 * quantize(W, E_w) - 1

        gcol = numpy.tensordot(Wq, gy, (0, 1)).astype(x.dtype, copy=False)

        gcol = numpy.rollaxis(gcol, 3)
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        if b is None:
            return gx, gW
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Ejemplo n.º 7
    def forward_cpu(self, x):
        self._in_dtype = x[0].dtype

        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(
                h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(
                w, self.kw, self.sx, self.pw, cover_all=self.cover_all)

        up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype)
        up_y = conv.im2col_cpu(
            up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
        for n in six.moves.range(up_y.shape[0]):
            for c in six.moves.range(up_y.shape[1]):
                for oy in six.moves.range(up_y.shape[4]):
                    for ox in six.moves.range(up_y.shape[5]):
                        ky = self.indexes[n, c, oy, ox] // up_y.shape[3]
                        kx = self.indexes[n, c, oy, ox] % up_y.shape[3]
                        up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox]
        up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph,
                               self.pw, self.outh, self.outw)
        return up_y,
Ejemplo n.º 8
    def backward_cpu(self, x, gy):
        if mkld.enable_max_poolingF((x, gy)):
            n, c, h, w = x[0].shape
            gx = numpy.empty((n, c, h, w), dtype=x[0].dtype)

            mkldnn.MaxPooling_F32.do_backward(gy[0], x[0], gx, self.indexes,
                                              self.sy, self.sx, self.ph,
                                              self.pd, self.pw, self.pr,
                                              self.kh, self.kw)
            return gx,
            n, c, out_h, out_w = gy[0].shape
            h, w = x[0].shape[2:]
            kh, kw = self.kh, self.kw

            gcol = numpy.zeros((n * c * out_h * out_w * kh * kw),

            indexes = self.indexes.flatten()
            indexes += numpy.arange(0, indexes.size * kh * kw, kh * kw)

            gcol[indexes] = gy[0].ravel()
            gcol = gcol.reshape(n, c, out_h, out_w, kh, kw)
            gcol = numpy.swapaxes(gcol, 2, 4)
            gcol = numpy.swapaxes(gcol, 3, 5)

            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
            return gx,
Ejemplo n.º 9
 def test_col2im_consistency(self):
     col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 2, 2, dy=2, dx=2)
     h, w = self.x.shape[2:]
     im_cpu = conv.col2im_cpu(col, 2, 2, 2, 2, h, w, dy=2, dx=2)
     im_gpu = conv.col2im_gpu(
         cuda.to_gpu(col), 2, 2, 2, 2, h, w, dy=2, dx=2)
     testing.assert_allclose(im_cpu, im_gpu.get())
Ejemplo n.º 10
    def forward_cpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not all([isinstance(i, numpy.ndarray) for i in inputs]):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        kh, kw = W.shape[2:]
        _, _, h, w = x.shape
        gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # - k, m, n: shape of out_channel
        # - b: number of inputs
        # - h, w: height and width of kernels
        # k, m, n, b, h, w -> b, k, m, n, h, w
        gcol = numpy.rollaxis(gcol, 3)
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        y = conv.col2im_cpu(
            gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
        # b, k, h, w
        if b is not None:
            y += b.reshape(1, b.size, 1, 1)
        return y,
    def forward_cpu(self, x):
        self._in_dtype = x[0].dtype

        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,

        up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype)
        up_y = conv.im2col_cpu(up_y,
                                   0, 1, 4, 5, 2, 3)
        colh, colw = up_y.shape[2:4]
        up_y = up_y.reshape(-1, self.kh * self.kw)
        indexes = self.indexes.ravel()
        up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel()
        up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw)
        up_y = conv.col2im_cpu(up_y.transpose(0, 1, 4, 5, 2, 3), self.sy,
                               self.sx, self.ph, self.pw, self.outh, self.outw)
        return up_y,
Ejemplo n.º 12
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(gy, self.col,
                             ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
        gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = numpy.rollaxis(gcol, 3)
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        if b is None:
            return gx, gW
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Ejemplo n.º 13
def template(col_shape=(2, 5, 3, 3, 3, 4),
             ksize=(3, 3),
             padding=(1, 1),
             stride=(1, 1),
             description: str = ""):
    col = Variable(col_shape, col_order)
    op = Col2Im(None, ksize, stride, padding)
    im, = op(col)
    im = im.change_order(im_order)

    vcol = np.random.rand(*(col.shape_dict[a]
                            for a in col_chainer_order.axes)).astype(
    h1 = get_deconv_outsize(col.shape_dict[Axis.H], op.KH, op.SH, op.PH)
    w1 = get_deconv_outsize(col.shape_dict[Axis.W], op.KW, op.SW, op.PW)
    vim = col2im_cpu(vcol, op.SH, op.SW, op.PH, op.PW, h1, w1)

    vcol = vcol.transpose(
        [col_chainer_order.axes_dict[a] for a in col_order.axes])
    vim = vim.transpose([OrderNCHW.axes_dict[a] for a in im_order.axes])

        description=f"Col2Im {description}",
        backend=["webgpu", "webgl", "webassembly"],
        graph=Graph([col], [im]),
        inputs={col: vcol},
        expected={im: vim},
Ejemplo n.º 14
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(gy, self.col,
                             ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
        if not self.requires_x_grad:
            gx = None
            gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
            gcol = numpy.rollaxis(gcol, 3)
            gx = conv.col2im_cpu(gcol,

        if b is None:
            return gx, gW
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Ejemplo n.º 15
    def forward_cpu(self, x):
        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,

        up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=numpy.float32)
        up_y = conv.im2col_cpu(up_y,
        for n in six.moves.range(up_y.shape[0]):
            for c in six.moves.range(up_y.shape[1]):
                for oy in six.moves.range(up_y.shape[4]):
                    for ox in six.moves.range(up_y.shape[5]):
                        ky = self.indexes[n, c, oy, ox] // up_y.shape[3]
                        kx = self.indexes[n, c, oy, ox] % up_y.shape[3]
                        up_y[n, c, ky, kx, oy, ox] = x[0][n, c, oy, ox]
        up_y = conv.col2im_cpu(up_y, self.sy, self.sx, self.ph, self.pw,
                               self.outh, self.outw)
        return up_y,
Ejemplo n.º 16
 def backward_cpu(self, x, gy):
     h, w = self._in_shape[2:]
     gcol = numpy.tile(gy[0][:, :, None, None],
                       (1, 1, self.kh, self.kw, 1, 1))
     gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
     gx /= self.kh * self.kw
     return gx,
Ejemplo n.º 17
 def backward_cpu(self, x, gy):
     h, w = x[0].shape[2:]
     gcol = numpy.tile(gy[0][:, :, numpy.newaxis, numpy.newaxis],
                       (1, 1, self.kh, self.kw, 1, 1))
     gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
     gx /= self.kh * self.kw
     return gx,
Ejemplo n.º 18
    def forward_cpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        kh, kw = W.shape[2:]
        _, _, h, w = x.shape
        gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # - k, m, n: shape of out_channel
        # - b: number of inputs
        # - h, w: height and width of kernels
        # k, m, n, b, h, w -> b, k, m, n, h, w
        gcol = numpy.rollaxis(gcol, 3)
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw,
                            self.outh, self.outw)
        # b, k, h, w
        if b is not None:
            y += b.reshape(1, b.size, 1, 1)
        return y,
Ejemplo n.º 19
 def backward_cpu(self, x, gy):
     h, w = self._in_shape[2:]
     gcol = numpy.tile(gy[0][:, :, None, None],
                       (1, 1, self.kh, self.kw, 1, 1))
     gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
     gx /= self.kh * self.kw
     return gx,
Ejemplo n.º 20
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        if self.bcoeffs is not None:
            olen, ilen, hlen, wlen = W.shape
            if self.coeffs is None:
                self.coeffs = numpy.ones(ilen)
            coeffs = numpy.copy(self.bcoeffs)
            coeffs = numpy.expand_dims(coeffs, 1)
            coeffs = numpy.expand_dims(coeffs, 1)
            coeffs = numpy.expand_dims(coeffs, 0)        
            coeffs = numpy.broadcast_to(coeffs, W.shape)
            self.mW = numpy.asarray(coeffs,numpy.float32).reshape(W.shape)
        if self.ocoeffs is not None:
            coeffs = numpy.copy(self.ocoeffs)
            self.mb = numpy.asarray(coeffs,numpy.float32)    
        W = self.M*W
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(
            gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False)

        if not self.requires_x_grad:
            gx = None
            gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
            gcol = numpy.rollaxis(gcol, 3)
            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw,
                                 h, w)
        if hasattr(self,'mW'):
            gW = self.mW * gW        
            if hasattr(self,'mb'):
                xp = cuda.get_array_module(*x)
                gW = xp.broadcast_to(
                    ,gW.shape) * gW
        if b is None:
            return gx, gW
            gb = gy.sum(axis=(0, 2, 3))
            if hasattr(self,'mb'):
                gb = self.mb * gb
            return gx, gW, gb
Ejemplo n.º 21
    def backward_cpu(self, x, gy):
        if self.gb is not None:
            self.gb += gy[0].sum(axis=(0, 2, 3))
        self.gW += numpy.tensordot(gy[0], self.col, ([0, 2, 3], [0, 4, 5]))
        gcol = numpy.tensordot(self.W, gy[0], (0, 1))
        gcol = numpy.rollaxis(gcol, 3)

        h, w = x[0].shape[2:]
        return conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w),
Ejemplo n.º 22
    def backward_cpu(self, x, gy):
        if self.gb is not None:
            self.gb += gy[0].sum(axis=(0, 2, 3))
        self.gW += numpy.tensordot(gy[0], self.col, ([0, 2, 3], [0, 4, 5]))
        gcol = numpy.tensordot(self.W, gy[0], (0, 1))
        gcol = numpy.rollaxis(gcol, 3)

        h, w = x[0].shape[2:]
        return conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w),
Ejemplo n.º 23
 def _forward_cpu_core(self, x, W, b):
     gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
     gcol = numpy.rollaxis(gcol, 3)
     y = conv.col2im_cpu(
         gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw,
         dy=self.dy, dx=self.dx)
     # b, k, h, w
     if b is not None:
         y += b.reshape(1, b.size, 1, 1)
     return y,
Ejemplo n.º 24
 def _forward_cpu_core(self, x, W, b):
     gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
     gcol = numpy.rollaxis(gcol, 3)
     y = conv.col2im_cpu(
         gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw,
         dy=self.dy, dx=self.dx)
     # b, k, h, w
     if b is not None:
         y += b.reshape(1, b.size, 1, 1)
     return y,
Ejemplo n.º 25
    def forward_cpu(self, gy):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(gy)):
            return self._forward_ideep(gy)

        h, w = self._in_shape[2:]
        gcol = numpy.tile(gy[0][:, :, None, None],
                          (1, 1, self.kh, self.kw, 1, 1))
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        gx /= self.kh * self.kw
        return gx,
Ejemplo n.º 26
    def forward_cpu(self, gy):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(gy)):
            return self._forward_ideep(gy)

        h, w = self._in_shape[2:]
        gcol = numpy.tile(gy[0][:, :, None, None],
                          (1, 1, self.kh, self.kw, 1, 1))
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        gx /= self.kh * self.kw
        return gx,
Ejemplo n.º 27
    def backward_cpu(self, x, gy):
        n, c, out_h, out_w = gy[0].shape
        h, w = x[0].shape[2:]
        gcol = numpy.zeros(
            (n, c, self.kh, self.kw, out_h, out_w), dtype=numpy.float32)

        # TODO(beam2d): Make it fast
        gcol_r = numpy.rollaxis(gcol.reshape(n, c, -1, out_h, out_w), 2)
        for i in numpy.ndindex(n, c, out_h, out_w):
            gcol_r[self.indexes[i]][i] = gy[0][i]

        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        return gx,
Ejemplo n.º 28
 def forward(self, x):
     h, w = x[0].shape[2:]
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
     xp = cuda.get_array_module(*x)
     col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis], (1, 1, self.kh, self.kw, 1, 1))
     if isinstance(x[0], cuda.ndarray):
         y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
         y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     return (y,)
Ejemplo n.º 29
    def backward_cpu(self, x, gy):
        n, c, out_h, out_w = gy[0].shape
        h, w = x[0].shape[2:]
        gcol = numpy.zeros((n, c, self.kh, self.kw, out_h, out_w),

        # TODO(beam2d): Make it fast
        gcol_r = numpy.rollaxis(gcol.reshape(n, c, -1, out_h, out_w), 2)
        for i in numpy.ndindex(n, c, out_h, out_w):
            gcol_r[self.indexes[i]][i] = gy[0][i]

        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        return gx,
Ejemplo n.º 30
    def forward(self, x):
        h, w = x[0].shape[2:]
        n = x[0].shape[0]
        c = x[0].shape[1]
        indexes = x[1]

        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h,
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w,
        xp = cuda.get_array_module(*x)

        col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis],
                      (1, 1, self.kh, self.kw, 1, 1))

        # NOTE(hvy): Take indexes(Switches) into account
        # TODO(hvy): Remove the loops and make it efficient
        y = xp.zeros_like(col)
        if isinstance(x[0], cuda.ndarray):
            indexes = cuda.cupy.asnumpy(indexes)

        for n_i in range(n):
            for c_i in range(c):
                for r in range(h):
                    for c in range(w):
                        index = indexes[n_i][c_i][r][c]
                        if index < self.kw:
                            y[n_i][c_i].T[c][r][index][0] = col[n_i][c_i].T[c][
                                index %
                                self.kw][1] = col[n_i][c_i].T[c][r][index %

        if isinstance(x[0], cuda.ndarray):
            y = conv.col2im_gpu(y, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)
            y = conv.col2im_cpu(y, self.sy, self.sx, self.ph, self.pw,
                                self.outh, self.outw)

        return y,
Ejemplo n.º 31
 def forward_cpu(self, x):
 	n, c, h, w = x[0].shape
     gcol = numpy.tensordot(self.W, x[0], (0, 1))
     # k, m, n, b, h, w
     gcol = numpy.rollaxis(gcol, 3)
     # b, k, m, n, h, w
     h_ = get_deconv_outsize(h, self.kh, self.sy, self.ph)
     w_ = get_deconv_outsize(w, self.kw, self.sx, self.pw)
     y = conv.col2im_cpu(
     	gcol, self.sy, self.sx, self.ph, self.pw, h_, w_)
     # b, k, h, w
     if self.b is not None:
     	y += self.b.reshape(1, self.b.size, 1, 1)
     return y,
Ejemplo n.º 32
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5)))
        gcol = numpy.tensordot(W, gy, (0, 1))
        gcol = numpy.rollaxis(gcol, 3)
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        if len(inputs) == 3:
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
            return gx, gW
Ejemplo n.º 33
    def backward(self, inputs, grad_outputs):
        x, = inputs
        xp = cuda.get_array_module(x)
        gy, = grad_outputs

        n, _, out_h, out_w = gy.shape
        _, c, h, w = x.shape
        gy = gy.reshape(n, c, self.kh, self.kw, out_h, out_w)
        if xp == numpy:
            gx = col2im_cpu(
                gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx)
            gx = col2im_gpu(
                gy, self.sy, self.sx, self.ph, self.pw, h, w, self.dy, self.dx)
        return gx,
Ejemplo n.º 34
    def backward(self, inputs, grad_outputs):
        x, = inputs
        xp = cuda.get_array_module(x)
        gy, = grad_outputs

        n, _, out_h, out_w = gy.shape
        _, c, h, w = x.shape
        gy = gy.reshape(n, c, self.kh, self.kw, out_h, out_w)
        if xp == numpy:
            gx = col2im_cpu(gy, self.sy, self.sx, self.ph, self.pw, h, w,
                            self.dy, self.dx)
            gx = col2im_gpu(gy, self.sy, self.sx, self.ph, self.pw, h, w,
                            self.dy, self.dx)
        return gx,
Ejemplo n.º 35
    def forward_cpu(self, x):
        n, c, out_h, out_w = x[0].shape
        gcol = numpy.zeros((n, c, self.kh, self.kw, out_h, out_w),

        # TODO(beam2d): Make it fast
        gcol_r = numpy.rollaxis(gcol.reshape(n, c, -1, out_h, out_w), 2)
        for i in numpy.ndindex(n, c, out_h, out_w):
            #            gcol_r[self.indexes[i]][i] = x[0][i]
            for j in xrange(gcol_r.shape[0]):
                gcol_r[j][i] = x[0][i]

        y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.h,
        return y,
Ejemplo n.º 36
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5)))
        gcol = numpy.tensordot(W, gy, (0, 1))
        gcol = numpy.rollaxis(gcol, 3)
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        if len(inputs) == 3:
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
            return gx, gW
Ejemplo n.º 37
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        Wb = numpy.where(W >= 0, 1, -1).astype(numpy.float32, copy=False)
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5)))
        gcol = numpy.tensordot(Wb, gy, (0, 1))
        gcol = numpy.rollaxis(gcol, 3)
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        if b is None:
            return gx, gW
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Ejemplo n.º 38
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False)
        gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = numpy.rollaxis(gcol, 3)
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w, dy=self.dy, dx=self.dx)

        if b is None:
            return gx, gW
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        Wb = numpy.where(W>=0, 1, -1).astype(numpy.float32, copy=False)
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5)))
        gcol = numpy.tensordot(Wb, gy, (0, 1))
        gcol = numpy.rollaxis(gcol, 3)
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        if b is None:
            return gx, gW
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Ejemplo n.º 40
    def backward_cpu(self, x, gy):
        n, c, out_h, out_w = gy[0].shape
        h, w = x[0].shape[2:]
        kh, kw = self.kh, self.kw

        gcol = numpy.zeros((n * c * out_h * out_w * kh * kw), dtype=x[0].dtype)

        indexes = self.indexes.ravel()
        indexes += numpy.arange(0, indexes.size * kh * kw, kh * kw)

        gcol[indexes] = gy[0].ravel()
        gcol = gcol.reshape(n, c, out_h, out_w, kh, kw)
        gcol = numpy.swapaxes(gcol, 2, 4)
        gcol = numpy.swapaxes(gcol, 3, 5)

        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        return gx,
    def backward(self, inputs, grad_outputs):
        x, W = inputs[:2]

        xp = cuda.get_array_module(*x)
        W = xp.where(W >= 0, 1, -1).astype(numpy.float32, copy=False)

        W = self.M * W
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        xp = cuda.get_array_module(*x)

        B, C, KY, KX, IY, IX = self.col.shape
        D = W.shape[0]

        # (B, C*D, IY, IX) -> (C, D, B*IY*IX, D)
        gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \
            .reshape((C, D, B * IY * IX))
        c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
            .reshape((C, B * IY * IX, KY * KX))
        # (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX)
        gW_ = _matmul(gy_, c_, xp)
        gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3)
        gW = gW.astype(W.dtype, copy=False)

        w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
        # (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX)
        gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX))
        gcol = gcol.astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, 3)

        if xp is numpy:
            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h,

        if b is None:
            return gx, gW
            gy = xp.rollaxis(gy, 1, 4)
            gb = gy.sum(axis=(0, 1, 2))
            return gx, gW, gb
Ejemplo n.º 42
    def backward_cpu(self, x, gy):
        n, c, out_h, out_w = gy[0].shape
        h, w = self._in_shape[2:]
        kh, kw = self.kh, self.kw

        gcol = numpy.zeros(
            (n * c * out_h * out_w * kh * kw), dtype=self._in_dtype)

        indexes = self.indexes.flatten()
        indexes += numpy.arange(0, indexes.size * kh * kw, kh * kw)

        gcol[indexes] = gy[0].ravel()
        gcol = gcol.reshape(n, c, out_h, out_w, kh, kw)
        gcol = numpy.swapaxes(gcol, 2, 4)
        gcol = numpy.swapaxes(gcol, 3, 5)

        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        return gx,
Ejemplo n.º 43
    def backward_cpu(self, x, gy):
        if mkld.enable_avg_poolingF((x, gy)):
            n, c, h, w = x[0].shape
            gx = numpy.empty((n, c, h, w), dtype=x[0].dtype)

            mkldnn.AvgPooling_F32.do_backward(gy[0], x[0], gx, self.sy,
                                              self.sx, self.ph, self.pd,
                                              self.pw, self.pr, self.kh,
            return gx,
            h, w = x[0].shape[2:]
            gcol = numpy.tile(gy[0][:, :, None, None],
                              (1, 1, self.kh, self.kw, 1, 1))
            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
            gx /= self.kh * self.kw
        return gx,
Ejemplo n.º 44
 def forward(self, x):
     h, w = x[0].shape[2:]
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(
             h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(
             w, self.kw, self.sx, self.pw, cover_all=self.cover_all)
     xp = cuda.get_array_module(*x)
     col = xp.tile(x[0][:, :, None, None],
                   (1, 1, self.kh, self.kw, 1, 1))
     if xp is numpy:
         y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
         y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
     return y,
Ejemplo n.º 45
 def forward_cpu(self, inputs):
     x, W = inputs[:2]
     b = inputs[2] if len(inputs) == 3 else None
     kh, kw = W.shape[2:]
     _, _, h, w = x.shape
     gcol = numpy.tensordot(W, x, (0, 1))
     # - k, m, n: shape of out_channel
     # - b: number of inputs
     # - h, w: height and width of kernels
     # k, m, n, b, h, w -> b, k, m, n, h, w
     gcol = numpy.rollaxis(gcol, 3)
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
     y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw,
                         self.outh, self.outw)
     # b, k, h, w
     if b is not None:
         y += b.reshape(1, b.size, 1, 1)
     return y,
Ejemplo n.º 46
 def forward_cpu(self, inputs):
     x, W = inputs[:2]
     b = inputs[2] if len(inputs) == 3 else None
     kh, kw = W.shape[2:]
     _, _, h, w = x.shape
     gcol = numpy.tensordot(W, x, (0, 1))
     # - k, m, n: shape of out_channel
     # - b: number of inputs
     # - h, w: height and width of kernels
     # k, m, n, b, h, w -> b, k, m, n, h, w
     gcol = numpy.rollaxis(gcol, 3)
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
     y = conv.col2im_cpu(
         gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     # b, k, h, w
     if b is not None:
         y += b.reshape(1, b.size, 1, 1)
     return y,
Ejemplo n.º 47
    def backward(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None
        gy = grad_outputs[0]
        h, w = x.shape[2:]

        xp = cuda.get_array_module(*x)

        B, C, KY, KX, IY, IX = self.col.shape
        D = W.shape[0]

        # (B, C*D, IY, IX) -> (C, D, B*IY*IX, D)
        gy_ = gy.reshape((B, C, D, IY * IX)).transpose(1, 2, 0, 3) \
            .reshape((C, D, B * IY * IX))
        c_ = self.col.transpose(1, 0, 4, 5, 2, 3) \
            .reshape((C, B * IY * IX, KY * KX))
        # (C, D, B*IY*IX), (C, B*IY*IX, KY*KX) -> (C, D, KY*KX)
        gW_ = _matmul(gy_, c_, xp)
        gW = gW_.reshape((C, D, KY, KX)).transpose(1, 0, 2, 3)
        gW = gW.astype(W.dtype, copy=False)

        w_ = W.transpose(1, 2, 3, 0).reshape((C, KY * KX, D))
        # (C, KY*KX, D), (C, D, B*IY*IX) -> (C, KY*KX, B*IY*IX)
        gcol = _matmul(w_, gy_, xp).reshape((C, KY, KX, B, IY, IX))
        gcol = gcol.astype(x.dtype, copy=False)
        gcol = xp.rollaxis(gcol, 3)

        if xp is numpy:
            gx = conv.col2im_cpu(gcol, self.sy, self.sx,
                                 self.ph, self.pw, h, w)
            gx = conv.col2im_gpu(gcol, self.sy, self.sx,
                                 self.ph, self.pw, h, w)

        if b is None:
            return gx, gW
            gy = xp.rollaxis(gy, 1, 4)
            gb = gy.sum(axis=(0, 1, 2))
            return gx, gW, gb
Ejemplo n.º 48
 def forward_cpu(self, inputs):
     x, W = inputs[:2]
     b = inputs[2] if len(inputs) == 3 else None
     kh, kw = W.shape[2:]
     _, _, h, w = x.shape
     gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
     # - k, m, n: shape of out_channel
     # - b: number of inputs
     # - h, w: height and width of kernels
     # k, m, n, b, h, w -> b, k, m, n, h, w
     gcol = numpy.rollaxis(gcol, 3)
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
         assert self.outh > 0, "Height in the output should be positive."
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
         assert self.outw > 0, "Width in the output should be positive."
     y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
     # b, k, h, w
     if b is not None:
         y += b.reshape(1, b.size, 1, 1)
     return (y,)
Ejemplo n.º 49
    def forward_cpu(self, gy):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(gy)):
            return self._forward_ideep(gy)

        n, c, out_h, out_w = gy[0].shape
        h, w = self._in_shape[2:]
        kh, kw = self.kh, self.kw

        gcol = numpy.zeros(
            (n * c * out_h * out_w * kh * kw), dtype=self._in_dtype)

        indexes = self.indexes.flatten()
        indexes += numpy.arange(0, indexes.size * kh * kw, kh * kw)

        gcol[indexes] = gy[0].ravel()
        gcol = gcol.reshape(n, c, out_h, out_w, kh, kw)
        gcol = numpy.swapaxes(gcol, 2, 4)
        gcol = numpy.swapaxes(gcol, 3, 5)

        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        return gx,
Ejemplo n.º 50
    def forward_cpu(self, gy):
        if (intel64.should_use_ideep('>=auto')
                and intel64.inputs_all_ready(gy)):
            return self._forward_ideep(gy)

        n, c, out_h, out_w = gy[0].shape
        h, w = self._in_shape[2:]
        kh, kw = self.kh, self.kw

        gcol = numpy.zeros((n * c * out_h * out_w * kh * kw),

        indexes = self.indexes.ravel() + numpy.arange(
            0, self.indexes.size * kh * kw, kh * kw)

        gcol[indexes] = gy[0].ravel()
        gcol = gcol.reshape(n, c, out_h, out_w, kh, kw)
        gcol = numpy.swapaxes(gcol, 2, 4)
        gcol = numpy.swapaxes(gcol, 3, 5)

        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)
        return gx,
Ejemplo n.º 51
 def forward(self, x):
     h, w = x[0].shape[2:]
     if self.outh is None:
         self.outh = conv.get_deconv_outsize(h,
     if self.outw is None:
         self.outw = conv.get_deconv_outsize(w,
     xp = cuda.get_array_module(*x)
     col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis],
                   (1, 1, self.kh, self.kw, 1, 1))
     if isinstance(x[0], cuda.ndarray):
         y = conv.col2im_gpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
         y = conv.col2im_cpu(col, self.sy, self.sx, self.ph, self.pw,
                             self.outh, self.outw)
     return y,
Ejemplo n.º 52
    def forward_cpu(self, inputs):
        self.retain_inputs((0, 1))  # only retain x and W
        if len(inputs) == 2:
            (x, W), b = inputs, None
            x, W, b = inputs

        self._calc_out_size(x, W)

        gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        gcol = numpy.rollaxis(gcol, 3)
        y = conv.col2im_cpu(gcol,
        # b, k, h, w
        if b is not None:
            y += b.reshape(1, b.size, 1, 1)
        return y,
Ejemplo n.º 53
    def forward_cpu(self, x):
        self._in_dtype = x[0].dtype

        n, c, h, w = x[0].shape
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(
                h, self.kh, self.sy, self.ph, cover_all=self.cover_all)
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(
                w, self.kw, self.sx, self.pw, cover_all=self.cover_all)

        up_y = numpy.zeros((n, c, self.outh, self.outw), dtype=self._in_dtype)
        up_y = conv.im2col_cpu(
            up_y, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw,
            cover_all=self.cover_all).transpose(0, 1, 4, 5, 2, 3)
        colh, colw = up_y.shape[2:4]
        up_y = up_y.reshape(-1, self.kh * self.kw)
        indexes = self.indexes.ravel()
        up_y[numpy.arange(len(indexes)), indexes] = x[0].ravel()
        up_y = up_y.reshape(n, c, colh, colw, self.kh, self.kw)
        up_y = conv.col2im_cpu(
            up_y.transpose(0, 1, 4, 5, 2, 3), self.sy, self.sx, self.ph,
            self.pw, self.outh, self.outw)
        return up_y,
Ejemplo n.º 54
 def test_col2im_consistency(self):
     col = conv.im2col_cpu(self.x, 3, 3, 2, 2, 1, 1)
     h, w = self.x.shape[2:]
     im_cpu = conv.col2im_cpu(col,         2, 2, 1, 1, h, w)
     im_gpu = conv.col2im_gpu(cuda.to_gpu(col), 2, 2, 1, 1, h, w)
     gradient_check.assert_allclose(im_cpu, im_gpu.get())
Ejemplo n.º 55
def _col2im(x, *args, **kwargs):
    if isinstance(x, numpy.ndarray):
        return col2im_cpu(x, *args, **kwargs)
    return col2im_gpu(x, *args, **kwargs)