Esempio n. 1
0
    def _forward_xp_core(self, x, gy, xp):
        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0,) + tuple(moves.range(2, self.ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0,) + tuple(moves.range(self.ndim + 2, self.ndim * 2 + 2))

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (xp is numpy and
                not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
                1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        if xp is numpy:
            col = conv_nd.im2col_nd_cpu(
                x, self.ksize, self.stride, self.pad,
                cover_all=self.cover_all, dilate=self.dilate)
        else:
            col = conv_nd.im2col_nd_gpu(
                x, self.ksize, self.stride, self.pad,
                cover_all=self.cover_all, dilate=self.dilate)
        gW = xp.tensordot(gy, col, (out_axes, col_axes)).astype(
            self.W_dtype, copy=False)
        return gW,
Esempio n. 2
0
    def _forward_xp(self, x, W, b, xp):
        ndim = self.ndim
        ksize = W.shape[2:]
        stride = self.stride
        pad = self.pad

        # Make patch array.
        if xp is numpy:
            self.col = conv_nd.im2col_nd_cpu(x,
                                             ksize,
                                             stride,
                                             pad,
                                             cover_all=self.cover_all)
        else:
            self.col = conv_nd.im2col_nd_gpu(x,
                                             ksize,
                                             stride,
                                             pad,
                                             cover_all=self.cover_all)

        # Compute correlation.
        axes = tuple(moves.range(1, ndim + 2))  # (1, 2, ..., N+1)
        y = xp.tensordot(self.col, W, (axes, axes)).astype(x.dtype)

        # Apply bias if given.
        if b is not None:
            y += b

        # Roll c_O before the second in (n, y_1, y_2, ..., y_N, c_O).
        return xp.rollaxis(y, ndim + 1, 1),
Esempio n. 3
0
    def _backward_xp(self, x, W, b, gy, xp):
        ndim = self.ndim
        ksize = W.shape[2:]
        stride = self.stride
        pad = self.pad
        if xp is numpy:
            col = conv_nd.im2col_nd_cpu(gy, ksize, stride, pad)
        else:
            col = conv_nd.im2col_nd_gpu(gy, ksize, stride, pad)

        # x  : n, C_I, d_1, d_2, ..., d_N
        # col: n, C_I, k_1, k_2, ..., k_N, d_1, d_2, ..., d_N
        x_axes = (0,) + tuple(six.moves.range(2, ndim + 2))
        col_axes = (0,) + tuple(six.moves.range(ndim + 2, ndim * 2 + 2))
        gW = xp.tensordot(x, col, (x_axes, col_axes)).astype(
            W.dtype, copy=False)

        # col: n, C_I, k_1, k_2, ..., k_N, d_1, d_2, ..., d_N
        # W  : C_I, C_O, k_1, k_2, ..., k_N
        axes = (1,) + tuple(six.moves.range(2, ndim + 2))
        gx = xp.tensordot(col, W, (axes, axes)).astype(x.dtype, copy=False)
        gx = xp.rollaxis(gx, ndim + 1, 1)

        if b is None:
            return gx, gW
        else:
            sum_axis = (0,) + tuple(six.moves.range(2, ndim + 2))
            gb = gy.sum(axis=sum_axis)
            return gx, gW, gb
Esempio n. 4
0
    def _backward_xp(self, x, W, b, gy, xp):
        ndim = self.ndim
        ksize = W.shape[2:]
        stride = self.stride
        pad = self.pad
        if xp is numpy:
            col = conv_nd.im2col_nd_cpu(gy, ksize, stride, pad)
        else:
            col = conv_nd.im2col_nd_gpu(gy, ksize, stride, pad)

        # x  : n, C_I, d_1, d_2, ..., d_N
        # col: n, C_I, k_1, k_2, ..., k_N, d_1, d_2, ..., d_N
        x_axes = (0, ) + tuple(six.moves.range(2, ndim + 2))
        col_axes = (0, ) + tuple(six.moves.range(ndim + 2, ndim * 2 + 2))
        gW = xp.tensordot(x, col, (x_axes, col_axes)).astype(W.dtype,
                                                             copy=False)

        # col: n, C_I, k_1, k_2, ..., k_N, d_1, d_2, ..., d_N
        # W  : C_I, C_O, k_1, k_2, ..., k_N
        axes = (1, ) + tuple(six.moves.range(2, ndim + 2))
        gx = xp.tensordot(col, W, (axes, axes)).astype(x.dtype, copy=False)
        gx = xp.rollaxis(gx, ndim + 1, 1)

        if b is None:
            return gx, gW
        else:
            sum_axis = (0, ) + tuple(six.moves.range(2, ndim + 2))
            gb = gy.sum(axis=sum_axis)
            return gx, gW, gb
Esempio n. 5
0
    def _forward_xp_core(self, x, W, b, xp):
        ndim = self.ndim
        ksize = W.shape[2:]
        stride = self.stride
        pad = self.pad
        dilate = self.dilate

        # Make patch array.
        if xp is numpy:
            col = conv_nd.im2col_nd_cpu(
                x, ksize, stride, pad, cover_all=self.cover_all, dilate=dilate)
        else:
            col = conv_nd.im2col_nd_gpu(
                x, ksize, stride, pad, cover_all=self.cover_all, dilate=dilate)

        # Compute correlation.
        axes = tuple(moves.range(1, ndim + 2))  # (1, 2, ..., N+1)
        y = xp.tensordot(col, W, (axes, axes)).astype(x.dtype, copy=False)

        # Apply bias if given.
        if b is not None:
            y += b

        # Roll c_O before the second in (n, y_1, y_2, ..., y_N, c_O).
        return xp.rollaxis(y, ndim + 1, 1),
Esempio n. 6
0
    def _forward_xp_core(self, x, gy, xp):
        # Compute filter weight gradient.
        # (n, _, out_1, out_2, ..., out_N)
        out_axes = (0, ) + tuple(moves.range(2, self.ndim + 2))
        # (n, _, _, ..., _, out_1, out_2, ..., out_N)
        col_axes = (0, ) + tuple(moves.range(self.ndim + 2, self.ndim * 2 + 2))

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (xp is numpy
                and not (gy.flags.c_contiguous or gy.flags.f_contiguous)
                and 1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        if xp is numpy:
            col = conv_nd.im2col_nd_cpu(x,
                                        self.ksize,
                                        self.stride,
                                        self.pad,
                                        cover_all=self.cover_all,
                                        dilate=self.dilate)
        else:
            col = conv_nd.im2col_nd_gpu(x,
                                        self.ksize,
                                        self.stride,
                                        self.pad,
                                        cover_all=self.cover_all,
                                        dilate=self.dilate)
        gW = xp.tensordot(gy, col, (out_axes, col_axes)).astype(self.W_dtype,
                                                                copy=False)
        return gW,
    def test_im2col_nd_gpu_parameter_ranks(self):
        img_gpu = cuda.to_gpu(self.img)

        # Invalid ksize length.
        with self.assertRaises(AssertionError):
            conv_nd.im2col_nd_gpu(img_gpu, (2,), self.stride, self.pad)

        # Invalid stride length.
        with self.assertRaises(AssertionError):
            conv_nd.im2col_nd_gpu(img_gpu, self.ksize, (1,), self.pad)

        # Invalid pad length.
        with self.assertRaises(AssertionError):
            conv_nd.im2col_nd_gpu(img_gpu, self.ksize, self.stride, (0,))
Esempio n. 8
0
    def forward_gpu(self, inputs):
        X, W, B, initial_ct = _as_contiguous(inputs[:4])
        dtype = X.dtype

        xp = cuda.get_array_module(W)
        batchsize, feature_dimension, seq_length = X.shape

        mask_x = inputs[4] if len(inputs) == 5 else None

        if mask_x is not None:
            X *= mask_x[..., None]

        self.col = conv_nd.im2col_nd_gpu(X, (1, ), (1, ), (0, ),
                                         cover_all=False)
        self.U = _as_contiguous(
            xp.tensordot(self.col, W[..., None],
                         ((1, 2), (1, 2))).astype(X.dtype,
                                                  copy=False).transpose(
                                                      (0, 2, 1)))
        # U = xp.matmul(W, X)

        total_columns = feature_dimension * batchsize
        thread_per_block = min(512, total_columns)
        num_block = math.ceil(total_columns / thread_per_block)
        assert thread_per_block * num_block >= total_columns

        H = xp.empty((batchsize, feature_dimension, seq_length), dtype=dtype)
        self.C = xp.empty((batchsize, feature_dimension, seq_length),
                          dtype=dtype)

        self._cuda_elementwise("forward",
                               args=[
                                   X.data.ptr, self.U.data.ptr, B.data.ptr,
                                   initial_ct.data.ptr, self.C.data.ptr,
                                   H.data.ptr, batchsize, feature_dimension,
                                   seq_length, self.use_tanh
                               ],
                               block=(thread_per_block, 1, 1),
                               grid=(num_block, 1, 1))

        return H, self.C, self.C[..., -1]
Esempio n. 9
0
 def test_im2col_consistency(self):
     col_cpu = conv_nd.im2col_nd_cpu(self.x, self.ksize, self.stride,
                                     self.pad)
     col_gpu = conv_nd.im2col_nd_gpu(cuda.to_gpu(self.x), self.ksize,
                                     self.stride, self.pad)
     testing.assert_allclose(col_cpu, col_gpu.get(), atol=0, rtol=0)
Esempio n. 10
0
 def test_im2col_consistency(self):
     col_cpu = conv_nd.im2col_nd_cpu(
         self.x, self.ksize, self.stride, self.pad)
     col_gpu = conv_nd.im2col_nd_gpu(
         cuda.to_gpu(self.x), self.ksize, self.stride, self.pad)
     testing.assert_allclose(col_cpu, col_gpu.get(), atol=0, rtol=0)
Esempio n. 11
0
    def backward_gpu(self, inputs, grad_outputs):
        X, W, B, initial_ct = _as_contiguous(inputs[:4])

        dtype = X.dtype
        xp = cuda.get_array_module(W)
        batchsize, feature_dimension, seq_length = X.shape

        mask_x = inputs[4] if len(inputs) == 5 else None

        if mask_x is not None:
            X *= mask_x[..., None]

        total_columns = feature_dimension * batchsize
        thread_per_block = min(512, total_columns)
        num_block = total_columns // thread_per_block + 1

        grad_x = xp.zeros_like(X)
        grad_highway_x = xp.zeros_like(X)
        grad_b = xp.zeros((batchsize, feature_dimension * 2, seq_length),
                          dtype=dtype)
        grad_w = xp.zeros((batchsize, ) + W.shape, dtype=dtype)
        grad_u = xp.zeros((batchsize, feature_dimension * 3, seq_length),
                          dtype=dtype)
        grad_initial_ct = xp.zeros_like(initial_ct)

        # initialize with zero
        incoming_grad_ct = xp.zeros_like(
            initial_ct) if grad_outputs[2] is None else _as_contiguous(
                grad_outputs[2])
        incoming_grad_h = xp.zeros_like(
            X) if grad_outputs[0] is None else _as_contiguous(grad_outputs[0])

        self._cuda_elementwise(
            "backward",
            args=[
                X.data.ptr, self.U.data.ptr, B.data.ptr, self.C.data.ptr,
                initial_ct.data.ptr, incoming_grad_h.data.ptr,
                incoming_grad_ct.data.ptr, W.data.ptr, grad_highway_x.data.ptr,
                grad_u.data.ptr, grad_b.data.ptr, grad_initial_ct.data.ptr,
                batchsize, feature_dimension, seq_length, self.use_tanh
            ],
            block=(thread_per_block, 1, 1),
            grid=(num_block, 1, 1))

        col = conv_nd.im2col_nd_gpu(grad_u, (1, ), (1, ), (0, ),
                                    cover_all=False)
        grad_x = xp.tensordot(col, W.T[..., None],
                              ((1, 2),
                               (1, 2))).astype(dtype, copy=False).transpose(
                                   (0, 2, 1)) + grad_highway_x

        if mask_x is not None:
            grad_x *= mask_x[..., None]

        grad_b = xp.sum(grad_b, axis=(0, 2))

        grad_w = xp.tensordot(grad_u, self.col,
                              ((0, 2),
                               (0, 3))).astype(dtype, copy=False).reshape(
                                   (feature_dimension * 3, feature_dimension))

        if len(inputs) == 5:
            return grad_x, grad_w, grad_b, grad_initial_ct, None
        return grad_x, grad_w, grad_b, grad_initial_ct