Esempio n. 1
0
    def forward(self, inputs):
        x = _as_mat(inputs[0])
        W = inputs[1]

        xp = cuda.get_array_module(*x)

        olen, ilen = W.shape
        if self.coeffs is None:
            self.coeffs = numpy.ones(ilen)
        coeffs = numpy.copy(self.coeffs)
        coeffs = numpy.expand_dims(coeffs, 0)        
        coeffs = numpy.broadcast_to(coeffs, W.shape)
        M = xp.asarray(coeffs,numpy.float32).reshape(W.shape)
        self.M = M
        W = self.M*W
        
        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'
                             .format(type(W), type(x)))

        y = x.dot(W.T).astype(x.dtype, copy=False)
        if len(inputs) == 3:
            b = inputs[2]
            y += b
        return y,
Esempio n. 2
0
 def test_all_numpy_subclasses(self):
     x = numpy.array([0])
     y = numpy.array([[1], [2]])
     with warnings.catch_warnings():
         warnings.simplefilter('ignore')
         z = numpy.matrix('3,4; 5,6')
     self.assertTrue(T.same_types(x, y, z))
Esempio n. 3
0
    def forward_cpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        kh, kw = W.shape[2:]
        self.col = conv.im2col_cpu(x,
                                   kh,
                                   kw,
                                   self.sy,
                                   self.sx,
                                   self.ph,
                                   self.pw,
                                   cover_all=self.cover_all,
                                   dy=self.dy,
                                   dx=self.dx)
        y = numpy.tensordot(self.col, W,
                            ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False)
        if b is not None:
            y += b
        return numpy.rollaxis(y, 3, 1),
Esempio n. 4
0
    def forward_cpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        kh, kw = W.shape[2:]
        _, _, h, w = x.shape
        gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # - k, m, n: shape of out_channel
        # - b: number of inputs
        # - h, w: height and width of kernels
        # k, m, n, b, h, w -> b, k, m, n, h, w
        gcol = numpy.rollaxis(gcol, 3)
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        y = conv.col2im_cpu(
            gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
        # b, k, h, w
        if b is not None:
            y += b.reshape(1, b.size, 1, 1)
        return y,
Esempio n. 5
0
    def forward_cpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        olen, ilen, hlen, wlen = W.shape
        if self.coeffs is None:
            self.coeffs = numpy.ones(ilen)
        coeffs = numpy.copy(self.coeffs)
        coeffs = numpy.expand_dims(coeffs, 1)
        coeffs = numpy.expand_dims(coeffs, 1)
        coeffs = numpy.expand_dims(coeffs, 0)        
        coeffs = numpy.broadcast_to(coeffs, W.shape)
        M = numpy.asarray(coeffs,numpy.float32).reshape(W.shape)
        self.M = M        
        W = self.M*W
        
        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        kh, kw = W.shape[2:]
        self.col = conv.im2col_cpu(
            x, kh, kw, self.sy, self.sx, self.ph, self.pw,
            cover_all=self.cover_all)
        y = numpy.tensordot(
            self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False)
        if b is not None:
            y += b
        return numpy.rollaxis(y, 3, 1),
    def forward(self, inputs):
        self.retain_inputs((0, ))
        x, W = inputs
        self._w_shape = W.shape

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'.format(
                                 type(W), type(x)))

        xp = cuda.get_array_module(*inputs)
        if chainer.is_debug():
            valid_x = xp.logical_and(0 <= x, x < len(W))
            if self.ignore_label is not None:
                valid_x = xp.logical_or(valid_x, x == self.ignore_label)
            if not valid_x.all():
                raise ValueError('Each not ignored `x` value need to satisfy'
                                 '`0 <= x < len(W)`')

        if self.ignore_label is not None:
            mask = (x == self.ignore_label)
            return xp.where(mask[..., None], 0,
                            W.take(xp.where(mask, 0, x), axis=0)),

        return W.take(x, axis=0),
Esempio n. 7
0
    def forward(self, inputs):
        e1 = array.as_mat(inputs[0])
        e2 = array.as_mat(inputs[1])
        W = inputs[2]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(e1): {1}, type(e2): {2}'
                             .format(type(W), type(e1), type(e2)))

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            y = numpy.einsum('ij,ik,jkl->il', e1, e2, W)
        else:
            i_len, j_len = e1.shape
            k_len = e2.shape[1]
            # 'ij,ik->ijk'
            e1e2 = e1[:, :, None] * e2[:, None, :]
            # ijk->i[jk]
            e1e2 = e1e2.reshape(i_len, j_len * k_len)
            # jkl->[jk]l
            W_mat = W.reshape(-1, W.shape[2])
            # 'i[jk],[jk]l->il'
            y = e1e2.dot(W_mat)

        if len(inputs) == 6:
            V1, V2, b = inputs[3:]
            y += e1.dot(V1)
            y += e2.dot(V2)
            y += b
        return y,
Esempio n. 8
0
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        gy = grad_outputs[0]
        kh, kw = W.shape[2:]
        col = conv.im2col_cpu(gy, kh, kw, self.sy, self.sx, self.ph, self.pw)
        gW = numpy.tensordot(x, col, ([0, 2, 3], [0, 4, 5])).astype(W.dtype,
                                                                    copy=False)
        gx = numpy.tensordot(col, W, ([1, 2, 3], [1, 2, 3])).astype(x.dtype,
                                                                    copy=False)
        gx = numpy.rollaxis(gx, 3, 1)

        if b is None:
            return gx, gW
        else:
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Esempio n. 9
0
    def forward_cpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        kh, kw = W.shape[2:]
        _, _, h, w = x.shape
        gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False)
        # - k, m, n: shape of out_channel
        # - b: number of inputs
        # - h, w: height and width of kernels
        # k, m, n, b, h, w -> b, k, m, n, h, w
        gcol = numpy.rollaxis(gcol, 3)
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw,
                            self.outh, self.outw)
        # b, k, h, w
        if b is not None:
            y += b.reshape(1, b.size, 1, 1)
        return y,
Esempio n. 10
0
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(gy, self.col,
                             ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                            copy=False)
        gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
        gcol = numpy.rollaxis(gcol, 3)
        gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        if b is None:
            return gx, gW
        else:
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Esempio n. 11
0
    def forward(self, inputs):
        x = inputs[0]
        W = inputs[1]
        if (ia.all_ready(inputs)):
            return self.forward_ia(inputs)

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'.format(
                                 type(W), type(x)))

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (isinstance(x, numpy.ndarray)
                and not (x.flags.c_contiguous or x.flags.f_contiguous)
                and 1 in x.shape):
            x = numpy.ascontiguousarray(x)

        y = x.dot(W.T).astype(x.dtype, copy=False)
        if len(inputs) == 3:
            b = inputs[2]
            y += b
        self.retain_inputs((0, 1))  # b is not retained
        return y,
Esempio n. 12
0
    def forward(self, inputs):
        self.retain_inputs((0, 1))  # only retain x and W
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        if self.outs is None:
            dims = x.shape[2:]
            ksize = W.shape[2:]
            self.outs = tuple(
                conv.get_deconv_outsize(d, k, s, p)
                for d, k, s, p in zip(dims, ksize, self.stride, self.pad))
            assert all(out > 0 for out in self.outs), \
                'Output sizes should be positive.'
        self._set_cover_all(x, W)

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            return self._forward_xp(x, W, b, numpy)
        elif self._use_cudnn(x, W):
            return self._forward_cudnn(x, W, b)
        else:
            return self._forward_xp(x, W, b, cuda.cupy)
Esempio n. 13
0
 def test_all_numpy_subclasses(self):
     x = numpy.array([0])
     y = numpy.array([[1], [2]])
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         z = numpy.matrix("3,4; 5,6")
     self.assertTrue(T.same_types(x, y, z))
Esempio n. 14
0
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        kh, kw = W.shape[2:]
        col = conv.im2col_cpu(
            gy, kh, kw, self.sy, self.sx, self.ph, self.pw)
        gW = numpy.tensordot(
            x, col, ([0, 2, 3], [0, 4, 5])).astype(W.dtype, copy=False)
        gx = numpy.tensordot(
            col, W, ([1, 2, 3], [1, 2, 3])).astype(x.dtype, copy=False)
        gx = numpy.rollaxis(gx, 3, 1)

        if b is None:
            return gx, gW
        else:
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Esempio n. 15
0
    def backward(self, inputs, grad_outputs):
        x = _as_mat(inputs[0])
        r = inputs[1]
        gy = grad_outputs[0]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(r): {0}, type(x): {1}'.format(
                                 type(r), type(x)))

        R = np.array([[np.cos(r), -np.sin(r)], [np.sin(r),
                                                np.cos(r)]]).reshape(2, 2)
        gx = gy.dot(R.T).astype(x.dtype, copy=False).reshape(inputs[0].shape)

        dR = np.array([[-np.sin(r), -np.cos(r)], [np.cos(r),
                                                  -np.sin(r)]]).reshape(2, 2)

        xdR = x.dot(dR.T).astype(x.dtype, copy=False)
        gr = (xdR * gy).sum().reshape(r.shape)

        if len(inputs) == 3:
            gb = gy.sum(0)
            return gx, gr, gb
        else:
            return gx, gr
Esempio n. 16
0
    def forward(self, inputs):
        e1 = array.as_mat(inputs[0])
        e2 = array.as_mat(inputs[1])
        W = inputs[2]

        if not type_check.same_types(*inputs):
            raise ValueError(
                'numpy and cupy must not be used together\n'
                'type(W): {0}, type(e1): {1}, type(e2): {2}'.format(
                    type(W), type(e1), type(e2)))

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            y = numpy.einsum('ij,ik,jkl->il', e1, e2, W)
        else:
            i_len, j_len = e1.shape
            k_len = e2.shape[1]
            # 'ij,ik->ijk'
            e1e2 = e1[:, :, None] * e2[:, None, :]
            # ijk->i[jk]
            e1e2 = e1e2.reshape(i_len, j_len * k_len)
            # jkl->[jk]l
            W_mat = W.reshape(-1, W.shape[2])
            # 'i[jk],[jk]l->il'
            y = e1e2.dot(W_mat)

        if len(inputs) == 6:
            V1, V2, b = inputs[3:]
            y += e1.dot(V1)
            y += e2.dot(V2)
            y += b
        return y,
Esempio n. 17
0
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(
            gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False)

        if not self.requires_x_grad:
            gx = None
        else:
            gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
            gcol = numpy.rollaxis(gcol, 3)
            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw,
                                 h, w)

        if b is None:
            return gx, gW
        else:
            gb = gy.sum(axis=(0, 2, 3))
            return gx, gW, gb
Esempio n. 18
0
    def backward_cpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        
        if self.bcoeffs is not None:
            olen, ilen, hlen, wlen = W.shape
            if self.coeffs is None:
                self.coeffs = numpy.ones(ilen)
            coeffs = numpy.copy(self.bcoeffs)
            coeffs = numpy.expand_dims(coeffs, 1)
            coeffs = numpy.expand_dims(coeffs, 1)
            coeffs = numpy.expand_dims(coeffs, 0)        
            coeffs = numpy.broadcast_to(coeffs, W.shape)
            self.mW = numpy.asarray(coeffs,numpy.float32).reshape(W.shape)
            
        if self.ocoeffs is not None:
            coeffs = numpy.copy(self.ocoeffs)
            self.mb = numpy.asarray(coeffs,numpy.float32)    
                    
        W = self.M*W
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        h, w = x.shape[2:]

        gW = numpy.tensordot(
            gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False)

        if not self.requires_x_grad:
            gx = None
        else:
            gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False)
            gcol = numpy.rollaxis(gcol, 3)
            gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw,
                                 h, w)
        
        if hasattr(self,'mW'):
            gW = self.mW * gW        
            if hasattr(self,'mb'):
                xp = cuda.get_array_module(*x)
                gW = xp.broadcast_to(
                    xp.expand_dims(xp.expand_dims(xp.expand_dims(self.mb,1),1),1)
                    ,gW.shape) * gW
        if b is None:
            return gx, gW
        else:
            gb = gy.sum(axis=(0, 2, 3))
            if hasattr(self,'mb'):
                gb = self.mb * gb
            return gx, gW, gb
Esempio n. 19
0
    def forward(self, inputs):
        x = inputs[0]
        r = inputs[1]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(r): {0}, type(x): {1}'.format(
                                 type(r), type(x)))

        xp = cuda.get_array_module(*inputs)
        rmat = self.rodrigues(r, xp)
        y = x.dot(rmat.T).astype(x.dtype, copy=False)
        return y,
Esempio n. 20
0
    def forward(self, inputs):
        x = _as_mat(inputs[0])
        W = inputs[1]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'
                             .format(type(W), type(x)))

        y = x.dot(W.T).astype(x.dtype, copy=False)
        if len(inputs) == 3:
            b = inputs[2]
            y += b
        return y,
Esempio n. 21
0
    def forward(self, inputs):
        x = _as_mat(inputs[0])
        W = inputs[1]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'.format(
                                 type(W), type(x)))

        y = x.dot(W.T).astype(x.dtype, copy=False)
        if len(inputs) == 3:
            b = inputs[2]
            y += b
        return y,
Esempio n. 22
0
    def backward(self, inputs, grad_outputs):
        x = _as_mat(inputs[0])
        W = inputs[1]
        gy = grad_outputs[0]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'
                             .format(type(W), type(x)))

        gx = gy.dot(W).astype(x.dtype, copy=False).reshape(inputs[0].shape)
        gW = gy.T.dot(x).astype(W.dtype, copy=False)
        if len(inputs) == 3:
            gb = gy.sum(0)
            return gx, gW, gb
        else:
            return gx, gW
Esempio n. 23
0
    def forward(self, inputs):
        self.retain_inputs((0, 1))
        if ((ia.all_ready(inputs)) and self.W_dtype == numpy.dtype('float32')):
            return self.forward_ia(inputs)
        x, gy = inputs
        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(x): {0}, type(gy): {1}'.format(
                                 type(x), type(gy)))
        if (isinstance(gy, numpy.ndarray)
                and not (gy.flags.c_contiguous or gy.flags.f_contiguous)
                and 1 in gy.shape):
            gy = numpy.ascontiguousarray(gy)

        gW = gy.T.dot(x).astype(self.W_dtype, copy=False)
        self.retain_inputs((0, 1))
        return gW,
Esempio n. 24
0
    def backward(self, inputs, grad_outputs):
        x = _as_mat(inputs[0])
        W = inputs[1]
        gy = grad_outputs[0]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'.format(
                                 type(W), type(x)))

        gx = gy.dot(W).astype(x.dtype, copy=False).reshape(inputs[0].shape)
        gW = gy.T.dot(x).astype(W.dtype, copy=False)
        if len(inputs) == 3:
            gb = gy.sum(0)
            return gx, gW, gb
        else:
            return gx, gW
Esempio n. 25
0
    def forward(self, inputs):
        x = _as_mat(inputs[0])
        r = inputs[1]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(r): {0}, type(x): {1}'.format(
                                 type(r), type(x)))

        R = np.array([[np.cos(r), -np.sin(r)], [np.sin(r),
                                                np.cos(r)]]).reshape(2, 2)

        y = x.dot(R.T).astype(x.dtype, copy=False)
        if len(inputs) == 3:
            b = inputs[2]
            y += b
        return y,
Esempio n. 26
0
    def backward(self, inputs, grad_outputs):
        e1 = array.as_mat(inputs[0])
        e2 = array.as_mat(inputs[1])
        W = inputs[2]

        if not type_check.same_types(*inputs):
            raise ValueError(
                'numpy and cupy must not be used together\n'
                'type(W): {0}, type(e1): {1}, type(e2): {2}'.format(
                    type(W), type(e1), type(e2)))

        gy = grad_outputs[0]

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy)
            ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy)
            ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy)
        else:
            kern = cuda.reduce('T in0, T in1, T in2', 'T out',
                               'in0 * in1 * in2', 'a + b', 'out = a', 0,
                               'bilinear_product')

            e1_b = e1[:, :, None, None]  # ij
            e2_b = e2[:, None, :, None]  # ik
            gy_b = gy[:, None, None, :]  # il
            W_b = W[None, :, :, :]  # jkl

            gW = kern(e1_b, e2_b, gy_b, axis=0)  # 'ij,ik,il->jkl'
            ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3))  # 'ik,jkl,il->ij'
            ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3))  # 'ij,jkl,il->ik'

        ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW
        if len(inputs) == 6:
            V1, V2, b = inputs[3:]
            gV1 = e1.T.dot(gy)
            gV2 = e2.T.dot(gy)
            gb = gy.sum(0)
            ge1 += gy.dot(V1.T)
            ge2 += gy.dot(V2.T)
            ret += gV1, gV2, gb
        return ret
Esempio n. 27
0
    def backward(self, inputs, grad_outputs):
        e1 = array.as_mat(inputs[0])
        e2 = array.as_mat(inputs[1])
        W = inputs[2]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(e1): {1}, type(e2): {2}'
                             .format(type(W), type(e1), type(e2)))

        gy = grad_outputs[0]

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy)
            ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy)
            ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy)
        else:
            kern = cuda.reduce('T in0, T in1, T in2', 'T out',
                               'in0 * in1 * in2', 'a + b', 'out = a', 0,
                               'bilinear_product')

            e1_b = e1[:, :, None, None]  # ij
            e2_b = e2[:, None, :, None]  # ik
            gy_b = gy[:, None, None, :]  # il
            W_b = W[None, :, :, :]  # jkl

            gW = kern(e1_b, e2_b, gy_b, axis=0)  # 'ij,ik,il->jkl'
            ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3))  # 'ik,jkl,il->ij'
            ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3))  # 'ij,jkl,il->ik'

        ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW
        if len(inputs) == 6:
            V1, V2, b = inputs[3:]
            gV1 = e1.T.dot(gy)
            gV2 = e2.T.dot(gy)
            gb = gy.sum(0)
            ge1 += gy.dot(V1.T)
            ge2 += gy.dot(V2.T)
            ret += gV1, gV2, gb
        return ret
Esempio n. 28
0
    def forward(self, inputs):
        self.retain_inputs(tuple(range(len(inputs))))

        e1 = array.as_mat(inputs[0])
        e2 = array.as_mat(inputs[1])
        W = inputs[2]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(e1): {1}, type(e2): {2}'
                             .format(type(W), type(e1), type(e2)))

        xp = cuda.get_array_module(*inputs)
        y = xp.einsum('ij,ik,jkl->il', e1, e2, W)

        if len(inputs) == 6:
            V1, V2, b = inputs[3:]
            y += e1.dot(V1)
            y += e2.dot(V2)
            y += b
        return y,
Esempio n. 29
0
    def forward(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            return self._forward_xp(x, W, b, numpy)
        elif not self._use_cudnn(x, W):
            return self._forward_xp(x, W, b, cuda.cupy)
        else:
            return self._forward_cudnn(x, W, b)
Esempio n. 30
0
    def forward(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            return self._forward_xp(x, W, b, numpy)
        elif self._use_cudnn(x, W):
            return self._forward_cudnn(x, W, b)
        else:
            return self._forward_xp(x, W, b, cuda.cupy)
Esempio n. 31
0
    def forward(self, inputs):
        self.retain_inputs(tuple(range(len(inputs))))

        e1 = array.as_mat(inputs[0])
        e2 = array.as_mat(inputs[1])
        W = inputs[2]

        if not type_check.same_types(*inputs):
            raise ValueError(
                'numpy and cupy must not be used together\n'
                'type(W): {0}, type(e1): {1}, type(e2): {2}'.format(
                    type(W), type(e1), type(e2)))

        xp = cuda.get_array_module(*inputs)
        y = xp.einsum('ij,ik,jkl->il', e1, e2, W)

        if len(inputs) == 6:
            V1, V2, b = inputs[3:]
            y += e1.dot(V1)
            y += e2.dot(V2)
            y += b
        return y,
Esempio n. 32
0
    def backward(self, inputs, grad_outputs):
        x = _as_mat(inputs[0])
        W = inputs[1]
        
        if self.bcoeffs is not None:
            xp = cuda.get_array_module(*x)
            coeffs = numpy.copy(self.bcoeffs)
            coeffs = numpy.expand_dims(coeffs, 0)
            coeffs = numpy.broadcast_to(coeffs, W.shape)
            self.mW = xp.asarray(coeffs,numpy.float32).reshape(W.shape)
        if self.ocoeffs is not None:
            xp = cuda.get_array_module(*x)
            coeffs = numpy.copy(self.ocoeffs)
            self.mb = xp.asarray(coeffs,numpy.float32)

        W = self.M * W
        gy = grad_outputs[0]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'
                             .format(type(W), type(x)))

        gx = gy.dot(W).astype(x.dtype, copy=False).reshape(inputs[0].shape)
        gW = gy.T.dot(x).astype(W.dtype, copy=False)
        
        if hasattr(self,'mW'):
            gW = self.mW * gW
            if hasattr(self,'mb'):
                xp = cuda.get_array_module(*x)
                gW = xp.broadcast_to(xp.expand_dims(self.mb,1),gW.shape) * gW
        # print('gW',gW.sum(0).sum(0))
        if len(inputs) == 3:
            gb = gy.sum(0)
            if hasattr(self,'mb'):
                gb = self.mb * gb
            return gx, gW, gb
        else:
            return gx, gW
Esempio n. 33
0
    def backward(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]    # (n, c_O, out_1, out_2, ..., out_N)

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            return self._backward_xp(x, W, b, gy, numpy)
        elif not self._use_cudnn(x, W):
            return self._backward_xp(x, W, b, gy, cuda.cupy)
        else:
            return self._backward_cudnn(x, W, b, gy)
    def forward_cpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        kh, kw = W.shape[2:]
        self.col = conv.im2col_cpu(
            x, kh, kw, self.sy, self.sx, self.ph, self.pw,
            cover_all=self.cover_all, dy=self.dy, dx=self.dx)
        y = numpy.tensordot(
            self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False)
        if b is not None:
            y += b
        return numpy.rollaxis(y, 3, 1),
Esempio n. 35
0
    def forward(self, inputs):
        x = inputs[0]
        W = inputs[1]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'
                             .format(type(W), type(x)))

        # NumPy raises an error when the array is not contiguous.
        # See: https://github.com/chainer/chainer/issues/2744
        # TODO(niboshi): Remove this code when NumPy is fixed.
        if (isinstance(x, numpy.ndarray) and
                not (x.flags.c_contiguous or x.flags.f_contiguous) and
                1 in x.shape):
            x = numpy.ascontiguousarray(x)

        y = x.dot(W.T).astype(x.dtype, copy=False)
        if len(inputs) == 3:
            b = inputs[2]
            y += b
        self.retain_inputs((0, 1))  # b is not retained
        return y,
Esempio n. 36
0
    def backward(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]    # (n, c_O, out_1, out_2, ..., out_N)

        xp = cuda.get_array_module(*inputs)
        if xp is numpy:
            return self._backward_xp(x, W, b, gy, numpy)
        elif not self._use_cudnn(x, W):
            return self._backward_xp(x, W, b, gy, cuda.cupy)
        else:
            return self._backward_cudnn(x, W, b, gy)
    def forward(self, inputs):
        a_h, b_h, k_h, cs, k_prev = inputs

        if not type_check.same_types(*inputs):
            raise ValueError(
                "numpy and cupy must not be used together\n"
                "type(a_h): {0}, type(b_h): {1}, type(k_h): {2}, type(cs): {3}, type(k_prev): {4}"
                .format(type(a_h), type(b_h), type(k_h), type(cs),
                        type(k_prev)))

        batch_size, W, u = cs.shape
        K = a_h.shape[1]
        xp = cuda.get_array_module(*inputs)

        a_h = xp.exp(a_h).reshape((batch_size, K, 1))
        b_h = xp.exp(b_h).reshape((batch_size, K, 1))
        k_h = k_prev + xp.exp(k_h)
        k_h = xp.reshape(k_h, (batch_size, K, 1))

        self.a_h = a_h
        self.b_h = b_h
        self.k_h = k_h

        # Compute phi's parameters
        #us = xp.linspace(0, u-1, u)
        us = xp.arange(u, dtype=xp.float32).reshape((1, 1, u))
        phi = a_h * xp.exp(-b_h * xp.square(k_h - us))

        self.phi = phi

        phi = xp.sum(phi, axis=1)

        # Finalize the soft window computation
        w = xp.matmul(cs, phi.reshape((batch_size, u, 1)))

        return w.reshape((batch_size, W)), k_h.reshape((batch_size, K)), phi
Esempio n. 38
0
    def forward(self, inputs):
        self.retain_inputs((0,))
        x, W = inputs
        self._w_shape = W.shape

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'
                             .format(type(W), type(x)))

        xp = cuda.get_array_module(*inputs)
        if chainer.is_debug():
            valid_x = xp.logical_and(0 <= x, x < len(W))
            if self.ignore_label is not None:
                valid_x = xp.logical_or(valid_x, x == self.ignore_label)
            if not valid_x.all():
                raise ValueError('Each not ignored `x` value need to satisfy'
                                 '`0 <= x < len(W)`')

        if self.ignore_label is not None:
            mask = (x == self.ignore_label)
            return xp.where(mask[..., None], 0, W[xp.where(mask, 0, x)]),

        return W[x],
Esempio n. 39
0
    def backward(self, inputs, grad_outputs):
        x = _as_mat(inputs[0])
        W = inputs[1]
        
        xp = cuda.get_array_module(*x)
        
        # deterministic
        W = xp.where(W>=0, 1, -1).astype(numpy.float32, copy=False)
        
        W = self.M * W
        gy = grad_outputs[0]

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together\n'
                             'type(W): {0}, type(x): {1}'
                             .format(type(W), type(x)))

        gx = gy.dot(W).astype(x.dtype, copy=False).reshape(inputs[0].shape)
        gW = gy.T.dot(x).astype(W.dtype, copy=False)
        if len(inputs) == 3:
            gb = gy.sum(0)
            return gx, gW, gb
        else:
            return gx, gW
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        out_c, _, kh, kw = W.shape
        n, c, h, w = x.shape
        dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1)

        out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph,
                                      cover_all=self.cover_all, d=self.dy)
        out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw,
                                      cover_all=self.cover_all, d=self.dx)

        y = cuda.cupy.zeros((n, out_c, out_h, out_w), dtype=x.dtype)
        if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):

            pad_x = cuda.cupy.zeros((n, c, h + 2 * self.ph, w + 2 * self.pw),
                                    dtype=x.dtype)
            pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x

            out_h_s1 = h + 2 * self.ph - dkh + 1
            out_w_s1 = w + 2 * self.pw - dkw + 1

            for j in moves.range(kh):
                for i in moves.range(kw):
                    xji = cuda.cupy.ascontiguousarray(
                        pad_x[:, :,
                              j * self.dy:j * self.dy + out_h_s1,
                              i * self.dx:i * self.dx + out_w_s1])
                    Wji = cuda.cupy.ascontiguousarray(
                        W[:, :, j:j + 1, i:i + 1])

                    if i == 0 and j == 0:
                        handle = cudnn.get_handle()
                        xji_desc = cudnn.create_tensor_descriptor(xji)
                        y_desc = cudnn.create_tensor_descriptor(y)
                        self.filter_desc = cudnn.create_filter_descriptor(Wji)
                        self.conv_desc = cudnn.create_convolution_descriptor(
                            (0, 0), (self.sy, self.sx), xji.dtype)

                        workspace_size = cuda.get_max_workspace_size()
                        workspace = cuda.cupy.empty(
                            (workspace_size,), dtype='b')
                        algo = libcudnn.getConvolutionForwardAlgorithm(
                            handle, xji_desc.value, self.filter_desc.value,
                            self.conv_desc.value, y_desc.value, _fwd_pref,
                            workspace_size)

                        oz_dtype = 'd' if x.dtype == 'd' else 'f'
                        one = numpy.array(1, dtype=oz_dtype).ctypes

                    libcudnn.convolutionForward(
                        handle, one.data, xji_desc.value, xji.data.ptr,
                        self.filter_desc.value, Wji.data.ptr,
                        self.conv_desc.value, algo, workspace.data.ptr,
                        workspace_size, one.data, y_desc.value, y.data.ptr)

            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])
                cudnn.add_tensor(
                    handle, one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            # Implementation using im2col
            self.col = conv.im2col_gpu(
                x, kh, kw, self.sy, self.sx, self.ph, self.pw,
                cover_all=self.cover_all, dy=self.dy, dx=self.dx)
            y = cuda.cupy.tensordot(
                self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype,
                                                            copy=False)
            # TODO(beam2d): Support unshared bias
            if b is not None:
                y += b
            y = cuda.cupy.rollaxis(y, 3, 1)

        return y,
Esempio n. 41
0
    def backward(self, inputs, grads):
        (hx, cx), inputs = _split(inputs, 2)
        ws, inputs = _split(inputs, self.n_layers * 8)
        bs, inputs = _split(inputs, self.n_layers * 8)
        x_list = inputs

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together')

        hx = cuda.cupy.ascontiguousarray(hx)
        cx = cuda.cupy.ascontiguousarray(cx)

        dhy, dcy = grads[:2]
        dy_list = list(grads[2:])
        if dhy is None:
            dhy = cuda.cupy.zeros_like(hx)
        if dcy is None:
            dcy = cuda.cupy.zeros_like(cx)
        for i in six.moves.range(len(dy_list)):
            if dy_list[i] is None:
                dy_list[i] = cuda.cupy.zeros_like(x_list[i])

        xs = cuda.cupy.concatenate(x_list, axis=0)
        length = len(x_list)

        dhx = cuda.cupy.empty_like(hx)
        dcx = cuda.cupy.empty_like(cx)

        hx_desc = cudnn.create_tensor_nd_descriptor(hx)
        cx_desc = cudnn.create_tensor_nd_descriptor(cx)
        dhy_desc = cudnn.create_tensor_nd_descriptor(dhy)
        dcy_desc = cudnn.create_tensor_nd_descriptor(dcy)

        c_dy_descs = _make_tensor_descriptor_array(dy_list)
        dys = cuda.cupy.concatenate(dy_list, axis=0)

        rnn_desc = self.rnn_desc
        handle = self.handle
        work_size = libcudnn.getRNNWorkspaceSize(
            handle, rnn_desc.value, length, self.c_x_descs.data)
        workspace = cuda.cupy.empty((work_size,), dtype='b')

        dhx_desc = cudnn.create_tensor_nd_descriptor(dhx)
        dcx_desc = cudnn.create_tensor_nd_descriptor(dcx)

        dxs = cuda.cupy.empty_like(xs)
        sections = numpy.cumsum([len(x) for x in x_list[:-1]])
        dx_list = cuda.cupy.split(dxs, sections, 0)
        c_dx_descs = _make_tensor_descriptor_array(dx_list)

        libcudnn.RNNBackwardData(
            handle, rnn_desc.value, length,
            self.c_y_descs.data, self.ys.data.ptr,
            c_dy_descs.data, dys.data.ptr, dhy_desc.value, dhy.data.ptr,
            dcy_desc.value, dcy.data.ptr, self.w_desc.value, self.w.data.ptr,
            hx_desc.value, hx.data.ptr, cx_desc.value, cx.data.ptr,
            c_dx_descs.data, dxs.data.ptr, dhx_desc.value, dhx.data.ptr,
            dcx_desc.value, dcx.data.ptr, workspace.data.ptr, work_size,
            self.reserve_space.data.ptr, self.reserve_space.size)

        dw = cuda.cupy.zeros_like(self.w)
        dw_desc = cudnn.create_filter_descriptor(dw)
        libcudnn.RNNBackwardWeights(
            handle, rnn_desc.value, length,
            self.c_x_descs.data, xs.data.ptr,
            hx_desc.value, hx.data.ptr, self.c_y_descs.data, self.ys.data.ptr,
            workspace.data.ptr, work_size, dw_desc.value, dw.data.ptr,
            self.reserve_space.data.ptr, self.reserve_space.size)

        dx = dx_list[0]
        dx = dx.reshape(dx.shape + (1,))
        dx_desc = cudnn.create_tensor_nd_descriptor(dx)
        dws = []
        dbs = []
        for layer in six.moves.range(self.n_layers):
            for lin_layer_id in six.moves.range(8):
                mat = cudnn.get_rnn_lin_layer_matrix_params(
                    handle, rnn_desc, layer, dx_desc, dw_desc, dw,
                    lin_layer_id)
                dws.append(mat.reshape(ws[layer * 8 + lin_layer_id].shape))
                bias = cudnn.get_rnn_lin_layer_bias_params(
                    handle, rnn_desc, layer, dx_desc, dw_desc, dw,
                    lin_layer_id)
                dbs.append(bias.reshape(bs[layer * 8 + lin_layer_id].shape))

        return tuple([dhx, dcx] + dws + dbs + dx_list)
Esempio n. 42
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        gx = None

        if (not self.cover_all and chainer.should_use_cudnn('>=auto') and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            if _cudnn_version >= 3000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')

                if configuration.config.cudnn_deterministic:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1  # NOQA
                else:
                    algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                        handle, x_desc.value, gy_desc.value,
                        self.conv_desc.value, self.filter_desc.value,
                        _bwd_filter_pref, workspace_size)

                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, self.filter_desc.value, gW.data.ptr)

                if self.requires_x_grad:
                    if configuration.config.cudnn_deterministic:
                        algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA
                    else:
                        algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                            handle, self.filter_desc.value, gy_desc.value,
                            self.conv_desc.value, x_desc.value, _bwd_data_pref,
                            workspace_size)

                    gx = cuda.cupy.empty_like(x)
                    libcudnn.convolutionBackwardData_v3(
                        handle, one.data, self.filter_desc.value, W.data.ptr,
                        gy_desc.value, gy.data.ptr, self.conv_desc.value,
                        algo, workspace.data.ptr, workspace_size,
                        zero.data, x_desc.value, gx.data.ptr)
            else:
                if configuration.config.cudnn_deterministic:
                    raise ValueError(
                        "`cudnn_deterministic` option must be False "
                        "if the backpropagation of "
                        "chainer.functions.Convolution2D "
                        "uses cuDNN and cuDNN versions < v3. "
                        "Turn off cudnn_deterministic option with "
                        "`chainer.using_config('cudnn_deterministic', False)` "
                        "context.")
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, x_desc.value, x.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, self.filter_desc.value, gW.data.ptr)
                if self.requires_x_grad:
                    gx = cuda.cupy.empty_like(x)
                    libcudnn.convolutionBackwardData_v2(
                        handle, one.data, self.filter_desc.value, W.data.ptr,
                        gy_desc.value, gy.data.ptr, self.conv_desc.value,
                        zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    zero.data, self.bias_desc.value, gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(
                gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                             copy=False)
            if self.requires_x_grad:
                gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                                 copy=False)
                gcol = cuda.cupy.rollaxis(gcol, 3)
                gx = conv.col2im_gpu(
                    gcol, self.sy, self.sx, self.ph, self.pw, h, w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
Esempio n. 43
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        out_c, _, kh, kw = W.shape
        n, c, h, w = x.shape

        out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph,
                                      cover_all=self.cover_all)
        assert out_h > 0, 'Height in the output should be positive.'
        out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw,
                                      cover_all=self.cover_all)
        assert out_w > 0, 'Width in the output should be positive.'

        y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype)
        if (not self.cover_all and chainer.should_use_cudnn('>=auto') and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype)
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            workspace_size = cuda.get_max_workspace_size()
            workspace = cuda.cupy.empty((workspace_size,), dtype='b')
            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, x_desc.value, self.filter_desc.value,
                self.conv_desc.value, y_desc.value, _fwd_pref,
                workspace_size)

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            libcudnn.convolutionForward(
                handle, one.data, x_desc.value, x.data.ptr,
                self.filter_desc.value, W.data.ptr, self.conv_desc.value,
                algo, workspace.data.ptr, workspace_size, zero.data,
                y_desc.value, y.data.ptr)

            # TODO(beam2d): Support unshared bias
            if b is not None:
                cudnn.add_tensor(
                    handle, one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            # Implementation using im2col
            self.col = conv.im2col_gpu(
                x, kh, kw, self.sy, self.sx, self.ph, self.pw,
                cover_all=self.cover_all)
            y = cuda.cupy.tensordot(
                self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype,
                                                            copy=False)
            # TODO(beam2d): Support unshared bias
            if b is not None:
                y += b
            y = cuda.cupy.rollaxis(y, 3, 1)

        return y,
Esempio n. 44
0
 def test_all_numpy_subclasses(self):
     x = numpy.array([0])
     y = numpy.array([[1], [2]])
     z = numpy.matrix("3,4; 5,6")
     self.assertTrue(T.same_types(x, y, z))
Esempio n. 45
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        gy = grad_outputs[0]
        n, in_c, in_h, in_w = x.shape
        _, out_channels, kh, kw = W.shape
        c, h, w = gy.shape[1:]
        gx = cuda.cupy.empty((n, in_c, in_h, in_w), dtype=x.dtype)

        if (cuda.cudnn_enabled and self.use_cudnn and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            gy_desc = cudnn.create_tensor_descriptor(gy)
            gx_desc = cudnn.create_tensor_descriptor(gx)

            # chance to choose implicit-precomp-gemm algorithm
            workspace_size = cuda.get_max_workspace_size()
            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, gy_desc.value, self.filter_desc.value,
                self.conv_desc.value, gx_desc.value, _fwd_pref,
                workspace_size)
            workspace = cuda.cupy.empty((workspace_size,), dtype='b')

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            libcudnn.convolutionForward(
                handle, one.data, gy_desc.value, gy.data.ptr,
                self.filter_desc.value, W.data.ptr,
                self.conv_desc.value, algo, workspace.data.ptr, workspace_size,
                zero.data, gx_desc.value, gx.data.ptr)
            # bias backward
            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    zero.data, self.bias_desc.value, gb.data.ptr)
            gW = cuda.cupy.empty_like(W)
            # filter backward
            if _cudnn_version >= 3000:
                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                        handle, gy_desc.value, gx_desc.value,
                        self.conv_desc.value, self.filter_desc.value,
                        _bwd_filter_pref, workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1  # NOQA

                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    gx_desc.value, x.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, self.filter_desc.value, gW.data.ptr)
            else:
                if self.deterministic:
                    raise ValueError("'deterministic' option not available "
                                     "for cuDNN versions < v3")
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, gy_desc.value, gy.data.ptr,
                    gx_desc.value, x.data.ptr, self.conv_desc.value,
                    zero.data, self.filter_desc.value, gW.data.ptr)
        else:
            # Implementation using im2col
            col = conv.im2col_gpu(
                gy, kh, kw, self.sy, self.sx, self.ph, self.pw)

            gW = cuda.cupy.tensordot(
                x, col, ([0, 2, 3], [0, 4, 5])).astype(W.dtype, copy=False)
            gx = cuda.cupy.tensordot(
                col, W, ([1, 2, 3], [1, 2, 3])).astype(x.dtype, copy=False)
            gx = cuda.cupy.rollaxis(gx, 3, 1)

            # bias backward
            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
Esempio n. 46
0
    def forward(self, inputs):
        (hx, cx), inputs = _split(inputs, 2)
        ws, inputs = _split(inputs, self.n_layers * 8)
        bs, inputs = _split(inputs, self.n_layers * 8)
        x_list = inputs

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together')

        hx = cuda.cupy.ascontiguousarray(hx)
        cx = cuda.cupy.ascontiguousarray(cx)

        x_desc = cudnn.create_tensor_nd_descriptor(x_list[0][..., None])

        length = len(x_list)
        n_units = hx.shape[2]

        xs = cuda.cupy.concatenate(x_list, axis=0)
        ys = cuda.cupy.empty((len(xs), n_units), dtype=xs.dtype)

        handle = cudnn.get_handle()
        self.handle = handle

        rnn_desc = cudnn.create_rnn_descriptor(n_units, self.n_layers,
                                               self.states.desc,
                                               libcudnn.CUDNN_LINEAR_INPUT,
                                               libcudnn.CUDNN_UNIDIRECTIONAL,
                                               libcudnn.CUDNN_LSTM,
                                               libcudnn.CUDNN_DATA_FLOAT)
        self.rnn_desc = rnn_desc

        c_x_descs = _make_tensor_descriptor_array(x_list)
        hx_desc = cudnn.create_tensor_nd_descriptor(hx)
        cx_desc = cudnn.create_tensor_nd_descriptor(cx)

        weights_size = libcudnn.getRNNParamsSize(handle, rnn_desc.value,
                                                 x_desc.value,
                                                 libcudnn.CUDNN_DATA_FLOAT)
        w = cuda.cupy.empty((weights_size // 4, 1, 1), dtype=numpy.float32)
        w_desc = cudnn.create_filter_descriptor(w)

        for layer in six.moves.range(self.n_layers):
            for lin_layer_id in six.moves.range(8):
                mat = cudnn.get_rnn_lin_layer_matrix_params(
                    handle, rnn_desc, layer, x_desc, w_desc, w, lin_layer_id)
                m = mat.reshape(mat.size)
                m[...] = ws[layer * 8 + lin_layer_id].ravel()
                bias = cudnn.get_rnn_lin_layer_bias_params(
                    handle, rnn_desc, layer, x_desc, w_desc, w, lin_layer_id)
                b = bias.reshape(bias.size)
                b[...] = bs[layer * 8 + lin_layer_id]
        self.w = w
        self.w_desc = w_desc

        sections = numpy.cumsum([len(x) for x in x_list[:-1]])
        y_list = cuda.cupy.split(ys, sections)

        c_y_descs = _make_tensor_descriptor_array(y_list)
        hy = cuda.cupy.empty_like(hx)
        cy = cuda.cupy.empty_like(cx)
        hy_desc = cudnn.create_tensor_nd_descriptor(hy)
        cy_desc = cudnn.create_tensor_nd_descriptor(cy)

        work_size = libcudnn.getRNNWorkspaceSize(handle, rnn_desc.value,
                                                 length, c_x_descs.data)
        workspace = cuda.cupy.empty((work_size, ), dtype='b')

        if not self.train:
            libcudnn.RNNForwardInference(
                handle, rnn_desc.value, length, c_x_descs.data, xs.data.ptr,
                hx_desc.value, hx.data.ptr, cx_desc.value, cx.data.ptr,
                w_desc.value, w.data.ptr, c_y_descs.data, ys.data.ptr,
                hy_desc.value, hy.data.ptr, cy_desc.value, cy.data.ptr,
                workspace.data.ptr, work_size)

        else:
            reserve_size = libcudnn.getRNNTrainingReserveSize(
                handle, rnn_desc.value, length, c_x_descs.data)
            self.reserve_space = cuda.cupy.empty((reserve_size, ), dtype='b')
            libcudnn.RNNForwardTraining(
                handle, rnn_desc.value, length, c_x_descs.data, xs.data.ptr,
                hx_desc.value, hx.data.ptr, cx_desc.value, cx.data.ptr,
                w_desc.value, w.data.ptr, c_y_descs.data, ys.data.ptr,
                hy_desc.value, hy.data.ptr, cy_desc.value, cy.data.ptr,
                workspace.data.ptr, work_size, self.reserve_space.data.ptr,
                reserve_size)

        self.c_y_descs = c_y_descs
        self.ys = ys
        self.c_x_descs = c_x_descs

        return tuple([hy, cy] + y_list)
Esempio n. 47
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}, type(b): {2}'
                                 .format(type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'
                                 .format(type(W), type(x)))

        kh, kw = W.shape[2:]
        n, in_c, in_h, in_w = x.shape
        c = W.shape[1]  # out_c
        if self.outh is None:
            self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph)
            assert self.outh > 0, 'Height in the output should be positive.'
        if self.outw is None:
            self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw)
            assert self.outw > 0, 'Width in the output should be positive.'
        if (cuda.cudnn_enabled and self.use_cudnn and
                _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y = cuda.cupy.empty((n, c, self.outh, self.outw),
                                dtype=x.dtype)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype)
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(
                    b[None, :, None, None])

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes

            if _cudnn_version >= 3000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size,), dtype='b')
                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                        handle, self.filter_desc.value, x_desc.value,
                        self.conv_desc.value, y_desc.value, _bwd_data_pref,
                        workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA

                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value,
                    algo, workspace.data.ptr, workspace_size,
                    zero.data, y_desc.value, y.data.ptr)
            else:
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    x_desc.value, x.data.ptr, self.conv_desc.value,
                    zero.data, y_desc.value, y.data.ptr)

            if b is not None:
                cudnn.add_tensor(
                    handle, one.data, self.bias_desc.value, b.data.ptr,
                    one.data, y_desc.value, y.data.ptr)
        else:
            gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype,
                                                            copy=False)
            # - k, m, n: shape of out_channel
            # - b: number of inputs
            # - h, w: height and width of kernels
            # k, m, n, b, h, w -> b, k, m, n, h, w
            gcol = cuda.cupy.rollaxis(gcol, 3)
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw)
            if b is not None:
                y += b.reshape(1, b.size, 1, 1)
        return y,
Esempio n. 48
0
    def forward(self, inputs):
        (hx, cx), inputs = _split(inputs, 2)
        ws, inputs = _split(inputs, self.n_layers * 8)
        bs, inputs = _split(inputs, self.n_layers * 8)
        x_list = inputs

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together')

        hx = cuda.cupy.ascontiguousarray(hx)
        cx = cuda.cupy.ascontiguousarray(cx)

        x_desc = cudnn.create_tensor_nd_descriptor(x_list[0][..., None])

        length = len(x_list)
        n_units = hx.shape[2]

        xs = cuda.cupy.concatenate(x_list, axis=0)
        ys = cuda.cupy.empty((len(xs), n_units), dtype=xs.dtype)

        handle = cudnn.get_handle()
        self.handle = handle

        rnn_desc = cudnn.create_rnn_descriptor(
            n_units, self.n_layers, self.states.desc,
            libcudnn.CUDNN_LINEAR_INPUT, libcudnn.CUDNN_UNIDIRECTIONAL,
            libcudnn.CUDNN_LSTM, libcudnn.CUDNN_DATA_FLOAT)
        self.rnn_desc = rnn_desc

        c_x_descs = _make_tensor_descriptor_array(x_list)
        hx_desc = cudnn.create_tensor_nd_descriptor(hx)
        cx_desc = cudnn.create_tensor_nd_descriptor(cx)

        weights_size = libcudnn.getRNNParamsSize(
            handle, rnn_desc.value, x_desc.value, libcudnn.CUDNN_DATA_FLOAT)
        w = cuda.cupy.empty((weights_size // 4, 1, 1), dtype=numpy.float32)
        w_desc = cudnn.create_filter_descriptor(w)

        for layer in six.moves.range(self.n_layers):
            for lin_layer_id in six.moves.range(8):
                mat = cudnn.get_rnn_lin_layer_matrix_params(
                    handle, rnn_desc, layer, x_desc, w_desc, w,
                    lin_layer_id)
                m = mat.reshape(mat.size)
                m[...] = ws[layer * 8 + lin_layer_id].ravel()
                bias = cudnn.get_rnn_lin_layer_bias_params(
                    handle, rnn_desc, layer, x_desc, w_desc, w,
                    lin_layer_id)
                b = bias.reshape(bias.size)
                b[...] = bs[layer * 8 + lin_layer_id]
        self.w = w
        self.w_desc = w_desc

        sections = numpy.cumsum([len(x) for x in x_list[:-1]])
        y_list = cuda.cupy.split(ys, sections)

        c_y_descs = _make_tensor_descriptor_array(y_list)
        hy = cuda.cupy.empty_like(hx)
        cy = cuda.cupy.empty_like(cx)
        hy_desc = cudnn.create_tensor_nd_descriptor(hy)
        cy_desc = cudnn.create_tensor_nd_descriptor(cy)

        work_size = libcudnn.getRNNWorkspaceSize(
            handle, rnn_desc.value, length, c_x_descs.data)
        workspace = cuda.cupy.empty((work_size,), dtype='b')

        if not self.train:
            libcudnn.RNNForwardInference(
                handle, rnn_desc.value, length,
                c_x_descs.data, xs.data.ptr, hx_desc.value, hx.data.ptr,
                cx_desc.value, cx.data.ptr, w_desc.value, w.data.ptr,
                c_y_descs.data, ys.data.ptr, hy_desc.value, hy.data.ptr,
                cy_desc.value, cy.data.ptr, workspace.data.ptr, work_size)

        else:
            reserve_size = libcudnn.getRNNTrainingReserveSize(
                handle, rnn_desc.value, length, c_x_descs.data)
            self.reserve_space = cuda.cupy.empty((reserve_size,), dtype='b')
            libcudnn.RNNForwardTraining(
                handle, rnn_desc.value, length,
                c_x_descs.data, xs.data.ptr, hx_desc.value, hx.data.ptr,
                cx_desc.value, cx.data.ptr, w_desc.value, w.data.ptr,
                c_y_descs.data, ys.data.ptr, hy_desc.value, hy.data.ptr,
                cy_desc.value, cy.data.ptr,
                workspace.data.ptr, work_size,
                self.reserve_space.data.ptr, reserve_size)

        self.c_y_descs = c_y_descs
        self.ys = ys
        self.c_x_descs = c_x_descs

        return tuple([hy, cy] + y_list)
Esempio n. 49
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        out_c, _, kh, kw = W.shape
        n, c, h, w = x.shape
        dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1)

        out_h = conv.get_conv_outsize(h,
                                      kh,
                                      self.sy,
                                      self.ph,
                                      cover_all=self.cover_all,
                                      d=self.dy)
        out_w = conv.get_conv_outsize(w,
                                      kw,
                                      self.sx,
                                      self.pw,
                                      cover_all=self.cover_all,
                                      d=self.dx)

        y = cuda.cupy.zeros((n, out_c, out_h, out_w), dtype=x.dtype)
        if (not self.cover_all and chainer.should_use_cudnn('>=auto')
                and _check_cudnn_acceptable_type(x.dtype, W.dtype)):

            pad_x = cuda.cupy.zeros((n, c, h + 2 * self.ph, w + 2 * self.pw),
                                    dtype=x.dtype)
            pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x

            out_h_s1 = h + 2 * self.ph - dkh + 1
            out_w_s1 = w + 2 * self.pw - dkw + 1

            for j in moves.range(kh):
                for i in moves.range(kw):
                    xji = cuda.cupy.ascontiguousarray(
                        pad_x[:, :, j * self.dy:j * self.dy + out_h_s1,
                              i * self.dx:i * self.dx + out_w_s1])
                    Wji = cuda.cupy.ascontiguousarray(W[:, :, j:j + 1,
                                                        i:i + 1])

                    if i == 0 and j == 0:
                        handle = cudnn.get_handle()
                        xji_desc = cudnn.create_tensor_descriptor(xji)
                        y_desc = cudnn.create_tensor_descriptor(y)
                        self.filter_desc = cudnn.create_filter_descriptor(Wji)
                        self.conv_desc = cudnn.create_convolution_descriptor(
                            (0, 0), (self.sy, self.sx), xji.dtype)

                        workspace_size = cuda.get_max_workspace_size()
                        workspace = cuda.cupy.empty((workspace_size, ),
                                                    dtype='b')
                        algo = libcudnn.getConvolutionForwardAlgorithm(
                            handle, xji_desc.value, self.filter_desc.value,
                            self.conv_desc.value, y_desc.value, _fwd_pref,
                            workspace_size)

                        oz_dtype = 'd' if x.dtype == 'd' else 'f'
                        one = numpy.array(1, dtype=oz_dtype).ctypes

                    libcudnn.convolutionForward(
                        handle, one.data, xji_desc.value, xji.data.ptr,
                        self.filter_desc.value, Wji.data.ptr,
                        self.conv_desc.value, algo, workspace.data.ptr,
                        workspace_size, one.data, y_desc.value, y.data.ptr)

            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)
                self.bias_desc = cudnn.create_tensor_descriptor(b[None, :,
                                                                  None, None])
                cudnn.add_tensor(handle, one.data, self.bias_desc.value,
                                 b.data.ptr, one.data, y_desc.value,
                                 y.data.ptr)
        else:
            # Implementation using im2col
            self.col = conv.im2col_gpu(x,
                                       kh,
                                       kw,
                                       self.sy,
                                       self.sx,
                                       self.ph,
                                       self.pw,
                                       cover_all=self.cover_all,
                                       dy=self.dy,
                                       dx=self.dx)
            y = cuda.cupy.tensordot(self.col, W,
                                    ((1, 2, 3), (1, 2, 3))).astype(x.dtype,
                                                                   copy=False)
            # TODO(beam2d): Support unshared bias
            if b is not None:
                y += b
            y = cuda.cupy.rollaxis(y, 3, 1)

        return y,
Esempio n. 50
0
 def test_all_cupy_array(self):
     x = cuda.cupy.array([0])
     y = cuda.cupy.array([1])
     z = cuda.cupy.array([2])
     self.assertTrue(T.same_types(x, y, z))
Esempio n. 51
0
    def backward(self, inputs, grads):
        (hx, cx), inputs = _split(inputs, 2)
        ws, inputs = _split(inputs, self.n_layers * 8)
        bs, inputs = _split(inputs, self.n_layers * 8)
        x_list = inputs

        if not type_check.same_types(*inputs):
            raise ValueError('numpy and cupy must not be used together')

        hx = cuda.cupy.ascontiguousarray(hx)
        cx = cuda.cupy.ascontiguousarray(cx)

        dhy, dcy = grads[:2]
        dy_list = list(grads[2:])
        if dhy is None:
            dhy = cuda.cupy.zeros_like(hx)
        if dcy is None:
            dcy = cuda.cupy.zeros_like(cx)
        for i in six.moves.range(len(dy_list)):
            if dy_list[i] is None:
                dy_list[i] = cuda.cupy.zeros_like(x_list[i])

        xs = cuda.cupy.concatenate(x_list, axis=0)
        length = len(x_list)

        dhx = cuda.cupy.empty_like(hx)
        dcx = cuda.cupy.empty_like(cx)

        hx_desc = cudnn.create_tensor_nd_descriptor(hx)
        cx_desc = cudnn.create_tensor_nd_descriptor(cx)
        dhy_desc = cudnn.create_tensor_nd_descriptor(dhy)
        dcy_desc = cudnn.create_tensor_nd_descriptor(dcy)

        c_dy_descs = _make_tensor_descriptor_array(dy_list)
        dys = cuda.cupy.concatenate(dy_list, axis=0)

        rnn_desc = self.rnn_desc
        handle = self.handle
        work_size = libcudnn.getRNNWorkspaceSize(handle, rnn_desc.value,
                                                 length, self.c_x_descs.data)
        workspace = cuda.cupy.empty((work_size, ), dtype='b')

        dhx_desc = cudnn.create_tensor_nd_descriptor(dhx)
        dcx_desc = cudnn.create_tensor_nd_descriptor(dcx)

        dxs = cuda.cupy.empty_like(xs)
        sections = numpy.cumsum([len(x) for x in x_list[:-1]])
        dx_list = cuda.cupy.split(dxs, sections, 0)
        c_dx_descs = _make_tensor_descriptor_array(dx_list)

        libcudnn.RNNBackwardData(
            handle, rnn_desc.value, length, self.c_y_descs.data,
            self.ys.data.ptr, c_dy_descs.data, dys.data.ptr, dhy_desc.value,
            dhy.data.ptr, dcy_desc.value, dcy.data.ptr, self.w_desc.value,
            self.w.data.ptr, hx_desc.value, hx.data.ptr, cx_desc.value,
            cx.data.ptr, c_dx_descs.data, dxs.data.ptr, dhx_desc.value,
            dhx.data.ptr, dcx_desc.value, dcx.data.ptr, workspace.data.ptr,
            work_size, self.reserve_space.data.ptr, self.reserve_space.size)

        dw = cuda.cupy.zeros_like(self.w)
        dw_desc = cudnn.create_filter_descriptor(dw)
        libcudnn.RNNBackwardWeights(
            handle, rnn_desc.value, length, self.c_x_descs.data, xs.data.ptr,
            hx_desc.value, hx.data.ptr, self.c_y_descs.data, self.ys.data.ptr,
            workspace.data.ptr, work_size, dw_desc.value, dw.data.ptr,
            self.reserve_space.data.ptr, self.reserve_space.size)

        dx = dx_list[0]
        dx = dx.reshape(dx.shape + (1, ))
        dx_desc = cudnn.create_tensor_nd_descriptor(dx)
        dws = []
        dbs = []
        for layer in six.moves.range(self.n_layers):
            for lin_layer_id in six.moves.range(8):
                mat = cudnn.get_rnn_lin_layer_matrix_params(
                    handle, rnn_desc, layer, dx_desc, dw_desc, dw,
                    lin_layer_id)
                dws.append(mat.reshape(ws[layer * 8 + lin_layer_id].shape))
                bias = cudnn.get_rnn_lin_layer_bias_params(
                    handle, rnn_desc, layer, dx_desc, dw_desc, dw,
                    lin_layer_id)
                dbs.append(bias.reshape(bs[layer * 8 + lin_layer_id].shape))

        return tuple([dhx, dcx] + dws + dbs + dx_list)
Esempio n. 52
0
    def forward_gpu(self, inputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        out_c, _, kh, kw = W.shape
        n, c, h, w = x.shape

        out_h = conv.get_conv_outsize(h,
                                      kh,
                                      self.sy,
                                      self.ph,
                                      cover_all=self.cover_all)
        assert out_h > 0, 'Height in the output should be positive.'
        out_w = conv.get_conv_outsize(w,
                                      kw,
                                      self.sx,
                                      self.pw,
                                      cover_all=self.cover_all)
        assert out_w > 0, 'Width in the output should be positive.'

        y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype)
        if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn
                and _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            if b is not None:
                b = cuda.cupy.ascontiguousarray(b)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            y_desc = cudnn.create_tensor_descriptor(y)

            self.filter_desc = cudnn.create_filter_descriptor(W)
            self.conv_desc = cudnn.create_convolution_descriptor(
                (self.ph, self.pw), (self.sy, self.sx), x.dtype)
            if b is not None:
                self.bias_desc = cudnn.create_tensor_descriptor(b[None, :,
                                                                  None, None])

            workspace_size = cuda.get_max_workspace_size()
            workspace = cuda.cupy.empty((workspace_size, ), dtype='b')
            algo = libcudnn.getConvolutionForwardAlgorithm(
                handle, x_desc.value, self.filter_desc.value,
                self.conv_desc.value, y_desc.value, _fwd_pref, workspace_size)

            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            libcudnn.convolutionForward(handle, one.data, x_desc.value,
                                        x.data.ptr, self.filter_desc.value,
                                        W.data.ptr, self.conv_desc.value, algo,
                                        workspace.data.ptr, workspace_size,
                                        zero.data, y_desc.value, y.data.ptr)

            # TODO(beam2d): Support unshared bias
            if b is not None:
                cudnn.add_tensor(handle, one.data, self.bias_desc.value,
                                 b.data.ptr, one.data, y_desc.value,
                                 y.data.ptr)
        else:
            # Implementation using im2col
            self.col = conv.im2col_gpu(x,
                                       kh,
                                       kw,
                                       self.sy,
                                       self.sx,
                                       self.ph,
                                       self.pw,
                                       cover_all=self.cover_all)
            y = cuda.cupy.tensordot(self.col, W,
                                    ((1, 2, 3), (1, 2, 3))).astype(x.dtype,
                                                                   copy=False)
            # TODO(beam2d): Support unshared bias
            if b is not None:
                y += b
            y = cuda.cupy.rollaxis(y, 3, 1)

        return y,
Esempio n. 53
0
 def test_numpy_cupy_mixed_2(self):
     x = cuda.cupy.array([0])
     y = numpy.array([1])
     z = cuda.cupy.array([2])
     self.assertFalse(T.same_types(x, y, z))
Esempio n. 54
0
    def backward_gpu(self, inputs, grad_outputs):
        x, W = inputs[:2]
        b = inputs[2] if len(inputs) == 3 else None

        if not type_check.same_types(*inputs):
            if b is not None:
                raise ValueError(
                    'numpy and cupy must not be used together\n'
                    'type(W): {0}, type(x): {1}, type(b): {2}'.format(
                        type(W), type(x), type(b)))
            else:
                raise ValueError('numpy and cupy must not be used together\n'
                                 'type(W): {0}, type(x): {1}'.format(
                                     type(W), type(x)))

        gy = grad_outputs[0]
        _, out_c, out_h, out_w = gy.shape
        n, c, h, w = x.shape
        kh, kw = W.shape[2:]

        gW = cuda.cupy.empty_like(W)
        if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn
                and _check_cudnn_acceptable_type(x.dtype, W.dtype)):
            x = cuda.cupy.ascontiguousarray(x)
            W = cuda.cupy.ascontiguousarray(W)
            gy = cuda.cupy.ascontiguousarray(gy)

            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x)
            gy_desc = cudnn.create_tensor_descriptor(gy)
            oz_dtype = 'd' if x.dtype == 'd' else 'f'
            one = numpy.array(1, dtype=oz_dtype).ctypes
            zero = numpy.array(0, dtype=oz_dtype).ctypes
            gx = cuda.cupy.empty_like(x)

            if _cudnn_version >= 3000:
                workspace_size = cuda.get_max_workspace_size()
                workspace = cuda.cupy.empty((workspace_size, ), dtype='b')

                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardFilterAlgorithm(
                        handle, x_desc.value, gy_desc.value,
                        self.conv_desc.value, self.filter_desc.value,
                        _bwd_filter_pref, workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1  # NOQA

                libcudnn.convolutionBackwardFilter_v3(
                    handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
                    gy.data.ptr, self.conv_desc.value, algo,
                    workspace.data.ptr, workspace_size, zero.data,
                    self.filter_desc.value, gW.data.ptr)

                if not self.deterministic:
                    algo = libcudnn.getConvolutionBackwardDataAlgorithm(
                        handle, self.filter_desc.value, gy_desc.value,
                        self.conv_desc.value, x_desc.value, _bwd_data_pref,
                        workspace_size)
                else:
                    algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1  # NOQA

                libcudnn.convolutionBackwardData_v3(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value, algo,
                    workspace.data.ptr, workspace_size, zero.data,
                    x_desc.value, gx.data.ptr)
            else:
                if self.deterministic:
                    raise ValueError("'deterministic' option not available "
                                     "for cuDNN versions < v3")
                libcudnn.convolutionBackwardFilter_v2(
                    handle, one.data, x_desc.value, x.data.ptr, gy_desc.value,
                    gy.data.ptr, self.conv_desc.value, zero.data,
                    self.filter_desc.value, gW.data.ptr)
                libcudnn.convolutionBackwardData_v2(
                    handle, one.data, self.filter_desc.value, W.data.ptr,
                    gy_desc.value, gy.data.ptr, self.conv_desc.value,
                    zero.data, x_desc.value, gx.data.ptr)

            if b is not None:
                gb = cuda.cupy.empty_like(b)
                libcudnn.convolutionBackwardBias(handle, one.data,
                                                 gy_desc.value, gy.data.ptr,
                                                 zero.data,
                                                 self.bias_desc.value,
                                                 gb.data.ptr)
        else:
            gW = cuda.cupy.tensordot(gy, self.col,
                                     ((0, 2, 3), (0, 4, 5))).astype(W.dtype,
                                                                    copy=False)
            gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype,
                                                             copy=False)
            gcol = cuda.cupy.rollaxis(gcol, 3)

            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)

            if b is not None:
                gb = gy.sum(axis=(0, 2, 3))

        if b is None:
            return gx, gW
        else:
            return gx, gW, gb
Esempio n. 55
0
 def test_all_numpy_array(self):
     x = numpy.array([0])
     y = numpy.array([1])
     z = numpy.array([2])
     self.assertTrue(T.same_types(x, y, z))