def forward(self, inputs): x = _as_mat(inputs[0]) W = inputs[1] xp = cuda.get_array_module(*x) olen, ilen = W.shape if self.coeffs is None: self.coeffs = numpy.ones(ilen) coeffs = numpy.copy(self.coeffs) coeffs = numpy.expand_dims(coeffs, 0) coeffs = numpy.broadcast_to(coeffs, W.shape) M = xp.asarray(coeffs,numpy.float32).reshape(W.shape) self.M = M W = self.M*W if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) y = x.dot(W.T).astype(x.dtype, copy=False) if len(inputs) == 3: b = inputs[2] y += b return y,
def test_all_numpy_subclasses(self): x = numpy.array([0]) y = numpy.array([[1], [2]]) with warnings.catch_warnings(): warnings.simplefilter('ignore') z = numpy.matrix('3,4; 5,6') self.assertTrue(T.same_types(x, y, z))
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) kh, kw = W.shape[2:] self.col = conv.im2col_cpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = numpy.tensordot(self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) assert self.outh > 0, 'Height in the output should be positive.' if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) assert self.outw > 0, 'Width in the output should be positive.' y = conv.col2im_cpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None olen, ilen, hlen, wlen = W.shape if self.coeffs is None: self.coeffs = numpy.ones(ilen) coeffs = numpy.copy(self.coeffs) coeffs = numpy.expand_dims(coeffs, 1) coeffs = numpy.expand_dims(coeffs, 1) coeffs = numpy.expand_dims(coeffs, 0) coeffs = numpy.broadcast_to(coeffs, W.shape) M = numpy.asarray(coeffs,numpy.float32).reshape(W.shape) self.M = M W = self.M*W if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) y = numpy.tensordot( self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward(self, inputs): self.retain_inputs((0, )) x, W = inputs self._w_shape = W.shape if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) xp = cuda.get_array_module(*inputs) if chainer.is_debug(): valid_x = xp.logical_and(0 <= x, x < len(W)) if self.ignore_label is not None: valid_x = xp.logical_or(valid_x, x == self.ignore_label) if not valid_x.all(): raise ValueError('Each not ignored `x` value need to satisfy' '`0 <= x < len(W)`') if self.ignore_label is not None: mask = (x == self.ignore_label) return xp.where(mask[..., None], 0, W.take(xp.where(mask, 0, x), axis=0)), return W.take(x, axis=0),
def forward(self, inputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}' .format(type(W), type(e1), type(e2))) xp = cuda.get_array_module(*inputs) if xp is numpy: y = numpy.einsum('ij,ik,jkl->il', e1, e2, W) else: i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, None] * e2[:, None, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = W.reshape(-1, W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) gy = grad_outputs[0] kh, kw = W.shape[2:] col = conv.im2col_cpu(gy, kh, kw, self.sy, self.sx, self.ph, self.pw) gW = numpy.tensordot(x, col, ([0, 2, 3], [0, 4, 5])).astype(W.dtype, copy=False) gx = numpy.tensordot(col, W, ([1, 2, 3], [1, 2, 3])).astype(x.dtype, copy=False) gx = numpy.rollaxis(gx, 3, 1) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) assert self.outh > 0, 'Height in the output should be positive.' if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) assert self.outw > 0, 'Width in the output should be positive.' y = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def forward(self, inputs): x = inputs[0] W = inputs[1] if (ia.all_ready(inputs)): return self.forward_ia(inputs) if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) # NumPy raises an error when the array is not contiguous. # See: https://github.com/chainer/chainer/issues/2744 # TODO(niboshi): Remove this code when NumPy is fixed. if (isinstance(x, numpy.ndarray) and not (x.flags.c_contiguous or x.flags.f_contiguous) and 1 in x.shape): x = numpy.ascontiguousarray(x) y = x.dot(W.T).astype(x.dtype, copy=False) if len(inputs) == 3: b = inputs[2] y += b self.retain_inputs((0, 1)) # b is not retained return y,
def forward(self, inputs): self.retain_inputs((0, 1)) # only retain x and W x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) if self.outs is None: dims = x.shape[2:] ksize = W.shape[2:] self.outs = tuple( conv.get_deconv_outsize(d, k, s, p) for d, k, s, p in zip(dims, ksize, self.stride, self.pad)) assert all(out > 0 for out in self.outs), \ 'Output sizes should be positive.' self._set_cover_all(x, W) xp = cuda.get_array_module(*inputs) if xp is numpy: return self._forward_xp(x, W, b, numpy) elif self._use_cudnn(x, W): return self._forward_cudnn(x, W, b) else: return self._forward_xp(x, W, b, cuda.cupy)
def test_all_numpy_subclasses(self): x = numpy.array([0]) y = numpy.array([[1], [2]]) with warnings.catch_warnings(): warnings.simplefilter("ignore") z = numpy.matrix("3,4; 5,6") self.assertTrue(T.same_types(x, y, z))
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] kh, kw = W.shape[2:] col = conv.im2col_cpu( gy, kh, kw, self.sy, self.sx, self.ph, self.pw) gW = numpy.tensordot( x, col, ([0, 2, 3], [0, 4, 5])).astype(W.dtype, copy=False) gx = numpy.tensordot( col, W, ([1, 2, 3], [1, 2, 3])).astype(x.dtype, copy=False) gx = numpy.rollaxis(gx, 3, 1) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def backward(self, inputs, grad_outputs): x = _as_mat(inputs[0]) r = inputs[1] gy = grad_outputs[0] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(r): {0}, type(x): {1}'.format( type(r), type(x))) R = np.array([[np.cos(r), -np.sin(r)], [np.sin(r), np.cos(r)]]).reshape(2, 2) gx = gy.dot(R.T).astype(x.dtype, copy=False).reshape(inputs[0].shape) dR = np.array([[-np.sin(r), -np.cos(r)], [np.cos(r), -np.sin(r)]]).reshape(2, 2) xdR = x.dot(dR.T).astype(x.dtype, copy=False) gr = (xdR * gy).sum().reshape(r.shape) if len(inputs) == 3: gb = gy.sum(0) return gx, gr, gb else: return gx, gr
def forward(self, inputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}'.format( type(W), type(e1), type(e2))) xp = cuda.get_array_module(*inputs) if xp is numpy: y = numpy.einsum('ij,ik,jkl->il', e1, e2, W) else: i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, None] * e2[:, None, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = W.reshape(-1, W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return y,
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot( gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) if not self.requires_x_grad: gx = None else: gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) return gx, gW, gb
def backward_cpu(self, inputs, grad_outputs): x, W = inputs[:2] if self.bcoeffs is not None: olen, ilen, hlen, wlen = W.shape if self.coeffs is None: self.coeffs = numpy.ones(ilen) coeffs = numpy.copy(self.bcoeffs) coeffs = numpy.expand_dims(coeffs, 1) coeffs = numpy.expand_dims(coeffs, 1) coeffs = numpy.expand_dims(coeffs, 0) coeffs = numpy.broadcast_to(coeffs, W.shape) self.mW = numpy.asarray(coeffs,numpy.float32).reshape(W.shape) if self.ocoeffs is not None: coeffs = numpy.copy(self.ocoeffs) self.mb = numpy.asarray(coeffs,numpy.float32) W = self.M*W b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] h, w = x.shape[2:] gW = numpy.tensordot( gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) if not self.requires_x_grad: gx = None else: gcol = numpy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = numpy.rollaxis(gcol, 3) gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if hasattr(self,'mW'): gW = self.mW * gW if hasattr(self,'mb'): xp = cuda.get_array_module(*x) gW = xp.broadcast_to( xp.expand_dims(xp.expand_dims(xp.expand_dims(self.mb,1),1),1) ,gW.shape) * gW if b is None: return gx, gW else: gb = gy.sum(axis=(0, 2, 3)) if hasattr(self,'mb'): gb = self.mb * gb return gx, gW, gb
def forward(self, inputs): x = inputs[0] r = inputs[1] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(r): {0}, type(x): {1}'.format( type(r), type(x))) xp = cuda.get_array_module(*inputs) rmat = self.rodrigues(r, xp) y = x.dot(rmat.T).astype(x.dtype, copy=False) return y,
def forward(self, inputs): x = _as_mat(inputs[0]) W = inputs[1] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) y = x.dot(W.T).astype(x.dtype, copy=False) if len(inputs) == 3: b = inputs[2] y += b return y,
def forward(self, inputs): x = _as_mat(inputs[0]) W = inputs[1] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) y = x.dot(W.T).astype(x.dtype, copy=False) if len(inputs) == 3: b = inputs[2] y += b return y,
def backward(self, inputs, grad_outputs): x = _as_mat(inputs[0]) W = inputs[1] gy = grad_outputs[0] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gx = gy.dot(W).astype(x.dtype, copy=False).reshape(inputs[0].shape) gW = gy.T.dot(x).astype(W.dtype, copy=False) if len(inputs) == 3: gb = gy.sum(0) return gx, gW, gb else: return gx, gW
def forward(self, inputs): self.retain_inputs((0, 1)) if ((ia.all_ready(inputs)) and self.W_dtype == numpy.dtype('float32')): return self.forward_ia(inputs) x, gy = inputs if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(x): {0}, type(gy): {1}'.format( type(x), type(gy))) if (isinstance(gy, numpy.ndarray) and not (gy.flags.c_contiguous or gy.flags.f_contiguous) and 1 in gy.shape): gy = numpy.ascontiguousarray(gy) gW = gy.T.dot(x).astype(self.W_dtype, copy=False) self.retain_inputs((0, 1)) return gW,
def backward(self, inputs, grad_outputs): x = _as_mat(inputs[0]) W = inputs[1] gy = grad_outputs[0] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) gx = gy.dot(W).astype(x.dtype, copy=False).reshape(inputs[0].shape) gW = gy.T.dot(x).astype(W.dtype, copy=False) if len(inputs) == 3: gb = gy.sum(0) return gx, gW, gb else: return gx, gW
def forward(self, inputs): x = _as_mat(inputs[0]) r = inputs[1] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(r): {0}, type(x): {1}'.format( type(r), type(x))) R = np.array([[np.cos(r), -np.sin(r)], [np.sin(r), np.cos(r)]]).reshape(2, 2) y = x.dot(R.T).astype(x.dtype, copy=False) if len(inputs) == 3: b = inputs[2] y += b return y,
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}'.format( type(W), type(e1), type(e2))) gy = grad_outputs[0] xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb return ret
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}' .format(type(W), type(e1), type(e2))) gy = grad_outputs[0] xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb return ret
def forward(self, inputs): self.retain_inputs(tuple(range(len(inputs)))) e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}' .format(type(W), type(e1), type(e2))) xp = cuda.get_array_module(*inputs) y = xp.einsum('ij,ik,jkl->il', e1, e2, W) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return y,
def forward(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) xp = cuda.get_array_module(*inputs) if xp is numpy: return self._forward_xp(x, W, b, numpy) elif not self._use_cudnn(x, W): return self._forward_xp(x, W, b, cuda.cupy) else: return self._forward_cudnn(x, W, b)
def forward(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) xp = cuda.get_array_module(*inputs) if xp is numpy: return self._forward_xp(x, W, b, numpy) elif self._use_cudnn(x, W): return self._forward_cudnn(x, W, b) else: return self._forward_xp(x, W, b, cuda.cupy)
def forward(self, inputs): self.retain_inputs(tuple(range(len(inputs)))) e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] if not type_check.same_types(*inputs): raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(e1): {1}, type(e2): {2}'.format( type(W), type(e1), type(e2))) xp = cuda.get_array_module(*inputs) y = xp.einsum('ij,ik,jkl->il', e1, e2, W) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b return y,
def backward(self, inputs, grad_outputs): x = _as_mat(inputs[0]) W = inputs[1] if self.bcoeffs is not None: xp = cuda.get_array_module(*x) coeffs = numpy.copy(self.bcoeffs) coeffs = numpy.expand_dims(coeffs, 0) coeffs = numpy.broadcast_to(coeffs, W.shape) self.mW = xp.asarray(coeffs,numpy.float32).reshape(W.shape) if self.ocoeffs is not None: xp = cuda.get_array_module(*x) coeffs = numpy.copy(self.ocoeffs) self.mb = xp.asarray(coeffs,numpy.float32) W = self.M * W gy = grad_outputs[0] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gx = gy.dot(W).astype(x.dtype, copy=False).reshape(inputs[0].shape) gW = gy.T.dot(x).astype(W.dtype, copy=False) if hasattr(self,'mW'): gW = self.mW * gW if hasattr(self,'mb'): xp = cuda.get_array_module(*x) gW = xp.broadcast_to(xp.expand_dims(self.mb,1),gW.shape) * gW # print('gW',gW.sum(0).sum(0)) if len(inputs) == 3: gb = gy.sum(0) if hasattr(self,'mb'): gb = self.mb * gb return gx, gW, gb else: return gx, gW
def backward(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] # (n, c_O, out_1, out_2, ..., out_N) xp = cuda.get_array_module(*inputs) if xp is numpy: return self._backward_xp(x, W, b, gy, numpy) elif not self._use_cudnn(x, W): return self._backward_xp(x, W, b, gy, cuda.cupy) else: return self._backward_cudnn(x, W, b, gy)
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] self.col = conv.im2col_cpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = numpy.tensordot( self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) if b is not None: y += b return numpy.rollaxis(y, 3, 1),
def forward(self, inputs): x = inputs[0] W = inputs[1] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) # NumPy raises an error when the array is not contiguous. # See: https://github.com/chainer/chainer/issues/2744 # TODO(niboshi): Remove this code when NumPy is fixed. if (isinstance(x, numpy.ndarray) and not (x.flags.c_contiguous or x.flags.f_contiguous) and 1 in x.shape): x = numpy.ascontiguousarray(x) y = x.dot(W.T).astype(x.dtype, copy=False) if len(inputs) == 3: b = inputs[2] y += b self.retain_inputs((0, 1)) # b is not retained return y,
def forward(self, inputs): a_h, b_h, k_h, cs, k_prev = inputs if not type_check.same_types(*inputs): raise ValueError( "numpy and cupy must not be used together\n" "type(a_h): {0}, type(b_h): {1}, type(k_h): {2}, type(cs): {3}, type(k_prev): {4}" .format(type(a_h), type(b_h), type(k_h), type(cs), type(k_prev))) batch_size, W, u = cs.shape K = a_h.shape[1] xp = cuda.get_array_module(*inputs) a_h = xp.exp(a_h).reshape((batch_size, K, 1)) b_h = xp.exp(b_h).reshape((batch_size, K, 1)) k_h = k_prev + xp.exp(k_h) k_h = xp.reshape(k_h, (batch_size, K, 1)) self.a_h = a_h self.b_h = b_h self.k_h = k_h # Compute phi's parameters #us = xp.linspace(0, u-1, u) us = xp.arange(u, dtype=xp.float32).reshape((1, 1, u)) phi = a_h * xp.exp(-b_h * xp.square(k_h - us)) self.phi = phi phi = xp.sum(phi, axis=1) # Finalize the soft window computation w = xp.matmul(cs, phi.reshape((batch_size, u, 1))) return w.reshape((batch_size, W)), k_h.reshape((batch_size, K)), phi
def forward(self, inputs): self.retain_inputs((0,)) x, W = inputs self._w_shape = W.shape if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) xp = cuda.get_array_module(*inputs) if chainer.is_debug(): valid_x = xp.logical_and(0 <= x, x < len(W)) if self.ignore_label is not None: valid_x = xp.logical_or(valid_x, x == self.ignore_label) if not valid_x.all(): raise ValueError('Each not ignored `x` value need to satisfy' '`0 <= x < len(W)`') if self.ignore_label is not None: mask = (x == self.ignore_label) return xp.where(mask[..., None], 0, W[xp.where(mask, 0, x)]), return W[x],
def backward(self, inputs, grad_outputs): x = _as_mat(inputs[0]) W = inputs[1] xp = cuda.get_array_module(*x) # deterministic W = xp.where(W>=0, 1, -1).astype(numpy.float32, copy=False) W = self.M * W gy = grad_outputs[0] if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gx = gy.dot(W).astype(x.dtype, copy=False).reshape(inputs[0].shape) gW = gy.T.dot(x).astype(W.dtype, copy=False) if len(inputs) == 3: gb = gy.sum(0) return gx, gW, gb else: return gx, gW
def forward_gpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) out_c, _, kh, kw = W.shape n, c, h, w = x.shape dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1) out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all, d=self.dy) out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all, d=self.dx) y = cuda.cupy.zeros((n, out_c, out_h, out_w), dtype=x.dtype) if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn and _check_cudnn_acceptable_type(x.dtype, W.dtype)): pad_x = cuda.cupy.zeros((n, c, h + 2 * self.ph, w + 2 * self.pw), dtype=x.dtype) pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x out_h_s1 = h + 2 * self.ph - dkh + 1 out_w_s1 = w + 2 * self.pw - dkw + 1 for j in moves.range(kh): for i in moves.range(kw): xji = cuda.cupy.ascontiguousarray( pad_x[:, :, j * self.dy:j * self.dy + out_h_s1, i * self.dx:i * self.dx + out_w_s1]) Wji = cuda.cupy.ascontiguousarray( W[:, :, j:j + 1, i:i + 1]) if i == 0 and j == 0: handle = cudnn.get_handle() xji_desc = cudnn.create_tensor_descriptor(xji) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(Wji) self.conv_desc = cudnn.create_convolution_descriptor( (0, 0), (self.sy, self.sx), xji.dtype) workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty( (workspace_size,), dtype='b') algo = libcudnn.getConvolutionForwardAlgorithm( handle, xji_desc.value, self.filter_desc.value, self.conv_desc.value, y_desc.value, _fwd_pref, workspace_size) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes libcudnn.convolutionForward( handle, one.data, xji_desc.value, xji.data.ptr, self.filter_desc.value, Wji.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, one.data, y_desc.value, y.data.ptr) if b is not None: b = cuda.cupy.ascontiguousarray(b) self.bias_desc = cudnn.create_tensor_descriptor( b[None, :, None, None]) cudnn.add_tensor( handle, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: # Implementation using im2col self.col = conv.im2col_gpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = cuda.cupy.tensordot( self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) # TODO(beam2d): Support unshared bias if b is not None: y += b y = cuda.cupy.rollaxis(y, 3, 1) return y,
def backward(self, inputs, grads): (hx, cx), inputs = _split(inputs, 2) ws, inputs = _split(inputs, self.n_layers * 8) bs, inputs = _split(inputs, self.n_layers * 8) x_list = inputs if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together') hx = cuda.cupy.ascontiguousarray(hx) cx = cuda.cupy.ascontiguousarray(cx) dhy, dcy = grads[:2] dy_list = list(grads[2:]) if dhy is None: dhy = cuda.cupy.zeros_like(hx) if dcy is None: dcy = cuda.cupy.zeros_like(cx) for i in six.moves.range(len(dy_list)): if dy_list[i] is None: dy_list[i] = cuda.cupy.zeros_like(x_list[i]) xs = cuda.cupy.concatenate(x_list, axis=0) length = len(x_list) dhx = cuda.cupy.empty_like(hx) dcx = cuda.cupy.empty_like(cx) hx_desc = cudnn.create_tensor_nd_descriptor(hx) cx_desc = cudnn.create_tensor_nd_descriptor(cx) dhy_desc = cudnn.create_tensor_nd_descriptor(dhy) dcy_desc = cudnn.create_tensor_nd_descriptor(dcy) c_dy_descs = _make_tensor_descriptor_array(dy_list) dys = cuda.cupy.concatenate(dy_list, axis=0) rnn_desc = self.rnn_desc handle = self.handle work_size = libcudnn.getRNNWorkspaceSize( handle, rnn_desc.value, length, self.c_x_descs.data) workspace = cuda.cupy.empty((work_size,), dtype='b') dhx_desc = cudnn.create_tensor_nd_descriptor(dhx) dcx_desc = cudnn.create_tensor_nd_descriptor(dcx) dxs = cuda.cupy.empty_like(xs) sections = numpy.cumsum([len(x) for x in x_list[:-1]]) dx_list = cuda.cupy.split(dxs, sections, 0) c_dx_descs = _make_tensor_descriptor_array(dx_list) libcudnn.RNNBackwardData( handle, rnn_desc.value, length, self.c_y_descs.data, self.ys.data.ptr, c_dy_descs.data, dys.data.ptr, dhy_desc.value, dhy.data.ptr, dcy_desc.value, dcy.data.ptr, self.w_desc.value, self.w.data.ptr, hx_desc.value, hx.data.ptr, cx_desc.value, cx.data.ptr, c_dx_descs.data, dxs.data.ptr, dhx_desc.value, dhx.data.ptr, dcx_desc.value, dcx.data.ptr, workspace.data.ptr, work_size, self.reserve_space.data.ptr, self.reserve_space.size) dw = cuda.cupy.zeros_like(self.w) dw_desc = cudnn.create_filter_descriptor(dw) libcudnn.RNNBackwardWeights( handle, rnn_desc.value, length, self.c_x_descs.data, xs.data.ptr, hx_desc.value, hx.data.ptr, self.c_y_descs.data, self.ys.data.ptr, workspace.data.ptr, work_size, dw_desc.value, dw.data.ptr, self.reserve_space.data.ptr, self.reserve_space.size) dx = dx_list[0] dx = dx.reshape(dx.shape + (1,)) dx_desc = cudnn.create_tensor_nd_descriptor(dx) dws = [] dbs = [] for layer in six.moves.range(self.n_layers): for lin_layer_id in six.moves.range(8): mat = cudnn.get_rnn_lin_layer_matrix_params( handle, rnn_desc, layer, dx_desc, dw_desc, dw, lin_layer_id) dws.append(mat.reshape(ws[layer * 8 + lin_layer_id].shape)) bias = cudnn.get_rnn_lin_layer_bias_params( handle, rnn_desc, layer, dx_desc, dw_desc, dw, lin_layer_id) dbs.append(bias.reshape(bs[layer * 8 + lin_layer_id].shape)) return tuple([dhx, dcx] + dws + dbs + dx_list)
def backward_gpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] _, out_c, out_h, out_w = gy.shape n, c, h, w = x.shape kh, kw = W.shape[2:] gW = cuda.cupy.empty_like(W) gx = None if (not self.cover_all and chainer.should_use_cudnn('>=auto') and _check_cudnn_acceptable_type(x.dtype, W.dtype)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) gy = cuda.cupy.ascontiguousarray(gy) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) gy_desc = cudnn.create_tensor_descriptor(gy) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes if _cudnn_version >= 3000: workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size,), dtype='b') if configuration.config.cudnn_deterministic: algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 # NOQA else: algo = libcudnn.getConvolutionBackwardFilterAlgorithm( handle, x_desc.value, gy_desc.value, self.conv_desc.value, self.filter_desc.value, _bwd_filter_pref, workspace_size) libcudnn.convolutionBackwardFilter_v3( handle, one.data, x_desc.value, x.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, self.filter_desc.value, gW.data.ptr) if self.requires_x_grad: if configuration.config.cudnn_deterministic: algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 # NOQA else: algo = libcudnn.getConvolutionBackwardDataAlgorithm( handle, self.filter_desc.value, gy_desc.value, self.conv_desc.value, x_desc.value, _bwd_data_pref, workspace_size) gx = cuda.cupy.empty_like(x) libcudnn.convolutionBackwardData_v3( handle, one.data, self.filter_desc.value, W.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, x_desc.value, gx.data.ptr) else: if configuration.config.cudnn_deterministic: raise ValueError( "`cudnn_deterministic` option must be False " "if the backpropagation of " "chainer.functions.Convolution2D " "uses cuDNN and cuDNN versions < v3. " "Turn off cudnn_deterministic option with " "`chainer.using_config('cudnn_deterministic', False)` " "context.") libcudnn.convolutionBackwardFilter_v2( handle, one.data, x_desc.value, x.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, zero.data, self.filter_desc.value, gW.data.ptr) if self.requires_x_grad: gx = cuda.cupy.empty_like(x) libcudnn.convolutionBackwardData_v2( handle, one.data, self.filter_desc.value, W.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, zero.data, x_desc.value, gx.data.ptr) if b is not None: gb = cuda.cupy.empty_like(b) libcudnn.convolutionBackwardBias( handle, one.data, gy_desc.value, gy.data.ptr, zero.data, self.bias_desc.value, gb.data.ptr) else: gW = cuda.cupy.tensordot( gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) if self.requires_x_grad: gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = cuda.cupy.rollaxis(gcol, 3) gx = conv.col2im_gpu( gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is not None: gb = gy.sum(axis=(0, 2, 3)) if b is None: return gx, gW else: return gx, gW, gb
def forward_gpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) out_c, _, kh, kw = W.shape n, c, h, w = x.shape out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all) assert out_h > 0, 'Height in the output should be positive.' out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all) assert out_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype) if (not self.cover_all and chainer.should_use_cudnn('>=auto') and _check_cudnn_acceptable_type(x.dtype, W.dtype)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(W) self.conv_desc = cudnn.create_convolution_descriptor( (self.ph, self.pw), (self.sy, self.sx), x.dtype) if b is not None: self.bias_desc = cudnn.create_tensor_descriptor( b[None, :, None, None]) workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size,), dtype='b') algo = libcudnn.getConvolutionForwardAlgorithm( handle, x_desc.value, self.filter_desc.value, self.conv_desc.value, y_desc.value, _fwd_pref, workspace_size) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes libcudnn.convolutionForward( handle, one.data, x_desc.value, x.data.ptr, self.filter_desc.value, W.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, y_desc.value, y.data.ptr) # TODO(beam2d): Support unshared bias if b is not None: cudnn.add_tensor( handle, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: # Implementation using im2col self.col = conv.im2col_gpu( x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) y = cuda.cupy.tensordot( self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) # TODO(beam2d): Support unshared bias if b is not None: y += b y = cuda.cupy.rollaxis(y, 3, 1) return y,
def test_all_numpy_subclasses(self): x = numpy.array([0]) y = numpy.array([[1], [2]]) z = numpy.matrix("3,4; 5,6") self.assertTrue(T.same_types(x, y, z))
def backward_gpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) gy = grad_outputs[0] n, in_c, in_h, in_w = x.shape _, out_channels, kh, kw = W.shape c, h, w = gy.shape[1:] gx = cuda.cupy.empty((n, in_c, in_h, in_w), dtype=x.dtype) if (cuda.cudnn_enabled and self.use_cudnn and _check_cudnn_acceptable_type(x.dtype, W.dtype)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) gy = cuda.cupy.ascontiguousarray(gy) if b is not None: b = cuda.cupy.ascontiguousarray(b) handle = cudnn.get_handle() gy_desc = cudnn.create_tensor_descriptor(gy) gx_desc = cudnn.create_tensor_descriptor(gx) # chance to choose implicit-precomp-gemm algorithm workspace_size = cuda.get_max_workspace_size() algo = libcudnn.getConvolutionForwardAlgorithm( handle, gy_desc.value, self.filter_desc.value, self.conv_desc.value, gx_desc.value, _fwd_pref, workspace_size) workspace = cuda.cupy.empty((workspace_size,), dtype='b') oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes libcudnn.convolutionForward( handle, one.data, gy_desc.value, gy.data.ptr, self.filter_desc.value, W.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, gx_desc.value, gx.data.ptr) # bias backward if b is not None: gb = cuda.cupy.empty_like(b) libcudnn.convolutionBackwardBias( handle, one.data, gy_desc.value, gy.data.ptr, zero.data, self.bias_desc.value, gb.data.ptr) gW = cuda.cupy.empty_like(W) # filter backward if _cudnn_version >= 3000: if not self.deterministic: algo = libcudnn.getConvolutionBackwardFilterAlgorithm( handle, gy_desc.value, gx_desc.value, self.conv_desc.value, self.filter_desc.value, _bwd_filter_pref, workspace_size) else: algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 # NOQA libcudnn.convolutionBackwardFilter_v3( handle, one.data, gy_desc.value, gy.data.ptr, gx_desc.value, x.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, self.filter_desc.value, gW.data.ptr) else: if self.deterministic: raise ValueError("'deterministic' option not available " "for cuDNN versions < v3") libcudnn.convolutionBackwardFilter_v2( handle, one.data, gy_desc.value, gy.data.ptr, gx_desc.value, x.data.ptr, self.conv_desc.value, zero.data, self.filter_desc.value, gW.data.ptr) else: # Implementation using im2col col = conv.im2col_gpu( gy, kh, kw, self.sy, self.sx, self.ph, self.pw) gW = cuda.cupy.tensordot( x, col, ([0, 2, 3], [0, 4, 5])).astype(W.dtype, copy=False) gx = cuda.cupy.tensordot( col, W, ([1, 2, 3], [1, 2, 3])).astype(x.dtype, copy=False) gx = cuda.cupy.rollaxis(gx, 3, 1) # bias backward if b is not None: gb = gy.sum(axis=(0, 2, 3)) if b is None: return gx, gW else: return gx, gW, gb
def forward(self, inputs): (hx, cx), inputs = _split(inputs, 2) ws, inputs = _split(inputs, self.n_layers * 8) bs, inputs = _split(inputs, self.n_layers * 8) x_list = inputs if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together') hx = cuda.cupy.ascontiguousarray(hx) cx = cuda.cupy.ascontiguousarray(cx) x_desc = cudnn.create_tensor_nd_descriptor(x_list[0][..., None]) length = len(x_list) n_units = hx.shape[2] xs = cuda.cupy.concatenate(x_list, axis=0) ys = cuda.cupy.empty((len(xs), n_units), dtype=xs.dtype) handle = cudnn.get_handle() self.handle = handle rnn_desc = cudnn.create_rnn_descriptor(n_units, self.n_layers, self.states.desc, libcudnn.CUDNN_LINEAR_INPUT, libcudnn.CUDNN_UNIDIRECTIONAL, libcudnn.CUDNN_LSTM, libcudnn.CUDNN_DATA_FLOAT) self.rnn_desc = rnn_desc c_x_descs = _make_tensor_descriptor_array(x_list) hx_desc = cudnn.create_tensor_nd_descriptor(hx) cx_desc = cudnn.create_tensor_nd_descriptor(cx) weights_size = libcudnn.getRNNParamsSize(handle, rnn_desc.value, x_desc.value, libcudnn.CUDNN_DATA_FLOAT) w = cuda.cupy.empty((weights_size // 4, 1, 1), dtype=numpy.float32) w_desc = cudnn.create_filter_descriptor(w) for layer in six.moves.range(self.n_layers): for lin_layer_id in six.moves.range(8): mat = cudnn.get_rnn_lin_layer_matrix_params( handle, rnn_desc, layer, x_desc, w_desc, w, lin_layer_id) m = mat.reshape(mat.size) m[...] = ws[layer * 8 + lin_layer_id].ravel() bias = cudnn.get_rnn_lin_layer_bias_params( handle, rnn_desc, layer, x_desc, w_desc, w, lin_layer_id) b = bias.reshape(bias.size) b[...] = bs[layer * 8 + lin_layer_id] self.w = w self.w_desc = w_desc sections = numpy.cumsum([len(x) for x in x_list[:-1]]) y_list = cuda.cupy.split(ys, sections) c_y_descs = _make_tensor_descriptor_array(y_list) hy = cuda.cupy.empty_like(hx) cy = cuda.cupy.empty_like(cx) hy_desc = cudnn.create_tensor_nd_descriptor(hy) cy_desc = cudnn.create_tensor_nd_descriptor(cy) work_size = libcudnn.getRNNWorkspaceSize(handle, rnn_desc.value, length, c_x_descs.data) workspace = cuda.cupy.empty((work_size, ), dtype='b') if not self.train: libcudnn.RNNForwardInference( handle, rnn_desc.value, length, c_x_descs.data, xs.data.ptr, hx_desc.value, hx.data.ptr, cx_desc.value, cx.data.ptr, w_desc.value, w.data.ptr, c_y_descs.data, ys.data.ptr, hy_desc.value, hy.data.ptr, cy_desc.value, cy.data.ptr, workspace.data.ptr, work_size) else: reserve_size = libcudnn.getRNNTrainingReserveSize( handle, rnn_desc.value, length, c_x_descs.data) self.reserve_space = cuda.cupy.empty((reserve_size, ), dtype='b') libcudnn.RNNForwardTraining( handle, rnn_desc.value, length, c_x_descs.data, xs.data.ptr, hx_desc.value, hx.data.ptr, cx_desc.value, cx.data.ptr, w_desc.value, w.data.ptr, c_y_descs.data, ys.data.ptr, hy_desc.value, hy.data.ptr, cy_desc.value, cy.data.ptr, workspace.data.ptr, work_size, self.reserve_space.data.ptr, reserve_size) self.c_y_descs = c_y_descs self.ys = ys self.c_x_descs = c_x_descs return tuple([hy, cy] + y_list)
def forward_gpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}' .format(type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}' .format(type(W), type(x))) kh, kw = W.shape[2:] n, in_c, in_h, in_w = x.shape c = W.shape[1] # out_c if self.outh is None: self.outh = conv.get_deconv_outsize(in_h, kh, self.sy, self.ph) assert self.outh > 0, 'Height in the output should be positive.' if self.outw is None: self.outw = conv.get_deconv_outsize(in_w, kw, self.sx, self.pw) assert self.outw > 0, 'Width in the output should be positive.' if (cuda.cudnn_enabled and self.use_cudnn and _check_cudnn_acceptable_type(x.dtype, W.dtype)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y = cuda.cupy.empty((n, c, self.outh, self.outw), dtype=x.dtype) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(W) self.conv_desc = cudnn.create_convolution_descriptor( (self.ph, self.pw), (self.sy, self.sx), x.dtype) if b is not None: self.bias_desc = cudnn.create_tensor_descriptor( b[None, :, None, None]) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes if _cudnn_version >= 3000: workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size,), dtype='b') if not self.deterministic: algo = libcudnn.getConvolutionBackwardDataAlgorithm( handle, self.filter_desc.value, x_desc.value, self.conv_desc.value, y_desc.value, _bwd_data_pref, workspace_size) else: algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 # NOQA libcudnn.convolutionBackwardData_v3( handle, one.data, self.filter_desc.value, W.data.ptr, x_desc.value, x.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, y_desc.value, y.data.ptr) else: libcudnn.convolutionBackwardData_v2( handle, one.data, self.filter_desc.value, W.data.ptr, x_desc.value, x.data.ptr, self.conv_desc.value, zero.data, y_desc.value, y.data.ptr) if b is not None: cudnn.add_tensor( handle, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: gcol = cuda.cupy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = cuda.cupy.rollaxis(gcol, 3) y = conv.col2im_gpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def forward(self, inputs): (hx, cx), inputs = _split(inputs, 2) ws, inputs = _split(inputs, self.n_layers * 8) bs, inputs = _split(inputs, self.n_layers * 8) x_list = inputs if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together') hx = cuda.cupy.ascontiguousarray(hx) cx = cuda.cupy.ascontiguousarray(cx) x_desc = cudnn.create_tensor_nd_descriptor(x_list[0][..., None]) length = len(x_list) n_units = hx.shape[2] xs = cuda.cupy.concatenate(x_list, axis=0) ys = cuda.cupy.empty((len(xs), n_units), dtype=xs.dtype) handle = cudnn.get_handle() self.handle = handle rnn_desc = cudnn.create_rnn_descriptor( n_units, self.n_layers, self.states.desc, libcudnn.CUDNN_LINEAR_INPUT, libcudnn.CUDNN_UNIDIRECTIONAL, libcudnn.CUDNN_LSTM, libcudnn.CUDNN_DATA_FLOAT) self.rnn_desc = rnn_desc c_x_descs = _make_tensor_descriptor_array(x_list) hx_desc = cudnn.create_tensor_nd_descriptor(hx) cx_desc = cudnn.create_tensor_nd_descriptor(cx) weights_size = libcudnn.getRNNParamsSize( handle, rnn_desc.value, x_desc.value, libcudnn.CUDNN_DATA_FLOAT) w = cuda.cupy.empty((weights_size // 4, 1, 1), dtype=numpy.float32) w_desc = cudnn.create_filter_descriptor(w) for layer in six.moves.range(self.n_layers): for lin_layer_id in six.moves.range(8): mat = cudnn.get_rnn_lin_layer_matrix_params( handle, rnn_desc, layer, x_desc, w_desc, w, lin_layer_id) m = mat.reshape(mat.size) m[...] = ws[layer * 8 + lin_layer_id].ravel() bias = cudnn.get_rnn_lin_layer_bias_params( handle, rnn_desc, layer, x_desc, w_desc, w, lin_layer_id) b = bias.reshape(bias.size) b[...] = bs[layer * 8 + lin_layer_id] self.w = w self.w_desc = w_desc sections = numpy.cumsum([len(x) for x in x_list[:-1]]) y_list = cuda.cupy.split(ys, sections) c_y_descs = _make_tensor_descriptor_array(y_list) hy = cuda.cupy.empty_like(hx) cy = cuda.cupy.empty_like(cx) hy_desc = cudnn.create_tensor_nd_descriptor(hy) cy_desc = cudnn.create_tensor_nd_descriptor(cy) work_size = libcudnn.getRNNWorkspaceSize( handle, rnn_desc.value, length, c_x_descs.data) workspace = cuda.cupy.empty((work_size,), dtype='b') if not self.train: libcudnn.RNNForwardInference( handle, rnn_desc.value, length, c_x_descs.data, xs.data.ptr, hx_desc.value, hx.data.ptr, cx_desc.value, cx.data.ptr, w_desc.value, w.data.ptr, c_y_descs.data, ys.data.ptr, hy_desc.value, hy.data.ptr, cy_desc.value, cy.data.ptr, workspace.data.ptr, work_size) else: reserve_size = libcudnn.getRNNTrainingReserveSize( handle, rnn_desc.value, length, c_x_descs.data) self.reserve_space = cuda.cupy.empty((reserve_size,), dtype='b') libcudnn.RNNForwardTraining( handle, rnn_desc.value, length, c_x_descs.data, xs.data.ptr, hx_desc.value, hx.data.ptr, cx_desc.value, cx.data.ptr, w_desc.value, w.data.ptr, c_y_descs.data, ys.data.ptr, hy_desc.value, hy.data.ptr, cy_desc.value, cy.data.ptr, workspace.data.ptr, work_size, self.reserve_space.data.ptr, reserve_size) self.c_y_descs = c_y_descs self.ys = ys self.c_x_descs = c_x_descs return tuple([hy, cy] + y_list)
def forward_gpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) out_c, _, kh, kw = W.shape n, c, h, w = x.shape dkh, dkw = kh + (kh - 1) * (self.dy - 1), kw + (kw - 1) * (self.dx - 1) out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all, d=self.dy) out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all, d=self.dx) y = cuda.cupy.zeros((n, out_c, out_h, out_w), dtype=x.dtype) if (not self.cover_all and chainer.should_use_cudnn('>=auto') and _check_cudnn_acceptable_type(x.dtype, W.dtype)): pad_x = cuda.cupy.zeros((n, c, h + 2 * self.ph, w + 2 * self.pw), dtype=x.dtype) pad_x[:, :, self.ph:self.ph + h, self.pw:self.pw + w] = x out_h_s1 = h + 2 * self.ph - dkh + 1 out_w_s1 = w + 2 * self.pw - dkw + 1 for j in moves.range(kh): for i in moves.range(kw): xji = cuda.cupy.ascontiguousarray( pad_x[:, :, j * self.dy:j * self.dy + out_h_s1, i * self.dx:i * self.dx + out_w_s1]) Wji = cuda.cupy.ascontiguousarray(W[:, :, j:j + 1, i:i + 1]) if i == 0 and j == 0: handle = cudnn.get_handle() xji_desc = cudnn.create_tensor_descriptor(xji) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(Wji) self.conv_desc = cudnn.create_convolution_descriptor( (0, 0), (self.sy, self.sx), xji.dtype) workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size, ), dtype='b') algo = libcudnn.getConvolutionForwardAlgorithm( handle, xji_desc.value, self.filter_desc.value, self.conv_desc.value, y_desc.value, _fwd_pref, workspace_size) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes libcudnn.convolutionForward( handle, one.data, xji_desc.value, xji.data.ptr, self.filter_desc.value, Wji.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, one.data, y_desc.value, y.data.ptr) if b is not None: b = cuda.cupy.ascontiguousarray(b) self.bias_desc = cudnn.create_tensor_descriptor(b[None, :, None, None]) cudnn.add_tensor(handle, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: # Implementation using im2col self.col = conv.im2col_gpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all, dy=self.dy, dx=self.dx) y = cuda.cupy.tensordot(self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) # TODO(beam2d): Support unshared bias if b is not None: y += b y = cuda.cupy.rollaxis(y, 3, 1) return y,
def test_all_cupy_array(self): x = cuda.cupy.array([0]) y = cuda.cupy.array([1]) z = cuda.cupy.array([2]) self.assertTrue(T.same_types(x, y, z))
def backward(self, inputs, grads): (hx, cx), inputs = _split(inputs, 2) ws, inputs = _split(inputs, self.n_layers * 8) bs, inputs = _split(inputs, self.n_layers * 8) x_list = inputs if not type_check.same_types(*inputs): raise ValueError('numpy and cupy must not be used together') hx = cuda.cupy.ascontiguousarray(hx) cx = cuda.cupy.ascontiguousarray(cx) dhy, dcy = grads[:2] dy_list = list(grads[2:]) if dhy is None: dhy = cuda.cupy.zeros_like(hx) if dcy is None: dcy = cuda.cupy.zeros_like(cx) for i in six.moves.range(len(dy_list)): if dy_list[i] is None: dy_list[i] = cuda.cupy.zeros_like(x_list[i]) xs = cuda.cupy.concatenate(x_list, axis=0) length = len(x_list) dhx = cuda.cupy.empty_like(hx) dcx = cuda.cupy.empty_like(cx) hx_desc = cudnn.create_tensor_nd_descriptor(hx) cx_desc = cudnn.create_tensor_nd_descriptor(cx) dhy_desc = cudnn.create_tensor_nd_descriptor(dhy) dcy_desc = cudnn.create_tensor_nd_descriptor(dcy) c_dy_descs = _make_tensor_descriptor_array(dy_list) dys = cuda.cupy.concatenate(dy_list, axis=0) rnn_desc = self.rnn_desc handle = self.handle work_size = libcudnn.getRNNWorkspaceSize(handle, rnn_desc.value, length, self.c_x_descs.data) workspace = cuda.cupy.empty((work_size, ), dtype='b') dhx_desc = cudnn.create_tensor_nd_descriptor(dhx) dcx_desc = cudnn.create_tensor_nd_descriptor(dcx) dxs = cuda.cupy.empty_like(xs) sections = numpy.cumsum([len(x) for x in x_list[:-1]]) dx_list = cuda.cupy.split(dxs, sections, 0) c_dx_descs = _make_tensor_descriptor_array(dx_list) libcudnn.RNNBackwardData( handle, rnn_desc.value, length, self.c_y_descs.data, self.ys.data.ptr, c_dy_descs.data, dys.data.ptr, dhy_desc.value, dhy.data.ptr, dcy_desc.value, dcy.data.ptr, self.w_desc.value, self.w.data.ptr, hx_desc.value, hx.data.ptr, cx_desc.value, cx.data.ptr, c_dx_descs.data, dxs.data.ptr, dhx_desc.value, dhx.data.ptr, dcx_desc.value, dcx.data.ptr, workspace.data.ptr, work_size, self.reserve_space.data.ptr, self.reserve_space.size) dw = cuda.cupy.zeros_like(self.w) dw_desc = cudnn.create_filter_descriptor(dw) libcudnn.RNNBackwardWeights( handle, rnn_desc.value, length, self.c_x_descs.data, xs.data.ptr, hx_desc.value, hx.data.ptr, self.c_y_descs.data, self.ys.data.ptr, workspace.data.ptr, work_size, dw_desc.value, dw.data.ptr, self.reserve_space.data.ptr, self.reserve_space.size) dx = dx_list[0] dx = dx.reshape(dx.shape + (1, )) dx_desc = cudnn.create_tensor_nd_descriptor(dx) dws = [] dbs = [] for layer in six.moves.range(self.n_layers): for lin_layer_id in six.moves.range(8): mat = cudnn.get_rnn_lin_layer_matrix_params( handle, rnn_desc, layer, dx_desc, dw_desc, dw, lin_layer_id) dws.append(mat.reshape(ws[layer * 8 + lin_layer_id].shape)) bias = cudnn.get_rnn_lin_layer_bias_params( handle, rnn_desc, layer, dx_desc, dw_desc, dw, lin_layer_id) dbs.append(bias.reshape(bs[layer * 8 + lin_layer_id].shape)) return tuple([dhx, dcx] + dws + dbs + dx_list)
def forward_gpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) out_c, _, kh, kw = W.shape n, c, h, w = x.shape out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all) assert out_h > 0, 'Height in the output should be positive.' out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all) assert out_w > 0, 'Width in the output should be positive.' y = cuda.cupy.empty((n, out_c, out_h, out_w), dtype=x.dtype) if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn and _check_cudnn_acceptable_type(x.dtype, W.dtype)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) if b is not None: b = cuda.cupy.ascontiguousarray(b) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) y_desc = cudnn.create_tensor_descriptor(y) self.filter_desc = cudnn.create_filter_descriptor(W) self.conv_desc = cudnn.create_convolution_descriptor( (self.ph, self.pw), (self.sy, self.sx), x.dtype) if b is not None: self.bias_desc = cudnn.create_tensor_descriptor(b[None, :, None, None]) workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size, ), dtype='b') algo = libcudnn.getConvolutionForwardAlgorithm( handle, x_desc.value, self.filter_desc.value, self.conv_desc.value, y_desc.value, _fwd_pref, workspace_size) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes libcudnn.convolutionForward(handle, one.data, x_desc.value, x.data.ptr, self.filter_desc.value, W.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, y_desc.value, y.data.ptr) # TODO(beam2d): Support unshared bias if b is not None: cudnn.add_tensor(handle, one.data, self.bias_desc.value, b.data.ptr, one.data, y_desc.value, y.data.ptr) else: # Implementation using im2col self.col = conv.im2col_gpu(x, kh, kw, self.sy, self.sx, self.ph, self.pw, cover_all=self.cover_all) y = cuda.cupy.tensordot(self.col, W, ((1, 2, 3), (1, 2, 3))).astype(x.dtype, copy=False) # TODO(beam2d): Support unshared bias if b is not None: y += b y = cuda.cupy.rollaxis(y, 3, 1) return y,
def test_numpy_cupy_mixed_2(self): x = cuda.cupy.array([0]) y = numpy.array([1]) z = cuda.cupy.array([2]) self.assertFalse(T.same_types(x, y, z))
def backward_gpu(self, inputs, grad_outputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None if not type_check.same_types(*inputs): if b is not None: raise ValueError( 'numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}, type(b): {2}'.format( type(W), type(x), type(b))) else: raise ValueError('numpy and cupy must not be used together\n' 'type(W): {0}, type(x): {1}'.format( type(W), type(x))) gy = grad_outputs[0] _, out_c, out_h, out_w = gy.shape n, c, h, w = x.shape kh, kw = W.shape[2:] gW = cuda.cupy.empty_like(W) if (not self.cover_all and cuda.cudnn_enabled and self.use_cudnn and _check_cudnn_acceptable_type(x.dtype, W.dtype)): x = cuda.cupy.ascontiguousarray(x) W = cuda.cupy.ascontiguousarray(W) gy = cuda.cupy.ascontiguousarray(gy) handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(x) gy_desc = cudnn.create_tensor_descriptor(gy) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes gx = cuda.cupy.empty_like(x) if _cudnn_version >= 3000: workspace_size = cuda.get_max_workspace_size() workspace = cuda.cupy.empty((workspace_size, ), dtype='b') if not self.deterministic: algo = libcudnn.getConvolutionBackwardFilterAlgorithm( handle, x_desc.value, gy_desc.value, self.conv_desc.value, self.filter_desc.value, _bwd_filter_pref, workspace_size) else: algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 # NOQA libcudnn.convolutionBackwardFilter_v3( handle, one.data, x_desc.value, x.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, self.filter_desc.value, gW.data.ptr) if not self.deterministic: algo = libcudnn.getConvolutionBackwardDataAlgorithm( handle, self.filter_desc.value, gy_desc.value, self.conv_desc.value, x_desc.value, _bwd_data_pref, workspace_size) else: algo = cuda.cupy.cuda.cudnn.CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 # NOQA libcudnn.convolutionBackwardData_v3( handle, one.data, self.filter_desc.value, W.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, algo, workspace.data.ptr, workspace_size, zero.data, x_desc.value, gx.data.ptr) else: if self.deterministic: raise ValueError("'deterministic' option not available " "for cuDNN versions < v3") libcudnn.convolutionBackwardFilter_v2( handle, one.data, x_desc.value, x.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, zero.data, self.filter_desc.value, gW.data.ptr) libcudnn.convolutionBackwardData_v2( handle, one.data, self.filter_desc.value, W.data.ptr, gy_desc.value, gy.data.ptr, self.conv_desc.value, zero.data, x_desc.value, gx.data.ptr) if b is not None: gb = cuda.cupy.empty_like(b) libcudnn.convolutionBackwardBias(handle, one.data, gy_desc.value, gy.data.ptr, zero.data, self.bias_desc.value, gb.data.ptr) else: gW = cuda.cupy.tensordot(gy, self.col, ((0, 2, 3), (0, 4, 5))).astype(W.dtype, copy=False) gcol = cuda.cupy.tensordot(W, gy, (0, 1)).astype(x.dtype, copy=False) gcol = cuda.cupy.rollaxis(gcol, 3) gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) if b is not None: gb = gy.sum(axis=(0, 2, 3)) if b is None: return gx, gW else: return gx, gW, gb
def test_all_numpy_array(self): x = numpy.array([0]) y = numpy.array([1]) z = numpy.array([2]) self.assertTrue(T.same_types(x, y, z))