def call(self): cudnn.convolution_backward_filter( self.x, self.gy, self.gW, self.pads, self.strides, self.dilations, self.groups, deterministic=self.deterministic, auto_tune=self.auto_tune, tensor_core=self.tensor_core)
def _cudnn_convolve_adjoint_filter(x, y, mode='full'): dtype = y.dtype device = backend.get_device(y) xp = device.xp if np.issubdtype(dtype, np.complexfloating): with device: xr = xp.real(x) xi = xp.imag(x) yr = xp.real(y) yi = xp.imag(y) # Concatenate real and imaginary to input/output channels x = xp.concatenate([xr, xi], axis=1) y = xp.concatenate([yr, yi], axis=1) W = _cudnn_convolve_adjoint_filter(x, y, mode=mode) # Convert back to complex Wr = W[:W.shape[0] // 2, :W.shape[1] // 2] Wr += W[W.shape[0] // 2:, W.shape[1] // 2:] Wi = W[W.shape[0] // 2:, :W.shape[1] // 2] Wi -= W[:W.shape[0] // 2, W.shape[1] // 2:] return (Wr + 1j * Wi).astype(dtype) ndim = y.ndim - 2 input_channel = x.shape[1] output_channel = y.shape[1] input_shape = x.shape[-ndim:] output_shape = y.shape[-ndim:] strides = (1, ) * ndim dilations = (1, ) * ndim groups = 1 auto_tune = True tensor_core = 'auto' deterministic = False if mode == 'full': filter_shape = tuple(p - m + 1 for m, p in zip(input_shape, output_shape)) pads = tuple(n - 1 for n in filter_shape) elif mode == 'valid': filter_shape = tuple(m - p + 1 for m, p in zip(input_shape, output_shape)) pads = (0, ) * ndim with device: W = xp.empty((output_channel, input_channel) + filter_shape, dtype=dtype) cudnn.convolution_backward_filter(x, y, W, pads, strides, dilations, groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core) W = util.flip(W, axes=range(-ndim, 0)) return W
def test_backward_filter(self): if not (self.layout == libcudnn.CUDNN_TENSOR_NHWC and self.dtype == numpy.float64): return unittest.SkipTest() with self.assertRaises(RuntimeError): cudnn.convolution_backward_filter( self.x, self.gy, self.gW, pad=(self.pad, self.pad), stride=(self.stride, self.stride), dilation=(1, 1), groups=1, deterministic=False, auto_tune=self.auto_tune, tensor_core='always', d_layout=self.layout, w_layout=self.layout)
def _convolve_filter_adjoint_cuda(output, data, filt_shape, mode='full', strides=None, multi_channel=False): xp = backend.get_array_module(data) D, b, B, m, n, s, c_i, c_o, p = _get_convolve_params( data.shape, filt_shape, mode, strides, multi_channel) if D == 1: return _convolve_filter_adjoint_cuda( xp.expand_dims(output, -1), xp.expand_dims(data, -1), list(filt_shape) + [1], mode=mode, strides=list(strides) + [1] if strides is not None else None, multi_channel=multi_channel).squeeze(-1) elif D > 3: raise ValueError( f'cuDNN convolution only supports 1, 2 or 3D, got {D}.') dilations = (1, ) * D groups = 1 auto_tune = True tensor_core = 'auto' deterministic = False if mode == 'full': pads = tuple(n_d - 1 for n_d in n) elif mode == 'valid': pads = (0, ) * D data = data.reshape((B, c_i) + m) output = output.reshape((B, c_o) + p) filt = xp.empty((c_o, c_i) + n, dtype=output.dtype) cudnn.convolution_backward_filter(data, output, filt, pads, s, dilations, groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core) filt = util.flip(filt, axes=range(-D, 0)) filt = filt.reshape(filt_shape) return filt
def test_backward_filter(self): err = None if (self.layout == libcudnn.CUDNN_TENSOR_NHWC and self.dtype == numpy.float64): err = self._get_error_type() if err is None: return unittest.SkipTest() with self.assertRaises(err): cudnn.convolution_backward_filter(self.x, self.gy, self.gW, pad=(self.pad, self.pad), stride=(self.stride, self.stride), dilation=(1, 1), groups=1, deterministic=0, auto_tune=self.auto_tune, tensor_core='always', d_layout=self.layout, w_layout=self.layout)
def _convolve_filter_adjoint_cuda(output, data, filt_shape, mode='full', strides=None, multi_channel=False): device = backend.get_device(data) xp = device.xp D, b, B, m, n, s, c_i, c_o, p = _get_convolve_params( data.shape, filt_shape, mode, strides, multi_channel) dilations = (1, ) * D groups = 1 auto_tune = True tensor_core = 'auto' deterministic = False if mode == 'full': pads = tuple(n_d - 1 for n_d in n) elif mode == 'valid': pads = (0, ) * D with device: data = data.reshape((B, c_i) + m) output = output.reshape((B, c_o) + p) filt = xp.empty((c_o, c_i) + n, dtype=output.dtype) cudnn.convolution_backward_filter(data, output, filt, pads, s, dilations, groups, deterministic=deterministic, auto_tune=auto_tune, tensor_core=tensor_core) filt = util.flip(filt, axes=range(-D, 0)) filt = filt.reshape(filt_shape) return filt