def forward_ia(self, inputs): self.retain_inputs((0, 1)) # retain only x and W x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None out_c, input_c, kh, kw = W.shape n, c, h, w = x.shape out_h = conv.get_conv_outsize(h, kh, self.sy, self.ph, cover_all=self.cover_all, d=self.dy) assert out_h > 0, 'Height in the output should be positive.' out_w = conv.get_conv_outsize(w, kw, self.sx, self.pw, cover_all=self.cover_all, d=self.dx) assert out_w > 0, 'Width in the output should be positive.' self.pd = self.sy * (out_h - 1) + (kh + (kh - 1) * (self.dy - 1)) - h - self.ph self.pr = self.sx * (out_w - 1) + (kw + (kw - 1) * (self.dx - 1)) - w - self.pw # create conv parameter # for IA specific param = ia.convolution2DParam((n, out_c, out_h, out_w), self.dy, self.dx, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr) y = ia.convolution2D.Forward(ia.array(x), ia.array(W), ia.array(b) if b is not None else None, param) return y,
def backward_ia(self, x, gy): param = ia.localResponseNormalizationParam( self.n, self.k, self.n * self.alpha, self.beta, ia.localResponseNormalizationParam.lrn_across_channels ) gx = ia.localResponseNormalization.Backward( ia.array(x[0]), ia.array(gy[0]), self.indexes, param) return gx,
def forward_ia(self, inputs): x = inputs[0] W = inputs[1] b = inputs[2] if len(inputs) == 3 else None y = ia.linear.Forward(ia.array(x), ia.array(W), ia.array(b) if b is not None else None) self.retain_inputs((0, 1)) # b is not retained return y,
def init_state(self, param): xp = cuda.get_array_module(param.data) with cuda.get_device_from_array(param.data): self.state['v'] = xp.zeros_like(param.data) if ia.all_ready((self.state['v'], )): self.state['v'] = ia.array(self.state['v'], itype=ia.ideep4py.wgt_array)
def forward_ia(self, x): param = ia.localResponseNormalizationParam( self.n, self.k, self.n * self.alpha, self.beta, ia.localResponseNormalizationParam.lrn_across_channels ) self.y, self.indexes = \ ia.localResponseNormalization.Forward(ia.array(x[0]), param) return self.y,
def forward_ia(self, inputs): offsets = ia.intVector() # FIXME # bypass python3 issue when transfer array to std::vector<> # https://github.com/SimpleITK/SimpleITK/issues/106 for i in self.indices_or_sections.tolist(): offsets.push_back(i) ret = ia.concat.Backward(ia.array(inputs[0]), offsets, self.axis) self._shapes = [r.shape for r in ret] return ret
def forward_ia(self, gy): # FIXME # Here we expect indexes is returned from MKL-DNN # otherwise, there are dtype mismatch for reorder (int64-->uint8) if not isinstance(self.indexes, ia.mdarray): return self.forward_cpu(gy) n, c, h, w = self._in_shape y_h, y_w = gy[0].shape[2:] self.pd = self.sy * (y_h - 1) + self.kh - h - self.ph self.pr = self.sx * (y_w - 1) + self.kw - w - self.pw pp = ia.pooling2DParam(self._in_shape, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr, ia.pooling2DParam.pooling_max) self.indexes = ia.array(self.indexes) gx = ia.pooling2D.Backward(ia.array(gy[0]), self.indexes, pp) return gx,
def forward_ia(self, gy): n, c, h, w = self._in_shape y_h, y_w = gy[0].shape[2:] self.pd = self.sy * (y_h - 1) + self.kh - h - self.ph self.pr = self.sx * (y_w - 1) + self.kw - w - self.pw pp = ia.pooling2DParam(self._in_shape, self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr, ia.pooling2DParam.pooling_avg_include_padding) gx = ia.pooling2D.Backward(ia.array(gy[0]), None, pp) return gx,
def to_ia(self): """Copies parameter variables and persistent values to CPU. """ if not ia.check_ideep_enabled(): raise Exception("ia is not ready!") d = self.__dict__ for name in self._params: d[name].to_ia() for name in self._persistent: value = d[name] if isinstance(value, numpy.ndarray): d[name] = ia.array(value, itype=ia.ideep4py.wgt_array) return self
def forward_ia(self, inputs): # FIXME # Some UT will directly call Backward, but set W.dtype as float16 if self.W_dtype != numpy.dtype('float32'): return self.forward_cpu(inputs) self.retain_inputs((0, 1)) x, gy = inputs n, input_c, h, w = x.shape n, out_c, out_h, out_w = gy.shape self.pd = (self.sy * (out_h - 1) + (self.kh + (self.kh - 1) * (self.dy - 1)) - h - self.ph) self.pr = (self.sx * (out_w - 1) + (self.kw + (self.kw - 1) * (self.dx - 1)) - w - self.pw) # create conv parameter # for IA specific param = ia.convolution2DParam((out_c, input_c, self.kh, self.kw), self.dy, self.dx, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr) # only calculate gW, no gb gW = ia.convolution2D.BackwardWeights(ia.array(x), ia.array(gy), param) return gW,
def forward_ia(self, x): self._in_shape = x[0].shape self._in_dtype = x[0].dtype n, c, h, w = x[0].shape y_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph, self.cover_all) assert y_h > 0, 'Height in the output should be positive.' y_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw, self.cover_all) assert y_w > 0, 'Width in the output should be positive.' self.pd = self.sy * (y_h - 1) + self.kh - h - self.ph self.pr = self.sx * (y_w - 1) + self.kw - w - self.pw pp = ia.pooling2DParam((n, c, y_h, y_w), self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, self.pd, self.pr, ia.pooling2DParam.pooling_max) y, self.indexes = ia.pooling2D.Forward(ia.array(x[0]), pp) return y,
def forward_ia(self, inputs): self.retain_inputs((0, 1)) gy, W = inputs gx = ia.linear.BackwardData(ia.array(W), ia.array(gy)) return gx,
def forward_ia(self, inputs): self.retain_inputs((0, 1)) x, gy = inputs gW = ia.linear.BackwardWeights(ia.array(x), ia.array(gy)) return gW,
def forward(self, inputs): self.retain_inputs((0, 1)) x, gamma, beta = inputs xp = cuda.get_array_module(x) if self.running_mean is None: self.running_mean = xp.zeros_like(gamma) self.running_var = xp.zeros_like(gamma) self.mode = _BNMode(x, gamma) # expander inserts singleton dimensions to gamma and beta so that they # can be broadcasted with x. head_ndim = gamma.ndim + 1 expander = (None, Ellipsis) + (None, ) * (x.ndim - head_ndim) self.expander = expander self.axis = (0, ) + tuple(range(head_ndim, x.ndim)) self.use_cudnn = self.mode.can_use_cudnn(xp) self.use_ideep = self.mode.can_use_ideep() if self.use_ideep: expand_dim = False if x.ndim == 2: expand_dim = True x = x[:, :, None, None] gamma = gamma[expander] beta = beta[expander] W = numpy.concatenate((gamma, beta), axis=0).reshape((2, -1)) y, self.mean, self.var, self.inv_std = ( ia.batchNormalization.Forward(ia.array(x), ia.array(W), None, None, self.eps)) m = x.size // gamma.size adjust = m / max(m - 1., 1.) if isinstance(self.running_mean, ia.mdarray) and \ isinstance(self.running_var, ia.mdarray): self.running_mean.inplace_axpby(self.decay, (1 - self.decay), self.mean) self.running_var.inplace_axpby(self.decay, (1 - self.decay), self.var * adjust) else: self.running_mean *= self.decay self.running_mean += self.mean * (1 - self.decay) self.running_var *= self.decay self.running_var += self.var * adjust * (1 - self.decay) # ndarray ? if expand_dim: y = numpy.squeeze(y, axis=(2, 3)) elif self.use_cudnn: x = cuda.cupy.ascontiguousarray(x) gamma = cuda.cupy.ascontiguousarray(gamma) beta = cuda.cupy.ascontiguousarray(beta) dtype = x.dtype handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(_as4darray(x)) derivedBnDesc = cudnn.create_uninitialized_tensor_descriptor() cudnn_mode = self.mode.get_cudnn_mode() libcudnn.deriveBNTensorDescriptor(derivedBnDesc.value, x_desc.value, cudnn_mode) dtype_param = _get_dtype_of_tensor_descriptor(derivedBnDesc) if dtype_param is not dtype: gamma = gamma.astype(dtype_param) beta = beta.astype(dtype_param) running_mean = self.running_mean.astype(dtype_param) running_var = self.running_var.astype(dtype_param) else: running_mean = self.running_mean running_var = self.running_var oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes y = cuda.cupy.empty_like(x) # Factor used in the moving average factor = 1 - self.decay if self.mean is None: # Output cache to speed up backward pass. self.mean = xp.empty_like(gamma) # Output cache to speed up backward pass. self.inv_std = xp.empty_like(gamma) # Note: cuDNN computes the mini-batch mean and variance # internally. We can simply (optionally) pass # it the running-average mean and variance arrays. # Note: This API seems to set the inverse of the standard deviation # (instead of variance) to resultSaveInvVariance argument. The # current implementation of our BN depends on this behavior so that # we can reduce the number of reduction kernels. libcudnn.batchNormalizationForwardTraining( handle, cudnn_mode, one.data, zero.data, x_desc.value, x.data.ptr, x_desc.value, y.data.ptr, derivedBnDesc.value, gamma.data.ptr, beta.data.ptr, factor, running_mean.data.ptr, running_var.data.ptr, self.eps, self.mean.data.ptr, self.inv_std.data.ptr) if dtype_param is not dtype: # When data type of prameters is converted, say, from fp16 # to fp32, the values of fp32 arrays of running_mean and # running_var updated by batchNormalizationForwardTraining # must be explicitly written back to their original fp16 # arrays. running_mean = running_mean.astype(dtype) running_var = running_var.astype(dtype) self.running_mean.data.copy_from(running_mean.data, running_mean.nbytes) self.running_var.data.copy_from(running_var.data, running_var.nbytes) else: gamma = gamma[expander] beta = beta[expander] self.mean = x.mean(axis=self.axis) var = x.var(axis=self.axis) var += self.eps self.inv_std = var**(-0.5) y = _apply_bn_fwd(xp, x, self.mean[expander], self.inv_std[expander], gamma, beta) # Update running statistics m = x.size // gamma.size adjust = m / max(m - 1., 1.) # unbiased estimation self.running_mean *= self.decay self.running_mean += (1 - self.decay) * self.mean self.running_var *= self.decay self.running_var += (1 - self.decay) * adjust * var # FIXME: dummy self.var for numpy & cudnn if not hasattr(self, 'var'): self.var = xp.zeros_like(self.mean) return y,
def forward_ia(self, inputs): mask, y = ia.dropout.Forward(ia.array(inputs[0]), self.dropout_ratio) self.mask = mask return y,
def forward(self, inputs): self.retain_inputs((0, 1, 3, 4)) x, gamma, beta, mean, var = inputs xp = cuda.get_array_module(x) # expander inserts singleton dimensions to gamma and beta so that they # can be broadcasted with x. head_ndim = gamma.ndim + 1 expander = (None, Ellipsis) + (None, ) * (x.ndim - head_ndim) self.expander = expander self.axis = (0, ) + tuple(range(head_ndim, x.ndim)) mode = _BNMode(x, gamma) if mode.can_use_ideep(): expand_dim = False if x.ndim == 2: expand_dim = True x = x[:, :, None, None] gamma = gamma[expander] beta = beta[expander] W = numpy.concatenate((gamma, beta), axis=0).reshape((2, -1)) y, = ia.batchNormalization.Forward(ia.array(x), ia.array(W), ia.array(mean), ia.array(var), self.eps) # ndarray ? if expand_dim: y = numpy.squeeze(y, axis=(2, 3)) # lazy self.inv_var = None self.inv_std = None elif mode.can_use_cudnn(xp): x = cuda.cupy.ascontiguousarray(x) gamma = cuda.cupy.ascontiguousarray(gamma) beta = cuda.cupy.ascontiguousarray(beta) dtype = x.dtype handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(_as4darray(x)) derivedBnDesc = cudnn.create_uninitialized_tensor_descriptor() cudnn_mode = mode.get_cudnn_mode() libcudnn.deriveBNTensorDescriptor(derivedBnDesc.value, x_desc.value, cudnn_mode) dtype_param = _get_dtype_of_tensor_descriptor(derivedBnDesc) if dtype_param is not dtype: gamma = gamma.astype(dtype_param) beta = beta.astype(dtype_param) mean = mean.astype(dtype_param) var = var.astype(dtype_param) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes y = cuda.cupy.empty_like(x) libcudnn.batchNormalizationForwardInference( handle, cudnn_mode, one.data, zero.data, x_desc.value, x.data.ptr, x_desc.value, y.data.ptr, derivedBnDesc.value, gamma.data.ptr, beta.data.ptr, mean.data.ptr, var.data.ptr, self.eps) else: gamma = gamma[expander] beta = beta[expander] var = var + self.eps self.inv_var = xp.reciprocal(var) self.inv_std = xp.sqrt(self.inv_var, dtype=self.inv_var.dtype) y = _apply_bn_fwd(xp, x, mean[expander], self.inv_std[expander], gamma, beta) return y,
def forward_ia(self, x): self.retain_inputs((0, )) self.retain_outputs((0, )) y = ia.relu.Forward(ia.array(x[0])) return y,
def forward_ia(self, inputs): gx = ia.relu.Backward(ia.array(self.a), ia.array(inputs[0])) return gx,
def forward_ia(self, xs): xs_mdarray = ia.mdarrayVector() for x in xs: xs_mdarray.push_back(ia.array(x)) return ia.concat.Forward(xs_mdarray, self.axis),
def forward_ia(self, inputs): return ia.dropout.Backward(ia.array(self.mask), ia.array(inputs[0])),
def forward(self, inputs): self.retain_inputs((0, 1, 2)) x, gamma, gy = inputs expander = self.expander inv_m = gamma.dtype.type(1. / (x.size // gamma.size)) xp = cuda.get_array_module(x) if self.use_ideep: expand_dim = False if x.ndim == 2: expand_dim = True x = x[:, :, None, None] gy = gy[:, :, None, None] gamma = gamma[self.expander] beta = numpy.zeros_like(gamma) W = numpy.concatenate((gamma, beta), axis=0).reshape((2, -1)) gx, gW = ia.batchNormalization.Backward(ia.array(x), ia.array(gy), self.mean, self.var, ia.array(W), self.eps) ggamma, gbeta = gW[:2] if expand_dim: gx = numpy.squeeze(gx, axis=(2, 3)) elif self.use_cudnn: cudnn_mode = self.mode.get_cudnn_mode() x = cuda.cupy.ascontiguousarray(x) gamma = cuda.cupy.ascontiguousarray(gamma) gy = cuda.cupy.ascontiguousarray(gy) dtype = x.dtype handle = cudnn.get_handle() x_desc = cudnn.create_tensor_descriptor(_as4darray(x)) derivedBnDesc = cudnn.create_uninitialized_tensor_descriptor() libcudnn.deriveBNTensorDescriptor(derivedBnDesc.value, x_desc.value, cudnn_mode) dtype_param = _get_dtype_of_tensor_descriptor(derivedBnDesc) if dtype_param is not dtype: gamma = gamma.astype(dtype_param) oz_dtype = 'd' if x.dtype == 'd' else 'f' one = numpy.array(1, dtype=oz_dtype).ctypes zero = numpy.array(0, dtype=oz_dtype).ctypes gx = cuda.cupy.empty_like(x) ggamma = cuda.cupy.empty_like(gamma) gbeta = cuda.cupy.empty_like(gamma) libcudnn.batchNormalizationBackward( handle, cudnn_mode, one.data, zero.data, one.data, zero.data, x_desc.value, x.data.ptr, x_desc.value, gy.data.ptr, x_desc.value, gx.data.ptr, derivedBnDesc.value, gamma.data.ptr, ggamma.data.ptr, gbeta.data.ptr, self.eps, self.mean.data.ptr, self.inv_std.data.ptr) if dtype_param is not dtype: ggamma = ggamma.astype(dtype) gbeta = gbeta.astype(dtype) else: gbeta = gy.sum(axis=self.axis) x_hat = _x_hat(x, self.mean[expander], self.inv_std[expander]) ggamma = (gy * x_hat).sum(axis=self.axis) if xp is numpy: gx = (gamma * self.inv_std)[expander] * ( gy - (x_hat * ggamma[expander] + gbeta[expander]) * inv_m) else: gx = cuda.elementwise( ''' T gy, T x_hat, T gamma, T inv_std, T ggamma, T gbeta, T inv_m ''', 'T gx', ''' gx = (gamma * inv_std) * ( gy - (x_hat * ggamma + gbeta) * inv_m) ''', 'bn_bwd')(gy, x_hat, gamma[expander], self.inv_std[expander], ggamma[expander], gbeta[expander], inv_m) self.retain_outputs((0, 1)) return gx, ggamma, gbeta