def __init__(self, in_ch): self.in_ch = in_ch self.gamma = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(1.0)) self.beta = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(0.0)) super().__init__(saveables=['gamma', 'beta'])
def __init__(self, in_ch, out_ch, use_bias=True, weight_initializer=None, bias_initializer=None ): self.in_ch = in_ch self.out_ch = out_ch self.use_bias = use_bias if weight_initializer is None: weight_initializer = nc.Cacheton.get_var('Dense_default_weight_initializer') if weight_initializer is None: weight_initializer = nn.initializer.GlorotUniform() if weight_initializer.has_fan_in(): if weight_initializer.fan_in is None: weight_initializer.fan_in = in_ch if weight_initializer.has_fan_out(): if weight_initializer.fan_out is None: weight_initializer.fan_out = out_ch self.weight = nn.Tensor( (in_ch, out_ch), init=weight_initializer ) self.bias = None if self.use_bias: if bias_initializer is None: bias_initializer = nn.initializer.Scalar(0.0) self.bias_initializer = bias_initializer self.bias = nn.Tensor( (self.out_ch,), init=bias_initializer) super().__init__(saveables=['weight','bias'])
def backward_test(): inp_t = nn.Tensor((2, 2, 8, 8), init=nn.initializer.Scalar(1.0)) kernel_t = nn.Tensor((4, 2, 3, 3), init=nn.initializer.Scalar(0.0)) kernel_2_t = nn.Tensor((4, 2, 3, 3), init=nn.initializer.Scalar(0.0)) opt = nn.optimizer.RMSprop([inp_t, kernel_t, kernel_2_t]) opt.zero_grad() with nn.optimizer.freeze(): r = nn.conv2D(inp_t, kernel_2_t) r.backward() if kernel_2_t.has_grad(): raise Exception( "kernel_2_t has grad, but used inside nn.optimizer.freeze()") r = nn.conv2D(inp_t, kernel_t) r.backward() if not kernel_t.has_grad(): raise Exception("kernel_t has no grad") kernel_grad_t = kernel_t.get_grad() if all(np.ndarray.flatten(kernel_grad_t.np()) == 0): raise Exception("kernel_grad_t is not changed after backward step.") opt.step() if all(np.ndarray.flatten(kernel_t.np()) == 0): raise Exception("kernel_t is not changed after optimization step.")
def __init__(self, in_ch): self.in_ch = in_ch self.weight = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(1.0)) self.bias = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(0.0)) self.eps = nn.Tensor((1, ), init=nn.initializer.Scalar(1e-6)) super().__init__(saveables=['weight', 'bias', 'eps'])
def BatchNorm2D_test(): module = BatchNorm2D(4) module.set_training(True) x = nn.Tensor((2, 4, 8, 8)) y = module(x) y.backward(grad_for_non_trainables=True) if not x.has_grad(): raise Exception('x has no grad') module.set_training(False) x = nn.Tensor((2, 4, 8, 8)) y = module(x) y.backward(grad_for_non_trainables=True)
def Dropout_test(): module = Dropout(0.3) module.set_training(True) x = nn.Tensor((2, 4, 4, 4)) y = module(x) y.backward(grad_for_non_trainables=True) if not x.has_grad(): raise Exception('x has no grad') module.set_training(False) x = nn.Tensor((2, 4, 4, 4)) y = module(x) y.backward(grad_for_non_trainables=True)
def resize2D_bilinear(input_t, size_or_output_hw): """ resize2D_bilinear operator arguments size_or_output_hw int float Iterable of height,weight """ N, C, H, W = input_t.shape if isinstance(size_or_output_hw, Iterable): OH, OW = int(size_or_output_hw[0]), int(size_or_output_hw[1]) elif isinstance(size_or_output_hw, (int, float)): OH = int(H * size_or_output_hw) OW = int(W * size_or_output_hw) else: raise ValueError( f'Unknown type of size_or_output_hw : {size_or_output_hw.__class__.__name__}' ) OH = max(1, OH) OW = max(1, OW) coords_shape = nc.TensorShape((OH, OW, 2)) coords_t = nn.Tensor(coords_shape, nn.initializer.CoordsArange(0, H - 1, 0, W - 1)) output_t = nn.spatial_transform2D(input_t, coords_t, grad_to_coords=False) return output_t
def spatial_affine_transform2D(input_t, affine_t, output_size=None): """ arguments input_t Tensor(NCHW) affine_t Tensor(N,2,3) affine matrix example of identity affine matrix [1,0,0], [0,1,0] output_size(None) tuple of 2 ints (HW) of output size if None , size will not be changed Reference: Spatial Transformer Networks https://arxiv.org/abs/1506.02025 """ op = nc.Cacheton.get(_SpatialAffineTransform2DOp, input_t.shape, affine_t.shape, output_size) affine_t = affine_t.transpose((0, 2, 1)) coords = nn.Tensor(op.coords_shape, init=op.coords_init).reshape(op.coords_reshape) coords = nc.op.matmul(coords, affine_t).reshape(op.coords_affined_shape) output_t = nc.op.spatial_transform2D(input_t, coords) return output_t
def conv2DTranspose(input_t, kernel_t, stride=2, dilation=1, padding='same'): """ conv2DTranspose operator. input_t Tensor shape must be (batch, in_ch, height, width) kernel_t Tensor shape must be (out_ch,in_ch, k_height,k_width) stride(2) int dilation(1) int padding(same) 'valid' 'same' """ op = nc.Cacheton.get(_Conv2DTransposeOp, input_t.shape, kernel_t.shape, int(stride), int(dilation), padding) output_t = nn.Tensor(op.output_shape) output_t._set_op_name('conv2DTranspose') output_t._assign_gradfn( input_t, lambda O_t, dO_t: conv2DTranspose_dI_gradfn( op, input_t, kernel_t, O_t, dO_t)) output_t._assign_gradfn( kernel_t, lambda O_t, dO_t: conv2DTranspose_dK_gradfn( op, input_t, kernel_t, O_t, dO_t)) out = nc.op.matmul(kernel_t.reshape((kernel_t.shape[0], -1)), op.im2colT(input_t)) nc.op.transpose(out.reshape(op.OC_N_OH_OW), (1, 0, 2, 3), output_t=output_t) return output_t
def depthwise_conv2D (input_t, kernel_t, stride=1, dilation=1, padding='same'): """ Depthwise Conv2D operator. input_t Tensor shape must be (batch, in_ch, height, width) kernel_t Tensor shape must be (in_ch,k_height,k_width) stride(1) int dilation(1) int padding(same) 'valid' no padding 'same' output size will be the same or divided by stride int padding value for all sides Iterable of 4 ints paddings for left,top,right,bottom sides """ op = nc.Cacheton.get(_DepthwiseConv2DOp, input_t.shape, kernel_t.shape, int(stride), int(dilation), padding) output_t = nn.Tensor( op.output_shape ) output_t._set_op_name('depthwise_conv2D') output_t._assign_gradfn (input_t, lambda O_t, dO_t: conv2D_dI_gradfn(op, input_t, kernel_t, dO_t) ) output_t._assign_gradfn (kernel_t, lambda O_t, dO_t: conv2D_dK_gradfn(op, input_t, kernel_t, dO_t) ) op.O_depthwise_krn.run(output_t, input_t, kernel_t) return output_t
def InstanceNorm2D_test(): module = InstanceNorm2D(4) x = nn.Tensor((2, 4, 8, 8)) y = module(x) y.backward(grad_for_non_trainables=True) if not x.has_grad(): raise Exception('x has no grad')
def DepthwiseConv2D_test(): module = DepthwiseConv2D(4, 3) x = nn.Tensor((2, 4, 8, 8)) y = module(x) y.backward(grad_for_non_trainables=True) if not x.has_grad(): raise Exception('x has no grad')
def slice_op(input_t, slices, output_t=None, is_add_to_output=False): """ arguments: output_t compute result to this Tensor. Tensor may be with different shape, but should match total size. gradfn will not be set. is_add_to_output add result to output_t if output_t is set. """ is_add_to_output = False if output_t is None else is_add_to_output op = nc.Cacheton.get(_SliceOp, input_t.shape, hashable_slices(slices), is_add_to_output) if output_t is None: if op.output_is_reshaped: return input_t.reshape(op.output_shape) else: output_t = nn.Tensor(op.output_shape) output_t._set_op_name('slice') output_t._assign_gradfn( input_t, lambda O_t, dO_t: slice_dI_gradfn(op, input_t, dO_t)) elif output_t.shape.size != op.output_shape.size: raise ValueError(f'output_t must have size {op.output_shape.size}') op.forward_krn.run(input_t, output_t) return output_t
def matmulc_op(a_t, b_t, output_t=None, is_add_to_output=False): """ matmul operator in col-major format A(...,K,M) x B(...,N,K) = (...,N,M) arguments output_t compute result to this Tensor. Tensor may be with different shape, but should match total size. gradfn will not be set. is_add_to_output add result to output_t if output_t is set. """ is_add_to_output = False if output_t is None else is_add_to_output op = nc.Cacheton.get(_MatmulOp, a_t.shape, b_t.shape, is_add_to_output) if output_t is None: output_t = nn.Tensor(op.output_shape) output_t._set_op_name('matmul') output_t._assign_gradfn( a_t, lambda O_t, dO_t: matmul_a_grad(a_t, b_t, dO_t)) output_t._assign_gradfn( b_t, lambda O_t, dO_t: matmul_b_grad(a_t, b_t, dO_t)) elif output_t.shape.size != op.output_shape.size: raise ValueError(f'output_t must have size {op.output_shape.size}') op.forward_krn.run(a_t, b_t, output_t) return output_t
def Dense_test(): module = Dense(4,8) x = nn.Tensor( (2,4) ) y = module(x) y.backward(grad_for_non_trainables=True) if not x.has_grad(): raise Exception('x has no grad')
def BlurPool_test(): module = BlurPool() x = nn.Tensor((2, 3, 64, 64)) y = module(x) y.backward(grad_for_non_trainables=True) if not x.has_grad(): raise Exception('x has no grad')
def FRNorm2D_test(): in_ch = 3 module = FRNorm2D(in_ch) x = nn.Tensor((2, in_ch, 64, 64)) y = module(x) y.backward(grad_for_non_trainables=True) if not x.has_grad(): raise Exception(f'x has no grad')
def Conv2DTranspose_test(): module = Conv2DTranspose(4, 8, 3) x = nn.Tensor((2, 4, 8, 8)) y = module(x) y.backward(grad_for_non_trainables=True) if not x.has_grad(): raise Exception('x has no grad')
def SGD_test(): weight_t = nn.Tensor((16, ), init=nn.initializer.Scalar(0.0)) weight_t.get_grad().fill(1.0) opt = nn.optimizer.SGD([weight_t], lr_decay=0.1, lr_dropout=0.7, clipnorm=0.1) opt.step()
def Initializers_test(): nn.Tensor((128, ), init=nn.initializer.Scalar(1.0)).np() nn.Tensor((128, ), init=nn.initializer.GlorotUniform(1.0, 1.0, 1.0)).np() nn.Tensor((128, ), init=nn.initializer.GlorotNormal(1.0, 1.0, 1.0)).np() nn.Tensor((128, ), init=nn.initializer.HeUniform(1.0, 1.0)).np() nn.Tensor((128, ), init=nn.initializer.HeNormal(1.0, 1.0)).np() nn.Tensor((128, ), init=nn.initializer.RandomNormal()).np() nn.Tensor((128, ), init=nn.initializer.RandomUniform()).np()
def spatial_transform2D(input_t, coords_t, grad_to_coords=True): """ spatial_transform2D operator Transforms input_t in spatial axes using coords_t. arguments input_t Tensor(NCHW) coords_t Tensor(NCHWD) N is 1 or input_t.N C is 1 or input_t.C H - output height W - output width D is (2)[x,y] coords grad_to_coords(True) if True, broadcasts coords_t to input_t for backprop if you use spatial_transform2D for resize only, you don't need backprop to coords_t Reference: Spatial Transformer Networks https://arxiv.org/abs/1506.02025 """ op = nc.Cacheton.get(_SpatialTransform2DOp, input_t.shape, coords_t.shape) output_t = nn.Tensor(op.output_shape) output_t._set_op_name('spatial_transform2D') output_t._assign_gradfn( input_t, lambda O_t, dO_t: spatial_transform2D_dI_gradfn( op, input_t, coords_t, dO_t)) if grad_to_coords: dK_coords_t = coords_t diff_rank = 5 - dK_coords_t.shape.rank if diff_rank != 0: dK_coords_t = dK_coords_t.reshape(diff_rank * (1, ) + dK_coords_t.shape) if op.coords_N_tile != 1 or op.coords_C_tile != 1: dK_coords_t = nc.op.tile( dK_coords_t, (op.coords_N_tile, op.coords_C_tile, 1, 1, 1)) output_t._assign_gradfn( dK_coords_t, lambda O_t, dO_t: spatial_transform2D_dK_gradfn( op, input_t, dK_coords_t, dO_t)) op.O_forward_krn.run(output_t, input_t, coords_t) return output_t
def __init__(self, in_ch, out_ch, kernel_size, stride=2, dilation=1, padding='same', use_bias=True, kernel_initializer=None, bias_initializer=None): self.in_ch = in_ch self.out_ch = out_ch self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.use_bias = use_bias if kernel_initializer is None: kernel_initializer = nc.Cacheton.get_var( 'Conv2DTranspose_default_kernel_initializer') if kernel_initializer is None: kernel_initializer = nn.initializer.GlorotUniform() if kernel_initializer.has_fan_in(): if kernel_initializer.fan_in is None: kernel_initializer.fan_in = in_ch * kernel_size * kernel_size if kernel_initializer.has_fan_out(): if kernel_initializer.fan_out is None: kernel_initializer.fan_out = out_ch * kernel_size * kernel_size self.kernel_initializer = kernel_initializer self.kernel = nn.Tensor((out_ch, in_ch, kernel_size, kernel_size), init=kernel_initializer) self.bias = None if self.use_bias: if bias_initializer is None: bias_initializer = nn.initializer.Scalar(0.0) self.bias_initializer = bias_initializer self.bias = nn.Tensor((self.out_ch, ), init=bias_initializer) super().__init__(saveables=['kernel', 'bias'])
def __init__(self, in_ch, momentum=0.99): self.in_ch = in_ch if momentum < 0 or momentum > 1.0: raise ValueError( f'momentum {momentum} must be in range [0 .. 1.0]') self.momentum = momentum self.gamma = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(1.0)) self.beta = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(0.0)) self.running_mean = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(0.0)) self.running_var = nn.Tensor((in_ch, ), init=nn.initializer.Scalar(1.0)) super().__init__( saveables=['gamma', 'beta', 'running_mean', 'running_var'], trainables=['gamma', 'beta'])
def MultiGPU_test_(): devices_count = len(nn.devices.get_current()) x = nn.Tensor_sliced_from_value( np.arange(0, devices_count).reshape((devices_count, ))) weight_t = nn.Tensor((1, ), init=nn.initializer.Scalar(0.0)) weight_t.get_grad().set(x) opt = nn.optimizer.RMSprop([weight_t], rho=1) opt.step() if all(np.ndarray.flatten(weight_t.np(0)) == \ np.ndarray.flatten(weight_t.np(1)) ): raise Exception("weight_t is equal on both GPUs") weight_t = nn.Tensor((1, ), init=nn.initializer.Scalar(0.0)) weight_t.get_grad().set(x) opt = nn.optimizer.RMSprop([weight_t], rho=1) opt.step(multi_gpu_step=True) if all(np.ndarray.flatten(weight_t.np(0)) != \ np.ndarray.flatten(weight_t.np(1)) ): raise Exception("weight_t is not equal on both GPUs")
def pool2D_op(op_type, input_t, pool_size=2, stride=2, padding='same'): """ arguments op_type 'avg','min','max' """ op = nc.Cacheton.get(_Pool2DOp, op_type, input_t.shape, int(pool_size), int(stride), padding) output_t = nn.Tensor(op.output_shape) output_t._set_op_name(f'{op_type}_pool2D') output_t._assign_gradfn( input_t, lambda O_t, dO_t: pool2D_dI_gradfn(op, input_t, dO_t, O_t)) op.O_forward_krn.run(output_t, input_t) return output_t
def stack_op(tensor_list, axis, output_t=None, is_add_to_output=False): """ arguments: output_t compute result to this Tensor. Tensor may be with different shape, but should match total size. gradfn will not be set. is_add_to_output add result to output_t if output_t is set. """ is_add_to_output = False if output_t is None else is_add_to_output tensor_list = tuple(tensor_list) if not all(isinstance(tensor, nn.Tensor) for tensor in tensor_list): raise ValueError('All value must have type of Tensor') stack_count = len(tensor_list) if stack_count == 0: raise ValueError('tensor_list is empty') input_shape = tensor_list[0].shape if not all(tensor.shape == input_shape for tensor in tensor_list): raise ValueError('All tensors must have the same shape') op = nc.Cacheton.get(_StackOp, input_shape, int(axis), stack_count, is_add_to_output) if output_t is None: output_t = nn.Tensor(op.info.output_shape) output_t._set_op_name('stack') for n in range(stack_count): output_t._assign_gradfn(tensor_list[n], lambda O_t, dO_t, n=n: stack_gradfn( op, tensor_list[n], dO_t, n)) elif output_t.shape.size != op.info.output_shape.size: raise ValueError( f'output_t must have size {op.info.output_shape.size}') for i in range(stack_count): op.forward_krn.run(output_t, tensor_list[i], np.int64(i)) return output_t
def shallow_mode_test(): class Module1(nn.Module): def __init__(self, include_self=True): self.conv = nn.Conv2D(1, 1, 3, stride=2) def forward(self, x): x = self.conv(x) return x m = Module1() m.set_training(True) out = m.shallow_forward(nn.Tensor((1, 1, 16, 16))) dev = nn.devices.get_current()[0] if dev.get_used_memory() != 0: raise Exception('Memory is allocated in shallow_forward')
def dual_wise_op(DualWiseOpKernel_cls, DualWiseOpKernel_args, a_t, b_t, output_t=None, is_add_to_output=False): """ operator for DualWiseOpKernel ops with two inputs arguments DualWiseOpKernel_cls class derived from DualWiseOpKernel DualWiseOpKernel_args args to construct DualWiseOpKernel_cls output_t compute result to this Tensor. Tensor may be with different shape, but should match total size. gradfn will not be set. is_add_to_output add result to output_t if output_t is set. """ is_add_to_output = False if output_t is None else is_add_to_output op = nc.Cacheton.get(_DualWiseOp, DualWiseOpKernel_cls, DualWiseOpKernel_args, a_t.shape, b_t.shape, is_add_to_output) if output_t is None: output_t = nn.Tensor(op.info.output_shape) output_t._set_op_name(f'{op.kernel.get_op_name()}') output_t._assign_gradfn( a_t, lambda O_t, dO_t: dual_wise_op_A_gradfn(op, a_t, b_t, O_t, dO_t)) output_t._assign_gradfn( b_t, lambda O_t, dO_t: dual_wise_op_B_gradfn(op, a_t, b_t, O_t, dO_t)) elif output_t.shape.size != op.info.output_shape.size: raise ValueError( f'output_t must have size { op.info.output_shape.size }') op.forward_krn.run(output_t, a_t, b_t) return output_t
def concat_op(tensor_list, axis, output_t=None, is_add_to_output=False): """ arguments output_t compute result to this Tensor. Tensor may be with different shape, but should match total size. gradfn will not be set. is_add_to_output add result to output_t if output_t is set. """ is_add_to_output = False if output_t is None else is_add_to_output tensor_list = tuple(tensor_list) if not all(isinstance(tensor, nn.Tensor) for tensor in tensor_list): raise ValueError('All values must have type of Tensor') if len(tensor_list) == 0: raise ValueError('empty tensor_list') op = nc.Cacheton.get(_ConcatOp, tuple(t.shape for t in tensor_list), int(axis), is_add_to_output) if output_t is None: output_t = nn.Tensor(op.info.output_shape) output_t._set_op_name('concat') for n in range(len(tensor_list)): output_t._assign_gradfn(tensor_list[n], lambda O_t, dO_t, n=n: concat_gradfn( op, tensor_list[n], dO_t, n)) elif output_t.shape.size != op.info.output_shape.size: raise ValueError( f'output_t must have size {op.info.output_shape.size}') for i, t in enumerate(tensor_list): op.forward_krn.run(output_t, t, np.int64(op.info.axis_offsets[i]), np.int64(op.info.axis_sizes[i]), global_shape=(t.shape.size, )) return output_t
def dropout_op(input_t, rate, seed=None, output_t=None, is_add_to_output=False): """ Dropout operator arguments input_t Tensor rate float [0 .. 1.0) probability seed(None) int value if None - random seed output_t compute result to this Tensor. Tensor may be with different shape, but should match total size. gradfn will not be set. is_add_to_output add result to output_t if output_t is set. reference Dropout: A Simple Way to Prevent Neural Networks from Overfitting """ op = nc.Cacheton.get(_DropoutOp, input_t.shape, float(rate), int(seed) if seed is not None else None, is_add_to_output) if output_t is None: output_t = nn.Tensor(op.output_shape) output_t._set_op_name('dropout') output_t._assign_gradfn( input_t, lambda O_t, dO_t: dropout_dI_gradfn(op, input_t, dO_t)) elif output_t.shape.size != op.output_shape.size: raise ValueError(f'output_t must have size {op.output_shape.size}') op.krn.run(output_t, input_t, np.uint32(op.seed)) return output_t