def GetOutputSizeOfPooling2dMax(in_size, kernel_size, stride, padding, ceil_mode=False, rigor=False, verbose=False): """ Returns satus and the size of output tensor :param in_size: :param kernel_size: :param stride: :param padding: :param ceil_mode: use floor when false, otherwize cel when true :param rigor: :param verbose: :return: the size of output tensor """ err = 0 if rigor: if (in_size < 1): err += 1 if verbose: dlr_common.DpuError(f"in_size should be positive: {in_size}", flush=True) if (kernel_size < 1): err += 1 if verbose: dlr_common.DpuError( f"kernel_size should be positive: {kernel_size}", flush=True) if ((kernel_size % 2) == 1): err += 1 if verbose: dlr_common.DpuError( f"kernel_size should be even: {kernel_size}", flush=True) if (stride < 1): err += 1 if verbose: dlr_common.DpuError( f"stride should be larger than 0: {stride}", flush=True) if (padding < 0): err += 1 if verbose: dlr_common.DpuError(f"padding should be positive: {padding}", flush=True) if ceil_mode: out_size = math.ceil(((in_size - kernel_size + 2 * padding) / stride) + 1) else: out_size = math.floor(( (in_size - kernel_size + 2 * padding) / stride) + 1) if err > 0: return False, out_size else: return True, out_size
def GetOutputSizeOfDeconvolution2d(in_size, kernel_size, stride, padding, output_padding=0, dilation=1, rigor=False, verbose=False): """ Returns satus and the size of output tensor :param in_size: :param kernel_size: :param stride: :param padding: :param rigor: :param verbose: :return: the size of output tensor """ err = 0 if rigor or dlr_common.rigor: if (in_size < 1): err += 1 if verbose: dlr_common.DpuError(f"in_size should be positive: {in_size}", flush=True) if (kernel_size < 1): err += 1 if verbose: dlr_common.DpuError( f"kernel_size should be positive: {kernel_size}", flush=True) if ((kernel_size % 2) != 1): err += 1 if verbose: dlr_common.DpuError( f"kernel_size should be odd: {kernel_size}", flush=True) if (stride < 1): err += 1 if verbose: dlr_common.DpuError( f"stride should be larger than 0: {stride}", flush=True) if (padding < 0): err += 1 if verbose: dlr_common.DpuError(f"padding should be positive: {padding}", flush=True) out_size = int((in_size - 1) * stride - 2 * padding + dilation * (kernel_size - 1) + 1) if err > 0: return False, out_size else: return True, out_size
def LinearNd( out_data # ndim x out_size , in_data # ndim x in_size , weight # out_size x in_size , bias=None # out_size , rigor=False, verbose=False): """ Returns True on success, otherwize returns False Applies a 1D matrix multiplication over an input data data. Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable). :param out_data: <mutable> output data, out_data[ndim][out_size] :param in_data: input data, in_data[ndim][in_size] :param weight: weight[out_size][in_size] :param bias: bias for each output, bias[out_size] :param rigor: check values rigorously when 'True' :param verbose: output message more when 'True' :return: 'True' on success, 'False' on failure. Follwoings are derived from input arguments . ndim: first dimension of out/in_data . out_size: array size of out_data . in_size: array size of in_data . weight_size: dimension of weight . bias_size: array size of bias Following is an example usage for PyTorch. LinearNd( tensor_out_data.data.numpy() # ndim x out_size , tenso_in_data.data.numpy() # ndim x in_size , tensor_weight.data.numpy() # out_size x in_size , tensor_bias.data.numpy() # out_size , rigor=True , verbose=True) """ if rigor: error = 0 if (out_data.ndim != 2): error += 1 if verbose: dlr_common.DpuError("out_data is not 1 dim", flush=True) if (in_data.ndim != 2): error += 1 if verbose: dlr_common.DpuError("in_data is not 1 dim", flush=True) if (weight.ndim != 2): error += 1 if verbose: dlr_common.DpuError("weight is not 2 dim", flush=True) if (bias is not None) and (bias.ndim != 1): error += 1 if verbose: dlr_common.DpuError(f"bias should be 1 dim: {bias.ndim}", flush=True) t_out_ndim = out_data.shape[0] t_out_size = out_data.shape[1] # note ndim (i.e., rank) is 1 t_in_ndim = in_data.shape[0] t_in_size = in_data.shape[1] # note ndim (i.e., rank) is 1 t_weight_size_row = weight.shape[0] # note ndim (i.e., rank) is 2 t_weight_size_col = weight.shape[1] # note ndim (i.e., rank) is 2 if (t_out_ndim != t_in_ndim): error += 1 dlr_common.DpuError(f"dimension mis-match", flush=True) if (t_out_size != t_weight_size_row): error += 1 dlr_common.DpuError(f"out_size mis-match", flush=True) if (t_in_size != t_weight_size_col): error += 1 dlr_common.DpuError(f"out_size mis-match", flush=True) if verbose: dlr_common.DpuInfo(f"out_size ={t_out_size} {out_data.shape}") dlr_common.DpuInfo(f"in_size ={t_in_size} {in_data.shape}") dlr_common.DpuInfo( f"weight_size={t_weitht_dize_row} {t_weight_size_col}") if (error != 0): dlr_common.DpuError(" parameter mis-match", flush=True) return False #_fname='' #_ctype='' if out_data.dtype.type == np.int32: _fname = 'LinearNdInt' _ctype = ctypes.c_int elif out_data.dtype.type == np.float32: _fname = 'LinearNdFloat' _ctype = ctypes.c_float elif out_data.dtype.type == np.float64: _fname = 'LinearNdDouble' _ctype = ctypes.c_double else: dlr_common.DpuError(" not support " + str(out_data.dtype.type), flush=True) return False _LinearNd = dlr_common.WrapFunction( dlr_common._dlr, _fname, None # return type , [ ctypes.POINTER(_ctype) # out data , ctypes.POINTER(_ctype) # in data , ctypes.POINTER(_ctype) # weight , ctypes.POINTER(_ctype) # bias , ctypes.c_ushort # out_size , ctypes.c_ushort # in_size , ctypes.c_ushort # bias_size , ctypes.c_ubyte # ndim , ctypes.c_int # rigor , ctypes.c_int ]) # verbose CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_weight = weight.ctypes.data_as(ctypes.POINTER(_ctype)) CP_out_size = ctypes.c_ushort( out_data.shape[1]) # note ndim (i.e., rank) is 2 CP_in_size = ctypes.c_ushort( in_data.shape[1]) # note ndim (i.e., rank) is 2 CP_ndim = ctypes.c_ubyte(in_data.shape[0]) # note ndim (i.e., rank) is 2 CP_rigor = 1 if rigor else 0 CP_verbose = 1 if verbose else 0 if (bias is None) or (bias.size == 0): CP_bias = ctypes.POINTER(_ctype)() CP_bias_size = ctypes.c_ushort(0) else: CP_bias = bias.ctypes.data_as(ctypes.POINTER(_ctype)) CP_bias_size = ctypes.c_ushort(bias.shape[0]) _LinearNd(CP_out_data, CP_in_data, CP_weight, CP_bias, CP_out_size, CP_in_size, CP_bias_size, CP_ndim, CP_rigor, CP_verbose) return True
def Norm1dBatch( out_data # in_channel x in_size (contiguous) , in_data # in_channel x in_size (contiguous) , running_mean # in_channel , running_var # in_channel , scale=None # None or in_channel (default 1) , bias=None # None or in_channel (default 0) , epsilon=1E-5, rigor=False, verbose=False): """ Returns True on success, otherwize returns False Applies a 1D matrix multiplication over an input data data. Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable). :param out_data: <mutable> output data, out_data[channel][size][size] :param in_data: input data, in_data[channel][size][size] :param running_mean: :param running_var: :param scale: :param bias: :param epsilon: :param rigor: check values rigorously when 'True' :param verbose: output message more when 'True' :return: 'True' on success, 'False' on failure. Following is an example usage for PyTorch. Norm1dBatch( tensor_out_data.data.numpy() # ndim x out_size , tenso_in_data.data.numpy() # ndim x in_size , tensor_running_mean.data.numpy() # out_size x in_size , tensor_running_var.data.numpy() # out_size , tensor_scale.data.numpy() # out_size , tensor_bias.data.numpy() # out_size , epsilon , rigor=True , verbose=True) """ if rigor: error = 0 if (out_data.ndim != in_data.ndim): error += 1 if verbose: dlr_common.DpuError("out_data in_data dimension mis-match", flush=True) if (running_mean.ndim != 1): error += 1 if verbose: dlr_common.DpuError( f"running_mean dimension mis-match {running_mean.ndim}", flush=True) if (running_mean.size != in_data.shape[0]): # num of channels error += 1 if verbose: dlr_common.DpuError( f"running_mean size mis-match {running_mean.size} {in_data.shape}", flush=True) if (running_var.ndim != 1): error += 1 if verbose: dlr_common.DpuError("running_var dimension mis-match", flush=True) if (running_var.size != in_data.shape[0]): # num of channels error += 1 if verbose: dlr_common.DpuError("running_var size mis-match", flush=True) if (scale is not None) and (scale.ndim != 1): error += 1 if verbose: dlr_common.DpuError(f"scale should be 1 dim: {scale.ndim}", flush=True) if (bias is not None) and (bias.ndim != 1): error += 1 if verbose: dlr_common.DpuError(f"bias should be 1 dim: {bias.ndim}", flush=True) t_out_channel = out_data.shape[0] t_out_size = out_data.shape[1] t_in_channel = in_data.shape[0] t_in_size = in_data.shape[1] if (t_out_channel != t_in_channel): error += 1 dlr_common.DpuError(f"channel mis-match", flush=True) if (t_out_size != t_in_size): error += 1 dlr_common.DpuError(f"channel mis-match", flush=True) if (error != 0): dlr_common.DpuError("parameter mis-match", flush=True) return False #_fname='' #_ctype='' if out_data.dtype.type == np.int32: _fname = 'Norm1dBatchInt' _ctype = ctypes.c_int elif out_data.dtype.type == np.float32: _fname = 'Norm1dBatchFloat' _ctype = ctypes.c_float elif out_data.dtype.type == np.float64: _fname = 'Norm1dBatchDouble' _ctype = ctypes.c_double else: dlr_common.DpuError(" not support " + str(out_data.dtype.type), flush=True) return False _Norm1dBatch = dlr_common.WrapFunction( dlr_common._dlr, _fname, None # return type , [ ctypes.POINTER(_ctype) # out data , ctypes.POINTER(_ctype) # in data , ctypes.POINTER(_ctype) # running_mean , ctypes.POINTER(_ctype) # running_var , ctypes.POINTER(_ctype) # scale , ctypes.POINTER(_ctype) # bias , ctypes.c_uint # in_size , ctypes.c_ushort # scale_size , ctypes.c_ushort # bias_size , ctypes.c_ushort # in_channel , ctypes.c_float # epsilon , ctypes.c_int # rigor , ctypes.c_int ]) # verbose in_channel = in_data.shape[0] in_size = int(in_data.size / in_channel) # num of elements per channel CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_running_mean = running_mean.ctypes.data_as(ctypes.POINTER(_ctype)) CP_running_var = running_var.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_size = ctypes.c_uint(in_size) CP_in_channel = ctypes.c_ushort(in_channel) CP_epsilon = ctypes.c_float(epsilon) CP_rigor = 1 if rigor else 0 CP_verbose = 1 if verbose else 0 if (scale is None) or (scale.size == 0): CP_scale = ctypes.POINTER(_ctype)() CP_scale_size = ctypes.c_ushort(0) else: CP_scale = scale.ctypes.data_as(ctypes.POINTER(_ctype)) CP_scale_size = ctypes.c_ushort(scale.shape[1]) if (bias is None) or (bias.size == 0): CP_bias = ctypes.POINTER(_ctype)() CP_bias_size = ctypes.c_ushort(0) else: CP_bias = bias.ctypes.data_as(ctypes.POINTER(_ctype)) CP_bias_size = ctypes.c_ushort(bias.shape[1]) _Norm1dBatch(CP_out_data, CP_in_data, CP_running_mean, CP_running_var, CP_scale, CP_bias, CP_in_size, CP_scale_size, CP_bias_size, CP_in_channel, CP_epsilon, CP_rigor, CP_verbose) return True
def Pooling2dMax( out_data # out_channel x out_size x out_size , in_data # in_channel x in_size x in_size , kernel_size # kernel_size x kernel_size , stride=1, padding=0, ceil_mode=False, rigor=False, verbose=False): """ Returns True on success, otherwize returns False Applies a 2D mAXpolling over an input data composed of several input channels. Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable). :param out_data: <mutable> output data, out_data[out_channel][out_size][out_size] :param in_data: input data, in_data[in_channel][in_size][in_size] :param kernel_size: :param stride: num of skips to apply next filter :param padding: num of pixes at the boundary :param ceil_mode: use floor() when false, otherwize ceil() :param rigor: check values rigorously when 'True' :param verbose: output message more when 'True' :return: 'True' on success, 'False' on failure. Follwoings are derived from input arguments . out_size: array size of out_data . in_size: array size of in_data . in_chnannels: num of input channels . out_channels: num of output channels (it should be the same as in_channel) Following is an example usage for PyTorch. Pooling2dMax( tensor_out_data.data.numpy() # out_channel x out_size x out_size , tenso_in_data.data.numpy() # in_channel x in_size x in_size , kernel_size , stride , padding , rigor=True , verbose=True) """ if rigor: error = 0 if (out_data.ndim != 3): error += 1 if verbose: dlr_common.DpuError("out_data is not 3 dim") if (in_data.ndim != 3): error += 1 if verbose: dlr_common.DpuError("in_data is not 3 dim") if (kernel_size < 2): error += 1 if verbose: dlr_common.DpuError("kernel_size should be >=2") if (stride < 1): error += 1 if verbose: dlr_common.DpuError("stride should be >=1") if (padding < 0): error += 1 if verbose: dlr_common.DpuError("stride should be >=0") t_out_size = out_data.shape[2] # note ndim (i.e., rank) is 3 t_in_size = in_data.shape[2] # note ndim (i.e., rank) is 3 t_kernel_size = kernel_size t_in_channel = in_data.shape[0] t_out_channel = out_data.shape[0] t_stride = stride t_padding = padding if (t_in_channel != t_out_channel): error += 1 if verbose: dlr_common.DpuError("in/out channel should be the same") status, t_out_size_expect = GetOutputSizeOfPooling2dMax( t_in_size, t_kernel_size, t_stride, t_padding) if not status: return False # something wrong with arguments if (t_out_size != t_out_size_expect): error += 1 if verbose: dlr_common.DpuError( f"out_size mis-match {t_out_size} {t_out_size_expect}") if ((t_kernel_size % 2) == 1): error += 1 if verbose: dlr_common.DpuError(f"kernel_size should be even") if verbose: dlr_common.DpuInfo(f"out_channel={t_out_channel} {out_data.shape}") dlr_common.DpuInfo(f"in_channel ={t_in_channel} {in_data.shape}") dlr_common.DpuInfo(f"out_size ={t_out_size} {out_data.shape}") dlr_common.DpuInfo(f"in_size ={t_in_size} {in_data.shape}") dlr_common.DpuInfo(f"kernel_size={t_kernel_size}") dlr_common.DpuInfo(f"stride ={t_stride} {stride}") dlr_common.DpuInfo(f"padding ={t_padding} {padding}") if (error != 0): dlr_common.DpuError("parameter mis-match") return False #_fname='' #_ctype='' if out_data.dtype.type == np.int32: _fname = 'Pooling2dMaxInt' _ctype = ctypes.c_int elif out_data.dtype.type == np.float32: _fname = 'Pooling2dMaxFloat' _ctype = ctypes.c_float elif out_data.dtype.type == np.float64: _fname = 'Pooling2dMaxDouble' _ctype = ctypes.c_double else: dlr_common.DpuError("not support " + str(out_data.dtype.type)) return False _Pooling2dMax = dlr_common.WrapFunction( dlr_common._dlr, _fname, None # return type , [ ctypes.POINTER(_ctype) # output features , ctypes.POINTER(_ctype) # input image , ctypes.c_ushort # out_size , ctypes.c_ushort # in_size , ctypes.c_ubyte # kernel_size (only for square filter) , ctypes.c_ushort # channel , ctypes.c_ubyte # stride , ctypes.c_ubyte # padding , ctypes.c_int # ceil_mode , ctypes.c_int # rigor , ctypes.c_int # verbose ]) CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_out_size = ctypes.c_ushort( out_data.shape[2]) # note ndim (i.e., rank) is 3 CP_in_size = ctypes.c_ushort( in_data.shape[2]) # note ndim (i.e., rank) is 3 CP_kernel_size = ctypes.c_ubyte(kernel_size) CP_channel = ctypes.c_ushort(in_data.shape[0]) CP_stride = ctypes.c_ubyte(stride) CP_padding = ctypes.c_ubyte(padding) CP_ceil_mode = 1 if ceil_mode else 0 CP_rigor = 1 if rigor else 0 CP_verbose = 1 if verbose else 0 _Pooling2dMax(CP_out_data, CP_in_data, CP_out_size, CP_in_size, CP_kernel_size, CP_channel, CP_stride, CP_padding, CP_ceil_mode, CP_rigor, CP_verbose) return True
def Activations( func_name , out_data # any dimension , in_data # any dimension , negative_slope=0.01 # for LeakyReLu , rigor=False , verbose=False): """ Returns True on success, otherwize returns False Applies a non-linear activation function over an input data composed of several input channels. Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable). :param out_data: <mutable> output data, out_data[...] :param in_data: input data, in_data[...] :param rigor: check values rigorously when 'True' :param verbose: output message more when 'True' :return: 'True' on success, 'False' on failure. Follwoings are derived from input arguments . out_size: array size of out_data . in_size: array size of in_data Following is an example usage for PyTorch. Activation'FUNC_NAME'( tensor_out_data.data.numpy() # contiguous array , tenso_in_data.data.numpy() # contiguous array , rigor=True , verbose=True) """ if rigor: error =0 if (out_data.ndim!=in_data.ndim): error += 1 if verbose: dlr_common.DpuError("data dimension mis-match") if (out_data.size!=in_data.size): error += 1 if verbose: dlr_common.DpuError(f"data size mis-match {in_data.size} {out_data.size}") for dim in range(in_data.ndim): if (in_data.shape[dim]!=out_data.shape[dim]): error += 1 if verbose: dlr_common.DpuError("data dimension size mis-match") if (error!=0): dlr_common.DpuError("parameter mis-match"); return False if (out_data.ndim==0) or (out_data.ndim==1): channel = 1 size = out_data.size elif (out_data.ndim==2): channel = out_data.shape[0] size = out_data.shape[1] else: channel = out_data.shape[0] size = np.prod(out_data.shape[1:]) #_fname='' #_ctype='' if out_data.dtype.type == np.int32: _fname = 'Activation'+func_name+'Int' _ctype = ctypes.c_int elif out_data.dtype.type == np.float32: _fname = 'Activation'+func_name+'Float' _ctype = ctypes.c_float elif out_data.dtype.type == np.float64: _fname = 'Activation'+func_name+'Double' _ctype = ctypes.c_double else: dlr_common.DpuError("not support "+str(out_data.dtype.type)) return False if func_name == 'LeakyReLu': _Activation=dlr_common.WrapFunction(dlr_common._dlr ,_fname , None # return type ,[ctypes.POINTER(_ctype) # output ,ctypes.POINTER(_ctype) # input ,ctypes.c_uint # number of elements ,ctypes.c_ushort # number of channels ,ctypes.c_uint # negative slope ,ctypes.c_int # rigor ,ctypes.c_int # verbose ]) CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_size = ctypes.c_uint(size) CP_channel = ctypes.c_ushort(channel) CP_negative_slope = ctypes.c_uint.from_buffer(ctypes.c_float(negative_slope)).value CP_rigor = 1 if rigor else 0 CP_verbose = 1 if verbose else 0 _Activation(CP_out_data ,CP_in_data ,CP_size ,CP_channel ,CP_negative_slope ,CP_rigor ,CP_verbose ) else : _Activation=dlr_common.WrapFunction(dlr_common._dlr ,_fname , None # return type ,[ctypes.POINTER(_ctype) # output ,ctypes.POINTER(_ctype) # input ,ctypes.c_uint # number of elements ,ctypes.c_ushort # number of channels ,ctypes.c_int # rigor ,ctypes.c_int # verbose ]) CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_size = ctypes.c_uint(size) CP_channel = ctypes.c_ushort(channel) CP_rigor = 1 if rigor else 0 CP_verbose = 1 if verbose else 0 _Activation(CP_out_data ,CP_in_data ,CP_size ,CP_channel ,CP_rigor ,CP_verbose ) return True
def Deconvolution2d( out_data # out_channel x out_size x out_size , in_data # in_channel x in_size x in_size , kernel # in_channel x out_channel x kernel_size x kernel_size , bias=None # out_channel , stride=1, padding=0, rigor=False, verbose=False): """ Returns True on success, otherwize returns False Applies a 2D deconvolution (transpose convolution) over an input data composed of several input channels. Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable). :param out_data: <mutable> output data, out_data[out_channel][out_size][out_size] :param in_data: input data, in_data[in_channel][in_size][in_size] :param kernel: kernel (or filter), kernel[in_channel][out_channel][kernel_size][kernel_size] :param bias: bias for each filter (kernel), bias[out_channel] :param stride: num of skips to apply next filter :param padding: num of pixes at the boundary :param rigor: check values rigorously when 'True' :param verbose: output message more when 'True' :return: 'True' on success, 'False' on failure. Follwoings are derived from input arguments . out_size: array size of out_data . in_size: array size of in_data . kernel_size: dimension of filter, e.g., 3 means 3x3 kernel . in_chnannels: num of input channels, e.g., 3 for RGB, 1 for gray . out_channels: num of filters . bias_size: array size of bias Following is an example usage for PyTorch. deconvolution2d( tensor_out_data.data.numpy() , tenso_in_data.data.numpy() , tensor_kernel.data.numpy() , tensor_bias.data.numpy() , stride , padding , rigor=True , verbose=True) """ if rigor or dlr_common.rigor: error = 0 if (out_data.ndim != 3): error += 1 if verbose: dlr_common.DpuError("out_data is not 3 dim", flush=True) if (in_data.ndim != 3): error += 1 if verbose: dlr_common.DpuError("in_data is not 3 dim", flush=True) if (kernel.ndim != 4): error += 1 if verbose: dlr_common.DpuError("kernel is not 4 dim", flush=True) if (bias is not None) and (bias.ndim != 1): error += 1 if verbose: dlr_common.DpuError(f"bias should be 1 dim: {bias.ndim}", flush=True) if (stride < 1): error += 1 if verbose: dlr_common.DpuError(f"stride should be >=1: {stride}", flush=True) if (padding < 0): error += 1 if verbose: dlr_common.DpuError(f"padding should be >=0: {padding}", flush=True) t_out_size = out_data.shape[2] # note ndim (i.e., rank) is 3 t_in_size = in_data.shape[2] # note ndim (i.e., rank) is 3 t_kernel_size = kernel.shape[3] # note ndim (i.e., rank) is 4 t_in_channel = in_data.shape[0] t_out_channel = out_data.shape[0] t_stride = stride t_padding = padding status, t_out_size_expect = GetOutputSizeOfDeconvolution2d( in_size=t_in_size, kernel_size=t_kernel_size, stride=t_stride, padding=t_padding, output_padding=0, dilation=1, rigor=rigor, verbose=verbose) if not status: return False # something wrong with arguments if (t_out_size != t_out_size_expect): error += 1 dlr_common.DpuError( f"out_size mis-match: {t_out_size, t_out_size_expect}", flush=True) if ((t_kernel_size % 2) != 1): error += 1 dlr_common.DpuError(f"kernel_size should be odd: {t_kernel_size}", flush=True) if verbose: dlr_common.DpuInfo(f"out_channel={t_out_channel} {out_data.shape}") dlr_common.DpuInfo(f"in_channel ={t_in_channel} {in_data.shape}") dlr_common.DpuInfo(f"out_size ={t_out_size} {out_data.shape}") dlr_common.DpuInfo(f"in_size ={t_in_size} {in_data.shape}") dlr_common.DpuInfo(f"kernel_size={t_kernel_size} {kernel.shape}") dlr_common.DpuInfo(f"stride ={t_stride} {stride}") dlr_common.DpuInfo(f"padding ={t_padding} {padding}") if (error != 0): dlr_common.DpuError(" parameter mis-match", flush=True) return False #_fname='' #_ctype='' if out_data.dtype.type == np.int32: _fname = 'Deconvolution2dInt' _ctype = ctypes.c_int elif out_data.dtype.type == np.float32: _fname = 'Deconvolution2dFloat' _ctype = ctypes.c_float elif out_data.dtype.type == np.float64: _fname = 'Deconvolution2dDouble' _ctype = ctypes.c_double else: dlr_common.DpuError(" not support " + str(out_data.dtype.type), flush=True) return False _Deconv2d = dlr_common.WrapFunction( dlr_common._dlr, _fname, None # return type , [ ctypes.POINTER(_ctype) # output features , ctypes.POINTER(_ctype) # input image , ctypes.POINTER(_ctype) # kernels , ctypes.POINTER(_ctype) # bias , ctypes.c_ushort # out_size , ctypes.c_ushort # in_size , ctypes.c_ubyte # kernel_size (only for square filter) , ctypes.c_ushort # bias_size , ctypes.c_ushort # in_channel , ctypes.c_ushort # out_channel , ctypes.c_ubyte # stride , ctypes.c_ubyte # padding , ctypes.c_int # rigor , ctypes.c_int ]) # verbose CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_kernel = kernel.ctypes.data_as(ctypes.POINTER(_ctype)) CP_out_size = ctypes.c_ushort( out_data.shape[2]) # note ndim (i.e., rank) is 3 CP_in_size = ctypes.c_ushort( in_data.shape[2]) # note ndim (i.e., rank) is 3 CP_kernel_size = ctypes.c_ubyte( kernel.shape[3]) # note ndim (i.e., rank) is 4 CP_in_channel = ctypes.c_ushort(in_data.shape[0]) # kernel.shape[0] CP_out_channel = ctypes.c_ushort(kernel.shape[1]) CP_stride = ctypes.c_ubyte(stride) CP_padding = ctypes.c_ubyte(padding) CP_rigor = 1 if rigor else 0 CP_verbose = 1 if verbose else 0 if (bias is None) or (bias.size == 0): CP_bias = ctypes.POINTER(_ctype)() CP_bias_size = ctypes.c_ushort(0) else: CP_bias = bias.ctypes.data_as(ctypes.POINTER(_ctype)) CP_bias_size = ctypes.c_ushort(bias.shape[0]) _Deconv2d(CP_out_data, CP_in_data, CP_kernel, CP_bias, CP_out_size, CP_in_size, CP_kernel_size, CP_bias_size, CP_in_channel, CP_out_channel, CP_stride, CP_padding, CP_rigor, CP_verbose) return True
def Concat2d( out_data # , in_dataA # in_rowsA x in_colsA , in_dataB # in_rowsB x in_colsB , dim=0, rigor=False, verbose=False): """ Returns True on success, otherwize returns False Applies a 2D Concatenation over two 2-dimensional input data Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable). :param out_data: <mutable> output data, out_data[][] :param in_dataA: input data, in_dataA[in_rowsA][in_colsA] :param in_dataB: input data, in_dataB[in_rowsB][in_colsB] :param dim: dimension to concatenate, 0 or 1 :param rigor: check values rigorously when 'True' :param verbose: output message more when 'True' :return: 'True' on success, 'False' on failure. Follwoings are derived from input arguments . out_rows: . out_cols: . in_rowsA: . in_colsA: . in_rowsB: . in_colsB: . dim: Following is an example usage for PyTorch. Concat2d( tensor_out_data.data.numpy() , tenso_in_dataA.data.numpy() , tenso_in_dataB.data.numpy() , dim , rigor=True , verbose=True) """ if rigor: error = 0 if (out_data.ndim != 2): error += 1 if verbose: dlr_common.DpuError("out_data is not 2 dim") if (in_dataA.ndim != 2): error += 1 if verbose: dlr_common.DpuError("in_data is not 2 dim") if (in_dataB.ndim != 2): error += 1 if verbose: dlr_common.DpuError("in_data is not 2 dim") if (dim != 0) and (dim != 1): error += 1 if verbose: dlr_common.DpuError("dim should be 0 or 1") t_in_rowsA = in_dataA.shape[0] t_in_colsA = in_dataA.shape[1] t_in_rowsB = in_dataB.shape[0] t_in_colsB = in_dataB.shape[1] if dim == 0: t_out_rows = in_dataA.shape[0] + in_dataB.shape[0] t_out_cols = in_dataA.shape[1] else: t_out_rows = in_dataA.shape[0] t_out_cols = in_dataA.shape[1] + in_dataB.shape[1] if (t_out_rows != out_data.shape[0]): error += 1 if verbose: dlr_common.DpuError("out data row count error") if (t_out_cols != out_data.shape[1]): error += 1 if verbose: dlr_common.DpuError("out data column count error") if dim == 0: if (t_in_colsA != t_in_colsB): error += 1 if verbose: dlr_common.DpuError("in dimension eror") else: if (t_in_rowsA != t_in_rowsB): error += 1 if verbose: dlr_common.DpuError("in dimension eror") if verbose: dlr_common.DpuInfo(f"out_data={out_data.shape}") dlr_common.DpuInfo(f"in_dataA={in_dataA.shape}") dlr_common.DpuInfo(f"in_dataB={in_dataB.shape}") dlr_common.DpuInfo(f"dim ={dim}") if (error != 0): dlr_common.DpuError("parameter mis-match") return False #_fname='' #_ctype='' if out_data.dtype.type == np.int32: _fname = 'Concat2dInt' _ctype = ctypes.c_int elif out_data.dtype.type == np.float32: _fname = 'Concat2dFloat' _ctype = ctypes.c_float elif out_data.dtype.type == np.float64: _fname = 'Concat2dDouble' _ctype = ctypes.c_double else: dlr_common.DpuError("not support " + str(out_data.dtype.type)) return False _Concat2d = dlr_common.WrapFunction( dlr_common._dlr, _fname, None # return type , [ ctypes.POINTER(_ctype) # output , ctypes.POINTER(_ctype) # input , ctypes.POINTER(_ctype) # input , ctypes.c_ushort # in_rowsA , ctypes.c_ushort # in_colsA , ctypes.c_ushort # in_rowsB , ctypes.c_ushort # in_colsB , ctypes.c_ubyte # dim , ctypes.c_int # rigor , ctypes.c_int # verbose ]) CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_dataA = in_dataA.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_dataB = in_dataB.ctypes.data_as(ctypes.POINTER(_ctype)) CP_in_rowsA = ctypes.c_ushort(in_dataA.shape[0]) CP_in_colsA = ctypes.c_ushort(in_dataA.shape[1]) CP_in_rowsB = ctypes.c_ushort(in_dataB.shape[0]) CP_in_colsB = ctypes.c_ushort(in_dataB.shape[1]) CP_dim = ctypes.c_ubyte(dim) CP_rigor = 1 if rigor else 0 CP_verbose = 1 if verbose else 0 _Concat2d(CP_out_data, CP_in_dataA, CP_in_dataB, CP_in_rowsA, CP_in_colsA, CP_in_rowsB, CP_in_colsB, CP_dim, CP_rigor, CP_verbose) return True