def GetOutputSizeOfPooling2dMax(in_size,
                                kernel_size,
                                stride,
                                padding,
                                ceil_mode=False,
                                rigor=False,
                                verbose=False):
    """
    Returns satus and the size of output tensor
    :param in_size:
    :param kernel_size:
    :param stride:
    :param padding:
    :param ceil_mode: use floor when false, otherwize cel when true
    :param rigor:
    :param verbose:
    :return: the size of output tensor
    """
    err = 0
    if rigor:
        if (in_size < 1):
            err += 1
            if verbose:
                dlr_common.DpuError(f"in_size should be positive: {in_size}",
                                    flush=True)
        if (kernel_size < 1):
            err += 1
            if verbose:
                dlr_common.DpuError(
                    f"kernel_size should be positive: {kernel_size}",
                    flush=True)
        if ((kernel_size % 2) == 1):
            err += 1
            if verbose:
                dlr_common.DpuError(
                    f"kernel_size should be even: {kernel_size}", flush=True)
        if (stride < 1):
            err += 1
            if verbose:
                dlr_common.DpuError(
                    f"stride should be larger than 0: {stride}", flush=True)
        if (padding < 0):
            err += 1
            if verbose:
                dlr_common.DpuError(f"padding should be positive: {padding}",
                                    flush=True)
    if ceil_mode:
        out_size = math.ceil(((in_size - kernel_size + 2 * padding) / stride) +
                             1)
    else:
        out_size = math.floor((
            (in_size - kernel_size + 2 * padding) / stride) + 1)
    if err > 0: return False, out_size
    else: return True, out_size
def GetOutputSizeOfDeconvolution2d(in_size,
                                   kernel_size,
                                   stride,
                                   padding,
                                   output_padding=0,
                                   dilation=1,
                                   rigor=False,
                                   verbose=False):
    """
    Returns satus and the size of output tensor
    :param in_size:
    :param kernel_size:
    :param stride:
    :param padding:
    :param rigor:
    :param verbose:
    :return: the size of output tensor
    """
    err = 0
    if rigor or dlr_common.rigor:
        if (in_size < 1):
            err += 1
            if verbose:
                dlr_common.DpuError(f"in_size should be positive: {in_size}",
                                    flush=True)
        if (kernel_size < 1):
            err += 1
            if verbose:
                dlr_common.DpuError(
                    f"kernel_size should be positive: {kernel_size}",
                    flush=True)
        if ((kernel_size % 2) != 1):
            err += 1
            if verbose:
                dlr_common.DpuError(
                    f"kernel_size should be odd: {kernel_size}", flush=True)
        if (stride < 1):
            err += 1
            if verbose:
                dlr_common.DpuError(
                    f"stride should be larger than 0: {stride}", flush=True)
        if (padding < 0):
            err += 1
            if verbose:
                dlr_common.DpuError(f"padding should be positive: {padding}",
                                    flush=True)

    out_size = int((in_size - 1) * stride - 2 * padding + dilation *
                   (kernel_size - 1) + 1)
    if err > 0: return False, out_size
    else: return True, out_size
def LinearNd(
        out_data  # ndim x out_size
    ,
        in_data  # ndim x in_size
    ,
        weight  # out_size x in_size
    ,
        bias=None  # out_size
    ,
        rigor=False,
        verbose=False):
    """
    Returns True on success, otherwize returns False
    Applies a 1D matrix multiplication over an input data data.
    Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable).
    :param out_data: <mutable> output data, out_data[ndim][out_size]
    :param in_data: input data, in_data[ndim][in_size]
    :param weight: weight[out_size][in_size]
    :param bias: bias for each output, bias[out_size]
    :param rigor: check values rigorously when 'True'
    :param verbose: output message more when 'True'
    :return: 'True' on success, 'False' on failure.
    Follwoings are derived from input arguments
    . ndim: first dimension of out/in_data
    . out_size: array size of out_data
    . in_size: array size of in_data
    . weight_size: dimension of weight
    . bias_size: array size of bias
    Following is an example usage for PyTorch.
        LinearNd( tensor_out_data.data.numpy() # ndim x out_size
                    , tenso_in_data.data.numpy()   # ndim x in_size
                    , tensor_weight.data.numpy()   # out_size x in_size
                    , tensor_bias.data.numpy()     # out_size
                    , rigor=True
                    , verbose=True)
    """
    if rigor:
        error = 0
        if (out_data.ndim != 2):
            error += 1
            if verbose:
                dlr_common.DpuError("out_data is not 1 dim", flush=True)
        if (in_data.ndim != 2):
            error += 1
            if verbose: dlr_common.DpuError("in_data is not 1 dim", flush=True)
        if (weight.ndim != 2):
            error += 1
            if verbose: dlr_common.DpuError("weight is not 2 dim", flush=True)
        if (bias is not None) and (bias.ndim != 1):
            error += 1
            if verbose:
                dlr_common.DpuError(f"bias should be 1 dim: {bias.ndim}",
                                    flush=True)
        t_out_ndim = out_data.shape[0]
        t_out_size = out_data.shape[1]  # note ndim (i.e., rank) is 1
        t_in_ndim = in_data.shape[0]
        t_in_size = in_data.shape[1]  # note ndim (i.e., rank) is 1
        t_weight_size_row = weight.shape[0]  # note ndim (i.e., rank) is 2
        t_weight_size_col = weight.shape[1]  # note ndim (i.e., rank) is 2
        if (t_out_ndim != t_in_ndim):
            error += 1
            dlr_common.DpuError(f"dimension mis-match", flush=True)
        if (t_out_size != t_weight_size_row):
            error += 1
            dlr_common.DpuError(f"out_size mis-match", flush=True)
        if (t_in_size != t_weight_size_col):
            error += 1
            dlr_common.DpuError(f"out_size mis-match", flush=True)
        if verbose:
            dlr_common.DpuInfo(f"out_size   ={t_out_size} {out_data.shape}")
            dlr_common.DpuInfo(f"in_size    ={t_in_size} {in_data.shape}")
            dlr_common.DpuInfo(
                f"weight_size={t_weitht_dize_row} {t_weight_size_col}")
        if (error != 0):
            dlr_common.DpuError(" parameter mis-match", flush=True)
            return False
    #_fname=''
    #_ctype=''
    if out_data.dtype.type == np.int32:
        _fname = 'LinearNdInt'
        _ctype = ctypes.c_int
    elif out_data.dtype.type == np.float32:
        _fname = 'LinearNdFloat'
        _ctype = ctypes.c_float
    elif out_data.dtype.type == np.float64:
        _fname = 'LinearNdDouble'
        _ctype = ctypes.c_double
    else:
        dlr_common.DpuError(" not support " + str(out_data.dtype.type),
                            flush=True)
        return False
    _LinearNd = dlr_common.WrapFunction(
        dlr_common._dlr,
        _fname,
        None  # return type
        ,
        [
            ctypes.POINTER(_ctype)  # out data
            ,
            ctypes.POINTER(_ctype)  # in data
            ,
            ctypes.POINTER(_ctype)  # weight
            ,
            ctypes.POINTER(_ctype)  # bias
            ,
            ctypes.c_ushort  # out_size
            ,
            ctypes.c_ushort  # in_size
            ,
            ctypes.c_ushort  # bias_size
            ,
            ctypes.c_ubyte  # ndim
            ,
            ctypes.c_int  # rigor
            ,
            ctypes.c_int
        ])  # verbose
    CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_weight = weight.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_out_size = ctypes.c_ushort(
        out_data.shape[1])  # note ndim (i.e., rank) is 2
    CP_in_size = ctypes.c_ushort(
        in_data.shape[1])  # note ndim (i.e., rank) is 2
    CP_ndim = ctypes.c_ubyte(in_data.shape[0])  # note ndim (i.e., rank) is 2
    CP_rigor = 1 if rigor else 0
    CP_verbose = 1 if verbose else 0
    if (bias is None) or (bias.size == 0):
        CP_bias = ctypes.POINTER(_ctype)()
        CP_bias_size = ctypes.c_ushort(0)
    else:
        CP_bias = bias.ctypes.data_as(ctypes.POINTER(_ctype))
        CP_bias_size = ctypes.c_ushort(bias.shape[0])
    _LinearNd(CP_out_data, CP_in_data, CP_weight, CP_bias, CP_out_size,
              CP_in_size, CP_bias_size, CP_ndim, CP_rigor, CP_verbose)
    return True
Example #4
0
def Norm1dBatch(
        out_data  # in_channel x in_size (contiguous)
    ,
        in_data  # in_channel x in_size (contiguous)
    ,
        running_mean  # in_channel
    ,
        running_var  # in_channel
    ,
        scale=None  # None or in_channel (default 1)
    ,
        bias=None  # None or in_channel (default 0)
    ,
        epsilon=1E-5,
        rigor=False,
        verbose=False):
    """
    Returns True on success, otherwize returns False
    Applies a 1D matrix multiplication over an input data data.
    Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable).
    :param out_data: <mutable> output data, out_data[channel][size][size]
    :param in_data: input data, in_data[channel][size][size]
    :param running_mean:
    :param running_var:
    :param scale:
    :param bias:
    :param epsilon:
    :param rigor: check values rigorously when 'True'
    :param verbose: output message more when 'True'
    :return: 'True' on success, 'False' on failure.
    Following is an example usage for PyTorch.
        Norm1dBatch( tensor_out_data.data.numpy() # ndim x out_size
                       , tenso_in_data.data.numpy()   # ndim x in_size
                       , tensor_running_mean.data.numpy()   # out_size x in_size
                       , tensor_running_var.data.numpy()     # out_size
                       , tensor_scale.data.numpy()     # out_size
                       , tensor_bias.data.numpy()     # out_size
                       , epsilon
                       , rigor=True
                       , verbose=True)
    """
    if rigor:
        error = 0
        if (out_data.ndim != in_data.ndim):
            error += 1
            if verbose:
                dlr_common.DpuError("out_data in_data dimension mis-match",
                                    flush=True)
        if (running_mean.ndim != 1):
            error += 1
            if verbose:
                dlr_common.DpuError(
                    f"running_mean dimension mis-match {running_mean.ndim}",
                    flush=True)
        if (running_mean.size != in_data.shape[0]):  # num of channels
            error += 1
            if verbose:
                dlr_common.DpuError(
                    f"running_mean size mis-match {running_mean.size} {in_data.shape}",
                    flush=True)
        if (running_var.ndim != 1):
            error += 1
            if verbose:
                dlr_common.DpuError("running_var dimension mis-match",
                                    flush=True)
        if (running_var.size != in_data.shape[0]):  # num of channels
            error += 1
            if verbose:
                dlr_common.DpuError("running_var size mis-match", flush=True)
        if (scale is not None) and (scale.ndim != 1):
            error += 1
            if verbose:
                dlr_common.DpuError(f"scale should be 1 dim: {scale.ndim}",
                                    flush=True)
        if (bias is not None) and (bias.ndim != 1):
            error += 1
            if verbose:
                dlr_common.DpuError(f"bias should be 1 dim: {bias.ndim}",
                                    flush=True)
        t_out_channel = out_data.shape[0]
        t_out_size = out_data.shape[1]
        t_in_channel = in_data.shape[0]
        t_in_size = in_data.shape[1]
        if (t_out_channel != t_in_channel):
            error += 1
            dlr_common.DpuError(f"channel mis-match", flush=True)
        if (t_out_size != t_in_size):
            error += 1
            dlr_common.DpuError(f"channel mis-match", flush=True)
        if (error != 0):
            dlr_common.DpuError("parameter mis-match", flush=True)
            return False
    #_fname=''
    #_ctype=''
    if out_data.dtype.type == np.int32:
        _fname = 'Norm1dBatchInt'
        _ctype = ctypes.c_int
    elif out_data.dtype.type == np.float32:
        _fname = 'Norm1dBatchFloat'
        _ctype = ctypes.c_float
    elif out_data.dtype.type == np.float64:
        _fname = 'Norm1dBatchDouble'
        _ctype = ctypes.c_double
    else:
        dlr_common.DpuError(" not support " + str(out_data.dtype.type),
                            flush=True)
        return False
    _Norm1dBatch = dlr_common.WrapFunction(
        dlr_common._dlr,
        _fname,
        None  # return type
        ,
        [
            ctypes.POINTER(_ctype)  # out data
            ,
            ctypes.POINTER(_ctype)  # in data
            ,
            ctypes.POINTER(_ctype)  # running_mean
            ,
            ctypes.POINTER(_ctype)  # running_var
            ,
            ctypes.POINTER(_ctype)  # scale
            ,
            ctypes.POINTER(_ctype)  # bias
            ,
            ctypes.c_uint  # in_size
            ,
            ctypes.c_ushort  # scale_size
            ,
            ctypes.c_ushort  # bias_size
            ,
            ctypes.c_ushort  # in_channel
            ,
            ctypes.c_float  # epsilon
            ,
            ctypes.c_int  # rigor
            ,
            ctypes.c_int
        ])  # verbose
    in_channel = in_data.shape[0]
    in_size = int(in_data.size / in_channel)  # num of elements per channel
    CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_running_mean = running_mean.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_running_var = running_var.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_in_size = ctypes.c_uint(in_size)
    CP_in_channel = ctypes.c_ushort(in_channel)
    CP_epsilon = ctypes.c_float(epsilon)
    CP_rigor = 1 if rigor else 0
    CP_verbose = 1 if verbose else 0
    if (scale is None) or (scale.size == 0):
        CP_scale = ctypes.POINTER(_ctype)()
        CP_scale_size = ctypes.c_ushort(0)
    else:
        CP_scale = scale.ctypes.data_as(ctypes.POINTER(_ctype))
        CP_scale_size = ctypes.c_ushort(scale.shape[1])
    if (bias is None) or (bias.size == 0):
        CP_bias = ctypes.POINTER(_ctype)()
        CP_bias_size = ctypes.c_ushort(0)
    else:
        CP_bias = bias.ctypes.data_as(ctypes.POINTER(_ctype))
        CP_bias_size = ctypes.c_ushort(bias.shape[1])
    _Norm1dBatch(CP_out_data, CP_in_data, CP_running_mean, CP_running_var,
                 CP_scale, CP_bias, CP_in_size, CP_scale_size, CP_bias_size,
                 CP_in_channel, CP_epsilon, CP_rigor, CP_verbose)
    return True
def Pooling2dMax(
        out_data  # out_channel x out_size x out_size
    ,
        in_data  # in_channel x in_size x in_size
    ,
        kernel_size  # kernel_size x kernel_size
    ,
        stride=1,
        padding=0,
        ceil_mode=False,
        rigor=False,
        verbose=False):
    """
    Returns True on success, otherwize returns False
    Applies a 2D mAXpolling over an input data composed of several input channels.
    Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable).
    :param out_data: <mutable> output data, out_data[out_channel][out_size][out_size]
    :param in_data: input data, in_data[in_channel][in_size][in_size]
    :param kernel_size:
    :param stride: num of skips to apply next filter
    :param padding: num of pixes at the boundary
    :param ceil_mode: use floor() when false, otherwize ceil()
    :param rigor: check values rigorously when 'True'
    :param verbose: output message more when 'True'
    :return: 'True' on success, 'False' on failure.
    Follwoings are derived from input arguments
    . out_size: array size of out_data
    . in_size: array size of in_data
    . in_chnannels: num of input channels
    . out_channels: num of output channels (it should be the same as in_channel)
    Following is an example usage for PyTorch.
        Pooling2dMax( tensor_out_data.data.numpy() # out_channel x out_size x out_size
                        , tenso_in_data.data.numpy()   # in_channel x in_size x in_size
                        , kernel_size
                        , stride
                        , padding
                        , rigor=True
                        , verbose=True)
    """
    if rigor:
        error = 0
        if (out_data.ndim != 3):
            error += 1
            if verbose: dlr_common.DpuError("out_data is not 3 dim")
        if (in_data.ndim != 3):
            error += 1
            if verbose: dlr_common.DpuError("in_data is not 3 dim")
        if (kernel_size < 2):
            error += 1
            if verbose: dlr_common.DpuError("kernel_size should be >=2")
        if (stride < 1):
            error += 1
            if verbose: dlr_common.DpuError("stride should be >=1")
        if (padding < 0):
            error += 1
            if verbose: dlr_common.DpuError("stride should be >=0")
        t_out_size = out_data.shape[2]  # note ndim (i.e., rank) is 3
        t_in_size = in_data.shape[2]  # note ndim (i.e., rank) is 3
        t_kernel_size = kernel_size
        t_in_channel = in_data.shape[0]
        t_out_channel = out_data.shape[0]
        t_stride = stride
        t_padding = padding
        if (t_in_channel != t_out_channel):
            error += 1
            if verbose:
                dlr_common.DpuError("in/out channel should be the same")
        status, t_out_size_expect = GetOutputSizeOfPooling2dMax(
            t_in_size, t_kernel_size, t_stride, t_padding)
        if not status: return False  # something wrong with arguments
        if (t_out_size != t_out_size_expect):
            error += 1
            if verbose:
                dlr_common.DpuError(
                    f"out_size mis-match {t_out_size} {t_out_size_expect}")
        if ((t_kernel_size % 2) == 1):
            error += 1
            if verbose: dlr_common.DpuError(f"kernel_size should be even")
        if verbose:
            dlr_common.DpuInfo(f"out_channel={t_out_channel} {out_data.shape}")
            dlr_common.DpuInfo(f"in_channel ={t_in_channel} {in_data.shape}")
            dlr_common.DpuInfo(f"out_size   ={t_out_size} {out_data.shape}")
            dlr_common.DpuInfo(f"in_size    ={t_in_size} {in_data.shape}")
            dlr_common.DpuInfo(f"kernel_size={t_kernel_size}")
            dlr_common.DpuInfo(f"stride     ={t_stride} {stride}")
            dlr_common.DpuInfo(f"padding    ={t_padding} {padding}")
        if (error != 0):
            dlr_common.DpuError("parameter mis-match")
            return False
    #_fname=''
    #_ctype=''
    if out_data.dtype.type == np.int32:
        _fname = 'Pooling2dMaxInt'
        _ctype = ctypes.c_int
    elif out_data.dtype.type == np.float32:
        _fname = 'Pooling2dMaxFloat'
        _ctype = ctypes.c_float
    elif out_data.dtype.type == np.float64:
        _fname = 'Pooling2dMaxDouble'
        _ctype = ctypes.c_double
    else:
        dlr_common.DpuError("not support " + str(out_data.dtype.type))
        return False
    _Pooling2dMax = dlr_common.WrapFunction(
        dlr_common._dlr,
        _fname,
        None  # return type
        ,
        [
            ctypes.POINTER(_ctype)  # output features
            ,
            ctypes.POINTER(_ctype)  # input image
            ,
            ctypes.c_ushort  # out_size
            ,
            ctypes.c_ushort  # in_size
            ,
            ctypes.c_ubyte  # kernel_size (only for square filter)
            ,
            ctypes.c_ushort  # channel
            ,
            ctypes.c_ubyte  # stride
            ,
            ctypes.c_ubyte  # padding
            ,
            ctypes.c_int  # ceil_mode
            ,
            ctypes.c_int  # rigor
            ,
            ctypes.c_int  # verbose
        ])
    CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_out_size = ctypes.c_ushort(
        out_data.shape[2])  # note ndim (i.e., rank) is 3
    CP_in_size = ctypes.c_ushort(
        in_data.shape[2])  # note ndim (i.e., rank) is 3
    CP_kernel_size = ctypes.c_ubyte(kernel_size)
    CP_channel = ctypes.c_ushort(in_data.shape[0])
    CP_stride = ctypes.c_ubyte(stride)
    CP_padding = ctypes.c_ubyte(padding)
    CP_ceil_mode = 1 if ceil_mode else 0
    CP_rigor = 1 if rigor else 0
    CP_verbose = 1 if verbose else 0

    _Pooling2dMax(CP_out_data, CP_in_data, CP_out_size, CP_in_size,
                  CP_kernel_size, CP_channel, CP_stride, CP_padding,
                  CP_ceil_mode, CP_rigor, CP_verbose)
    return True
Example #6
0
def Activations( func_name 
               , out_data # any dimension
               , in_data  # any dimension
               , negative_slope=0.01 # for LeakyReLu
               , rigor=False
               , verbose=False):
    """
    Returns True on success, otherwize returns False
    Applies a non-linear activation function over an input data composed of several input channels.
    Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable).
    :param out_data: <mutable> output data, out_data[...]
    :param in_data: input data, in_data[...]
    :param rigor: check values rigorously when 'True'
    :param verbose: output message more when 'True'
    :return: 'True' on success, 'False' on failure.
    Follwoings are derived from input arguments
    . out_size: array size of out_data
    . in_size: array size of in_data
    Following is an example usage for PyTorch.
        Activation'FUNC_NAME'( tensor_out_data.data.numpy() # contiguous array
                      , tenso_in_data.data.numpy() # contiguous array
                      , rigor=True
                      , verbose=True)
    """
    if rigor:
       error =0
       if (out_data.ndim!=in_data.ndim):
           error += 1
           if verbose: dlr_common.DpuError("data dimension mis-match")
       if (out_data.size!=in_data.size):
           error += 1
           if verbose: dlr_common.DpuError(f"data size mis-match {in_data.size} {out_data.size}")
       for dim in range(in_data.ndim):
           if (in_data.shape[dim]!=out_data.shape[dim]):
               error += 1
               if verbose: dlr_common.DpuError("data dimension size mis-match")
       if (error!=0):
           dlr_common.DpuError("parameter mis-match");
           return False
    if (out_data.ndim==0) or (out_data.ndim==1):
        channel = 1
        size = out_data.size
    elif (out_data.ndim==2):
        channel = out_data.shape[0]
        size = out_data.shape[1]
    else:
        channel = out_data.shape[0]
        size = np.prod(out_data.shape[1:])

    #_fname=''
    #_ctype=''
    if out_data.dtype.type == np.int32:
        _fname = 'Activation'+func_name+'Int'
        _ctype = ctypes.c_int
    elif out_data.dtype.type == np.float32:
        _fname = 'Activation'+func_name+'Float'
        _ctype = ctypes.c_float
    elif out_data.dtype.type == np.float64:
        _fname = 'Activation'+func_name+'Double'
        _ctype = ctypes.c_double
    else:
        dlr_common.DpuError("not support "+str(out_data.dtype.type))
        return False

    if func_name == 'LeakyReLu':
        _Activation=dlr_common.WrapFunction(dlr_common._dlr
                                    ,_fname
                                    , None          # return type
                                    ,[ctypes.POINTER(_ctype) # output
                                     ,ctypes.POINTER(_ctype) # input
                                     ,ctypes.c_uint    # number of elements
                                     ,ctypes.c_ushort  # number of channels
                                     ,ctypes.c_uint    # negative slope
                                     ,ctypes.c_int     # rigor
                                     ,ctypes.c_int     # verbose
                                     ]) 
        CP_out_data       = out_data.ctypes.data_as(ctypes.POINTER(_ctype))
        CP_in_data        = in_data.ctypes.data_as(ctypes.POINTER(_ctype))
        CP_size           = ctypes.c_uint(size)
        CP_channel        = ctypes.c_ushort(channel)
        CP_negative_slope = ctypes.c_uint.from_buffer(ctypes.c_float(negative_slope)).value
        CP_rigor          = 1 if rigor else 0
        CP_verbose        = 1 if verbose else 0

        _Activation(CP_out_data    
                   ,CP_in_data      
                   ,CP_size    
                   ,CP_channel
                   ,CP_negative_slope
                   ,CP_rigor
                   ,CP_verbose
                   )
    else :
        _Activation=dlr_common.WrapFunction(dlr_common._dlr
                                    ,_fname
                                    , None          # return type
                                    ,[ctypes.POINTER(_ctype) # output
                                     ,ctypes.POINTER(_ctype) # input
                                     ,ctypes.c_uint    # number of elements
                                     ,ctypes.c_ushort  # number of channels
                                     ,ctypes.c_int     # rigor
                                     ,ctypes.c_int     # verbose
                                     ]) 
        CP_out_data    = out_data.ctypes.data_as(ctypes.POINTER(_ctype))
        CP_in_data     = in_data.ctypes.data_as(ctypes.POINTER(_ctype))
        CP_size        = ctypes.c_uint(size)
        CP_channel     = ctypes.c_ushort(channel)
        CP_rigor       = 1 if rigor else 0
        CP_verbose     = 1 if verbose else 0

        _Activation(CP_out_data    
                   ,CP_in_data      
                   ,CP_size    
                   ,CP_channel
                   ,CP_rigor
                   ,CP_verbose
                   )
    return True
def Deconvolution2d(
        out_data  # out_channel x out_size x out_size
    ,
        in_data  # in_channel x in_size x in_size
    ,
        kernel  # in_channel x out_channel x kernel_size x kernel_size
    ,
        bias=None  # out_channel
    ,
        stride=1,
        padding=0,
        rigor=False,
        verbose=False):
    """
    Returns True on success, otherwize returns False
    Applies a 2D deconvolution (transpose convolution) over an input data composed of several input channels.
    Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable).
    :param out_data: <mutable> output data, out_data[out_channel][out_size][out_size]
    :param in_data: input data, in_data[in_channel][in_size][in_size]
    :param kernel: kernel (or filter), kernel[in_channel][out_channel][kernel_size][kernel_size]
    :param bias: bias for each filter (kernel), bias[out_channel]
    :param stride: num of skips to apply next filter
    :param padding: num of pixes at the boundary
    :param rigor: check values rigorously when 'True'
    :param verbose: output message more when 'True'
    :return: 'True' on success, 'False' on failure.
    Follwoings are derived from input arguments
    . out_size: array size of out_data
    . in_size: array size of in_data
    . kernel_size: dimension of filter, e.g., 3 means 3x3 kernel
    . in_chnannels: num of input channels, e.g., 3 for RGB, 1 for gray
    . out_channels: num of filters
    . bias_size: array size of bias
    Following is an example usage for PyTorch.
        deconvolution2d( tensor_out_data.data.numpy()
                       , tenso_in_data.data.numpy()
                       , tensor_kernel.data.numpy()
                       , tensor_bias.data.numpy()
                       , stride
                       , padding
                       , rigor=True
                       , verbose=True)
    """
    if rigor or dlr_common.rigor:
        error = 0
        if (out_data.ndim != 3):
            error += 1
            if verbose:
                dlr_common.DpuError("out_data is not 3 dim", flush=True)
        if (in_data.ndim != 3):
            error += 1
            if verbose: dlr_common.DpuError("in_data is not 3 dim", flush=True)
        if (kernel.ndim != 4):
            error += 1
            if verbose: dlr_common.DpuError("kernel is not 4 dim", flush=True)
        if (bias is not None) and (bias.ndim != 1):
            error += 1
            if verbose:
                dlr_common.DpuError(f"bias should be 1 dim: {bias.ndim}",
                                    flush=True)
        if (stride < 1):
            error += 1
            if verbose:
                dlr_common.DpuError(f"stride should be >=1: {stride}",
                                    flush=True)
        if (padding < 0):
            error += 1
            if verbose:
                dlr_common.DpuError(f"padding should be >=0: {padding}",
                                    flush=True)
        t_out_size = out_data.shape[2]  # note ndim (i.e., rank) is 3
        t_in_size = in_data.shape[2]  # note ndim (i.e., rank) is 3
        t_kernel_size = kernel.shape[3]  # note ndim (i.e., rank) is 4
        t_in_channel = in_data.shape[0]
        t_out_channel = out_data.shape[0]
        t_stride = stride
        t_padding = padding
        status, t_out_size_expect = GetOutputSizeOfDeconvolution2d(
            in_size=t_in_size,
            kernel_size=t_kernel_size,
            stride=t_stride,
            padding=t_padding,
            output_padding=0,
            dilation=1,
            rigor=rigor,
            verbose=verbose)
        if not status: return False  # something wrong with arguments
        if (t_out_size != t_out_size_expect):
            error += 1
            dlr_common.DpuError(
                f"out_size mis-match: {t_out_size, t_out_size_expect}",
                flush=True)
        if ((t_kernel_size % 2) != 1):
            error += 1
            dlr_common.DpuError(f"kernel_size should be odd: {t_kernel_size}",
                                flush=True)
        if verbose:
            dlr_common.DpuInfo(f"out_channel={t_out_channel} {out_data.shape}")
            dlr_common.DpuInfo(f"in_channel ={t_in_channel} {in_data.shape}")
            dlr_common.DpuInfo(f"out_size   ={t_out_size} {out_data.shape}")
            dlr_common.DpuInfo(f"in_size    ={t_in_size} {in_data.shape}")
            dlr_common.DpuInfo(f"kernel_size={t_kernel_size} {kernel.shape}")
            dlr_common.DpuInfo(f"stride     ={t_stride} {stride}")
            dlr_common.DpuInfo(f"padding    ={t_padding} {padding}")
        if (error != 0):
            dlr_common.DpuError(" parameter mis-match", flush=True)
            return False
    #_fname=''
    #_ctype=''
    if out_data.dtype.type == np.int32:
        _fname = 'Deconvolution2dInt'
        _ctype = ctypes.c_int
    elif out_data.dtype.type == np.float32:
        _fname = 'Deconvolution2dFloat'
        _ctype = ctypes.c_float
    elif out_data.dtype.type == np.float64:
        _fname = 'Deconvolution2dDouble'
        _ctype = ctypes.c_double
    else:
        dlr_common.DpuError(" not support " + str(out_data.dtype.type),
                            flush=True)
        return False
    _Deconv2d = dlr_common.WrapFunction(
        dlr_common._dlr,
        _fname,
        None  # return type
        ,
        [
            ctypes.POINTER(_ctype)  # output features
            ,
            ctypes.POINTER(_ctype)  # input image
            ,
            ctypes.POINTER(_ctype)  # kernels
            ,
            ctypes.POINTER(_ctype)  # bias
            ,
            ctypes.c_ushort  # out_size
            ,
            ctypes.c_ushort  # in_size
            ,
            ctypes.c_ubyte  # kernel_size (only for square filter)
            ,
            ctypes.c_ushort  # bias_size
            ,
            ctypes.c_ushort  # in_channel
            ,
            ctypes.c_ushort  # out_channel
            ,
            ctypes.c_ubyte  # stride
            ,
            ctypes.c_ubyte  # padding
            ,
            ctypes.c_int  # rigor
            ,
            ctypes.c_int
        ])  # verbose
    CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_in_data = in_data.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_kernel = kernel.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_out_size = ctypes.c_ushort(
        out_data.shape[2])  # note ndim (i.e., rank) is 3
    CP_in_size = ctypes.c_ushort(
        in_data.shape[2])  # note ndim (i.e., rank) is 3
    CP_kernel_size = ctypes.c_ubyte(
        kernel.shape[3])  # note ndim (i.e., rank) is 4
    CP_in_channel = ctypes.c_ushort(in_data.shape[0])  # kernel.shape[0]
    CP_out_channel = ctypes.c_ushort(kernel.shape[1])
    CP_stride = ctypes.c_ubyte(stride)
    CP_padding = ctypes.c_ubyte(padding)
    CP_rigor = 1 if rigor else 0
    CP_verbose = 1 if verbose else 0
    if (bias is None) or (bias.size == 0):
        CP_bias = ctypes.POINTER(_ctype)()
        CP_bias_size = ctypes.c_ushort(0)
    else:
        CP_bias = bias.ctypes.data_as(ctypes.POINTER(_ctype))
        CP_bias_size = ctypes.c_ushort(bias.shape[0])
    _Deconv2d(CP_out_data, CP_in_data, CP_kernel, CP_bias, CP_out_size,
              CP_in_size, CP_kernel_size, CP_bias_size, CP_in_channel,
              CP_out_channel, CP_stride, CP_padding, CP_rigor, CP_verbose)
    return True
def Concat2d(
        out_data  #
    ,
        in_dataA  # in_rowsA x in_colsA
    ,
        in_dataB  # in_rowsB x in_colsB
    ,
        dim=0,
        rigor=False,
        verbose=False):
    """
    Returns True on success, otherwize returns False
    Applies a 2D Concatenation over two 2-dimensional input data
    Note that all nd-array lists are NumPy (mutable), not PyTorch tensor (immutable).
    :param out_data: <mutable> output data, out_data[][]
    :param in_dataA: input data, in_dataA[in_rowsA][in_colsA]
    :param in_dataB: input data, in_dataB[in_rowsB][in_colsB]
    :param dim: dimension to concatenate, 0 or 1
    :param rigor: check values rigorously when 'True'
    :param verbose: output message more when 'True'
    :return: 'True' on success, 'False' on failure.
    Follwoings are derived from input arguments
    . out_rows:
    . out_cols:
    . in_rowsA:
    . in_colsA:
    . in_rowsB:
    . in_colsB:
    . dim:
    Following is an example usage for PyTorch.
        Concat2d( tensor_out_data.data.numpy()
                    , tenso_in_dataA.data.numpy()
                    , tenso_in_dataB.data.numpy()
                    , dim
                    , rigor=True
                    , verbose=True)
    """
    if rigor:
        error = 0
        if (out_data.ndim != 2):
            error += 1
            if verbose: dlr_common.DpuError("out_data is not 2 dim")
        if (in_dataA.ndim != 2):
            error += 1
            if verbose: dlr_common.DpuError("in_data is not 2 dim")
        if (in_dataB.ndim != 2):
            error += 1
            if verbose: dlr_common.DpuError("in_data is not 2 dim")
        if (dim != 0) and (dim != 1):
            error += 1
            if verbose: dlr_common.DpuError("dim should be 0 or 1")
        t_in_rowsA = in_dataA.shape[0]
        t_in_colsA = in_dataA.shape[1]
        t_in_rowsB = in_dataB.shape[0]
        t_in_colsB = in_dataB.shape[1]
        if dim == 0:
            t_out_rows = in_dataA.shape[0] + in_dataB.shape[0]
            t_out_cols = in_dataA.shape[1]
        else:
            t_out_rows = in_dataA.shape[0]
            t_out_cols = in_dataA.shape[1] + in_dataB.shape[1]
        if (t_out_rows != out_data.shape[0]):
            error += 1
            if verbose: dlr_common.DpuError("out data row count error")
        if (t_out_cols != out_data.shape[1]):
            error += 1
            if verbose: dlr_common.DpuError("out data column count error")
        if dim == 0:
            if (t_in_colsA != t_in_colsB):
                error += 1
                if verbose: dlr_common.DpuError("in dimension eror")
        else:
            if (t_in_rowsA != t_in_rowsB):
                error += 1
                if verbose: dlr_common.DpuError("in dimension eror")
        if verbose:
            dlr_common.DpuInfo(f"out_data={out_data.shape}")
            dlr_common.DpuInfo(f"in_dataA={in_dataA.shape}")
            dlr_common.DpuInfo(f"in_dataB={in_dataB.shape}")
            dlr_common.DpuInfo(f"dim     ={dim}")
        if (error != 0):
            dlr_common.DpuError("parameter mis-match")
            return False
    #_fname=''
    #_ctype=''
    if out_data.dtype.type == np.int32:
        _fname = 'Concat2dInt'
        _ctype = ctypes.c_int
    elif out_data.dtype.type == np.float32:
        _fname = 'Concat2dFloat'
        _ctype = ctypes.c_float
    elif out_data.dtype.type == np.float64:
        _fname = 'Concat2dDouble'
        _ctype = ctypes.c_double
    else:
        dlr_common.DpuError("not support " + str(out_data.dtype.type))
        return False
    _Concat2d = dlr_common.WrapFunction(
        dlr_common._dlr,
        _fname,
        None  # return type
        ,
        [
            ctypes.POINTER(_ctype)  # output
            ,
            ctypes.POINTER(_ctype)  # input
            ,
            ctypes.POINTER(_ctype)  # input
            ,
            ctypes.c_ushort  # in_rowsA
            ,
            ctypes.c_ushort  # in_colsA
            ,
            ctypes.c_ushort  # in_rowsB
            ,
            ctypes.c_ushort  # in_colsB
            ,
            ctypes.c_ubyte  # dim
            ,
            ctypes.c_int  # rigor
            ,
            ctypes.c_int  # verbose
        ])
    CP_out_data = out_data.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_in_dataA = in_dataA.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_in_dataB = in_dataB.ctypes.data_as(ctypes.POINTER(_ctype))
    CP_in_rowsA = ctypes.c_ushort(in_dataA.shape[0])
    CP_in_colsA = ctypes.c_ushort(in_dataA.shape[1])
    CP_in_rowsB = ctypes.c_ushort(in_dataB.shape[0])
    CP_in_colsB = ctypes.c_ushort(in_dataB.shape[1])
    CP_dim = ctypes.c_ubyte(dim)
    CP_rigor = 1 if rigor else 0
    CP_verbose = 1 if verbose else 0

    _Concat2d(CP_out_data, CP_in_dataA, CP_in_dataB, CP_in_rowsA, CP_in_colsA,
              CP_in_rowsB, CP_in_colsB, CP_dim, CP_rigor, CP_verbose)
    return True