Beispiel #1
0
    def __init__(self, input_shape: nc.TensorShape,
                 affine_shape: nc.TensorShape, output_size):
        if input_shape.rank != 4:
            raise ValueError('input_shape must be rank 4 (NCHW)')

        N, IC, IH, IW = input_shape

        if affine_shape.rank != 3:
            raise ValueError('affine_shape must be rank 3')

        AN, AH, AW = affine_shape
        self.aff_tile = 0
        if AN != N:
            raise ValueError(f'affine matrix batch must == {AN}')

        if AH != 2:
            raise ValueError('affine matrix height must == 2')
        if AW != 3:
            raise ValueError('affine matrix width  must == 3')

        if output_size is not None:
            OH, OW = output_size
        else:
            OH, OW = IH, IW

        self.coords_shape = nc.TensorShape((N, IC, OH, OW, 3))
        self.coords_init = nc.initializer.CoordsArange(0, IH - 1, 0, IW - 1)

        self.coords_reshape = nc.TensorShape((N, IC * OH * OW, 3))
        self.coords_affined_shape = nc.TensorShape((N, IC, OH, OW, 2))

        self.output_shape = output_shape = nc.TensorShape((N, IC, OH, OW))
Beispiel #2
0
    def __init__(self, shape, axes, keepdims):
        shape_axes = shape.axes_arange()

        if axes.is_none_axes():
            axes = shape_axes

        # Check correctness of axes
        for axis in axes:
            if axis not in shape_axes:
                raise ValueError(f'Wrong axis {axis} not in {shape_axes}')

        self.reduction_axes = reduction_axes = axes.sorted()

        # Output axes. Remove axes from shape_axes
        self.output_axes = output_axes = shape_axes - axes

        if output_axes.is_none_axes():
            output_shape = nc.TensorShape((1, ))
        else:
            output_shape = shape[output_axes]

        self.output_shape = output_shape
        self.output_shape_keepdims = nc.TensorShape(
            1 if axis in reduction_axes else shape[axis]
            for axis in range(shape.rank))

        if keepdims:
            self.output_shape = self.output_shape_keepdims
Beispiel #3
0
 def __init__(self, t, shape):
     shape = nc.TensorShape(shape)
     if t.shape.size != shape.size:
         raise ValueError(
             f'Cannot interpet shape {t.shape} as ref shape {shape}')
     super().__init__(shape)
     self._t = t
Beispiel #4
0
    def __init__(self, shape, tiles):
        if len(tiles) != shape.rank:
            raise ValueError(f'tiles should match shape.rank {shape.rank}')

        self.output_shape = nc.TensorShape(dim * tiles[i]
                                           for i, dim in enumerate(shape))

        c = [0] * shape.rank

        axes_offsets = []
        for n in range(np.prod(tiles)):
            axes_offsets.append(c.copy())
            for i in range(shape.rank - 1, -1, -1):
                c[i] += 1
                if c[i] < tiles[i]:
                    break
                c[i] = 0

        axes_slices = []
        for axes_offset in axes_offsets:
            sl = []
            for axis, tile in enumerate(axes_offset):
                axis_size = shape[axis]
                sl.append(slice(axis_size * tile, axis_size * (tile + 1)))
            axes_slices.append(tuple(sl))
        self.axes_slices = tuple(axes_slices)
Beispiel #5
0
    def __init__(self, shape, init=None):
        self.shape = shape = nc.TensorShape(shape)
        if init is None:
            init = nn.initializer.Scalar(0.0)

        Tensor._object_count += 1

        # ! internal variables (start with _) must be accesed through methods,
        # ! because Tensor can be a TensorRef that uses method overridings

        self._seq_id = Tensor._seq_id = Tensor._seq_id + 1

        self._freezed = Tensor._freeze_stack != 0  # indicates that backprop
        # should not go through inputs of _gradfns,
        # which are marked as _is_trainable()
        # use _is_freezed() to get the value

        self._trainable = False  # indicates that Tensor is used in Optimizer
        # use _is_trainable() to get the value

        self._grad_t = None  # Gradient Tensor of this tensor
        # use get_grad() to get the value

        self._gradfns = None  # Dict of [input_tensor] = func(O_t,dO_t), see _assign_grad
        # use _get_gradfns() to get the value

        self._op_name = None  # Name of op which produces this Tensor

        self._parent_module_w = None  # weakref of parent Module

        super().__init__(
            shape.size * 4,
            init_func=lambda x: init.initialize_CLBuffer(x, shape)
            if init is not None else None)
Beispiel #6
0
def resize2D_bilinear(input_t, size_or_output_hw):
    """
    resize2D_bilinear operator
    
    arguments
    
     size_or_output_hw  int
                        float     
                        Iterable of height,weight
    
    """
    N, C, H, W = input_t.shape

    if isinstance(size_or_output_hw, Iterable):
        OH, OW = int(size_or_output_hw[0]), int(size_or_output_hw[1])
    elif isinstance(size_or_output_hw, (int, float)):
        OH = int(H * size_or_output_hw)
        OW = int(W * size_or_output_hw)
    else:
        raise ValueError(
            f'Unknown type of size_or_output_hw : {size_or_output_hw.__class__.__name__}'
        )

    OH = max(1, OH)
    OW = max(1, OW)

    coords_shape = nc.TensorShape((OH, OW, 2))

    coords_t = nn.Tensor(coords_shape,
                         nn.initializer.CoordsArange(0, H - 1, 0, W - 1))
    output_t = nn.spatial_transform2D(input_t, coords_t, grad_to_coords=False)

    return output_t
Beispiel #7
0
    def __init__(self, shape, axis, stack_count):
        if axis < 0:
            axis = shape.rank + 1 + axis
        if axis < 0 or axis > shape.rank:
            raise ValueError(f'Wrong axis {axis}')

        if stack_count <= 0:
            raise ValueError(f'Invalid stack_count {stack_count}')

        self.output_shape = nc.TensorShape(
            tuple(shape)[0:axis] + (stack_count, ) + tuple(shape)[axis:])
        self.axis = axis
Beispiel #8
0
    def copy(self, shape=None):
        """
        Creates new tensor with the same shape and data.

            shape   override with new shape, but should match shape.size
        """
        if shape is None:
            shape = self.shape
        else:
            shape = nc.TensorShape(shape)

        if shape.size != self.shape.size:
            raise ValueError('shapes size mismatch')

        t = Tensor(shape)
        t.set(self)
        return t
Beispiel #9
0
    def __init__(self, input_shape: nc.TensorShape, size, is_add_to_output):
        N, IC, IH, IW = input_shape
        OC = IC
        OH = IH * size
        OW = IW * size

        self.output_shape = output_shape = nc.TensorShape((N, OC, OH, OW))

        common_kernel_text = f"""
{ph.define_axes_accessor('I', input_shape, 'NCHW')}
{ph.define_axes_accessor('O', output_shape, 'NCHW')}
"""

        self.O_forward_krn = nc.CLKernel(global_shape=(output_shape.size, ),
                                         kernel_text=f"""
{common_kernel_text}

#define SIZE {size}

__kernel void impl(__global float* O, __global const float* I)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('o', 'NCHW', 'gid')}

    O[gid] {'+=' if is_add_to_output else '='} I[I_idx(on,oc,oh / SIZE,ow / SIZE)];
}}
""")
        self.dI_krn = nc.CLKernel(global_shape=(input_shape.size, ),
                                  kernel_text=f"""
{common_kernel_text}

#define SIZE {size}

__kernel void impl(__global float* dI, __global const float* dO)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('i', 'NCHW', 'gid')}
    
    float v = 0.0;
    for (int y=0; y<SIZE; ++y)
    for (int x=0; x<SIZE; ++x)    
        v += dO[O_idx(in,ic,ih*SIZE+y,iw*SIZE+x)];
    
    dI[gid] += v;
}}
""")
Beispiel #10
0
def tile_test():
    for _ in range(10):
        for shape_len in range(3, 5):
            try:
                shape = tuple(np.random.randint( 8, size=(shape_len,) )+1)
                tiles = tuple(np.random.randint( 4, size=(shape_len,) )+1)

                val_n = np.random.randint( 2**8, size=shape ).astype(np.float32)
                tiled_n = np.tile(val_n, tiles)

                val_t = nn.Tensor_from_value(val_n)
                tiled_t = nn.tile(val_t, tiles)

                if tiled_n.shape != tiled_t.shape:
                    raise Exception(f'shape is not equal')

                if not all ( np.ndarray.flatten( tiled_t.np() == tiled_n ) ):
                    raise Exception(f'data is not equal')

                tiled_n_grad = np.random.randint( 2**8, size=tiled_n.shape ).astype(np.float32)

                val_t.get_grad().fill(1.0)
                nn.backward( {tiled_t:tiled_n_grad} , grad_for_non_trainables=True )

                info = nc.info.InfoTile( nc.TensorShape(shape), tiles)
                val_n_grad = sum([ tiled_n_grad[axes_slice] for axes_slice in info.axes_slices ])
                if not all ( np.ndarray.flatten(val_t.get_grad().np()-1.0 == val_n_grad) ):
                    raise Exception(f'grad is not equal')

            except:
                
                raise Exception(f"""
shape         : {shape}
tiles         : {tiles}
tiled_n_shape : {tiled_n.shape}
tiled_t_shape : {tiled_t.shape}
exception     : {traceback.format_exc()}
""")
Beispiel #11
0
    def __init__(self, shape, target_shape):
        output_shape = []

        remain_size = shape.size

        unk_axis = None
        for t_size in target_shape:
            t_size = int(t_size)
            if t_size != -1:
                mod = remain_size % t_size
                if mod != 0:
                    raise ValueError(
                        f'Cannot reshape {shape} to {target_shape}.')
                remain_size /= t_size
            else:
                if unk_axis is not None:
                    raise ValueError('Can specify only one unknown dimension.')
                unk_axis = len(output_shape)
            output_shape.append(t_size)

        if unk_axis is not None:
            output_shape[unk_axis] = int(remain_size)
        self.output_shape = nc.TensorShape(output_shape)
Beispiel #12
0
    def __init__(self, shapes, axis):
        shapes = tuple(shapes)

        if len(shapes) == 0:
            raise ValueError('shapes is empty')

        shape = shapes[0]

        if axis < 0:
            axis = shape.rank + axis
        if axis < 0 or axis >= shape.rank:
            raise ValueError(f'Wrong axis {axis}')

        fixed_shapes = [
            tuple(a for i, a in enumerate(shape) if i != axis)
            for shape in shapes
        ]
        req_shape = fixed_shapes[0]
        if not all(shape == req_shape for shape in fixed_shapes[1:]):
            raise ValueError(
                f'All shapes must match shape {tuple(a if i != axis else "*" for i,a in enumerate(shape))}'
            )

        axis_sizes = [shape[axis] for shape in shapes]

        axis_offset = 0
        axis_offsets = []
        for axis_size in axis_sizes:
            axis_offsets.append(axis_offset)
            axis_offset += axis_size

        self.output_shape = nc.TensorShape(
            tuple(shape)[0:axis] + (sum(axis_sizes), ) +
            tuple(shape)[axis + 1:])
        self.axis = axis
        self.axis_sizes = axis_sizes
        self.axis_offsets = tuple(axis_offsets)
Beispiel #13
0
    def __init__(self, input_shape: nc.TensorShape,
                 coords_shape: nc.TensorShape):
        N, IC, IH, IW = input_shape

        if coords_shape.rank not in [3, 4, 5]:
            raise ValueError(
                f'Coords shape rank must be 3(HWD) or 4(CHWD) or 5(NCHWD)')
        KN, KC = 1, 1

        if coords_shape.rank == 5:
            KN, KC, KH, KW, KD = coords_shape
        elif coords_shape.rank == 4:
            KC, KH, KW, KD = coords_shape
        elif coords_shape.rank == 3:
            KH, KW, KD = coords_shape

        self.coords_N_tile = 1
        self.coords_C_tile = 1

        if KN != N:
            if KN == 1:
                self.coords_N_tile = N
            else:
                raise ValueError(
                    f'Coords output batch {KN} does not match tensor input batch {N}.'
                )

        if KC != IC:
            if KC == 1:
                self.coords_C_tile = IC
            else:
                raise ValueError(
                    f'Coords output channels {KC} does not match tensor input channels {IC}.'
                )

        if KD != 2:
            raise ValueError(f'Coords D {KD} channels must be == 2 (x,y)')

        self.output_shape = output_shape = nc.TensorShape((N, IC, KH, KW))

        common_kernel_text = f"""
{ph.define_axes_accessor('I', input_shape, 'NCHW')}
{ph.define_axes_accessor('O', output_shape, 'NCHW')}
"""
        self.O_forward_krn = nc.CLKernel(global_shape=(output_shape.size, ),
                                         kernel_text=f"""
{common_kernel_text}
{ph.define_axes_accessor('K', (KN,KC,KH,KW), 'NCHW')}
__kernel void impl(__global float* O, __global const float* I, __global const float2* K)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('o', 'NCHW', 'gid')}

    float v = 0.0;

    float2 xys = K[K_idx_mod(on,oc,oh,ow)];

    for (int ih=0; ih < IH; ++ih)
    {{
        float ys_mod = max(0.0, 1.0-fabs(xys.y-ih));
        if (ys_mod != 0.0)
        for (int iw=0; iw < IW; ++iw)
        {{
            float xs_mod = max(0.0, 1.0-fabs(xys.x-iw));
            if (xs_mod != 0.0)
                v += xs_mod*ys_mod*I[I_idx(on,oc,ih,iw)];
        }}
    }}

    O[gid] = v;
}}
""")

        self.dI_krn = nc.CLKernel(global_shape=(input_shape.size, ),
                                  kernel_text=f"""
{common_kernel_text}
{ph.define_axes_accessor('K', (KN,KC,KH,KW), 'NCHW')}
__kernel void impl(__global float* dI, __global const float2* K, __global float* dO)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('i', 'NCHW', 'gid')}

    float v = 0.0;

    for (int oh=0; oh < OH; ++oh)
    for (int ow=0; ow < OW; ++ow)
    {{
        float2 xys = K[K_idx_mod(in,ic,oh,ow)];

        float ys_mod = max(0.0, 1.0-fabs(xys.y-ih));
        if (ys_mod != 0.0)
        {{
            float xs_mod = max(0.0, 1.0-fabs(xys.x-iw));
            if (xs_mod != 0.0)
                v += xs_mod*ys_mod*dO[O_idx(in,ic,oh,ow)];
        }}
    }}

    dI[gid] += v;
}}
""")

        self.dK_krn = nc.CLKernel(global_shape=(N * IC * KH * KW, ),
                                  kernel_text=f"""
{common_kernel_text}
{ph.define_axes_accessor('K', (N,IC,KH,KW), 'NCHW')}
__kernel void impl(__global float2* dK, __global const float* I, __global const float2* K, __global float* dO)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('k', 'NCHW', 'gid')}

    float dk_x = 0.0;
    float dk_y = 0.0;

    float2 xys = K[gid];

    for (int ih=0; ih < IH; ++ih)
    {{
        {{
            float ys_mod = max(0.0, 1.0-fabs(xys.y-ih));

            if (ys_mod != 0.0)
            for (int iw=0; iw < IW; ++iw)
            if ( fabs(iw-xys.x) < 1.0 )
            {{
                float xs_mod = 1.0*(iw >= xys.x)-1.0*(iw < xys.x);
                dk_x += xs_mod*ys_mod*I[I_idx(kn,kc,ih,iw)] * dO[O_idx(kn,kc,kh,kw)];
            }}
        }}

        if (fabs(ih-xys.y) < 1.0)
        {{
            float ys_mod = 1.0*(ih >= xys.y)-1.0*(ih < xys.y);
            for (int iw=0; iw < IW; ++iw)
            {{
                float xs_mod = max(0.0, 1.0-fabs(xys.x-iw));
                if (xs_mod != 0.0)
                    dk_y += xs_mod*ys_mod*I[I_idx(kn,kc,ih,iw)] * dO[O_idx(kn,kc,kh,kw)];
            }}
        }}
    }}
    dK[gid] += (float2)(dk_x, dk_y);
}}
""")
Beispiel #14
0
    def __init__(self, op_type, input_shape: nc.TensorShape, pool_size, stride,
                 padding):
        if op_type not in ['avg', 'min', 'max']:
            raise ValueError(f'unknown op_type {op_type}')
        if pool_size < 2:
            raise ValueError(f'pool_size {pool_size} must be at least 2')
        self.op_type = op_type

        N, IC, IH, IW = input_shape
        ci = nc.info.InfoConv2D(IH, IW, pool_size, pool_size, stride, 1,
                                padding)
        OC, OH, OW = IC, ci.OH, ci.OW
        self.output_shape = output_shape = nc.TensorShape((N, OC, OH, OW))

        common_kernel_text = f"""
{ph.define_axes_accessor('I', input_shape, 'NCHW')}
{ph.define_axes_accessor('O', output_shape, 'NCHW')}

#define PADL {ci.PADL}
#define PADT {ci.PADT}

#define POOL_SIZE {pool_size}
#define STRIDE {stride}
"""

        self.O_forward_krn = nc.CLKernel(global_shape=(output_shape.size, ),
                                         kernel_text=f"""
{common_kernel_text}

__kernel void impl(__global float* O, __global const float* I)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('o', 'NCHW', 'gid')}

    { {'avg' : 'float v = 0.0; int v_count = 0;',
       'max' : 'float v = -INFINITY;',
       'min' : 'float v = INFINITY;',
      }[op_type] }

    for (int ph=0; ph<POOL_SIZE; ++ph)
    for (int pw=0; pw<POOL_SIZE; ++pw)
    {{
        int ih = -PADT + ph + oh*STRIDE;
        int iw = -PADL + pw + ow*STRIDE;
        if (iw >= 0 & ih >= 0 & iw < IW & ih < IH)
        {{
            { {'avg' : 'v +=        I[I_idx(on,oc,ih,iw)]; ++v_count;',
               'max' : 'v = fmax(v, I[I_idx(on,oc,ih,iw)]);',
               'min' : 'v = fmin(v, I[I_idx(on,oc,ih,iw)]);',
              }[op_type] }
        }}
    }}

    { {'avg' : 'if (v_count != 0) v /= v_count;',
       'max' : 'if (v == -INFINITY) v = 0.0;',
       'min' : 'if (v == INFINITY) v = 0.0;',
      }[op_type] }


    O[gid] = v;
}}
""")
        if op_type == 'avg':
            self.dI_krn = nc.CLKernel(global_shape=(input_shape.size, ),
                                      kernel_text=f"""
{common_kernel_text}
__kernel void impl(__global float* dI, __global const float* dO)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('i', 'NCHW', 'gid')}

    float v = 0.0;

    for (int ph=0; ph<POOL_SIZE; ++ph)
    for (int pw=0; pw<POOL_SIZE; ++pw)
    {{
        int oh = (PADT + ih - ph ) / STRIDE;
        int ow = (PADL + iw - pw ) / STRIDE;
        if (ow >= 0 & oh >= 0 & ow < OW & oh < OH
            & iw == (-PADL + pw + ow*STRIDE)
            & ih == (-PADT + ph + oh*STRIDE) )
        {{
            int d=0;
            for (int dph=0; dph<POOL_SIZE; ++dph)
            for (int dpw=0; dpw<POOL_SIZE; ++dpw)
            {{
                int dih = -PADT + dph + oh*STRIDE;
                int diw = -PADL + dpw + ow*STRIDE;
                d += (diw >= 0 & dih >= 0 & diw < IW & dih < IH);
            }}
            v += dO[O_idx(in,ic,oh,ow)] / d;
        }}
    }}

    dI[gid] += v;
}}
""")
        elif op_type in ['min', 'max']:
            # Implementation is different from tensorflow in case when the same values exist in reduction axes.
            # Example tf     : 3 4 5 5 , max = 5, gradients : 0 0 0.5 0.5
            # Example litenn : 3 4 5 5 , max = 5, gradients : 0 0   1   0
            #                                 or  gradients : 0 0   0   1 - depends on which GPU thread will be first !
            self.dI_krn = nc.CLKernel(global_shape=(input_shape.size, ),
                                      kernel_text=f"""
{common_kernel_text}
__kernel void impl(__global float* dI, __global const float* I, __global const float* dO, __global const float* O)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('i', 'NCHW', 'gid')}

    float v = 0.0;

    // Iterate over all O pixels, where 'I' have contribution
    for (int ph=0; ph<POOL_SIZE; ++ph)
    for (int pw=0; pw<POOL_SIZE; ++pw)
    {{
        int oh = (PADT + ih - ph ) / STRIDE;
        int ow = (PADL + iw - pw ) / STRIDE;
        if (ow >= 0 & oh >= 0 & ow < OW & oh < OH
            & iw == (-PADL + pw + ow*STRIDE)
            & ih == (-PADT + ph + oh*STRIDE) )
        {{
            //Now we have oh,ow where ih,iw have contribution

            float Ov = O[O_idx(in,ic,oh,ow)];

            //Iterate in those I pixels, which were used to produce O
            //to determine first min/max match
            for (int dphw=0; dphw < POOL_SIZE*POOL_SIZE; ++dphw)
            {{
                int dih = -PADT + (dphw / POOL_SIZE) + oh*STRIDE;
                int diw = -PADL + (dphw % POOL_SIZE) + ow*STRIDE;
                if (diw >= 0 & dih >= 0 & diw < IW & dih < IH &
                    I[I_idx(in,ic,dih,diw)] == Ov)
                {{
                    // Match I==O
                    if (dih == ih & diw == iw)
                        // but add gradient only if current ih/iw index match dih/diw
                        v += dO[O_idx(in,ic,oh,ow)];
                    break;
                }}
            }}
        }}
    }}

    dI[gid] += v;
}}
""")
Beispiel #15
0
    def __init__(self, input_shape : nc.TensorShape, kernel_shape : nc.TensorShape, stride, dilation, padding):
        if kernel_shape.rank != 3:
            raise ValueError(f'Kernel shape rank must be == 3')   
        N,IC,IH,IW = input_shape
        KI,KH,KW = kernel_shape      
        if KI != IC:
            raise ValueError(f'Kernel input channels {KI} does not match tensor input channels {IC}.')
        
        ci = nc.info.InfoConv2D(IH, IW, KH, KW, stride, dilation, padding)
        OC, OH, OW = IC, ci.OH, ci.OW
        self.output_shape = output_shape = nc.TensorShape( (N, OC, OH, OW) )

        self.OC_1_1_NxOHxOW = (OC,1,1,N*OH*OW)
        self.KI_KH_KW_NxOHxOW = (KI,KH,KW,N*OH*OW)

        common_kernel_text = f"""
{ph.define_axes_accessor('I', input_shape, 'NCHW')}
{ph.define_axes_accessor('O', output_shape, 'NCHW')}
{ph.define_axes_accessor('K', kernel_shape, 'IHW')}
#define PADL {ci.PADL}
#define PADT {ci.PADT}

#define STRIDE {stride}
#define DILATION {dilation}
"""
        self.O_depthwise_krn = nc.CLKernel(global_shape=(output_shape.size,), kernel_text=f"""
{common_kernel_text}
__kernel void impl(__global float* O, __global const float* I, __global const float* K)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('o', 'NCHW', 'gid')}

    float v = 0.0;
    for (int kh=0; kh<KH; ++kh)
    {{
        int ih = -PADT + kh*DILATION + oh*STRIDE;
        if (ih >= 0 & ih < IH)
            for (int kw=0; kw<KW; ++kw)
            {{
                int iw = -PADL + kw*DILATION + ow*STRIDE;
                if (iw >= 0 & iw < IW)
                    v += I[I_idx(on,oc,ih,iw)]*K[K_idx(oc,kh,kw)];
            }}
    }}
    O[gid] = v;
}}
""")

        self.dI_depthwise_krn = nc.CLKernel(global_shape=(input_shape.size,), kernel_text=f"""
{common_kernel_text}
__kernel void impl(__global float* dI, __global const float* K, __global const float* dO)
{{
    size_t gid = get_global_id(0);
    {ph.axes_idxs_from_var('i', 'NCHW', 'gid')}
    float v = 0.0;
    for (int kh=0; kh<KH; ++kh)
    {{
        int oh = (PADT + ih - kh*DILATION ) / STRIDE;
        if (oh >= 0 & oh < OH)
            for (int kw=0; kw<KW; ++kw)
            {{
                int ow = (PADL + iw - kw*DILATION ) / STRIDE;
                if (ow >= 0 & ow < OW
                    & iw == (-PADL + kw*DILATION + ow*STRIDE)
                    & ih == (-PADT + kh*DILATION + oh*STRIDE) )
                        v += dO[O_idx(in,ic,oh,ow)]*K[K_idx(ic,kh,kw)];
            }}
    }}
    dI[gid] += v;
}}
""")

        self.im2col  = lambda x: nc.op.unfold2D(x, N, IC, IH, IW, OH, OW, KH, KW, ci.PADL, ci.PADT, dilation, stride, 'CJI_NHW', is_transpose=False)
Beispiel #16
0
    def __init__(self, input_shape: nc.TensorShape, slices, is_add_to_output):

        # Validate slices argument for given shape.
        new_slices = []
        before_ellipsis = None

        for s in slices:
            if s is Ellipsis:
                before_ellipsis = new_slices
                new_slices = []
                continue
            elif s is not None and not isinstance(s, (int, tuple)):
                raise ValueError(
                    f'unknown slice argument {s} of type {s.__class__}')

            new_slices.append(s)

        if before_ellipsis is not None:
            # Process Ellipsis separator
            new_slices_n_axes = sum([1 for x in new_slices if x != None])
            before_ellipsis_n_axes = sum(
                [1 for x in before_ellipsis if x != None])

            # Expand slices by filling intermediate (None,None,None) for each remaining axis
            new_slices = before_ellipsis + \
                         [(None,None,None)]*max(0, input_shape.rank-before_ellipsis_n_axes-new_slices_n_axes) + \
                         new_slices

        new_slices_n_axes = sum([1 for x in new_slices if x != None])
        if new_slices_n_axes > input_shape.rank:
            raise ValueError('slices arguments more than shape axes')
        elif new_slices_n_axes < input_shape.rank:
            # Fill remaining axes
            new_slices += [(None, None, None)
                           ] * (input_shape.rank - new_slices_n_axes)

        slices = tuple(new_slices)

        # Compute shapes
        output_is_reshaped = True  # Flag determines that output_tensor
        # can be just reshaped without any computation
        output_shape = []  # output tensor shape
        output_shape_krn = [
        ]  # output shape used in kernel, must match input shape
        input_axes_begin_step = []  # begin,step ints for every input shape

        i_axis = 0
        for v in slices:
            if v is None:
                # None is new axis
                # We can add unlimited number of (1,) axes at any place of shape
                output_shape.append(1)
                continue

            i_axis_size = input_shape[i_axis]
            i_axis += 1

            if isinstance(v, int):
                if v < 0: v += i_axis_size
                if v < 0 or v >= i_axis_size:
                    raise ValueError(
                        f'index {v} is out of bounds for axis {i_axis} with size {i_axis_size}'
                    )
                b, e, s = v, v, 1
            else:
                b, e, s = v

            # Fix begin, end, step values
            if s is None: s = 1
            if s == 0:
                raise ValueError('slice step cannot be zero')

            if b is None: b = 0 if s > 0 else i_axis_size - 1
            if e is None: e = i_axis_size if s > 0 else -1
            elif e < 0: e += i_axis_size

            if b < 0: b += i_axis_size

            if s > 0:
                b = np.clip(b, 0, i_axis_size)
                e = np.clip(e, 0, i_axis_size)
            else:
                b = np.clip(b, 0, i_axis_size - 1)
                e = np.clip(e, -1, i_axis_size)

            if i_axis_size != 1 and not (b == 0 and e == i_axis_size
                                         and s == 1):
                # Such params of axis slice will change input, thus output cannot be just reshaped input
                output_is_reshaped = False

            # Compute output_axis_size based on begin,end,step
            output_axis_size = max(0, math.ceil((e - b) / s))

            if output_axis_size >= 1:
                # >= 1 : select range of indexes, axis will remain
                output_shape.append(output_axis_size)
            # ^ othwerwise axis will be supressed

            # output_shape to use in kernel, must match rank of input shape
            output_shape_krn.append(max(1, output_axis_size))

            # for every output_shape_krn axis
            # we have exact begin,step values to fetch value from input
            input_axes_begin_step.append((b, s))

        output_shape_krn = nc.TensorShape(output_shape_krn)
        self.output_is_reshaped = output_is_reshaped
        self.output_shape = nc.TensorShape(output_shape)

        self.forward_krn = nc.CLKernel(global_shape=(output_shape_krn.size, ),
                                       kernel_text=f"""
{ph.define_axes_accessor('I', input_shape )}
{ph.define_axes_sizes('O', output_shape_krn )}
__kernel void impl(__global const float* I, __global float* O)
{{
size_t gid = get_global_id(0);
{ph.axes_idxs_from_var('o', output_shape_krn.rank, 'gid')}
{''.join( f'size_t i{i} = {b} + o{i} * {s};' for i, (b,s) in enumerate(input_axes_begin_step)  )  }
O[get_global_id(0)] {'+=' if is_add_to_output else '='} I[I_idx({ph.axes_seq_enum('i', input_shape.rank)})];
}}
""")

        self.backward_krn = nc.CLKernel(global_shape=(output_shape_krn.size, ),
                                        kernel_text=f"""
{ph.define_axes_accessor('I', input_shape )}
{ph.define_axes_sizes('O', output_shape_krn )}
__kernel void impl(__global float* dI, __global const float* O)
{{
size_t gid = get_global_id(0);
{ph.axes_idxs_from_var('o', output_shape_krn.rank, 'gid')}
{''.join( f'size_t i{i} = {b} + o{i} * {s};' for i, (b,s) in enumerate(input_axes_begin_step)  )  }
dI[I_idx({ph.axes_seq_enum('i', input_shape.rank)})] += O[get_global_id(0)];
}}
""")
Beispiel #17
0
    def __init__(self, input_shape, kernel_shape, stride, dilation, padding):
        if padding not in ['valid', 'same']:
            raise ValueError(
                'Wrong padding value, only valid or same supported for conv2DTranspose.'
            )

        N, IC, IH, IW = input_shape
        KO, KI, KH, KW = kernel_shape
        if KI != IC:
            raise ValueError(
                f'Kernel input channels {KI} does not match input channels {IC}.'
            )

        ci = nc.info.InfoConv2D(IH, IW, KH, KW, stride, dilation, padding)
        OC, OH, OW = KO, ci.OH_T, ci.OW_T
        self.output_shape = output_shape = nc.TensorShape((N, OC, OH, OW))

        self.OC_N_OH_OW = (OC, N, OH, OW)
        self.OC_NxOHxOW = (OC, N * OH * OW)
        self.N_IH_IW_IC = (N, IH, IW, IC)

        self.im2colT = lambda x: nc.op.unfold2D(x,
                                                N,
                                                IC,
                                                IH,
                                                IW,
                                                OH,
                                                OW,
                                                KH,
                                                KW,
                                                ci.PADL,
                                                ci.PADT,
                                                dilation,
                                                stride,
                                                'CJI_NHW',
                                                is_transpose=True)
        self.im2rowT = lambda x: nc.op.unfold2D(x,
                                                N,
                                                IC,
                                                IH,
                                                IW,
                                                OH,
                                                OW,
                                                KH,
                                                KW,
                                                ci.PADL,
                                                ci.PADT,
                                                dilation,
                                                stride,
                                                'NHW_CJI',
                                                is_transpose=True)
        self.im2row = lambda x: nc.op.unfold2D(x,
                                               N,
                                               OC,
                                               OH,
                                               OW,
                                               IH,
                                               IW,
                                               KH,
                                               KW,
                                               ci.PADL,
                                               ci.PADT,
                                               dilation,
                                               stride,
                                               'NHW_CJI',
                                               is_transpose=False)
Beispiel #18
0
    def __init__(self, input_shape, kernel_shape, stride, dilation, padding):
        N, IC, IH, IW = input_shape
        KO, KI, KH, KW = kernel_shape
        if KI != IC:
            raise ValueError(
                f'Kernel input channels {KI} does not match tensor input channels {IC}.'
            )

        ci = nc.info.InfoConv2D(IH, IW, KH, KW, stride, dilation, padding)
        OC, OH, OW = KO, ci.OH, ci.OW
        self.output_shape = output_shape = nc.TensorShape((N, OC, OH, OW))

        self.OC_N_OH_OW = (OC, N, OH, OW)
        self.OC_NxOHxOW = (OC, N * OH * OW)
        self.N_IH_IW_IC = (N, IH, IW, IC)

        self.im2col = lambda x: nc.op.unfold2D(x,
                                               N,
                                               IC,
                                               IH,
                                               IW,
                                               OH,
                                               OW,
                                               KH,
                                               KW,
                                               ci.PADL,
                                               ci.PADT,
                                               dilation,
                                               stride,
                                               'CJI_NHW',
                                               is_transpose=False)
        self.im2row = lambda x: nc.op.unfold2D(x,
                                               N,
                                               IC,
                                               IH,
                                               IW,
                                               OH,
                                               OW,
                                               KH,
                                               KW,
                                               ci.PADL,
                                               ci.PADT,
                                               dilation,
                                               stride,
                                               'NHW_CJI',
                                               is_transpose=False)
        self.im2rowT = lambda x: nc.op.unfold2D(x,
                                                N,
                                                OC,
                                                OH,
                                                OW,
                                                IH,
                                                IW,
                                                KH,
                                                KW,
                                                ci.PADL,
                                                ci.PADT,
                                                dilation,
                                                stride,
                                                'NHW_CJI',
                                                is_transpose=True)
Beispiel #19
0
    def __init__(self, a_shape, b_shape, is_add_to_output):
        if a_shape.rank != b_shape.rank:
            raise ValueError(
                f'Ranks are not equal. {a_shape.rank} != {b_shape.rank}')

        rank = a_shape.rank
        if rank < 2:
            raise ValueError('Tensors rank must be at least 2.')

        K, M = a_shape[-2], a_shape[-1]
        N, B_COLS = b_shape[-2], b_shape[-1]

        if K != B_COLS:
            raise ValueError('A_ROWS != B_COLS')

        BATCH = a_shape[0:-2].size
        B_BATCH = b_shape[0:-2].size

        if BATCH != B_BATCH:
            raise ValueError(
                f'BATCH size {BATCH} != {B_BATCH} in shapes {a_shape} {b_shape}'
            )

        if rank == 2:
            self.output_shape = output_shape = nc.TensorShape((N, M))
        else:
            self.output_shape = output_shape = nc.TensorShape(a_shape[:-2] +
                                                              (N, M))

        self.M = M
        self.N = N
        self.K = K

        # Determining optimal tile widths
        for MW in [16, 8, 4, 2, 1]:
            if M % MW == 0:
                break
        for KW in [8, 4, 2, 1]:
            if N % KW == 0 and K % KW == 0:
                break
        NW = KW

        self.forward_krn = nc.CLKernel(global_shape=(M // MW, N // NW, BATCH),
                                       kernel_text=f"""
#define K {K}
#define N {N}
#define MW {MW}     // M tile Width
#define NW {NW}     // N tile Width  -- NW & KW should be the same !
#define KW {KW}     // K tile Width
#define MT {M//MW}  // MT is max for 'mt' (M tile count)
#define KT {K//KW}  // KT is max for 'kt' (K tile count)

#define floatMW { f'float{MW}' if MW != 1 else 'float'}
#define floatKW { f'float{KW}' if KW != 1 else 'float'}

__kernel void GeMM(const __global floatMW* restrict A, const __global floatKW* restrict B, __global floatMW* C)
{{
    size_t mt = get_global_id(0);    //global M-tile id
    size_t nc = get_global_id(1);    //global N-tile id
    size_t batch = get_global_id(2); 
        
    float AT[KW][MW]; // sub tiles
    float BT[NW][KW];
    float CT[NW][MW];

    #pragma unroll
    for (uint i=0; i<NW*MW; ++i) // zero CT tile
        ((float*) CT)[i] = 0.0;

    for (uint kt=0; kt<KT; ++kt)  // iterate over K-dim tiles
    {{
        #pragma unroll
        for (uint k=0; k<KW; ++k)  // every k-element inside K-dim tile
            *( (floatMW*) AT[k] ) = A[batch*K*MT + (kt*KW + k)*MT + mt]; // store M-Width floats

        #pragma unroll
        for (uint n=0; n<NW; ++n)  // every n-element inside N-dim tile
            *( (floatKW*) BT[n] ) = B[batch*N*KT + (nc*NW + n)*KT + kt]; // store K-Width floats

        #pragma unroll
        for (uint k=0; k<KW; ++k)
        #pragma unroll
        for (uint n=0; n<NW; ++n)  // sub tiles multiplication
        #pragma unroll
        for (uint m=0; m<MW; ++m)
            CT[n][m] += AT[k][m] * BT[n][k];
    }}

    #pragma unroll
    for (uint n=0; n<NW; ++n)
        C[ batch*N*MT + (nc*NW + n)*MT + mt] {'+=' if is_add_to_output else '='}
                               *( (floatMW*) CT[n]);
}}""")
Beispiel #20
0
def dual_wise_op_test():
    for op in [
            add, binary_crossentropy, categorical_crossentropy, sub, max, min,
            mul, div
    ]:
        print(f'{op.__name__}()')
        for _ in range(10):
            if op == categorical_crossentropy:
                shape_gen = [2]
            else:
                shape_gen = range(1, 5)

            for shape_len in shape_gen:
                try:
                    a_shape = tuple(
                        np.random.randint(8, size=(shape_len, )) + 1)

                    if op == categorical_crossentropy:
                        b_shape = a_shape
                    else:
                        if np.random.randint(2) == 0:
                            b_shape = tuple(
                                a_shape[np.random.randint(len(a_shape)):])
                            b_shape = (1, ) if len(b_shape) == 0 else b_shape
                        else:
                            b_shape = list(a_shape)
                            b_shape[np.random.randint(len(b_shape))] = 1
                            b_shape = tuple(b_shape)

                        shapes = [a_shape, b_shape]
                        if np.random.randint(2) == 0:
                            shapes = shapes[::-1]
                        a_shape, b_shape = shapes

                    a_n = np.random.randint(1, 2**8,
                                            size=a_shape).astype(np.float32)
                    b_n = np.random.randint(1, 2**8,
                                            size=b_shape).astype(np.float32)
                    a_t = nn.Tensor_from_value(a_n)
                    b_t = nn.Tensor_from_value(b_n)
                    r_t = op(a_t, b_t)

                    r_n_grad = np.random.randint(2**8, size=r_t.shape).astype(
                        np.float32)

                    a_t.get_grad().fill(1.0)
                    b_t.get_grad().fill(1.0)
                    nn.backward({r_t: r_n_grad}, grad_for_non_trainables=True)

                    if op == div:
                        # Test validness and gradient only for div
                        r_n = a_n / b_n

                        if r_n.shape != r_t.shape:
                            raise Exception(f'shapes are not equal')
                        if np.abs(np.sum(
                            (np.ndarray.flatten(r_t.np() - r_n)))) > 1:
                            raise Exception(f'data is not equal')

                        info = nc.info.InfoBroadcast(nc.TensorShape(a_shape),
                                                     nc.TensorShape(b_shape))

                        a_n_grad = r_n_grad / b_n

                        axes = info.a_shape_reduction_axes
                        if axes.rank == 0:
                            a_n_grad = a_n_grad.reshape(a_n.shape)
                        else:
                            a_n_grad = a_n_grad.sum(tuple(axes), keepdims=True)

                        b_n_grad = r_n_grad * (-a_n / (b_n**2))

                        axes = info.b_shape_reduction_axes
                        if axes.rank == 0:
                            b_n_grad = b_n_grad.reshape(b_n.shape)
                        else:
                            b_n_grad = b_n_grad.sum(tuple(axes), keepdims=True)

                        if np.abs(
                                np.sum(
                                    (np.ndarray.flatten(a_t.get_grad().np() -
                                                        1.0 - a_n_grad)))) > 1:
                            raise Exception(f'grad A is not equal')
                        if np.abs(
                                np.sum(
                                    (np.ndarray.flatten(b_t.get_grad().np() -
                                                        1.0 - b_n_grad)))) > 1:
                            raise Exception(f'grad B is not equal')
                    else:
                        if not a_t.has_grad():
                            raise Exception(f'a_t has no grad')
                        if not b_t.has_grad():
                            raise Exception(f'b_t has no grad')

                except:
                    raise Exception(f"""
op        : {op}
a_shape   : {a_shape}
b_shape   : {b_shape}
r_n_shape : {r_n.shape}
exception : {traceback.format_exc() }
""")