def kernel(A): # my_sum will perform integer reduction my_sum = hcl.reducer(0, lambda x, y: x + y) r = hcl.reduce_axis(0, 10) return hcl.compute((1, ), lambda x: my_sum(A[r], axis=r, dtype=hcl.Float()), dtype=hcl.Float())
def kernel(A): def reducer_body(x, y): with hcl.if_(x > 5): hcl.return_(y + 1) with hcl.else_(): hcl.return_(y + 2) my_sum = hcl.reducer(0, reducer_body) r = hcl.reduce_axis(0, 10) return hcl.compute((1,), lambda x: my_sum(A[r], axis=r))
def pack(A): rk = hcl.reduce_axis(0, 4, name='rk') genpack = hcl.reducer(0, lambda x, y: y * 2 + x, dtype=hcl.UInt(4)) # y is accumulator pack = hcl.compute((2, ), lambda x: genpack(A[x * 4 + (3 - rk)], axis=rk), dtype=hcl.UInt(4)) # pack = hcl.pack(A, axis=0, factor=4, dtype=hcl.UInt(4)) return pack
def kernel(A): init = hcl.compute((10,), lambda x: 11) def freduce(x, Y): with hcl.for_(0, 10) as i: with hcl.if_(x < Y[i]): with hcl.for_(9, i, -1) as j: Y[j] = Y[j-1] Y[i] = x hcl.break_() my_sort = hcl.reducer(init, freduce) r = hcl.reduce_axis(0, 10) return hcl.compute(A.shape, lambda _x, y: my_sort(A[r, y], axis=r))
def kernel(A): init = hcl.compute((2,), lambda x: 10) def freduce(x, Y): with hcl.if_(x < Y[0]): Y[1] = Y[0] Y[0] = x with hcl.else_(): with hcl.if_(x < Y[1]): Y[1] = x my_min = hcl.reducer(init, freduce) r = hcl.reduce_axis(0, 10) return hcl.compute((2,), lambda _x: my_min(A[r], axis=r))
def kernel(A): init = hcl.compute((A.shape[0]*A.shape[1],), lambda x: 11) def freduce(x, Y): with hcl.for_(0, Y.shape[0]) as i: with hcl.if_(x < Y[i]): with hcl.for_(Y.shape[0]-1, i, -1) as j: Y[j] = Y[j-1] Y[i] = x hcl.break_() my_sort = hcl.reducer(init, freduce) rx = hcl.reduce_axis(0, 10) ry = hcl.reduce_axis(0, 10) return hcl.compute(init.shape, lambda _x: my_sort(A[rx, ry], axis=[rx, ry]))
def max_pool2d_nhwc(data, pooling, stride=[1, 1], padding=[0, 0], name='max_pool2d'): assert len(data.shape) == 4, "only support 4-dim pooling" assert len(stride) == 2, "only support 2-dim stride" max = hcl.reducer(tvm.min_value(data.dtype), lambda x, y: tvm.make.Max(x, y), data.dtype) pooling_h, pooling_w = pooling stride_h, stride_w = stride batch, height, width, channel = data.shape if len(padding) == 4: pad_top = padding[0] pad_left = padding[1] pad_bottom = padding[2] pad_right = padding[3] else: pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple( padding, (pooling_h, pooling_w)) pad_before = [0, pad_top, pad_left, 0] pad_after = [0, pad_bottom, pad_right, 0] data = pad(data, pad_before, pad_after, pad_value=tvm.min_value(data.dtype)) out_height = simplify((height - pooling_h + pad_top + pad_bottom) // stride_h + 1) out_width = simplify((width - pooling_w + pad_left + pad_right) // stride_w + 1) dheight = hcl.reduce_axis(0, pooling_h) dwidth = hcl.reduce_axis(0, pooling_w) return hcl.compute( (batch, out_height, out_width, channel), lambda i, h, w, c: max(data[i, h * stride_h + dheight, w * stride_w + dwidth, c], axis=[dheight, dwidth]), name=name, attrs=OrderedDict([('out_img_w', out_width), ('out_img_h', out_height), ('in_num', channel), ('kernel_h', pooling[1]), ('kernel_w', pooling[0]), ('stride_h', stride[1]), ('stride_w', stride[0]), ('app_name', tvm.make.StringImm('max_pool'))]))
from collections import OrderedDict import heterocl as hcl import heterocl.tvm as tvm import numpy as np import hlib from ..utils import * from .op import * dtype = hcl.Float() sum = hcl.reducer(0, lambda x, y: x + y, dtype) max = hcl.reducer(-1, lambda x, y: tvm.make.Max(x, y), dtype) _all = hcl.reducer(True, lambda x, y: x & y, bool) def simplify(expr): return tvm.ir_pass.Simplify(expr) if isinstance(expr, tvm.expr.Expr) else expr def pad(data, pad_before, pad_after=None, pad_value=0.0, name="pad"): n = len(data.shape) pad_after = pad_after if pad_after else pad_before if len(pad_before) != n: raise ValueError("Input dimension and pad_before dismatch : %d vs %d" % (n, len(pad_before))) if len(pad_after) != n: raise ValueError("Input dimension and pad_after dismatch : %d vs %d" % (n, len(pad_after))) out_shape = tuple( tvm.ir_pass.Simplify((data.shape[i] +
def sum(data, axis=None, keepdims=True): init_shape = data.shape init_dim = len(init_shape) new_shape = [] new_axis = [] if isinstance(axis, int): if axis < 0: axis = init_dim + axis axis = [axis] for i in range(len(init_shape)): if axis is None: new_axis.append(i) elif i in axis: new_axis.append(i) if i not in new_axis: new_shape.append(init_shape[i]) else: if keepdims: new_shape.append(1) def _new_axes(axis, init_shape): new_axes = [] for i in range(len(init_shape)): if i in axis: new_axes.append(hcl.reduce_axis(0, init_shape[i])) return new_axes def _new_inx(axis, axes, init_shape, *indices): indices = indices[0] init_dim = len(init_shape) new_axis = [] inx = 0 axis_inx = 0 for i in range(init_dim): if i in axis: new_axis.append(axes[axis_inx]) axis_inx = axis_inx + 1 else: new_axis.append(indices[inx]) inx = inx + 1 return tuple(new_axis) axes = _new_axes(new_axis, init_shape) axis_len = len(axis) temp_transpose = [] _new_shape = [] for i in range(len(init_shape)): if i not in axis: _new_shape.append(init_shape[i]) temp_transpose.append(i) for i in range(len(axis)): temp_transpose.append(axis[i]) while len(_new_shape) < len(init_shape): _new_shape.append(1) transpose_axes = [] for i in range(len(temp_transpose)): transpose_axes.append(temp_transpose.index(i)) _sum = hcl.reducer(0, lambda x, y: x + y, data.dtype) out = hcl.compute( tuple(_new_shape), lambda *x: _sum(data[_new_inx(new_axis, axes, init_shape, x)], axis=axes)) if keepdims: return nn.transpose(out, transpose_axes) else: return nn.squeeze(out)
unit2_names = [ 's1_u2_b1', 's1_u2_r1', 's1_u2_c1', 's1_u2_b2', 's1_u2_r2', 's1_u2_c2', 's1_u2_add', 's2_u2_b1', 's2_u2_r1', 's2_u2_c1', 's2_u2_b2', 's2_u2_r2', 's2_u2_c2', 's2_u2_add', 's3_u2_b1', 's3_u2_r1', 's3_u2_c1', 's3_u2_b2', 's3_u2_r2', 's3_u2_c2', 's3_u2_add', 's4_u2_b1', 's4_u2_r1', 's4_u2_c1', 's4_u2_b2', 's4_u2_r2', 's4_u2_c2', 's4_u2_add' ] fnames = ['bn', 'conv0', 'bn0', 'relu0', 'pool0'] enames = ['bn1', 'relu1', 'pool1', 'fc1'] name_pool = unit1_names + unit2_names + enames + fnames sum = hcl.reducer(0, lambda x, y: x + y, dtype) max = hcl.reducer(-1, lambda x, y: tvm.make.Max(x, y), dtype) def softmax(out, x): assert len(x.shape) == 2, "only support 2-dim softmax" m, n = x.shape k = hcl.reduce_axis(0, n) max_elem = hcl.compute((m, ), lambda i: max(x[i, k], axis=k)) k = hcl.reduce_axis(0, n) expsum = hcl.compute((m, ), lambda i: sum(tvm.exp(x[i, k] - max_elem[i]), axis=k)) return hcl.update(out, lambda i, j: tvm.exp(x[i, j] - max_elem[i]) / expsum[i])
def kernel(A): my_sum = hcl.reducer(0, lambda x, y: x+y) r = hcl.reduce_axis(0, 10) return hcl.compute((1, 10), lambda x, y: my_sum(A[r, y], axis=r))
from collections import OrderedDict import heterocl as hcl import heterocl.tvm as tvm import numpy as np dtype = hcl.Float() max = hcl.reducer(-1, lambda x, y: tvm.make.Max(x, y), dtype) min = hcl.reducer(-1, lambda x, y: tvm.make.Min(x, y), dtype) def _broadcast(shape,*indices): axes = [] indices=indices[0] for i in range(len(shape)): if(shape[i]==1): axes.append(0) else: axes.append(indices[i]) axes = tuple(axes) return axes def broadcast_add(input1,input2,name='broadcast_add'): return hcl.compute(input1.shape,lambda *x: input1[x]+input2[_broadcast(input2.shape,x)],name=name) def broadcast_sub(input1,input2,name='broadcast_sub'): return hcl.compute(input1.shape,lambda *x: input1[x]-input2[_broadcast(input2.shape,x)],name=name) def broadcast_mul(input1,input2,name='broadcast_mul'): return hcl.compute(input1.shape,lambda *x: input1[x]*input2[_broadcast(input2.shape,x)],name=name) def broadcast_div(input1,input2,name='broadcast_div'):
import heterocl as hcl import heterocl.tvm as tvm import numpy as np import numpy.testing as tst import hlib dtype = hcl.Float(64) _sum = hcl.reducer(0, lambda x, y: x + y, dtype) _max = hcl.reducer(-100000, lambda x, y: tvm.make.Max(x, y), dtype) _min = hcl.reducer(100000, lambda x, y: tvm.make.Min(x, y), dtype) _prod = hcl.reducer(1, lambda x, y: x * y, dtype) def test_exp(): def _test(in_shape): hcl.init(hcl.Float()) data = hcl.placeholder(in_shape) def math_func(data): return hlib.op.math.exp(data) s = hcl.create_schedule(data, math_func) f = hcl.build(s) _in = 10 * np.random.random(in_shape) - 5 out = hcl.asarray(np.zeros(in_shape).astype('float32')) real_out = np.exp(_in) f(hcl.asarray(_in), out) tst.assert_almost_equal(out.asnumpy(), real_out, 4) _test((1, 3))
def kernel(a, A): my_sum = hcl.reducer(a, lambda x, y: x+y) r = hcl.reduce_axis(0, 10) return hcl.compute((1,), lambda x: my_sum(A[r], axis=r))
def kernel(A): my_sum = hcl.reducer(0, lambda x, y: x+y) r = hcl.reduce_axis(0, 10) return hcl.compute((1,), lambda x: my_sum(A[r], axis=r, dtype=hcl.UInt(2)))
import heterocl as hcl import numpy as np import torch import torch.nn as nn dtype = hcl.Float() hcl.init(dtype) sum = hcl.reducer(0, lambda x, y: x + y, dtype) def pool(): A = hcl.placeholder((4, 4), "A", dtype) def kernel(A): r = hcl.reduce_axis(0, 2) c = hcl.reduce_axis(0, 2) return hcl.compute((2, 2), lambda x, y: sum(A[x * 2 + r, y * 2 + c], axis=[r, c]) / 4, "B", dtype) s = hcl.create_schedule([A], kernel) s[kernel.B].pipeline(kernel.B.axis[1]) s.partition(A, dim=2) target = hcl.platform.zc706 target.config(compile="vivado_hls",mode="csyn",project="pool.prj") # target = None f = hcl.build(s, target=target) np_A = np.random.randint(0, 10, A.shape) hcl_A = hcl.asarray(np_A,dtype) hcl_B = hcl.asarray(np.zeros((2, 2),np.float),dtype) f(hcl_A, hcl_B) avgpool = nn.AvgPool2d((2,2))
def kernel(A): my_sum = hcl.reducer(0, lambda x, y: x+y) r = hcl.reduce_axis(0, 10) return hcl.compute((1,), lambda x: my_sum(A[r], axis=r, where=A[r]>5))
def kernel(A): my_sum = hcl.reducer(0, lambda x, y: x+y) r1 = hcl.reduce_axis(0, 10) r2 = hcl.reduce_axis(0, 10) return hcl.compute((1,), lambda x: my_sum(A[r1, r2], axis=[r1, r2]))