def kernel(A):
     # my_sum will perform integer reduction
     my_sum = hcl.reducer(0, lambda x, y: x + y)
     r = hcl.reduce_axis(0, 10)
     return hcl.compute((1, ),
                        lambda x: my_sum(A[r], axis=r, dtype=hcl.Float()),
                        dtype=hcl.Float())
 def kernel(A):
     def reducer_body(x, y):
         with hcl.if_(x > 5):
             hcl.return_(y + 1)
         with hcl.else_():
             hcl.return_(y + 2)
     my_sum = hcl.reducer(0, reducer_body)
     r = hcl.reduce_axis(0, 10)
     return hcl.compute((1,), lambda x: my_sum(A[r], axis=r))
Exemple #3
0
 def pack(A):
     rk = hcl.reduce_axis(0, 4, name='rk')
     genpack = hcl.reducer(0, lambda x, y: y * 2 + x,
                           dtype=hcl.UInt(4))  # y is accumulator
     pack = hcl.compute((2, ),
                        lambda x: genpack(A[x * 4 + (3 - rk)], axis=rk),
                        dtype=hcl.UInt(4))
     # pack = hcl.pack(A, axis=0, factor=4, dtype=hcl.UInt(4))
     return pack
 def kernel(A):
     init = hcl.compute((10,), lambda x: 11)
     def freduce(x, Y):
         with hcl.for_(0, 10) as i:
             with hcl.if_(x < Y[i]):
                 with hcl.for_(9, i, -1) as j:
                     Y[j] = Y[j-1]
                 Y[i] = x
                 hcl.break_()
     my_sort = hcl.reducer(init, freduce)
     r = hcl.reduce_axis(0, 10)
     return hcl.compute(A.shape, lambda _x, y: my_sort(A[r, y], axis=r))
 def kernel(A):
     init = hcl.compute((2,), lambda x: 10)
     def freduce(x, Y):
         with hcl.if_(x < Y[0]):
             Y[1] = Y[0]
             Y[0] = x
         with hcl.else_():
             with hcl.if_(x < Y[1]):
                 Y[1] = x
     my_min = hcl.reducer(init, freduce)
     r = hcl.reduce_axis(0, 10)
     return hcl.compute((2,), lambda _x: my_min(A[r], axis=r))
 def kernel(A):
     init = hcl.compute((A.shape[0]*A.shape[1],), lambda x: 11)
     def freduce(x, Y):
         with hcl.for_(0, Y.shape[0]) as i:
             with hcl.if_(x < Y[i]):
                 with hcl.for_(Y.shape[0]-1, i, -1) as j:
                     Y[j] = Y[j-1]
                 Y[i] = x
                 hcl.break_()
     my_sort = hcl.reducer(init, freduce)
     rx = hcl.reduce_axis(0, 10)
     ry = hcl.reduce_axis(0, 10)
     return hcl.compute(init.shape, lambda _x: my_sort(A[rx, ry], axis=[rx, ry]))
Exemple #7
0
def max_pool2d_nhwc(data,
                    pooling,
                    stride=[1, 1],
                    padding=[0, 0],
                    name='max_pool2d'):
    assert len(data.shape) == 4, "only support 4-dim pooling"
    assert len(stride) == 2, "only support 2-dim stride"
    max = hcl.reducer(tvm.min_value(data.dtype),
                      lambda x, y: tvm.make.Max(x, y), data.dtype)
    pooling_h, pooling_w = pooling
    stride_h, stride_w = stride
    batch, height, width, channel = data.shape
    if len(padding) == 4:
        pad_top = padding[0]
        pad_left = padding[1]
        pad_bottom = padding[2]
        pad_right = padding[3]
    else:
        pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(
            padding, (pooling_h, pooling_w))
    pad_before = [0, pad_top, pad_left, 0]
    pad_after = [0, pad_bottom, pad_right, 0]
    data = pad(data,
               pad_before,
               pad_after,
               pad_value=tvm.min_value(data.dtype))
    out_height = simplify((height - pooling_h + pad_top + pad_bottom) //
                          stride_h + 1)
    out_width = simplify((width - pooling_w + pad_left + pad_right) //
                         stride_w + 1)
    dheight = hcl.reduce_axis(0, pooling_h)
    dwidth = hcl.reduce_axis(0, pooling_w)
    return hcl.compute(
        (batch, out_height, out_width, channel),
        lambda i, h, w, c: max(data[i, h * stride_h + dheight, w * stride_w +
                                    dwidth, c],
                               axis=[dheight, dwidth]),
        name=name,
        attrs=OrderedDict([('out_img_w', out_width), ('out_img_h', out_height),
                           ('in_num', channel), ('kernel_h', pooling[1]),
                           ('kernel_w', pooling[0]), ('stride_h', stride[1]),
                           ('stride_w', stride[0]),
                           ('app_name', tvm.make.StringImm('max_pool'))]))
Exemple #8
0
from collections import OrderedDict
import heterocl as hcl
import heterocl.tvm as tvm
import numpy as np
import hlib
from ..utils import *
from .op import *

dtype = hcl.Float()

sum = hcl.reducer(0, lambda x, y: x + y, dtype)
max = hcl.reducer(-1, lambda x, y: tvm.make.Max(x, y), dtype)
_all = hcl.reducer(True, lambda x, y: x & y, bool)


def simplify(expr):
    return tvm.ir_pass.Simplify(expr) if isinstance(expr,
                                                    tvm.expr.Expr) else expr


def pad(data, pad_before, pad_after=None, pad_value=0.0, name="pad"):
    n = len(data.shape)
    pad_after = pad_after if pad_after else pad_before
    if len(pad_before) != n:
        raise ValueError("Input dimension and pad_before dismatch : %d vs %d" %
                         (n, len(pad_before)))
    if len(pad_after) != n:
        raise ValueError("Input dimension and pad_after dismatch : %d vs %d" %
                         (n, len(pad_after)))
    out_shape = tuple(
        tvm.ir_pass.Simplify((data.shape[i] +
Exemple #9
0
def sum(data, axis=None, keepdims=True):
    init_shape = data.shape
    init_dim = len(init_shape)
    new_shape = []
    new_axis = []
    if isinstance(axis, int):
        if axis < 0:
            axis = init_dim + axis
        axis = [axis]
    for i in range(len(init_shape)):
        if axis is None:
            new_axis.append(i)
        elif i in axis:
            new_axis.append(i)
        if i not in new_axis:
            new_shape.append(init_shape[i])
        else:
            if keepdims:
                new_shape.append(1)

    def _new_axes(axis, init_shape):
        new_axes = []
        for i in range(len(init_shape)):
            if i in axis:
                new_axes.append(hcl.reduce_axis(0, init_shape[i]))
        return new_axes

    def _new_inx(axis, axes, init_shape, *indices):
        indices = indices[0]
        init_dim = len(init_shape)
        new_axis = []
        inx = 0
        axis_inx = 0
        for i in range(init_dim):
            if i in axis:
                new_axis.append(axes[axis_inx])
                axis_inx = axis_inx + 1
            else:
                new_axis.append(indices[inx])
                inx = inx + 1
        return tuple(new_axis)

    axes = _new_axes(new_axis, init_shape)
    axis_len = len(axis)
    temp_transpose = []
    _new_shape = []

    for i in range(len(init_shape)):
        if i not in axis:
            _new_shape.append(init_shape[i])
            temp_transpose.append(i)
    for i in range(len(axis)):
        temp_transpose.append(axis[i])
    while len(_new_shape) < len(init_shape):
        _new_shape.append(1)
    transpose_axes = []
    for i in range(len(temp_transpose)):
        transpose_axes.append(temp_transpose.index(i))
    _sum = hcl.reducer(0, lambda x, y: x + y, data.dtype)
    out = hcl.compute(
        tuple(_new_shape),
        lambda *x: _sum(data[_new_inx(new_axis, axes, init_shape, x)],
                        axis=axes))
    if keepdims:
        return nn.transpose(out, transpose_axes)
    else:
        return nn.squeeze(out)
Exemple #10
0
unit2_names = [
    's1_u2_b1', 's1_u2_r1', 's1_u2_c1', 's1_u2_b2', 's1_u2_r2', 's1_u2_c2',
    's1_u2_add', 's2_u2_b1', 's2_u2_r1', 's2_u2_c1', 's2_u2_b2', 's2_u2_r2',
    's2_u2_c2', 's2_u2_add', 's3_u2_b1', 's3_u2_r1', 's3_u2_c1', 's3_u2_b2',
    's3_u2_r2', 's3_u2_c2', 's3_u2_add', 's4_u2_b1', 's4_u2_r1', 's4_u2_c1',
    's4_u2_b2', 's4_u2_r2', 's4_u2_c2', 's4_u2_add'
]

fnames = ['bn', 'conv0', 'bn0', 'relu0', 'pool0']

enames = ['bn1', 'relu1', 'pool1', 'fc1']

name_pool = unit1_names + unit2_names + enames + fnames

sum = hcl.reducer(0, lambda x, y: x + y, dtype)
max = hcl.reducer(-1, lambda x, y: tvm.make.Max(x, y), dtype)


def softmax(out, x):
    assert len(x.shape) == 2, "only support 2-dim softmax"
    m, n = x.shape
    k = hcl.reduce_axis(0, n)
    max_elem = hcl.compute((m, ), lambda i: max(x[i, k], axis=k))
    k = hcl.reduce_axis(0, n)
    expsum = hcl.compute((m, ),
                         lambda i: sum(tvm.exp(x[i, k] - max_elem[i]), axis=k))
    return hcl.update(out,
                      lambda i, j: tvm.exp(x[i, j] - max_elem[i]) / expsum[i])

 def kernel(A):
     my_sum = hcl.reducer(0, lambda x, y: x+y)
     r = hcl.reduce_axis(0, 10)
     return hcl.compute((1, 10), lambda x, y: my_sum(A[r, y], axis=r))
Exemple #12
0
from collections import OrderedDict
import heterocl as hcl
import heterocl.tvm as tvm
import numpy as np

dtype = hcl.Float()

max = hcl.reducer(-1, lambda x, y: tvm.make.Max(x, y), dtype)
min = hcl.reducer(-1, lambda x, y: tvm.make.Min(x, y), dtype)

def _broadcast(shape,*indices):
    axes = []
    indices=indices[0]
    for i in range(len(shape)):
        if(shape[i]==1):
            axes.append(0)
        else:
            axes.append(indices[i])
    axes = tuple(axes)
    return axes

def broadcast_add(input1,input2,name='broadcast_add'):
    return hcl.compute(input1.shape,lambda *x: input1[x]+input2[_broadcast(input2.shape,x)],name=name)

def broadcast_sub(input1,input2,name='broadcast_sub'):
    return hcl.compute(input1.shape,lambda *x: input1[x]-input2[_broadcast(input2.shape,x)],name=name)

def broadcast_mul(input1,input2,name='broadcast_mul'):
    return hcl.compute(input1.shape,lambda *x: input1[x]*input2[_broadcast(input2.shape,x)],name=name)

def broadcast_div(input1,input2,name='broadcast_div'):
Exemple #13
0
import heterocl as hcl
import heterocl.tvm as tvm
import numpy as np
import numpy.testing as tst
import hlib

dtype = hcl.Float(64)

_sum = hcl.reducer(0, lambda x, y: x + y, dtype)
_max = hcl.reducer(-100000, lambda x, y: tvm.make.Max(x, y), dtype)
_min = hcl.reducer(100000, lambda x, y: tvm.make.Min(x, y), dtype)
_prod = hcl.reducer(1, lambda x, y: x * y, dtype)


def test_exp():
    def _test(in_shape):
        hcl.init(hcl.Float())
        data = hcl.placeholder(in_shape)

        def math_func(data):
            return hlib.op.math.exp(data)

        s = hcl.create_schedule(data, math_func)
        f = hcl.build(s)
        _in = 10 * np.random.random(in_shape) - 5
        out = hcl.asarray(np.zeros(in_shape).astype('float32'))
        real_out = np.exp(_in)
        f(hcl.asarray(_in), out)
        tst.assert_almost_equal(out.asnumpy(), real_out, 4)

    _test((1, 3))
 def kernel(a, A):
     my_sum = hcl.reducer(a, lambda x, y: x+y)
     r = hcl.reduce_axis(0, 10)
     return hcl.compute((1,), lambda x: my_sum(A[r], axis=r))
 def kernel(A):
     my_sum = hcl.reducer(0, lambda x, y: x+y)
     r = hcl.reduce_axis(0, 10)
     return hcl.compute((1,), lambda x: my_sum(A[r], axis=r, dtype=hcl.UInt(2)))
Exemple #16
0
import heterocl as hcl
import numpy as np
import torch
import torch.nn as nn

dtype = hcl.Float()
hcl.init(dtype)
sum = hcl.reducer(0, lambda x, y: x + y, dtype)

def pool():
    A = hcl.placeholder((4, 4), "A", dtype)

    def kernel(A):
        r = hcl.reduce_axis(0, 2)
        c = hcl.reduce_axis(0, 2)
        return hcl.compute((2, 2),
                lambda x, y: sum(A[x * 2 + r, y * 2 + c], axis=[r, c]) / 4, "B", dtype)

    s = hcl.create_schedule([A], kernel)
    s[kernel.B].pipeline(kernel.B.axis[1])
    s.partition(A, dim=2)

    target = hcl.platform.zc706
    target.config(compile="vivado_hls",mode="csyn",project="pool.prj")
    # target = None
    f = hcl.build(s, target=target)
    np_A = np.random.randint(0, 10, A.shape)
    hcl_A = hcl.asarray(np_A,dtype)
    hcl_B = hcl.asarray(np.zeros((2, 2),np.float),dtype)
    f(hcl_A, hcl_B)
    avgpool = nn.AvgPool2d((2,2))
 def kernel(A):
     my_sum = hcl.reducer(0, lambda x, y: x+y)
     r = hcl.reduce_axis(0, 10)
     return hcl.compute((1,), lambda x: my_sum(A[r], axis=r, where=A[r]>5))
 def kernel(A):
     my_sum = hcl.reducer(0, lambda x, y: x+y)
     r1 = hcl.reduce_axis(0, 10)
     r2 = hcl.reduce_axis(0, 10)
     return hcl.compute((1,), lambda x: my_sum(A[r1, r2], axis=[r1, r2]))