def test_const_tensor_float(): def test_kernel(dtype, size): hcl.init(dtype) np_A = numpy.random.rand(*size) py_A = np_A.tolist() def kernel(): cp1 = hcl.const_tensor(np_A) cp2 = hcl.const_tensor(py_A) return hcl.compute(np_A.shape, lambda *x: cp1[x] + cp2[x], dtype=hcl.Float()) O = hcl.placeholder(np_A.shape) s = hcl.create_schedule([], kernel) f = hcl.build(s) np_O = numpy.zeros(np_A.shape) hcl_O = hcl.asarray(np_O, dtype=hcl.Float()) f(hcl_O) np_A = hcl.cast_np(np_A, dtype) assert numpy.allclose(hcl_O.asnumpy(), np_A * 2, 1, 1e-5) test_kernel(hcl.Float(), (8, 8)) test_kernel(hcl.Float(), (20, 20, 3)) for i in range(0, 5): bit = numpy.random.randint(10, 60) test_kernel(hcl.Fixed(bit, bit - 4), (8, 8)) test_kernel(hcl.UFixed(bit, bit - 4), (8, 8)) test_kernel(hcl.Fixed(bit, bit - 4), (20, 20, 3)) test_kernel(hcl.UFixed(bit, bit - 4), (20, 20, 3))
def test_dtype_cast(): def _test_body(dtype1, dtype2, dtype3): hcl.init() A = hcl.placeholder((100, ), dtype=dtype1) B = hcl.placeholder((100, ), dtype=dtype2) def kernel(A, B): C = hcl.compute((100, ), lambda x: A[x] + B[x], dtype=dtype3) D = hcl.compute((100, ), lambda x: A[x] - B[x], dtype=dtype3) return C, D s = hcl.create_schedule([A, B], kernel) f = hcl.build(s) npA = np.random.rand(100) * 100 npB = np.random.rand(100) * 100 npC = np.random.rand(100) npD = np.random.rand(100) hclA = hcl.asarray(npA, dtype1) hclB = hcl.asarray(npB, dtype2) hclC = hcl.asarray(npC, dtype3) hclD = hcl.asarray(npD, dtype3) f(hclA, hclB, hclC, hclD) # TODO: check results using HLS CSIM from itertools import permutations perm = permutations([ hcl.UInt(1), hcl.Int(1), hcl.UInt(10), hcl.Int(10), hcl.UInt(32), hcl.Int(32), hcl.UFixed(4, 2), hcl.Fixed(4, 2), hcl.UFixed(32, 16), hcl.Fixed(32, 16), hcl.Float() ], 3) for dtypes in list(perm): _test_body(*dtypes)
def test_uint_int(): A = hcl.placeholder((8, 8), "A", dtype=hcl.Fixed(20,12)) B = hcl.placeholder((8, 8), "B", dtype=hcl.UFixed(16,12)) def kernel(A, B): return hcl.compute((8, 8), lambda y, x: hcl.select(x < 4, A[y][x], B[y][x]), "C", dtype=hcl.Int(8)) s = hcl.create_scheme([A, B], kernel) s = hcl.create_schedule_from_scheme(s) code = hcl.build(s, target="vhls") assert "ap_ufixed<20, 8>)A" in code
def test_ac_fixed(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Fixed(5, 3)) B = hcl.placeholder((1, 32), dtype=hcl.UFixed(5, 3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Fixed(7, 4)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='ihls') assert "ac_fixed<5, 2, true>" in code assert "ac_fixed<5, 2, false>" in code assert "ac_fixed<7, 3, true>" in code
def test_fixed(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Fixed(5, 3)) B = hcl.placeholder((1, 32), dtype=hcl.UFixed(5, 3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Fixed(7, 4)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target=target) assert strings[3] in code assert strings[4] in code assert strings[5] in code
def Sigmoid(exponent): ret = hcl.scalar(0.0, "sigmoid", FTYPE) with hcl.if_(exponent > hcl.cast(FTYPE, 4.0)): ret[0] = 1.0 with hcl.elif_(exponent < hcl.cast(FTYPE, -4.0)): ret[0] = 0.0 with hcl.else_(): with hcl.if_(exponent < hcl.cast(FTYPE, 0.0)): num = hcl.scalar(0, dtype=hcl.UFixed(18, 8)) num[0][18:0] = exponent[29:11] num[0] = ~(num[0] << 8) + 1 index = 2047.0 - num[0] ret[0] = lut[hcl.cast(hcl.Int(32), index)] with hcl.else_(): index = exponent[21:11] ret[0] = lut[hcl.cast(hcl.Int(32), index)] return ret[0]
def test_dtype_compute_fixed(): def _test_dtype(dtype): hcl.init(dtype) A = hcl.placeholder((100,)) B = hcl.placeholder((100,)) def kernel(A, B): C = hcl.compute(A.shape, lambda x: A[x] + B[x]) D = hcl.compute(A.shape, lambda x: A[x] - B[x]) E = hcl.compute(A.shape, lambda x: A[x] * B[x]) # division is not recommended #F = hcl.compute(A.shape, lambda x: A[x] / B[x]) #return C, D, E, F return C, D, E s = hcl.create_schedule([A, B], kernel) f = hcl.build(s) np_A = np.random.rand(*A.shape) + 0.1 np_B = np.random.rand(*B.shape) + 0.1 hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) hcl_C = hcl.asarray(np.zeros(A.shape)) hcl_D = hcl.asarray(np.zeros(A.shape)) hcl_E = hcl.asarray(np.zeros(A.shape)) #hcl_F = hcl.asarray(np.zeros(A.shape)) #f(hcl_A, hcl_B, hcl_C, hcl_D, hcl_E, hcl_F) f(hcl_A, hcl_B, hcl_C, hcl_D, hcl_E) np_C = hcl.cast_np(hcl_A.asnumpy() + hcl_B.asnumpy(), dtype) np_D = hcl.cast_np(hcl_A.asnumpy() - hcl_B.asnumpy(), dtype) np_E = hcl.cast_np(hcl_A.asnumpy() * hcl_B.asnumpy(), dtype) #np_F = hcl.cast_np(hcl_A.asnumpy() / hcl_B.asnumpy(), dtype) assert np.allclose(np_C, hcl_C.asnumpy()) assert np.allclose(np_D, hcl_D.asnumpy()) assert np.allclose(np_E, hcl_E.asnumpy()) #assert np.allclose(np_F, hcl_F.asnumpy()) for j in range(0, 10): for i in range(6, 66, 4): # To avoid floating point exception during division _test_dtype(hcl.UFixed(i, i-2)) _test_dtype(hcl.Fixed(i, i-2))
def test_dtype_overflow_ufixed(): def _test_dtype(dtype): hcl.init(dtype) np_A = np.random.rand(100) * 10 hcl_A = hcl.asarray(np_A) np_A2 = hcl_A.asnumpy() def cast(val): sf = 1 << dtype.fracs sb = 1 << dtype.bits val = val * sf val = int(val) % sb val = float(val) / sf return val vfunc = np.vectorize(cast) np_A3 = vfunc(np_A) assert np.array_equal(np_A2, np_A3) for j in range(0, 10): for i in range(2, 66, 4): _test_dtype(hcl.UFixed(i, i-2))
In this example, we demonstrate how to use a While loop in HeteroCL. """ import heterocl as hcl import numpy as np import time #lenA = 128 lenA = 28 #lenB = 128 lenB = 28 #num = 1024 num = 64 penalty = -4 hcl.init() dtype = hcl.UFixed(3) mtype = hcl.Int(16) def top(target=None): def smith_waterman(seqA, seqB, consA, consB): def similarity_score(a, b): return hcl.select(a == b, 1, penalty) def find_max(A, len_): max_ = hcl.local(A[0], "max") act_ = hcl.local(0, "act") with hcl.for_(0, len_) as i: with hcl.if_(A[i] > max_[0]): max_[0] = A[i] act_[0] = i
hcl.print(A[5], "%.4f\n") s = hcl.create_schedule([A], kernel) f = hcl.build(s) np_A = np.random.rand(10) hcl_A = hcl.asarray(np_A) f(hcl_A) print("%.4f" % hcl_A.asnumpy()[5]) # case4: fixed points hcl.init(hcl.UFixed(6, 4)) A = hcl.placeholder((10, )) def kernel(A): hcl.print(A[5], "%.4f\n") s = hcl.create_schedule([A], kernel) f = hcl.build(s) np_A = np.random.rand(10) hcl_A = hcl.asarray(np_A) f(hcl_A)
import torch import torch.nn as nn import heterocl as hcl from weight_quant import weight_quantize_fn from ultranet_model import ultranet ############################################################################### # Define Data Types ############################################################################### hcl.init(hcl.Float(32)) input_dtype = hcl.Fixed(8, 4) weight_dtype = hcl.Fixed(5, 3) # TODO: why hcl.Fixed(4,4) doesn't work act_dtype = hcl.UFixed(6, 4) bn_a_dtype = hcl.Fixed(14, 10) # TODO some 14 bit fixed pt, this seems to work well bn_b_dtype = hcl.Fixed(26, 18) # TODO some 26 bit fixed pt, this seems to work well conv_dtype = hcl.Fixed(16, 8) ############################################################################### # Define parameters and images ############################################################################### image_path = './test_images/boat1_000001.jpg' # image_path = './test_images/person23_0113.jpg' # image_path = './test_images/car1_0001.jpg' raw_height = 360 raw_width = 640 width = 320 height = 160
HeteroCL. """ import heterocl as hcl ############################################################################## # Data Types Supported by HeteroCL # -------------------------------- # HeteroCL supports both bit-accurate data types and floating points. We show # some examples below. If no argument is provided, the default bitwidth for # each type is 32. hcl.Int(15) # 15-bit signed integer hcl.UInt(24) # 24-bit unsigned integer hcl.Fixed(13, 5) # 13-bit signed fixed point with 5 fractional bits hcl.UFixed(44, 30) # 44-bit unsigned fixed point with 30 fractional bits hcl.Float(32) # single-precision floating point hcl.Float(64) # double-precision floating point ############################################################################## # These data types can be used in ``hcl.init`` to set the default data type hcl.init(hcl.Float()) ############################################################################## # Data Type Customization # ----------------------- # Another important hardware customization is data type customization, which # can be data quantization or downsizing a data type. Data quantization has # been proved to improve hardware efficiency in many accelerators. In HeteroCL, # to apply data type customization, we need to use ``hcl.create_scheme``,
import heterocl as hcl import numpy as np from ultranet_model import ultranet from main_single_input import load_np_params, load_image input_dtype = hcl.Fixed(8, 4) weight_dtype = hcl.Fixed(5, 3) act_dtype = hcl.UFixed(4, 4) bn_a_dtype = hcl.Fixed(14, 10) bn_b_dtype = hcl.Fixed(26, 18) conv_dtype = hcl.Fixed(16, 8) batch_size = 1 # image_path = "./example_images/example_1.jpg" image_path = "./test_images/boat1_000001.jpg" project_name = "no_float_full_stream_" # customizations stream = True opt = True partition = True def build_ultranet_hls(batch_size=batch_size, target=None): # set up input/output placeholders input_image = hcl.placeholder((batch_size, 3, 160, 320), dtype=input_dtype, name="input_image")