def kernel(A, B, C): stype = hcl.Struct({"fa": hcl.Int(8), "fb": hcl.Fixed(13, 11), "fc": hcl.Float()}) D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=stype) E = hcl.compute(A.shape, lambda x: D[x].fa, dtype=hcl.Int(8)) F = hcl.compute(A.shape, lambda x: D[x].fb, dtype=hcl.Fixed(13, 11)) G = hcl.compute(A.shape, lambda x: D[x].fc, dtype=hcl.Float()) return E, F, G
def test_dtype_struct(): hcl.init() A = hcl.placeholder((100,), dtype=hcl.Int(8)) B = hcl.placeholder((100,), dtype=hcl.Fixed(13, 11)) C = hcl.placeholder((100,), dtype=hcl.Float()) def kernel(A, B, C): stype = hcl.Struct({"fa": hcl.Int(8), "fb": hcl.Fixed(13, 11), "fc": hcl.Float()}) D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=stype) E = hcl.compute(A.shape, lambda x: D[x].fa, dtype=hcl.Int(8)) F = hcl.compute(A.shape, lambda x: D[x].fb, dtype=hcl.Fixed(13, 11)) G = hcl.compute(A.shape, lambda x: D[x].fc, dtype=hcl.Float()) return E, F, G s = hcl.create_schedule([A, B, C], kernel) f = hcl.build(s) np_A = np.random.randint(0, 500, size=100) - 250 np_B = np.random.rand(100) - 0.5 np_C = np.random.rand(100) - 0.5 np_E = np.zeros(100) np_F = np.zeros(100) np_G = np.zeros(100) hcl_A = hcl.asarray(np_A, dtype=hcl.Int(8)) hcl_B = hcl.asarray(np_B, dtype=hcl.Fixed(13, 11)) hcl_C = hcl.asarray(np_C, dtype=hcl.Float()) hcl_E = hcl.asarray(np_E, dtype=hcl.Int(8)) hcl_F = hcl.asarray(np_F, dtype=hcl.Fixed(13, 11)) hcl_G = hcl.asarray(np_G, dtype=hcl.Float()) f(hcl_A, hcl_B, hcl_C, hcl_E, hcl_F, hcl_G) assert np.allclose(hcl_A.asnumpy(), hcl_E.asnumpy()) assert np.allclose(hcl_B.asnumpy(), hcl_F.asnumpy()) assert np.allclose(hcl_C.asnumpy(), hcl_G.asnumpy())
def test_const_tensor_float(): def test_kernel(dtype, size): hcl.init(dtype) np_A = numpy.random.rand(*size) py_A = np_A.tolist() def kernel(): cp1 = hcl.const_tensor(np_A) cp2 = hcl.const_tensor(py_A) return hcl.compute(np_A.shape, lambda *x: cp1[x] + cp2[x], dtype=hcl.Float()) O = hcl.placeholder(np_A.shape) s = hcl.create_schedule([], kernel) f = hcl.build(s) np_O = numpy.zeros(np_A.shape) hcl_O = hcl.asarray(np_O, dtype=hcl.Float()) f(hcl_O) np_A = hcl.cast_np(np_A, dtype) assert numpy.allclose(hcl_O.asnumpy(), np_A * 2, 1, 1e-5) test_kernel(hcl.Float(), (8, 8)) test_kernel(hcl.Float(), (20, 20, 3)) for i in range(0, 5): bit = numpy.random.randint(10, 60) test_kernel(hcl.Fixed(bit, bit - 4), (8, 8)) test_kernel(hcl.UFixed(bit, bit - 4), (8, 8)) test_kernel(hcl.Fixed(bit, bit - 4), (20, 20, 3)) test_kernel(hcl.UFixed(bit, bit - 4), (20, 20, 3))
def test_ac_fixed(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Fixed(5, 3)) B = hcl.placeholder((1, 32), dtype=hcl.UFixed(5, 3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Fixed(7, 4)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='ihls') assert "ac_fixed<5, 2, true>" in code assert "ac_fixed<5, 2, false>" in code assert "ac_fixed<7, 3, true>" in code
def test_fixed(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Fixed(5, 3)) B = hcl.placeholder((1, 32), dtype=hcl.UFixed(5, 3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Fixed(7, 4)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target=target) assert strings[3] in code assert strings[4] in code assert strings[5] in code
def kernel(A, B, C): stype = hcl.Struct({ "fa": hcl.Int(8), "fb": hcl.Fixed(13, 11), "fc": hcl.Float() }) D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=stype) E = hcl.compute(A.shape, lambda x: D[x].fa, dtype=hcl.Int(8)) F = hcl.compute(A.shape, lambda x: D[x].fb, dtype=hcl.Fixed(13, 11)) G = hcl.compute(A.shape, lambda x: D[x].fc, dtype=hcl.Float()) # Check the data type assert D[0].fa.dtype == "int8" assert D[0].fb.dtype == "fixed13_11" assert D[0].fc.dtype == "float32" return E, F, G
def test_dtype_cast(): def _test_body(dtype1, dtype2, dtype3): hcl.init() A = hcl.placeholder((100, ), dtype=dtype1) B = hcl.placeholder((100, ), dtype=dtype2) def kernel(A, B): C = hcl.compute((100, ), lambda x: A[x] + B[x], dtype=dtype3) D = hcl.compute((100, ), lambda x: A[x] - B[x], dtype=dtype3) return C, D s = hcl.create_schedule([A, B], kernel) f = hcl.build(s) npA = np.random.rand(100) * 100 npB = np.random.rand(100) * 100 npC = np.random.rand(100) npD = np.random.rand(100) hclA = hcl.asarray(npA, dtype1) hclB = hcl.asarray(npB, dtype2) hclC = hcl.asarray(npC, dtype3) hclD = hcl.asarray(npD, dtype3) f(hclA, hclB, hclC, hclD) # TODO: check results using HLS CSIM from itertools import permutations perm = permutations([ hcl.UInt(1), hcl.Int(1), hcl.UInt(10), hcl.Int(10), hcl.UInt(32), hcl.Int(32), hcl.UFixed(4, 2), hcl.Fixed(4, 2), hcl.UFixed(32, 16), hcl.Fixed(32, 16), hcl.Float() ], 3) for dtypes in list(perm): _test_body(*dtypes)
def test_dtype_large_array(): def test_kernel(dtype): hcl.init(dtype) A = hcl.placeholder((1000, )) def kernel(A): X = hcl.compute(A.shape, lambda x: A[x]) return hcl.compute(A.shape, lambda x: X[x]) s = hcl.create_schedule([A], kernel) f = hcl.build(s) npA = np.random.rand(1000) npB = np.zeros(1000) hcl_A = hcl.asarray(npA) hcl_B = hcl.asarray(npB) f(hcl_A, hcl_B) assert np.allclose(hcl_A.asnumpy(), hcl_B.asnumpy()) test_kernel(hcl.Fixed(8, 6)) test_kernel(hcl.Fixed(16, 14)) test_kernel(hcl.Fixed(3, 1)) test_kernel(hcl.Fixed(6, 4)) test_kernel(hcl.Fixed(11, 9)) test_kernel(hcl.Fixed(18, 16)) test_kernel(hcl.Fixed(37, 35))
def test_binary_ops(): A = hcl.placeholder((8, 8), "A", dtype=hcl.Int(20)) B = hcl.placeholder((8, 8), "B", dtype=hcl.Fixed(16,12)) def kernel(A, B): return hcl.compute((8, 8), lambda y, x: hcl.select(x < 4, A[y][x], B[y][x]), "C", dtype=hcl.Int(8)) s = hcl.create_scheme([A, B], kernel) s = hcl.create_schedule_from_scheme(s) code = hcl.build(s, target="vhls") assert "(ap_fixed<32, 20>)B" in code
def test_uint_int(): A = hcl.placeholder((8, 8), "A", dtype=hcl.Fixed(20,12)) B = hcl.placeholder((8, 8), "B", dtype=hcl.UFixed(16,12)) def kernel(A, B): return hcl.compute((8, 8), lambda y, x: hcl.select(x < 4, A[y][x], B[y][x]), "C", dtype=hcl.Int(8)) s = hcl.create_scheme([A, B], kernel) s = hcl.create_schedule_from_scheme(s) code = hcl.build(s, target="vhls") assert "ap_ufixed<20, 8>)A" in code
def test_vhls_host_dtype(): if os.system("which vivado_hls >> /dev/null") != 0: return dtype = hcl.Fixed(16,12) A = hcl.placeholder((10, 32), "A", dtype=dtype) def kernel(A): B = hcl.compute(A.shape, lambda *args : A[args] + 1, "B", dtype=dtype) return B target = hcl.Platform.aws_f1 target.config(compiler="vivado_hls", mode="csim", project="test") s = hcl.create_schedule([A], kernel) f = hcl.build(s, target) np_A = np.random.randint(10, size=(10,32)) np_B = np.zeros((10,32)) hcl_A = hcl.asarray(np_A, dtype=hcl.Fixed(16,12)) hcl_B = hcl.asarray(np_B, dtype=hcl.Fixed(16,12)) f(hcl_A, hcl_B)
def test1(): A = hcl.placeholder((8, 8), "A") B = hcl.placeholder((8, 8), "B", dtype=hcl.Fixed(16, 12)) def kernel(A, B): return hcl.compute( (8, 8), lambda y, x: hcl.select(x < 4, A[y][x], B[y][x]), "C") s = hcl.create_scheme([A, B], kernel) s = hcl.create_schedule_from_scheme(s) f = hcl.build(s, target="vhls") print(f)
def top(dtype = hcl.Fixed(10,2)): hcl.init(dtype) def quantization(A): return hcl.compute(A.shape, lambda x: hcl.tanh(A[x]), "B") ############################################################################## # First, let's build the application without applying any quantization scheme. s = hcl.create_schedule([A], quantization) f = hcl.build(s) return f
def test(): dtype = hcl.Fixed(12, 10) def kernel(): A = hcl.const_tensor(np.random.random((10, 10)), "A", dtype) return hcl.compute(A.shape, lambda x, y: A[x, y] + 1, "B", dtype) s = hcl.create_schedule([], kernel) target = hcl.platform.zc706 target.config(compile="vivado_hls", mode="csyn") f = hcl.build(s, target=target) hcl_B = hcl.asarray(np.zeros((10, 10))) f(hcl_B)
def add(): qtype = hcl.Fixed(16,12) A = hcl.placeholder((10,), "A", dtype=qtype) def kernel(A): return hcl.compute((10,), lambda x: A[x] + 1, "B", dtype=qtype) s = hcl.create_schedule(A, kernel) target = hcl.platform.aws_f1 # target.config(compile="vivado_hls", mode="csim") # target.config(compile="vivado_hls", mode="debug") # target.config(compile="vitis", mode="hw_sim", backend="vhls") target.config(compile="vitis", mode="debug", backend="vhls") s.to(A, target.xcel) s.to(kernel.B,target.host) f = hcl.build(s, target=target) print(f)
def simple_add2(): if os.system("which vivado_hls >> /dev/null") != 0: return dtype = hcl.Fixed(16, 12) # dtype = hcl.Float() def test_hls(target_mode): A = hcl.placeholder((10, 32), "A", dtype=dtype) def kernel(A): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B", dtype=dtype) C = hcl.compute(B.shape, lambda *args: B[args] + 1, "C", dtype=dtype) return C target = hcl.platform.zc706 s = hcl.create_schedule([A], kernel) s.to(A, target.xcel) s.to(kernel.C, target.host) s.to(kernel.B, s[kernel.C]) target.config(compile="vivado_hls", mode=target_mode) # sys.exit() f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 32)) np_B = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A, dtype=dtype) hcl_B = hcl.asarray(np_B, dtype=dtype) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() if "csyn" in target_mode: report = f.report("csyn") assert "ReportVersion" in report elif "csim" in target_mode: for i in range(0, 10): for j in range(0, 32): assert ret_B[i, j] == (np_A[i, j] + 3) test_hls("csim")
def test_dtype_compute_fixed(): def _test_dtype(dtype): hcl.init(dtype) A = hcl.placeholder((100,)) B = hcl.placeholder((100,)) def kernel(A, B): C = hcl.compute(A.shape, lambda x: A[x] + B[x]) D = hcl.compute(A.shape, lambda x: A[x] - B[x]) E = hcl.compute(A.shape, lambda x: A[x] * B[x]) # division is not recommended #F = hcl.compute(A.shape, lambda x: A[x] / B[x]) #return C, D, E, F return C, D, E s = hcl.create_schedule([A, B], kernel) f = hcl.build(s) np_A = np.random.rand(*A.shape) + 0.1 np_B = np.random.rand(*B.shape) + 0.1 hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) hcl_C = hcl.asarray(np.zeros(A.shape)) hcl_D = hcl.asarray(np.zeros(A.shape)) hcl_E = hcl.asarray(np.zeros(A.shape)) #hcl_F = hcl.asarray(np.zeros(A.shape)) #f(hcl_A, hcl_B, hcl_C, hcl_D, hcl_E, hcl_F) f(hcl_A, hcl_B, hcl_C, hcl_D, hcl_E) np_C = hcl.cast_np(hcl_A.asnumpy() + hcl_B.asnumpy(), dtype) np_D = hcl.cast_np(hcl_A.asnumpy() - hcl_B.asnumpy(), dtype) np_E = hcl.cast_np(hcl_A.asnumpy() * hcl_B.asnumpy(), dtype) #np_F = hcl.cast_np(hcl_A.asnumpy() / hcl_B.asnumpy(), dtype) assert np.allclose(np_C, hcl_C.asnumpy()) assert np.allclose(np_D, hcl_D.asnumpy()) assert np.allclose(np_E, hcl_E.asnumpy()) #assert np.allclose(np_F, hcl_F.asnumpy()) for j in range(0, 10): for i in range(6, 66, 4): # To avoid floating point exception during division _test_dtype(hcl.UFixed(i, i-2)) _test_dtype(hcl.Fixed(i, i-2))
def test_dtype_basic_fixed(): def _test_dtype(dtype): hcl.init(dtype) np_A = np.random.rand(100) - 0.5 hcl_A = hcl.asarray(np_A) np_A2 = hcl_A.asnumpy() def cast(val): sf = 1 << dtype.fracs sb = 1 << dtype.bits sb1 = 1 << (dtype.bits-1) val = val * sf val = int(val) % sb val = val if val < sb1 else val - sb val = float(val) / sf return val vfunc = np.vectorize(cast) np_A3 = vfunc(np_A) assert np.array_equal(np_A2, np_A3) for j in range(0, 10): for i in range(2, 66, 4): _test_dtype(hcl.Fixed(i, i-2))
f = hcl.build(s, target=target) return f def time_gemm(dtype, m=1024, n=1024, k=1024, target=None): hcl.init(dtype) f = gemm(m, n, k, dtype, target) np_1 = np.random.randint(10, size=(m, k)) np_2 = np.random.randint(10, size=(k, n)) np_3 = np.matmul(np_1, np_2) hcl_m1 = hcl.asarray(np_1, dtype=dtype) hcl_m2 = hcl.asarray(np_2, dtype=dtype) hcl_m3 = hcl.asarray(np.zeros((m, n)), dtype=dtype) f(hcl_m1, hcl_m2, hcl_m3) begin = time.time() for i in range(10): f(hcl_m1, hcl_m2, hcl_m3) end = time.time() print("dtype is: ", dtype) print("average of 10 runs takes: {} sec".format((end - begin) / 10)) np.testing.assert_allclose(hcl_m3.asnumpy(), np_3, rtol=1e-03) ############################################################################### # Test the algorithm with different data types dtypes = [hcl.Int(32), hcl.Float(), hcl.Fixed(32, 16)] for dtype in dtypes: time_gemm(dtype)
import heterocl as hcl import numpy as np from sgd import * from lut import lut as lut_ DTYPE = hcl.Fixed(16, 12) LTYPE = hcl.Int(8) FTYPE = hcl.Fixed(32, 19) MEM_BANDWIDTH = 64 MTYPE = hcl.UInt(MEM_BANDWIDTH) D_VECTOR_SIZE = MEM_BANDWIDTH / DTYPE.bits L_VECTOR_SIZE = MEM_BANDWIDTH / LTYPE.bits F_VECTOR_SIZE = MEM_BANDWIDTH / FTYPE.bits data = hcl.placeholder((NUM_FEATURES * NUM_TRAINING / D_VECTOR_SIZE, ), "data", dtype=MTYPE) label = hcl.placeholder((NUM_TRAINING / L_VECTOR_SIZE, ), "label", dtype=MTYPE) theta = hcl.placeholder((NUM_FEATURES / F_VECTOR_SIZE, ), "theta", dtype=MTYPE) lut = hcl.placeholder((LUT_SIZE, ), "lut", dtype=FTYPE) f = hcl.make_scheme([data, label, theta, lut], SgdLR) f.downsize(SgdLR.label_local, LTYPE) f.quantize(SgdLR.theta_local, FTYPE) f.quantize(SgdLR.data_local, DTYPE) s = hcl.make_schedule_from_scheme(f) PAR_FACTOR = 32
from PIL import Image import heterocl as hcl import numpy as np import math import imageio hcl.init(init_dtype=hcl.Fixed(15, 5)) path = "home.jpg" img = Image.open(path) width, height = img.size A = hcl.placeholder((height, width, 3), "A") Gx = hcl.placeholder((3, 3), "Gx") Gy = hcl.placeholder((3, 3), "Gy") def sobel(A, Gx, Gy): B = hcl.compute((height, width), lambda x, y: A[x][y][0] + A[x][y][1] + A[x][y][2], "B") r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) D = hcl.compute((height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + r, y + c] * Gx[r, c], axis=[r, c]), B[x, y]), "Gx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) E = hcl.compute((height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + t, y + g] * Gy[t, g], axis=[t, g]), B[x, y]), "Gy")
] end = [ 'bn1_beta', 'bn1_gamma', 'bn1_moving_mean', 'bn1_moving_var', 'fc1_weight', 'fc1_bias' ] # create placeholder in batch holders, values = list(), list() names = before + stage1_unit1 + stage1_unit2 + \ stage2_unit1 + stage2_unit2 + \ stage3_unit1 + stage3_unit2 + \ stage4_unit1 + stage4_unit2 + end # run and calculate test accuracy qtype1 = hcl.Fixed(16, 14) qtype2 = hcl.Fixed(16, 14) correct_sum, correct_top5 = 0, 0 params = arg_params.copy() params.update(aux_params) for name in names: val = params[name].asnumpy() ph = hcl.placeholder(val.shape, name) holders.append(ph) values.append(hcl.asarray(val, dtype=hcl.Float())) # build the function input_image = hcl.placeholder((batch_size, 3, 224, 224), "input_image") resnet = hcl.placeholder((batch_size, 1000), "resnet")
default=False, help='Use Vitis to compile? (default: False)') parser.add_argument('--opt', type=bool, default=False, help='Use optimization? (default: False)') parser.add_argument('--stream', type=bool, default=False, help='Use data streaming? (default: False)') args = parser.parse_args() test_size = 100 qtype_bit = hcl.UInt(1) # weights qtype_int = hcl.Int(6) # not unsigned! qtype_float = hcl.Fixed(20, 10) qtype_packed = hcl.UInt(32) if __name__ == "__main__": target = None batch_size = 100 dtype_in = qtype_bit dtype_out = qtype_float else: batch_size = 1 if args.vitis: print("[INFO] Use Vitis to compile") target = hcl.platform.aws_f1 target.config(compile="vitis", mode="hw_exe") dtype_in = hcl.UInt(8) dtype_out = hcl.Fixed(32, 10)
def test_kernel_fracs(): A = hcl.placeholder((100, ), dtype=hcl.Fixed(1000, 800))
import heterocl as hcl import hlib.op.bnn as bnn import numpy as np import sys from heterocl.profiler import Profiler profiler = Profiler() target = None test_size = 100 batch_size = 100 qtype_bit = hcl.UInt(1) # weights qtype_int = hcl.Int(6) # not unsigned! qtype_float = hcl.Fixed(20, 10) qtype_packed = hcl.UInt(32) def build_packed_bnn(*arrays): # 1*16*16 hcl_comp = [] for i, array in enumerate(arrays): if i in [0, 1]: dtype = qtype_bit elif i == 3: dtype = hcl.UInt(16) elif i in [5, 7]: dtype = qtype_packed else: dtype = qtype_float hcl_comp.append( hcl.compute(array.shape, lambda *dim: array[dim],
import heterocl as hcl from PIL import Image import numpy as np import math #import os import imageio #================================================================================================================================================ #initialization #================================================================================================================================================ path = "home.jpg" # Your image path #hcl.init(init_dtype=hcl.Float()) hcl.init(init_dtype=hcl.Fixed(30, 16)) img = Image.open(path) width, height = img.size #================================================================================================================================================ #main function #================================================================================================================================================ def sobelAlgo(A, B, Fx, Fy): def rgb_sum(x, y): B[x][y] = A[x][y][0] + A[x][y][1] + A[x][y][2] hcl.mutate(B.shape, lambda x, y: rgb_sum(x, y)) #B = hcl.compute((height+2, width+2), lambda x,y:A[x][y][0]+A[x][y][1]+A[x][y][2], "B") r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) Gx = hcl.compute( (height, width), lambda y, x: hcl.sum(B[y + r, x + c] * Fx[r, c], axis=[r, c]), "Gx")
parser.add_argument('--opt', type=bool, default=False, help='Use optimization? (default: False)') parser.add_argument('--stream', type=bool, default=False, help='Use data streaming? (default: False)') args = parser.parse_args() test_size = 100 qtype_bit = hcl.UInt(1) # weights qtype_int = hcl.Int(8) if __name__ == "__main__": batch_size = 10 qtype_float = hcl.Fixed(24, 12) target = None else: # vhls batch_size = 1 qtype_float = hcl.Fixed(32, 12) # for interface synthesis target = hcl.platform.zc706 if args.vitis: print("Use Vitis to compile") target.config(compile="vitis", mode="hw_exe") else: target.config(compile="vivado_hls", mode="csyn") # qtype_packed = hcl.UInt(32) def RSign(data, alpha, name="rsign"): assert data.shape[1] == alpha.shape[0]
import heterocl as hcl import hlib import numpy as np target = None batch_size = 100 test_size = 100 qtype_bit = hcl.UInt(1) # weights qtype_int = hcl.Int(12) # not unsigned! qtype_float = hcl.Fixed(25, 13) # hcl.Float() # compute declaration def build_bnn(input_image, w_conv1, gamma1, beta1, miu1, sigma1, w_conv2, gamma2, beta2, miu2, sigma2, w_fc1, b_fc1, w_fc2, b_fc2): # 1*16*16 conv1 = hlib.op.bnn.conv2d_nchw(input_image, w_conv1, padding=[1, 1], name="conv1") # 64*16*16 bn1 = hlib.op.bnn.batch_norm(conv1, gamma1, beta1, miu1, sigma1)[0] maxpool1 = hlib.op.bnn.max_pool2d_nchw(bn1, [2, 2], [2, 2]) # 64*8*8 conv2 = hlib.op.bnn.conv2d_nchw(maxpool1, w_conv2, padding=[1, 1], name="conv2") # 128*8*8 bn2 = hlib.op.bnn.batch_norm(conv2, gamma2, beta2, miu2, sigma2, 64)[0] maxpool2 = hlib.op.bnn.max_pool2d_nchw(bn2, [2, 2], [2, 2]) # 128*4*4=2048 flat = hlib.op.bnn.flatten(maxpool2) fc1 = hlib.op.bnn.dense(flat, w_fc1, b_fc1, True) # 2048->512 fc2 = hlib.op.bnn.dense(fc1, w_fc2, b_fc2, False) # 512->10
mx.gluon.utils.download( 'https://gist.githubusercontent.com/Huyuwei/dc00ce83f537914c64a204133d23b019/raw/79af41e7c8ba9120ea7f35fb1d0484b65bccd54f/lenet-0010.params' ) mx.gluon.utils.download( 'https://gist.githubusercontent.com/Huyuwei/dc00ce83f537914c64a204133d23b019/raw/79af41e7c8ba9120ea7f35fb1d0484b65bccd54f/lenet-symbol.json' ) sym, arg_params, aux_params = mx.model.load_checkpoint('lenet', 10) # get weights weight_conv1_np = arg_params['convolution0_weight'].asnumpy() weight_conv2_np = arg_params['convolution1_weight'].asnumpy() weight_fc1_np = arg_params['fullyconnected0_weight'].asnumpy() weight_fc2_np = arg_params['fullyconnected1_weight'].asnumpy() ############################################################################### # Define the quantized data type and run the inference qtype1 = hcl.Fixed(16, 14) qtype2 = hcl.Fixed(16, 14) correct_sum = 0 batch_size = 1000 mnist = mx.test_utils.get_mnist() ############################################################################### # In this example, we quantize the weights to `qtype1` and the activations to # `qtype2`. To quantize the placeholders, simply specify the `dtype` field. For # the internal tensors, we use `hcl.quantize` API. def build_lenet_inf(batch_size=batch_size, target=None): # set up input/output placeholders input_image = hcl.placeholder((batch_size, 1, 28, 28), "input_image") weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1", qtype1) weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2", qtype1)
import numpy as np import cv2 import torch import torch.nn as nn import heterocl as hcl from weight_quant import weight_quantize_fn from ultranet_model import ultranet ############################################################################### # Define Data Types ############################################################################### hcl.init(hcl.Float(32)) input_dtype = hcl.Fixed(8, 4) weight_dtype = hcl.Fixed(5, 3) # TODO: why hcl.Fixed(4,4) doesn't work act_dtype = hcl.UFixed(6, 4) bn_a_dtype = hcl.Fixed(14, 10) # TODO some 14 bit fixed pt, this seems to work well bn_b_dtype = hcl.Fixed(26, 18) # TODO some 26 bit fixed pt, this seems to work well conv_dtype = hcl.Fixed(16, 8) ############################################################################### # Define parameters and images ############################################################################### image_path = './test_images/boat1_000001.jpg' # image_path = './test_images/person23_0113.jpg' # image_path = './test_images/car1_0001.jpg' raw_height = 360 raw_width = 640