def test_hls(target_mode): hcl.init(hcl.Int(16)) A = hcl.placeholder((10, ), "A") def kernel(A): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") return B target = hcl.Platform.aws_f1 s = hcl.create_schedule([A], kernel) s.to(A, target.xcel) s.to(kernel.B, target.host) target.config(compiler="vivado_hls", mode=target_mode) f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, )) np_B = np.zeros((10, )) hcl_A = hcl.asarray(np_A, dtype=hcl.Int(16)) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(16)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() report = f.report() np.testing.assert_array_equal(ret_B, (np_A + 1) * 1)
def test_const_tensor_int(): def test_kernel(dtype, size): hcl.init(dtype) np_A = numpy.random.randint(10, size=size) py_A = np_A.tolist() def kernel(): cp1 = hcl.const_tensor(np_A) cp2 = hcl.const_tensor(py_A) return hcl.compute(np_A.shape, lambda *x: cp1[x] + cp2[x]) O = hcl.placeholder(np_A.shape) s = hcl.create_schedule([], kernel) f = hcl.build(s) np_O = numpy.zeros(np_A.shape) hcl_O = hcl.asarray(np_O, dtype=dtype) f(hcl_O) assert numpy.array_equal(hcl_O.asnumpy(), np_A * 2) for i in range(0, 5): bit = numpy.random.randint(6, 60) test_kernel(hcl.Int(bit), (8, 8)) test_kernel(hcl.UInt(bit), (8, 8)) test_kernel(hcl.Int(bit), (20, 20, 3)) test_kernel(hcl.UInt(bit), (20, 20, 3))
def top(input, filter, bias, ): input_extent_3_required_s = (((((final_extent_2 + 31)//32) * final_extent_3) + -1)//hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1)) final_total_extent_1 = (hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_1) * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_0)) final_total_extent_2 = (final_total_extent_1 * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_2)) final_total_extent_3 = (final_total_extent_2 * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_3)) f_conv_n_extent_realized_s = hcl.select(hcl.select((((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)) > (final_extent_3 + -1), (((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)), (final_extent_3 + -1)) > (((((final_extent_2 + 31)//32) * final_extent_3) + -1)//(hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) + 1)), hcl.select((((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)) > (final_extent_3 + -1), (((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)), (final_extent_3 + -1)), (((((final_extent_2 + 31)//32) * final_extent_3) + -1)//(hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) + 1))) f_conv_z_extent_realized = hcl.select(((hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) * 32) + 32) > final_extent_2, ((hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) * 32) + 32), final_extent_2) f_conv = hcl.compute((final_extent_0, ((((final_extent_1 + -1)//32) * 32) + 32), f_conv_z_extent_realized, (f_conv_n_extent_realized_s + 1)), lambda x, y, z, w: 0, name = "f_conv", dtype = hcl.Float(bits = 32)) with hcl.Stage("f_conv"): with hcl.for_(0, (final_extent_2 * final_extent_3), name = "f_conv_s0_z_par") as f_conv_s0_z_par: with hcl.for_(final_min_1, final_extent_1, name = "f_conv_s0_y") as f_conv_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "f_conv_s0_x") as f_conv_s0_x: f_conv[f_conv_s0_x, f_conv_s0_y, ((f_conv_s0_z_par % hcl.select(final_extent_2 > 1, final_extent_2, 1)) + final_min_2), ((f_conv_s0_z_par//hcl.select(final_extent_2 > 1, final_extent_2, 1)) + final_min_3)] = bias[((f_conv_s0_z_par % hcl.select(final_extent_2 > 1, final_extent_2, 1)) + final_min_2)] with hcl.for_(0, (((final_extent_2 + 31)//32) * final_extent_3), name = "f_conv_s1_z_z_par") as f_conv_s1_z_z_par: f_conv_s1_z_z_t_base_s = (f_conv_s1_z_z_par % hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1)) with hcl.for_(0, 32, name = "f_conv_s1_r__z") as f_conv_s1_r__z: with hcl.for_(0, ((final_extent_1 + 31)//32), name = "f_conv_s1_y_y") as f_conv_s1_y_y: with hcl.for_(0, 32, name = "f_conv_s1_z_z_t") as f_conv_s1_z_z_t: with hcl.for_(0, 32, name = "f_conv_s1_y_y_t") as f_conv_s1_y_y_t: with hcl.for_(final_min_0, final_extent_0, name = "f_conv_s1_x") as f_conv_s1_x: with hcl.for_(0, 3, name = "f_conv_s1_r__y_r21") as f_conv_s1_r__y_r21: with hcl.for_(0, 3, name = "f_conv_s1_r__x_r20") as f_conv_s1_r__x_r20: t51_s = (f_conv_s1_z_z_par//hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1)) f_conv[f_conv_s1_x, (((f_conv_s1_y_y * 32) + final_min_1) + f_conv_s1_y_y_t), (((f_conv_s1_z_z_t_base_s * 32) + final_min_2) + f_conv_s1_z_z_t), ((f_conv_s1_z_z_par//hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1)) + final_min_3)] = (f_conv[f_conv_s1_x, (((f_conv_s1_y_y * 32) + final_min_1) + f_conv_s1_y_y_t), (((f_conv_s1_z_z_t_base_s * 32) + final_min_2) + f_conv_s1_z_z_t), (final_min_3 + t51_s)] + (filter[f_conv_s1_r__x_r20, f_conv_s1_r__y_r21, f_conv_s1_r__z, (((f_conv_s1_z_z_t_base_s * 32) + final_min_2) + f_conv_s1_z_z_t)] * input[(f_conv_s1_r__x_r20 + f_conv_s1_x), ((((f_conv_s1_y_y * 32) + final_min_1) + f_conv_s1_y_y_t) + f_conv_s1_r__y_r21), f_conv_s1_r__z, (final_min_3 + t51_s)])) final = hcl.compute((64, 64, 32, 4), lambda x, y, z, w: 0, name = "final", dtype = hcl.Float(bits = 32)) with hcl.Stage("final"): with hcl.for_(final_min_3, final_extent_3, name = "final_s0_n") as final_s0_n: with hcl.for_(final_min_2, final_extent_2, name = "final_s0_z") as final_s0_z: with hcl.for_(final_min_1, final_extent_1, name = "final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y, final_s0_z, final_s0_n] = hcl.select(f_conv[final_s0_x, final_s0_y, final_s0_z, final_s0_n] > hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.000000), f_conv[final_s0_x, final_s0_y, final_s0_z, final_s0_n], hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.000000)) return final
def test_csyn(): # 1. Declare computation A = hcl.placeholder((10, 32), "A") def kernel(A): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") return B # 2. Create schedule s = hcl.create_schedule([A], kernel) # 3. Specify the target platform and mode target = hcl.platform.zc706 target.config(compile="vivado_hls", mode="csyn") # 4. Data movement s.to(A, target.xcel) s.to(kernel.B, target.host) # 5. Build the kernel # (A misleading interface here, no kernel code is generated. # Only the template Tcl file is copied to the current folder.) f = hcl.build(s, target) print("Done building") # 6. Create required arrays np_A = np.random.randint(0, 10, size=(10, 32)) np_B = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A, dtype=hcl.Int(32)) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) # 7. Generate kernel code and do synthesis f(hcl_A, hcl_B)
def test_schedule_return_multi(): hcl.init() A = hcl.placeholder((10,)) def algorithm(A): B = hcl.compute(A.shape, lambda x: A[x] + 1) C = hcl.compute(A.shape, lambda x: A[x] + 2) return B, C s = hcl.create_schedule([A], algorithm) f = hcl.build(s) _A = hcl.asarray(np.random.randint(100, size=(10,)), dtype = hcl.Int(32)) _B = hcl.asarray(np.zeros(10), dtype = hcl.Int(32)) _C = hcl.asarray(np.zeros(10), dtype = hcl.Int(32)) f(_A, _B, _C) _A = _A.asnumpy() _B = _B.asnumpy() _C = _C.asnumpy() for i in range(10): assert(_B[i] == _A[i] + 1) assert(_C[i] == _A[i] + 2)
def kernel(A, B, C): stype = hcl.Struct({"fa": hcl.Int(8), "fb": hcl.Fixed(13, 11), "fc": hcl.Float()}) D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=stype) E = hcl.compute(A.shape, lambda x: D[x].fa, dtype=hcl.Int(8)) F = hcl.compute(A.shape, lambda x: D[x].fb, dtype=hcl.Fixed(13, 11)) G = hcl.compute(A.shape, lambda x: D[x].fc, dtype=hcl.Float()) return E, F, G
def test_dtype_struct(): hcl.init() A = hcl.placeholder((100,), dtype=hcl.Int(8)) B = hcl.placeholder((100,), dtype=hcl.Fixed(13, 11)) C = hcl.placeholder((100,), dtype=hcl.Float()) def kernel(A, B, C): stype = hcl.Struct({"fa": hcl.Int(8), "fb": hcl.Fixed(13, 11), "fc": hcl.Float()}) D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=stype) E = hcl.compute(A.shape, lambda x: D[x].fa, dtype=hcl.Int(8)) F = hcl.compute(A.shape, lambda x: D[x].fb, dtype=hcl.Fixed(13, 11)) G = hcl.compute(A.shape, lambda x: D[x].fc, dtype=hcl.Float()) return E, F, G s = hcl.create_schedule([A, B, C], kernel) f = hcl.build(s) np_A = np.random.randint(0, 500, size=100) - 250 np_B = np.random.rand(100) - 0.5 np_C = np.random.rand(100) - 0.5 np_E = np.zeros(100) np_F = np.zeros(100) np_G = np.zeros(100) hcl_A = hcl.asarray(np_A, dtype=hcl.Int(8)) hcl_B = hcl.asarray(np_B, dtype=hcl.Fixed(13, 11)) hcl_C = hcl.asarray(np_C, dtype=hcl.Float()) hcl_E = hcl.asarray(np_E, dtype=hcl.Int(8)) hcl_F = hcl.asarray(np_F, dtype=hcl.Fixed(13, 11)) hcl_G = hcl.asarray(np_G, dtype=hcl.Float()) f(hcl_A, hcl_B, hcl_C, hcl_E, hcl_F, hcl_G) assert np.allclose(hcl_A.asnumpy(), hcl_E.asnumpy()) assert np.allclose(hcl_B.asnumpy(), hcl_F.asnumpy()) assert np.allclose(hcl_C.asnumpy(), hcl_G.asnumpy())
def random_test(): def top_func(dtype = hcl.Int()): def random(number): number[0] = 78 number = hcl.placeholder((64,), "number") s = hcl.create_schedule([number], random) return hcl.build(s) np_number = np.random.randint(2, size = (64,)) np_count = hcl.cast_np(np.zeros((1,)), dtype = hcl.Int()) hcl_count = hcl.asarray(np_count) #hcl_count = 0 hcl_number = hcl.asarray(np_number) dtype = hcl.Int() f = top_func(dtype) f(hcl_number) number = hcl_number.asnumpy() print(number) print(hcl_count)
def algorithm(A, B): @hcl.def_( [A.shape, B.shape, ()], [hcl.UInt(2), hcl.Int(32), hcl.Int(32)]) def add(A, B, x): hcl.return_(A[x] + B[x]) return hcl.compute(A.shape, lambda x: add(A, B, x), "C")
def kernel_select(a, b, c, d): use_imm = hcl.scalar(1) with hcl.for_(0, 10, name="i") as i: src = hcl.select(use_imm == 1, hcl.cast(hcl.Int(16), (c[i] + b[i])), hcl.cast(hcl.Int(32), (c[i] - b[i]))) dst = hcl.cast(hcl.Int(32), (2 * (c[i] + b[i]))) d[i] = hcl.select(dst >= (-1 * src), hcl.select(dst <= src, a[i], src), (-1 * src))
def test_ap_int(): hcl.init(); A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='aocl') assert "ap_int<3>" in code assert "ap_uint<3>" in code assert "int8" in code
def test_ac_int(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='ihls') assert "ac_int<3, true>" in code assert "ac_int<3, false>" in code assert "ac_int<8, true>" in code
def top(input, ): final_total_extent_1 = ( hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) * hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0)) max_local = hcl.compute((final_extent_0, final_extent_1), lambda x, y: 0, name="max_local", dtype=hcl.UInt(bits=16)) with hcl.Stage("max_local"): with hcl.for_(final_min_1, final_extent_1, name="max_local_s0_y") as max_local_s0_y: with hcl.for_(final_min_0, final_extent_0, name="max_local_s0_x") as max_local_s0_x: maximum = hcl.compute((1, 1), lambda x, y: 0, name="maximum", dtype=hcl.UInt(bits=16)) with hcl.Stage("maximum"): maximum[max_local_s0_x, max_local_s0_y] = hcl.cast(dtype=hcl.UInt(bits=16), expr=0) with hcl.for_( 0, 3, name="maximum_s1_box__y") as maximum_s1_box__y: with hcl.for_( 0, 3, name="maximum_s1_box__x") as maximum_s1_box__x: maximum[max_local_s0_x, max_local_s0_y] = hcl.select( maximum[max_local_s0_x, max_local_s0_y] > input[(max_local_s0_x + maximum_s1_box__x), (max_local_s0_y + maximum_s1_box__y)], maximum[max_local_s0_x, max_local_s0_y], input[(max_local_s0_x + maximum_s1_box__x), (max_local_s0_y + maximum_s1_box__y)]) max_local[max_local_s0_x, max_local_s0_y] = maximum[max_local_s0_x, max_local_s0_y] final = hcl.compute((640, 480), lambda x, y: 0, name="final", dtype=hcl.UInt(bits=16)) with hcl.Stage("final"): with hcl.for_(final_min_1, final_extent_1, name="final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y] = max_local[final_s0_x, final_s0_y] return final
def kernel(A, B, C, O): dtype_xyz = hcl.Struct({ "x": hcl.Int(), "y": hcl.Int(), "z": hcl.Int() }) dtype_out = hcl.Struct({ "v0": hcl.Int(), "v1": hcl.Int(), "v2": hcl.Int(), "v3": hcl.Int(), "v4": hcl.Int(), "v5": hcl.Int() }) D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=dtype_xyz) E = hcl.compute(A.shape, lambda x: (D[x].x * D[x].x, D[x].y * D[x].y, D[x].z * D[x].z, D[ x].x * D[x].y, D[x].y * D[x].z, D[x].x * D[x].z), dtype=dtype_out) with hcl.Stage(): with hcl.for_(0, 100) as i: for j in range(0, 6): O[i][j] = E[i].__getattr__("v" + str(j))
def stateToIndex(sVals, iVals, bounds, ptsEachDim): iVals[0] = ((sVals[0] - bounds[0, 0]) / (bounds[0, 1] - bounds[0, 0])) * (ptsEachDim[0] - 1) iVals[1] = ((sVals[1] - bounds[1, 0]) / (bounds[1, 1] - bounds[1, 0])) * (ptsEachDim[1] - 1) iVals[2] = ((sVals[2] - bounds[2, 0]) / (bounds[2, 1] - bounds[2, 0])) * (ptsEachDim[2] - 1) # NOTE: add 0.5 to simulate rounding iVals[0] = hcl.cast(hcl.Int(), iVals[0] + 0.5) iVals[1] = hcl.cast(hcl.Int(), iVals[1] + 0.5) iVals[2] = hcl.cast(hcl.Int(), iVals[2] + 0.5)
def test_gemm_multi_bit_signed(): hcl.init() data = hcl.placeholder((32, 32), 'd', dtype=hcl.Int(8)) weight = hcl.placeholder((32, 32), 'w', dtype=hcl.Int(8)) def kernel(d, w): return hlib.ppac.gemm_multi_bit(d, w, 'res') s = hcl.create_schedule([data, weight], kernel) f = hcl.build(s, target='rv64_ppac') code = str(f) assert 'PPACFunc_GeMMSInt' in code
def test_ap_int(): hcl.init(); A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='aocl') print (code) assert "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" in code assert "ap_int<3> intd_t" in code assert "ap_uint<3> uintd_t" in code assert "ap_int<8> intd_t" in code
def test_int(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target=target) assert strings[0] in code assert strings[1] in code assert strings[2] in code
def zculling(size_pixels,size,fragment,z_buffer,pixels): pixel_cntr = hcl.scalar(0,dtype=hcl.Int()) with hcl.for_(0,size) as n: x = hcl.scalar(fragment[n][0],dtype=hcl.Int()) y = hcl.scalar(fragment[n][1],dtype=hcl.Int()) z = hcl.scalar(fragment[n][2]) color = hcl.scalar(fragment[n][3]) with hcl.if_( z < z_buffer[y][x] ): pixels[pixel_cntr][0] = x.v pixels[pixel_cntr][1] = y.v pixels[pixel_cntr][2] = color.v pixel_cntr.v += 1 z_buffer[y][x] = z.v size_pixels[0] = pixel_cntr.v
def kernel(A, B, C): stype = hcl.Struct({ "fa": hcl.Int(8), "fb": hcl.Fixed(13, 11), "fc": hcl.Float() }) D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=stype) E = hcl.compute(A.shape, lambda x: D[x].fa, dtype=hcl.Int(8)) F = hcl.compute(A.shape, lambda x: D[x].fb, dtype=hcl.Fixed(13, 11)) G = hcl.compute(A.shape, lambda x: D[x].fc, dtype=hcl.Float()) # Check the data type assert D[0].fa.dtype == "int8" assert D[0].fb.dtype == "fixed13_11" assert D[0].fc.dtype == "float32" return E, F, G
def test_gemm_multi_bit(): hcl.init() b_n, i_c, o_c = 64, 256, 256 ppac_config = hlib.ppac.PPAC_config(multi_bit=True) data = hcl.placeholder((b_n, i_c), 'd', dtype=hcl.Int(8)) weight = hcl.placeholder((o_c, i_c), 'w', dtype=hcl.Int(8)) def kernel(d, w): return hlib.ppac.gemm_multi_bit(d, w, 'res') s = hcl.create_schedule([data, weight], kernel) ir = str(hcl.lower(s)) assert ('\"_batch_num\"=' + str(b_n)) in ir assert ('\"_in_block_num\"=' + str(i_c // ppac_config.elem_num)) in ir assert ('\"_out_channel_num\"=' + str(o_c)) in ir
def test_module_cond_return_if_else(): def algorithm(A, B): @hcl.def_([A.shape, ()]) def update_B(A, x): with hcl.if_(A[x] > 5): hcl.return_(-1) with hcl.else_(): hcl.return_(A[x] + 1) hcl.update(B, lambda x: update_B(A, x)) A = hcl.placeholder((10, )) B = hcl.placeholder((10, )) s = hcl.create_schedule([A, B], algorithm) f = hcl.build(s) a = np.random.randint(10, size=(10, )) b = np.zeros(10) _A = hcl.asarray(a) _B = hcl.asarray(b, hcl.Int()) f(_A, _B) _A = _A.asnumpy() _B = _B.asnumpy() for i in range(0, 10): assert (_B[i] == a[i] + 1 if a[i] <= 5 else -1)
def test_hls(target_mode): hcl.init() A = hcl.placeholder((10, 32), "A") def kernel(A): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") C = hcl.compute(A.shape, lambda *args: B[args] + 1, "C") D = hcl.compute(A.shape, lambda *args: C[args] * 2, "D") return D target = hcl.platform.aws_f1 s = hcl.create_schedule([A], kernel) s.to(kernel.B, target.xcel) s.to(kernel.C, target.host) target.config(compile="vivado_hls", mode=target_mode) f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 32)) np_B = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() if "csyn" in target_mode: report = f.report(target) assert "ReportVersion" in report elif "csim" in target_mode: np.testing.assert_array_equal(ret_B, (np_A + 2) * 2)
def test_intel_aocl(): if os.system("which aocl >> /dev/null") != 0: return hcl.init() A = hcl.placeholder((10, 32), "A") def kernel(A): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") C = hcl.compute(A.shape, lambda *args: B[args] + 1, "C") D = hcl.compute(A.shape, lambda *args: C[args] * 2, "D") return D target = hcl.platform.vlab s = hcl.create_schedule([A], kernel) s.to(kernel.B, target.xcel) s.to(kernel.C, target.host) target.config(compile="aocl", mode="sw_sim") f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 32)) np_B = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() np.testing.assert_array_equal(ret_B, (np_A + 2) * 2)
def test_add_mul(): hcl.init() A = hcl.placeholder((10, 32), "A") def kernel(A): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") C = hcl.compute(A.shape, lambda *args: B[args] + 1, "C") D = hcl.compute(A.shape, lambda *args: C[args] * 2, "D") return D target = hcl.platform.zc706 s = hcl.create_schedule([A], kernel) s.to(kernel.B, target.xcel) s.to(kernel.C, target.host) target.config(compile="sdsoc", mode="sw_sim") f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 32)) np_B = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) f(hcl_A, hcl_B) assert np.array_equal(hcl_B.asnumpy(), np_A * 2 + 2)
def test_xrt_stream(): hcl.init() A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") def kernel(A, B): C = hcl.compute(A.shape, lambda i, j: A[i, j] + B[i, j], "C") D = hcl.compute(C.shape, lambda i, j: C[i, j] + 1, "D") return D target = hcl.platform.aws_f1 target.config(compile="vitis", mode="sw_sim") s = hcl.create_schedule([A, B], kernel) s.to(A, target.xcel, stream_type=hcl.Stream.FIFO) s.to(B, target.xcel, stream_type=hcl.Stream.Copy) s.to(kernel.D, target.host, stream_type=hcl.Stream.FIFO) f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 32)) np_B = np.random.randint(10, size=(10, 32)) np_D = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) hcl_D = hcl.asarray(np_D) f(hcl_A, hcl_B, hcl_D) assert np.array_equal(hcl_D.asnumpy(), np_A + np_B + 1)
def test_mixed_stream(): if os.system("which vivado_hls >> /dev/null") != 0: return A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") def kernel(A, B): C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], "C") D = hcl.compute(C.shape, lambda i, j: C[i][j] * 2, "D") E = hcl.compute(C.shape, lambda i, j: D[i][j] * 3, "E") return E target = hcl.platform.aws_f1 s = hcl.create_schedule([A, B], kernel) s.to([A, B], target.xcel) s.to(kernel.D, target.host) s.to(kernel.C, s[kernel.D]) target.config(compile="vivado_hls", mode="csim") f = hcl.build(s, target) np_A = np.random.randint(10, size=(10, 32)) np_B = np.random.randint(10, size=(10, 32)) np_C = np.zeros((10, 32)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) hcl_C = hcl.asarray(np_C, dtype=hcl.Int(32)) f(hcl_A, hcl_B, hcl_C) ret_C = hcl_C.asnumpy() np.testing.assert_array_equal(ret_C, (np_A + np_B) * 6)
def test_module_no_return(): def algorithm(A, B): @hcl.def_([A.shape, B.shape, ()]) def update_B(A, B, x): B[x] = A[x] + 1 with hcl.Stage(): with hcl.for_(0, 10) as i: update_B(A, B, i) A = hcl.placeholder((10, )) B = hcl.placeholder((10, )) s = hcl.create_schedule([A, B], algorithm) f = hcl.build(s) a = np.random.randint(100, size=(10, )) b = np.zeros(10) _A = hcl.asarray(a) _B = hcl.asarray(b, hcl.Int()) f(_A, _B) _A = _A.asnumpy() _B = _B.asnumpy() for i in range(0, 10): assert (_B[i] == a[i] + 1)
def test_mutate_complex(): def kernel(A, B): def foo(x): with hcl.for_(0, 10) as y: with hcl.if_(A[x][y] > 5): B[x] += 1 hcl.mutate((10, ), foo) A = hcl.placeholder((10, 10)) B = hcl.placeholder((10, )) s = hcl.create_schedule([A, B], kernel) f = hcl.build(s) np_A = numpy.random.randint(10, size=(10, 10)) np_B = numpy.zeros((10, )) gold_B = [] for i in range(0, 10): gold_B.append(len([x for x in np_A[i] if x > 5])) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() for i in range(0, 10): assert ret_B[i] == gold_B[i]
def test_fcompute_multiple_return_multi_dim(): def kernel(A): def foo(x, y, z): with hcl.if_(A[x, y, z] > 5): hcl.return_(x) with hcl.else_(): hcl.return_(0) return hcl.compute(A.shape, foo) A = hcl.placeholder((10, 10, 10)) s = hcl.create_schedule(A, kernel) f = hcl.build(s) np_A = numpy.random.randint(10, size=(10, 10, 10)) np_B = numpy.zeros((10, 10, 10)) hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B, dtype=hcl.Int(32)) f(hcl_A, hcl_B) ret_B = hcl_B.asnumpy() for i in range(0, 10): for j in range(0, 10): for k in range(0, 10): if np_A[i][j][k] > 5: assert ret_B[i][j][k] == i else: assert ret_B[i][j][k] == 0