def test_partition_dim_factor(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda x, y: A[x, y], "B") s = hcl.create_schedule([A, B]) s.partition(A, dim=1, factor=2) ir = str(hcl.lower(s)) assert "partition variable=A complete factor=2 dim=1" in ir
def test_index_split(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].split(B.axis[0], 5) code = hcl.build(s, target=target) assert "for (sc_int<32> y_inner = 0; y_inner < 5; ++y_inner)" in code
def test_partition_basic(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda x, y: A[x, y], "B") s = hcl.create_schedule([A, B]) s.partition(A) ir = str(hcl.lower(s)) assert "partition variable=A" in ir
def _build_kernel(): hcl.init() A = hcl.placeholder((10, 20, 30), name="A") B = hcl.compute(A.shape, lambda i, j, m: A[i, j, m] * 2, name="B") C = hcl.compute(B.shape, lambda ii, jj, mm: B[ii, jj, mm] + 1, name="C") return A, B, C
def test_index_fuse(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].fuse(B.axis[0], B.axis[1]) code = hcl.build(s, target="vhls") assert "B[(y_x_fused / 10)][(y_x_fused % 10)]" in code
def test_index_fuse(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].fuse(B.axis[0], B.axis[1]) code = hcl.build(s, target=target) assert "for (sc_int<32> y_x_fused = 0; y_x_fused < 100; ++y_x_fused)" in code
def test_index_split(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].split(B.axis[0], 5) code = hcl.build(s, target="vhls") assert "B[(x + ((y_inner + (y_outer * 5)) * 10))]" in code
def test_legacy_interface(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].fuse(B.axis[0], B.axis[1]) code = hcl.build(s, target="vhls") assert "A[10][10]" in code assert "B[10][10]" in code
def test_dtye_strcut_complex(): hcl.init() A = hcl.placeholder((100, )) B = hcl.placeholder((100, )) C = hcl.placeholder((100, )) O = hcl.placeholder((100, 6)) def kernel(A, B, C, O): dtype_xyz = hcl.Struct({ "x": hcl.Int(), "y": hcl.Int(), "z": hcl.Int() }) dtype_out = hcl.Struct({ "v0": hcl.Int(), "v1": hcl.Int(), "v2": hcl.Int(), "v3": hcl.Int(), "v4": hcl.Int(), "v5": hcl.Int() }) D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=dtype_xyz) E = hcl.compute(A.shape, lambda x: (D[x].x * D[x].x, D[x].y * D[x].y, D[x].z * D[x].z, D[ x].x * D[x].y, D[x].y * D[x].z, D[x].x * D[x].z), dtype=dtype_out) with hcl.Stage(): with hcl.for_(0, 100) as i: for j in range(0, 6): O[i][j] = E[i].__getattr__("v" + str(j)) s = hcl.create_schedule([A, B, C, O], kernel) f = hcl.build(s) np_A = np.random.randint(10, size=100) np_B = np.random.randint(10, size=100) np_C = np.random.randint(10, size=100) np_O = np.zeros((100, 6)) np_G = np.zeros((100, 6)).astype("int") for i in range(0, 100): np_G[i][0] = np_A[i] * np_A[i] np_G[i][1] = np_B[i] * np_B[i] np_G[i][2] = np_C[i] * np_C[i] np_G[i][3] = np_A[i] * np_B[i] np_G[i][4] = np_B[i] * np_C[i] np_G[i][5] = np_A[i] * np_C[i] hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) hcl_C = hcl.asarray(np_C) hcl_O = hcl.asarray(np_O) f(hcl_A, hcl_B, hcl_C, hcl_O) assert np.array_equal(hcl_O.asnumpy(), np_G)
def test_fuse_num_axis(): hcl.init() a = hcl.placeholder((10, 20, 30, 40)) b = hcl.placeholder((10, 20, 30, 40)) c = hcl.compute(a.shape, lambda i, j, k, l: a[i, j, k, l] + b[i, j, k, l]) s = hcl.create_schedule([a, b, c]) s[c].fuse(1, 2) ir = hcl.lower(s) assert "j.k.fused" in str(ir)
def test_reshape(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda x, y: A[x, y], "B") C = hcl.compute(A.shape, lambda x, y: B[x, y], "C") s = hcl.create_schedule([A, C]) s.reshape(B, (2, 5, 2, 5)) ir = str(hcl.lower(s)) assert "allocate B[int32 * 2 * 5 * 2 * 5]" in ir
def test_sim_(length): hcl.init(hcl.UInt(32)) input_vec = hcl.placeholder((length, ), name="input") # assume gsize = lsize = 1 def math_func(input_vec): new_vec = hlib.ip.byte_swap_rtl(input_vec) return hcl.compute(input_vec.shape, lambda *args: new_vec[args] + 1, name="ret") s = hcl.create_schedule([input_vec], math_func) target = hcl.platform.vlab target.config(compile="aocl", mode="debug") s.to(input_vec, target.xcel) s.to(math_func.ret, target.host) # test debug mode (source code checking) code = hcl.build(s, target) assert "my_byteswap(input[k])" in code # test software emulation target.config(compile="aocl", mode="sw_sim") f = hcl.build(s, target) x_np = np.random.randint(low=2**16, high=2**20, size=length) y_np = np.zeros((length)) x_hcl = hcl.asarray(x_np) y_hcl = hcl.asarray(np.zeros((length))) f(x_hcl, y_hcl) for i in range(length): y_np[i] = np.bitwise_and((1 << 32) - 1, np.bitwise_or(x_np[i] << 16, x_np[i] >> 16)) y_np[i] = y_np[i] + 1 np.testing.assert_array_equal(y_np, y_hcl.asnumpy()) # test modelsim simulation target.config(compile="aocl", mode="hw_sim") f = hcl.build(s, target) x_np = np.random.randint(low=2**16, high=2**20, size=length) y_np = np.zeros((length)) x_hcl = hcl.asarray(x_np) y_hcl = hcl.asarray(np.zeros((length))) f(x_hcl, y_hcl) f.report(target) for i in range(length): y_np[i] = np.bitwise_and((1 << 32) - 1, np.bitwise_or(x_np[i] << 16, x_np[i] >> 16)) y_np[i] = y_np[i] + 1 np.testing.assert_array_equal(y_np, y_hcl.asnumpy())
def test_ac_fixed(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Fixed(5, 3)) B = hcl.placeholder((1, 32), dtype=hcl.UFixed(5, 3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Fixed(7, 4)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='ihls') assert "ac_fixed<5, 2, true>" in code assert "ac_fixed<5, 2, false>" in code assert "ac_fixed<7, 3, true>" in code
def test2(): hcl.init() A = hcl.placeholder((10, 10), "A") def kernel(A): B = hcl.compute(A.shape, lambda x, y: A[x][y] + 1, "B") return B s = hcl.create_schedule(A, kernel) s[kernel.B].dataflow(kernel.B.axis[0]) f = hcl.build(s,"vhls") print(f)
def test_reuse_compute(): hcl.init() A = hcl.placeholder((10, 10), name="A") B = hcl.compute((10, 10), lambda y, x: A[y, x], "B") C = hcl.compute((10, 8), lambda y, x: B[y, x] + B[y, x + 1] + B[y, x + 2], "C") s = hcl.create_schedule([A, B, C]) RB = s.reuse_at(B, s[C], C.axis[1]) print(hcl.lower(s)) f = hcl.build(s)
def test_ap_int(): hcl.init(); A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='aocl') assert "ap_int<3>" in code assert "ap_uint<3>" in code assert "int8" in code
def test_ac_int(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='ihls') assert "ac_int<3, true>" in code assert "ac_int<3, false>" in code assert "ac_int<8, true>" in code
def test_reuse_compute_sum(): hcl.init() rx = hcl.reduce_axis(0, 3, name="rx") A = hcl.placeholder((10, 10), name="A") B = hcl.compute((10, 10), lambda y, x: A[y, x], "B") C = hcl.compute((10, 8), lambda y, x: hcl.sum(B[y, x + rx], axis=rx), "C") s = hcl.create_schedule([A, B, C]) RB = s.reuse_at(B, s[C], C.axis[1]) print(hcl.lower(s)) f = hcl.build(s)
def test_print(target): hcl.init() A = hcl.placeholder((10, 32)) def kernel(A): hcl.print(A[0]) return hcl.compute(A.shape, lambda *args: A[args]) s = hcl.create_schedule([A], kernel) code = hcl.build(s, target=target)
def from_nnvm(model, frontend, filename, input_shape=None, batch_size=None, layout="NCHW", dtype=hcl.Float(), target=None): if frontend == "keras": nnvm_model = keras.models.load_model(model) graph, params = nnvm.frontend.from_keras(nnvm_model) else: raise NameError('frontend {} is not a valid frontend'.format(frontend)) #generate json from model graph = _graph.create(graph) _json = json.loads(graph.json()) nodes = _json["nodes"] param_shape = {} #convert params to dict of numpy arrays for param in params: params[param] = params[param].asnumpy() param_shape[param] = params[param].shape #add shapes to _json for lower function _json["param_shape"] = param_shape var_sym = {} hcl.init(dtype) #generate placeholders for node in _json["arg_nodes"]: new_node = nodes[node] node_name = new_node['name'] if 'input' in node_name: gen_placeholders(var_sym, node_name, batch_size, input_shape, False) else: gen_placeholders(var_sym, node_name, batch_size, params[node_name].shape, True) args = [] for j in var_sym: args.append(var_sym[j]) func = gen_function(var_sym, nodes, _json['heads'][0][0], layout) s = gen_schedule(args, func) #transform params so they can be used in function param = [] for i in params: param.append(hcl.asarray(params[i])) if target == None: return hcl.build(s), tuple(param) else: try: f = hcl.build(s, target=target, name=filename) save_file(filename, f, target) return f, tuple(param) except ValueError: print( "target {} provided is not a compatible target".format(target))
def test_numpy_operator_err_msg_0(): hcl.init() A = hcl.placeholder((10, 10)) np_A = np.array([5, 10, 15]) try: np_A[1] > A[5, 5] except hcl.debug.APIError: return assert False
def test_set_slice(target, string): hcl.init() A = hcl.placeholder((10, ), "A") def kernel(A): with hcl.Stage("S"): A[0][5:1] = 1 s = hcl.create_schedule([A], kernel) code = hcl.build(s, target=target) assert string in code
def test_unroll_num_axis(): hcl.init() factor = 4 a = hcl.placeholder((10, 20)) b = hcl.placeholder((10, 20)) c = hcl.compute(a.shape, lambda i, j: a[i, j] + b[i, j]) s = hcl.create_schedule([a, b, c]) s[c].unroll(0, factor=factor) ir = hcl.lower(s) unroll_hint_str = "\"factor\"="+str(factor) assert unroll_hint_str in str(ir)
def test_pipeline(): hcl.init() initiation_interval = 4 a = hcl.placeholder((10, 20)) b = hcl.placeholder((10, 20)) c = hcl.compute(a.shape, lambda i, j: a[i, j] + b[i, j]) s = hcl.create_schedule([a, b, c]) s[c].pipeline(c.axis[0], initiation_interval) ir = hcl.lower(s) pipeline_hint_str = "\"initiation_interval\"="+str(initiation_interval) assert pipeline_hint_str in str(ir)
def test_bitcast_expr(): hcl.init() A = hcl.placeholder((10, 10), dtype=hcl.Float(32), name='A') B = hcl.placeholder((10, 10), dtype=hcl.Float(32), name='B') def algorithm(A, B): idx = hcl.bitcast(A[0, 0] + B[0, 0], hcl.UInt(32)) return hcl.compute((10, 10), lambda x, y: A[x][y] + B[x][y] + idx) s = hcl.create_schedule([A, B], algorithm) assert 'bitcast' in str(hcl.lower(s))
def test_simplify_slice(): hcl.init() A = hcl.placeholder((10, ), "A") def kernel(A): with hcl.Stage(): A[5][2:2] = 4 s = hcl.create_schedule(A, kernel) ir = hcl.lower(s) assert "2:2" not in str(ir)
def test(): hcl.init() A = hcl.placeholder((8, 8), "A") def kernel(A): return hcl.compute((8, 8), lambda y, x: foo(A[y, x] + A[y, x]), "C") s = hcl.create_scheme([A], kernel) s = hcl.create_schedule_from_scheme(s) f = hcl.build(s, "vhls") print(f)
def main(): hcl.init() V_f = hcl.placeholder((50, 50, 50), name="V_f", dtype=hcl.Float()) V_init = hcl.placeholder((50, 50, 50), name="V_init", dtype=hcl.Float()) thetas = hcl.placeholder((50, ), name="thetas", dtype=hcl.Float()) # Create schedule s = hcl.create_schedule([V_f, V_init, thetas], HJ_PDE_solver) # Inspect IR print(hcl.lower(s))
def test_reuse_compute_nd(): hcl.init() nz = 1 rx = hcl.reduce_axis(0, 3, name="rx") rz = hcl.reduce_axis(0, nz, name="rz") A = hcl.placeholder((nz, 10, 10), name="A") B = hcl.compute((10, 8), lambda y, x: hcl.sum(A[rz, y, x + rx], axis=[rz, rx]), "B") s = hcl.create_schedule([A, B]) RB = s.reuse_at(A, s[B], B.axis[1]) print(hcl.lower(s)) f = hcl.build(s)
def test_ap_int(): hcl.init(); A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='aocl') print (code) assert "#pragma OPENCL EXTENSION cl_intel_arbitrary_precision_integers : enable" in code assert "ap_int<3> intd_t" in code assert "ap_uint<3> uintd_t" in code assert "ap_int<8> intd_t" in code