def test_reuse_blur_y_3D(): hcl.init() A = hcl.placeholder((10, 10, 2)) B = hcl.compute((8, 10, 2), lambda y, x, c: A[y, x, c] + A[y+1, x, c] + A[y+2, x, c]) s = hcl.create_schedule([A, B]) RB = s.reuse_at(A, s[B], B.axis[0]) f = hcl.build(s) np_A = np.random.randint(0, 10, size=(10, 10, 2)) np_B = np.zeros((8, 10, 2), dtype="int") np_C = np.zeros((8, 10, 2), dtype="int") for y in range(0, 8): for x in range(0, 10): for c in range(0, 2): np_C[y][x][c] = np_A[y][x][c] + np_A[y+1][x][c] + np_A[y+2][x][c] hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) f(hcl_A, hcl_B) np_B = hcl_B.asnumpy() assert np.array_equal(np_B, np_C)
def build_bnn_inf(batch_size=batch_size,target=target): hcl_ph = [] input_image = hcl.placeholder((batch_size,1,16,16),"input_image",qtype_bit) for name in params: dtype = qtype_bit if ("conv" in name or "w_" in name) else qtype_float hcl_ph.append(hcl.placeholder(params[name].shape,name,dtype=dtype)) # build the network s = hcl.create_schedule([input_image] + hcl_ph, build_bnn) # if isinstance(target,hcl.platform): # s.to([input_image] + hcl_ph, target.xcel) # s.to(build_bnn.fc2, target.host) # target.config(compile="vivado_hls", mode="csyn") return hcl.build(s, target=target)
def test_if(): hcl.init() def absolute(A, B): with hcl.for_(0, A.shape[0], name="x") as x: with hcl.for_(0, A.shape[1], name="y") as y: with hcl.if_(A[x, y] >= 0): B[x, y] = A[x, y] with hcl.else_(): B[x, y] = -A[x, y] A = hcl.placeholder((10, 20), name="A", dtype="float32") B = hcl.placeholder(A.shape, name="B", dtype="float32") with hcl.Stage() as C: absolute(A, B) s = hcl.create_schedule([A, B]) o, i = s[C].split(C.x, factor=3) s[C].reorder(i, o) # test lower ir = hcl.lower(s) assert str(ir.body.body.body.body).startswith("for (x.inner, 0, 3)") assert str(ir.body.body.body.body.body).startswith("for (x.outer, 0, 4)") assert str(ir.body.body.body.body.body.body).startswith("for (y, 0, 20)") assert str(ir.body.body.body.body.body.body.body.condition).startswith( "(x.inner < (10 - (x.outer*3)))") assert str( ir.body.body.body.body.body.body.body.then_case.condition).startswith( "(0.000000f <= A[(y + ((x.inner + (x.outer*3))*20))])") assert str( ir.body.body.body.body.body.body.body.then_case.then_case ).startswith( "B[(y + ((x.inner + (x.outer*3))*20))] = A[(y + ((x.inner + (x.outer*3))*20))]" ) assert str( ir.body.body.body.body.body.body.body.then_case.else_case ).startswith( "B[(y + ((x.inner + (x.outer*3))*20))] = (A[(y + ((x.inner + (x.outer*3))*20))]*-1.000000f)" ) # test build f = hcl.build(s) a_np = np.random.random((A.shape)) a_hcl = hcl.asarray(a_np, dtype="float32") b_hcl = hcl.asarray(np.zeros(B.shape), dtype="float32") f(a_hcl, b_hcl) b_np = np.abs(a_np) np.testing.assert_allclose(b_np, b_hcl.asnumpy())
def test_set_slice(): A = hcl.placeholder((10,), "A") def kernel(A): with hcl.Stage("S"): A[0][5:1] = 1 s = hcl.create_schedule([A], kernel) code = hcl.build(s, target="ihls") assert "A[0].set_slc(1, ((ac_int<4, false>)1))" in code
def test_set_bit(): A = hcl.placeholder((10,), "A") def kernel(A): with hcl.Stage("S"): A[0][4] = 1 s = hcl.create_schedule([A], kernel) code = hcl.build(s, target="ihls") assert "A[0][4] = 1" in code
def test_binary_conv(): hcl.init() A = hcl.placeholder((1, 32, 14, 14), dtype=hcl.UInt(1), name="A") B = hcl.placeholder((64, 32, 3, 3), dtype=hcl.UInt(1), name="B") rc = hcl.reduce_axis(0, 32) ry = hcl.reduce_axis(0, 3) rx = hcl.reduce_axis(0, 3) C = hcl.compute((1, 64, 12, 12), lambda nn, ff, yy, xx: hcl.sum( A[nn, rc, yy + ry, xx + rx] * B[ff, rc, ry, rx], axis=[rc, ry, rx]), dtype=hcl.UInt(8), name="C") s = hcl.create_schedule([A, B, C]) s[C].split(C.axis[1], factor=5) code = hcl.build(s, target='aocl') assert "for (int32_t ff_outer = 0; ff_outer < 13; ++ff_outer)" in code assert "for (int32_t ff_inner = 0; ff_inner < 5; ++ff_inner)" in code assert "if (ff_inner < (64 - (ff_outer * 5)))" in code
def test_super_stage(): hcl.init() A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") target = hcl.Platform.aws_f1 def kernel(A, B): C = hcl.compute((10, 32), lambda *args: A[args] + B[args], "C") with hcl.Stage("Super") as m: hcl.update(C, lambda *args: C[args] + 1, "update") with hcl.Stage("Plus") as stage: with hcl.for_(0, 10) as j: C[j, 0] = 10 return C # place the whole super stage body on device def _test_super_stage_on_device(): s = hcl.create_schedule([A, B], kernel) s.to([A, B], target.xcel) s.to(kernel.Super.Plus.C, target.host) code = str(hcl.lower(s)) assert "test(C, A, B)" in code, code print("Succeed!") # place the whole super stage body on device def _test_super_stage_on_device_stream(): s = hcl.create_schedule([A, B], kernel) s.to([A, B], target.xcel, mode=hcl.IO.Stream, fifo_depth=10) s.to(kernel.Super.Plus.C, target.host, fifo_depth=10) code = str(hcl.lower(s)) assert "io attr: \"C\" mem(0) port(0) io_type(0) fifo_depth(10) direction(1)" in code, code print("Succeed!") # yet to support def _test_partial_super_stage_on_device(): s = hcl.create_schedule([A, B], kernel) s.to([A, B], target.xcel) s.to(kernel.Super.update.C, target.host) _test_super_stage_on_device() _test_super_stage_on_device_stream()
def inter_stage_fork(): hcl.init() A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") def kernel(A, B): C = hcl.compute(A.shape, lambda i, j: A[i, j] + B[i, j], "C") D = hcl.compute(C.shape, lambda i, j: C[i, j] + 1, "D") E = hcl.compute(C.shape, lambda i, j: C[i, j] * 2, "E") return D, E target = hcl.Platform.aws_f1 s = hcl.create_schedule([A, B], kernel) s.to(kernel.C, [kernel.D, kernel.E]) code = str(hcl.lower(s)) assert "allocate C.pipe.1[int32 * 10 * 32]" in code assert "allocate C.pipe.2[int32 * 10 * 32]" in code
def _build_kernel(): hcl.init() A = hcl.placeholder((10, 20, 30), name="A") B = hcl.compute(A.shape, lambda i, j, m: A[i, j, m] * 2, name="B") C = hcl.compute(B.shape, lambda ii, jj, mm: B[ii, jj, mm] + 1, name="C") return A, B, C
def test_mutate(): hcl.init(raise_assert_exception=False) A = hcl.placeholder((10, )) M = hcl.placeholder((2, )) def kernel(A, M): def loop_body(x): with hcl.if_(A[x]> M[0]): with hcl.if_(A[x]> M[1]): hcl.assert_(x == 2, "assert error in if--value of x: %d", x) M[0] = M[1] M[1] = A[x] with hcl.else_(): M[0] = A[x] hcl.mutate(A.shape, lambda x : loop_body(x)) hcl.print(0, "this should not be printed\n") s = hcl.create_schedule([A, M], kernel) return s
def test_multiple_subgraph(): hcl.init() A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") def kernel(A, B): C = hcl.compute(A.shape, lambda i, j: A[i,j] + 1, "C") D = hcl.compute(C.shape, lambda i, j: B[i,j] + 1, "D") return hcl.compute(C.shape, lambda i, j: C[i,j] + D[i,j], "E") target = hcl.Platform.aws_f1 s = hcl.create_schedule([A, B], kernel) s.to([A, B], target.xcel) s.to([kernel.E], target.host) code = str(hcl.lower(s)) assert "io attr: \"B\"" in code assert "io attr: \"A\"" in code assert "io attr: \"E\"" in code
def test_index_fuse(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].fuse(B.axis[0], B.axis[1]) code = hcl.build(s, target=target) assert "for (sc_int<32> y_x_fused = 0; y_x_fused < 100; ++y_x_fused)" in code
def test_index_split(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].split(B.axis[0], 5) code = hcl.build(s, target=target) assert "for (sc_int<32> y_inner = 0; y_inner < 5; ++y_inner)" in code
def test_inter_stage(): A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") def kernel(A, B): C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], "C") D = hcl.compute(C.shape, lambda i, j: C[i][j], "D") return D target = hcl.platform.aws_f1 s = hcl.create_schedule([A, B], kernel) s.to(kernel.C, s[kernel.D]) code = str(hcl.lower(s)) assert "C.pipe1.write" in code assert "C.pipe1.read" in code
def test_placeholders(): hcl.init() A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") C = hcl.placeholder((10, 32), "C") D = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], "D") E = hcl.compute(C.shape, lambda i, j: C[i][j] * D[i][j], "E") F = hcl.compute(C.shape, lambda i, j: E[i][j] + 1, "F") target = hcl.platform.aws_f1 s = hcl.create_schedule([A, B, C, F]) # s.to([A, B, C], target.xcel) # s.to(E, target.host) target.config(compile="sdaccel", backend="vhls") f = hcl.build(s, target) print(f)
def kernel(A_name, B_name): hcl.init() A = hcl.placeholder((1, 2, 3, 4), dtype=hcl.UInt(33), name=A_name) B = hcl.compute((1, 2, 3, 4), lambda x, y, z, w: A[x, y, z, w] + 1, name=B_name) s = hcl.create_schedule([A, B]) return s
def inter_stage_join(): hcl.init() A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") def kernel(A, B): C = hcl.compute(A.shape, lambda i, j: 0, "C") hcl.update(C, lambda i, j: A[i,j] + 1, "s1") hcl.update(C, lambda i, j: B[i,j] * 2, "s2") return hcl.compute(C.shape, lambda *args: C[args] + 3, "ret") target = hcl.platform.aws_f1 s = hcl.create_schedule([A, B], kernel) s.join([kernel.s1.C, kernel.s2.C], kernel.ret.C) code = str(hcl.lower(s)) assert "C.pipe1.read" in code assert "C.pipe2.write" in code
def systolic(m=16, k=16, n=16, dtype=hcl.Int(), target=None): hcl.init(dtype) dim_x, dim_y = 16, 16 m_A = hcl.placeholder((m, k), dtype=dtype, name="m_A") m_B = hcl.placeholder((k, n), dtype=dtype, name="m_B") m_output = hcl.placeholder((m, n), dtype=dtype, name="m_output") # k (time) and y/x (spatial) dim def kernel(k, y, x): last = hcl.scalar(hcl.select(k == 0, 0, m_output[y, x]), "last") m_output[y, x] = last.v + m_A[y, k] * m_B[k, x] hcl.mutate((m, dim_y, dim_x), lambda k, y, x: kernel(k, y, x)) s = hcl.create_schedule([m_A, m_B, m_output]) f = hcl.build(s, target=target) return f
def test_partition_basic(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda x, y: A[x, y], "B") s = hcl.create_schedule([A, B]) s.partition(A) ir = str(hcl.lower(s)) assert "partition variable=A" in ir
def test_partition_dim_factor(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda x, y: A[x, y], "B") s = hcl.create_schedule([A, B]) s.partition(A, dim=1, factor=2) ir = str(hcl.lower(s)) assert "partition variable=A complete factor=2 dim=1" in ir
def test_index_fuse(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].fuse(B.axis[0], B.axis[1]) code = hcl.build(s, target="vhls") assert "B[(y_x_fused / 10)][(y_x_fused % 10)]" in code
def test_pragma(): hcl.init() A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j]) # unroll s1 = hcl.create_schedule([A, B, C]) s1[C].unroll(C.axis[1], factor=4) code1 = hcl.build(s1, target='aocl') assert "#pragma unroll 4" in code1 # pipeline s2 = hcl.create_schedule([A, B, C]) s2[C].pipeline(C.axis[0], initiation_interval=2) code2 = hcl.build(s2, target='aocl') assert "#pragma ii 2" in code2
def test_index_split(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].split(B.axis[0], 5) code = hcl.build(s, target="vhls") assert "B[(x + ((y_inner + (y_outer * 5)) * 10))]" in code
def move_inputs(): hcl.init() A = hcl.placeholder((10, 32), "A") B = hcl.placeholder((10, 32), "B") C = hcl.placeholder((10, 32), "C") D = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], "D") E = hcl.compute(C.shape, lambda i, j: C[i][j] * D[i][j], "E") F = hcl.compute(C.shape, lambda i, j: E[i][j] + 1, "F") target = hcl.platform.aws_f1 s = hcl.create_schedule([A, B, C, D, E, F]) s.to([A, B, C], target.xcel) s.to(E, target.host) code = str(hcl.lower(s)) pattern = "test({}.channel, {}.channel, {}.channel, E.channel)" combination = [ pattern.format(*_) for _ in list(permutations(["A", "B", "C"])) ] assert any([_ in code for _ in combination])
def build_bnn_inf_opt(batch_size=batch_size,target=target): hcl_ph = [] input_image = hcl.placeholder((batch_size,1,16,16),"input_image",qtype_bit) for name in params: dtype = qtype_bit if ("conv" in name or "w_" in name) else qtype_float hcl_ph.append(hcl.placeholder(params[name].shape,name,dtype=dtype)) s = hcl.create_schedule([input_image] + hcl_ph, build_bnn) # compute optimization layer_names = build_bnn.__dict__.keys() for layer in layer_names: s_layer = getattr(build_bnn,layer) if "bn" in layer: # fuse conv s_conv = getattr(build_bnn,"conv" + layer[-1]) s[s_conv].compute_at(s[s_layer],s_layer.axis[3]) if layer == "bn1": s[s_layer].pipeline(s_layer.axis[3]) # will be refreshed else: s[s_conv].pipeline(s_conv.axis[4]) elif "pool" in layer: s[s_layer].pipeline(s_layer.axis[2]) elif "fc" in layer: s[s_layer].pipeline(s_layer.axis[1]) elif "flatten" in layer: s[s_layer].pipeline(s_layer.axis[1]) elif "dense_relu" in layer: s_fc = getattr(build_bnn,"fc1") s[s_fc].compute_at(s[s_layer],s_layer.axis[1]) s[s_fc].pipeline(s_fc.axis[2]) if isinstance(target,hcl.platform): s.to([input_image] + hcl_ph, target.xcel) s.to(build_bnn.fc2, target.host) target.config(compile="vivado_hls", mode="csyn") # memory optimization s.partition(input_image, hcl.Partition.Block, dim=1, factor=8) for ph in reversed(hcl_ph): if ph.name in ["b_fc2", "fc2"]: s.partition(ph, hcl.Partition.Complete, dim=1) else: s.partition(ph, hcl.Partition.Block, dim=1, factor=8) return hcl.build(s, target=target)
def test_with_if(): hcl.init() matrix_1 = hcl.placeholder((m, k)) matrix_2 = hcl.placeholder((k, n)) def kernel(matrix_1, matrix_2): return_matrix = hcl.compute((m,k), lambda x, y: matrix_1[x,y] + matrix_2[x,y], "return_matrix") with hcl.if_(matrix_2[0,0] == 0): hcl.assert_(matrix_2[1,1] == 0, "assert message in if statement") #result is true hcl.print(0, "in the if statement\n") #should be printed hcl.assert_(matrix_1[0,0] != 0, "customized assert message 1") #result is false hcl.print(0, "this shouldn't be printed") return return_matrix s = hcl.create_schedule([matrix_1, matrix_2], kernel) return s
def test_pack(): def pack(A): return hcl.pack(A, factor=5) A = hcl.placeholder((40, ), "A", dtype=hcl.UInt(3)) s = hcl.create_schedule([A], pack) code = hcl.build(s, target="vhls") slice_range = "(((i * 3) + 2), (i * 3))" assert slice_range in code
def test_reshape(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda x, y: A[x, y], "B") C = hcl.compute(A.shape, lambda x, y: B[x, y], "C") s = hcl.create_schedule([A, C]) s.reshape(B, (2, 5, 2, 5)) ir = str(hcl.lower(s)) assert "allocate B[int32 * 2 * 5 * 2 * 5]" in ir
def test_legacy_interface(): hcl.init() A = hcl.placeholder((10, 10), "A") B = hcl.compute(A.shape, lambda y, x: A[y][x], "B") s = hcl.create_schedule([A, B]) s[B].fuse(B.axis[0], B.axis[1]) code = hcl.build(s, target="vhls") assert "A[10][10]" in code assert "B[10][10]" in code
def test_get_slice(): A = hcl.placeholder((10,), "A") def kernel(A): with hcl.Stage("S"): A[0] = A[0][5:1] s = hcl.create_schedule([A], kernel) code = hcl.build(s, target="ihls") assert "A[0].slc<4>(1)" in code