Ejemplo n.º 1
0
def test_module_quantize_args():

    hcl.init()

    def algorithm(A, B):
        @hcl.def_([A.shape, B.shape, ()])
        def add(A, B, x):
            hcl.return_(A[x] + B[x])

        return hcl.compute(A.shape, lambda x: add(A, B, x), "C")

    A = hcl.placeholder((10, ), dtype=hcl.UInt(2))
    B = hcl.placeholder((10, ))

    s = hcl.create_scheme([A, B], algorithm)
    s.downsize([algorithm.add.A], hcl.UInt(2))
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s)

    a = np.random.randint(100, size=(10, ))
    b = np.random.randint(100, size=(10, ))
    c = np.zeros(10)
    _A = hcl.asarray(a, hcl.UInt(2))
    _B = hcl.asarray(b)
    _C = hcl.asarray(c)

    f(_A, _B, _C)

    _A = _A.asnumpy()
    _B = _B.asnumpy()
    _C = _C.asnumpy()

    for i in range(0, 10):
        assert (_C[i] == a[i] % 4 + b[i])
Ejemplo n.º 2
0
 def test_uint_imm_ops():
     A = hcl.placeholder((10, 10), "A", dtype=hcl.UInt(1))
     def kernel(A):
         return hcl.compute((8, 8), lambda y, x:
             hcl.select(x < 4, A[y][x], 0), "B")
     s = hcl.create_scheme(A, kernel)
     s = hcl.create_schedule_from_scheme(s)
     code = hcl.build(s, target="vhls")
     assert "(unsigned int)0U)" in code
Ejemplo n.º 3
0
 def test_binary_ops():
     A = hcl.placeholder((8, 8), "A", dtype=hcl.Int(20))
     B = hcl.placeholder((8, 8), "B", dtype=hcl.Fixed(16,12))
     def kernel(A, B):
         return hcl.compute((8, 8), lambda y, x:
             hcl.select(x < 4, A[y][x], B[y][x]), "C", dtype=hcl.Int(8))
     s = hcl.create_scheme([A, B], kernel)
     s = hcl.create_schedule_from_scheme(s)
     code = hcl.build(s, target="vhls")
     assert "(ap_fixed<32, 20>)B" in code
Ejemplo n.º 4
0
 def test_imm_ops():
     A = hcl.placeholder((10, 10), "A")
     def kernel(A):
         return hcl.compute((8, 8), lambda y, x:
             hcl.select(x < 4, A[y][x] + A[y+2][x+2], 0), "B")
     s = hcl.create_scheme(A, kernel)
     s = hcl.create_schedule_from_scheme(s)
     code = hcl.build(s, target="vhls")
     assert "((ap_int<33>)0)" in code
     assert "((ap_int<33>)(((ap_int<33>)A" in code
Ejemplo n.º 5
0
 def test_uint_int():
     A = hcl.placeholder((8, 8), "A", dtype=hcl.Fixed(20,12))
     B = hcl.placeholder((8, 8), "B", dtype=hcl.UFixed(16,12))
     def kernel(A, B):
         return hcl.compute((8, 8), lambda y, x:
             hcl.select(x < 4, A[y][x], B[y][x]), "C", dtype=hcl.Int(8))
     s = hcl.create_scheme([A, B], kernel)
     s = hcl.create_schedule_from_scheme(s)
     code = hcl.build(s, target="vhls")
     assert "ap_ufixed<20, 8>)A" in code
Ejemplo n.º 6
0
def build_bnn_inf_opt(batch_size=batch_size, target=target):
    hcl_ph = []
    input_image = hcl.placeholder((batch_size, 1, 16, 16), "input_image",
                                  qtype_bit)
    for name in params:
        dtype = qtype_bit if ("conv" in name or "w_" in name) else qtype_float
        hcl_ph.append(hcl.placeholder(params[name].shape, name, dtype=dtype))

    # build the network
    scheme = hcl.create_scheme([input_image] + hcl_ph, build_bnn)
    s = hcl.create_schedule_from_scheme(scheme)

    def plot_dataflow_graph():
        import matplotlib.pyplot as plt
        import networkx as nx
        graph, op = s.dataflow_graph(plot=True)
        nx.draw(graph, with_labels=True)
        plt.savefig("bnn.png")

    # compute optimization
    layer_names = build_bnn.__dict__.keys()
    for layer in layer_names:
        s_layer = getattr(build_bnn, layer)
        if "bn" in layer:  # fuse conv
            s_conv = getattr(build_bnn, "conv" + layer[-1])
            s[s_conv].compute_at(s[s_layer], s_layer.axis[3])
            if layer == "bn1":
                s[s_layer].pipeline(s_layer.axis[3])  # will be refreshed
            else:
                s[s_conv].pipeline(s_conv.axis[4])
        elif "pool" in layer:
            s[s_layer].pipeline(s_layer.axis[2])
        elif "fc" in layer:
            s[s_layer].pipeline(s_layer.axis[1])
        elif "flatten" in layer:
            s[s_layer].pipeline(s_layer.axis[1])
        elif "dense_relu" in layer:
            s_fc = getattr(build_bnn, "fc1")
            s[s_fc].compute_at(s[s_layer], s_layer.axis[1])
            s[s_fc].pipeline(s_fc.axis[2])

    if isinstance(target, hcl.platform):
        s.to([input_image] + hcl_ph, target.xcel)
        s.to(build_bnn.fc2, target.host)
        target.config(compile="vivado_hls", mode="csyn")

    # memory optimization
    s.partition(input_image, hcl.Partition.Block, dim=1, factor=8)
    for ph in reversed(hcl_ph):
        if ph.name in ["b_fc2", "fc2"]:
            s.partition(ph, hcl.Partition.Complete, dim=1)
        else:
            s.partition(ph, hcl.Partition.Block, dim=1, factor=8)

    return hcl.build(s, target=target)
Ejemplo n.º 7
0
def test():
    hcl.init()
    A = hcl.placeholder((8, 8), "A")

    def kernel(A):
        return hcl.compute((8, 8), lambda y, x: foo(A[y, x] + A[y, x]), "C")

    s = hcl.create_scheme([A], kernel)
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s, "vhls")
    print(f)
Ejemplo n.º 8
0
def test1():
    A = hcl.placeholder((8, 8), "A")
    B = hcl.placeholder((8, 8), "B", dtype=hcl.Fixed(16, 12))

    def kernel(A, B):
        return hcl.compute(
            (8, 8), lambda y, x: hcl.select(x < 4, A[y][x], B[y][x]), "C")

    s = hcl.create_scheme([A, B], kernel)
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s, target="vhls")
    print(f)
Ejemplo n.º 9
0
def test2():
    A = hcl.placeholder((8, 8), "A", dtype=hcl.UInt(1))

    def kernel(A):
        return hcl.compute((8, 8), lambda y, x: hcl.select(x < 4, A[y][x], 0),
                           "B")

    s = hcl.create_scheme([A], kernel)
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s, target="vhls")
    with open("select_test.cpp", "w") as outfile:
        outfile.write(f)
Ejemplo n.º 10
0
def test3():
    A = hcl.placeholder((8, 8), "A", dtype=hcl.UInt(2))
    B = hcl.placeholder((8, 8), "B", dtype=hcl.UInt(2))

    def kernel(A, B):
        return hcl.compute((8, 8),
                           lambda y, x: hcl.select(x < 4, A[y, x][0], 0), "C")

    s = hcl.create_scheme([A, B], kernel)
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s, "vhls")
    print(f)
Ejemplo n.º 11
0
def build_lenet_inf(batch_size=batch_size, target=None):
    # set up input/output placeholders
    input_image = hcl.placeholder((batch_size, 1, 28, 28), "input_image")
    weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1", qtype1)
    weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2", qtype1)
    weight_fc1 = hcl.placeholder((500, 800), "weight_fc1", qtype1)
    weight_fc2 = hcl.placeholder((10, 500), "weight_fc2", qtype1)
    lenet = hcl.placeholder((batch_size, 10), "lenet")
    # create a quantization scheme
    scheme = hcl.create_scheme([
        input_image, weight_conv1, weight_conv2, weight_fc1, weight_fc2, lenet
    ], build_lenet)
    # quantize the three activation layers
    scheme.quantize([build_lenet.tanh1, build_lenet.tanh2, build_lenet.tanh3],
                    qtype2)
    s = hcl.create_schedule_from_scheme(scheme)
    return hcl.build(s, target=target)
Ejemplo n.º 12
0
def build_bnn_inf(batch_size=batch_size, target=target):
    hcl_ph = []
    input_image = hcl.placeholder((batch_size, 1, 16, 16), "input_image",
                                  qtype_bit)
    for name in params:
        dtype = qtype_bit if ("conv" in name or "w_" in name) else qtype_float
        hcl_ph.append(hcl.placeholder(params[name].shape, name, dtype=dtype))

    # build the network
    scheme = hcl.create_scheme([input_image] + hcl_ph, build_bnn)
    s = hcl.create_schedule_from_scheme(scheme)

    # if isinstance(target,hcl.platform):
    #     s.to([input_image] + hcl_ph, target.xcel)
    #     s.to(build_bnn.fc2, target.host)
    # target.config(compile="vivado_hls", mode="csyn")

    return hcl.build(s, target=target)
Ejemplo n.º 13
0
def test_resize():
    def algorithm(A):
        return hcl.compute(A.shape, lambda x: A[x] + 1, "B")

    A = hcl.placeholder((10, ), dtype=hcl.UInt(32))

    scheme = hcl.create_scheme([A], algorithm)
    scheme.downsize(algorithm.B, hcl.UInt(2))
    s = hcl.create_schedule_from_scheme(scheme)
    f = hcl.build(s)

    a = np.random.randint(100, size=(10, ))
    _A = hcl.asarray(a, dtype=hcl.UInt(32))
    _B = hcl.asarray(np.zeros(10), dtype=hcl.UInt(2))

    f(_A, _B)

    _A = _A.asnumpy()
    _B = _B.asnumpy()

    for i in range(10):
        assert (_B[i] == (a[i] + 1) % 4)
Ejemplo n.º 14
0
def build_ultranet_hls(batch_size=batch_size, target=None):
    # set up input/output placeholders
    input_image = hcl.placeholder((batch_size, 3, 160, 320),
                                  dtype=input_dtype,
                                  name="input_image")

    weight_conv1 = hcl.placeholder((16, 3, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv1")  # 3 in, 16 out
    a_batchnorm1 = hcl.placeholder((16, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm1")
    b_batchnorm1 = hcl.placeholder((16, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm1")

    weight_conv2 = hcl.placeholder((32, 16, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv2")  # 16 in, 32 out
    a_batchnorm2 = hcl.placeholder((32, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm2")
    b_batchnorm2 = hcl.placeholder((32, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm2")

    weight_conv3 = hcl.placeholder((64, 32, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv3")  # 32 in, 64 out
    a_batchnorm3 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm3")
    b_batchnorm3 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm3")

    weight_conv4 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv4")  # 64 in, 64 out
    a_batchnorm4 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm4")
    b_batchnorm4 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm4")

    weight_conv5 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv5")  # 64 in, 64 out
    a_batchnorm5 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm5")
    b_batchnorm5 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm5")

    weight_conv6 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv6")  # 64 in, 64 out
    a_batchnorm6 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm6")
    b_batchnorm6 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm6")

    weight_conv7 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv7")  # 64 in, 64 out
    a_batchnorm7 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm7")
    b_batchnorm7 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm7")

    weight_conv8 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv8")  # 64 in, 64 out
    a_batchnorm8 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm8")
    b_batchnorm8 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm8")

    sm = hcl.create_scheme([
        input_image, weight_conv1, a_batchnorm1, b_batchnorm1, weight_conv2,
        a_batchnorm2, b_batchnorm2, weight_conv3, a_batchnorm3, b_batchnorm3,
        weight_conv4, a_batchnorm4, b_batchnorm4, weight_conv5, a_batchnorm5,
        b_batchnorm5, weight_conv6, a_batchnorm6, b_batchnorm6, weight_conv7,
        a_batchnorm7, b_batchnorm7, weight_conv8, a_batchnorm8, b_batchnorm8
    ], ultranet)

    # quantize activations
    sm.quantize(ultranet.conv1, conv_dtype)
    sm.quantize(ultranet.relu1, act_dtype)
    sm.quantize(ultranet.conv2, conv_dtype)
    sm.quantize(ultranet.relu2, act_dtype)
    sm.quantize(ultranet.conv3, conv_dtype)
    sm.quantize(ultranet.relu3, act_dtype)
    sm.quantize(ultranet.conv4, conv_dtype)
    sm.quantize(ultranet.relu4, act_dtype)
    sm.quantize(ultranet.conv5, conv_dtype)
    sm.quantize(ultranet.relu5, act_dtype)
    sm.quantize(ultranet.conv6, conv_dtype)
    sm.quantize(ultranet.relu6, act_dtype)
    sm.quantize(ultranet.conv7, conv_dtype)
    sm.quantize(ultranet.relu7, act_dtype)
    sm.quantize(ultranet.conv8, conv_dtype)
    sm.quantize(ultranet.relu8, act_dtype)

    s = hcl.create_schedule_from_scheme(sm, "main")

    # create line-buffer and window-buffer for conv layers
    for i in range(1, 1 + 8):
        conv_pad = getattr(ultranet, 'conv' + str(i) + '_pad')
        conv = getattr(ultranet, 'conv' + str(i))
        LB = s.reuse_at(conv_pad._op, s[conv], conv.axis[2],
                        f"conv{i}_line_buffer")
        WB = s.reuse_at(LB, s[conv], conv.axis[3], f"conv{i}_window_buffer")

    # conv3 = ultranet.conv3
    # xo, yo, xi, yi = s[conv3].tile(conv3.axis[2], conv3.axis[3], 4, 4)
    # s[conv3].reorder(yo, xo, yi, xi)

    # print(hcl.lower(s))
    if opt:
        # merge conv + bn + relu operators
        for i in range(1, 1 + 8):
            pad = getattr(ultranet, 'conv' + str(i) + '_pad')
            conv = getattr(ultranet, 'conv' + str(i))
            bn = getattr(ultranet, 'batch_norm' + str(i))
            relu = getattr(ultranet, 'relu' + str(i))
            # Can't merge pad with conv, a limitation of HCL.
            # s[pad].compute_at(s[conv], conv.axis[3])
            s[bn].compute_at(s[relu], relu.axis[3])
        res = ultranet.result
        relu8 = ultranet.relu8
        s[relu8].compute_at(s[res], res.axis[3])

        # pipeline all layers
        for i in range(1, 1 + 8):
            pad = getattr(ultranet, 'conv' + str(i) + '_pad')
            conv = getattr(ultranet, 'conv' + str(i))
            bn_relu = getattr(ultranet, 'relu' + str(i))
            s[pad].pipeline(pad.axis[3])
            # s[conv].pipeline(conv.axis[4])
            s[conv].pipeline(conv.axis[3])
            s[bn_relu].pipeline(bn_relu.axis[3])
            if i <= 4:
                pool_pad = getattr(ultranet, 'pool' + str(i) + '_pad')
                pool = getattr(ultranet, 'pool' + str(i))
                s[pool_pad].pipeline(pool_pad.axis[3])
                s[pool].pipeline(pool.axis[3])
        s[ultranet.result].pipeline(ultranet.result.axis[3])

        # partition weight buffers
        if partition:
            # weights need to be partitioned in dim 2, 3, 4
            # for now HeteroCL doesn't support multi-dimensional partition
            s.partition(weight_conv1, dim=2)
            s.partition(weight_conv2, dim=2)
            s.partition(weight_conv3, dim=2)
            s.partition(weight_conv4, dim=2)
            s.partition(weight_conv5, dim=2)
            s.partition(weight_conv6, dim=2)
            s.partition(weight_conv7, dim=2)
            s.partition(weight_conv8, dim=2)

        # fifo across layers
        if stream:
            '''
            Note: 
            Padding layer's pipelining has to precede other layers'
            because of a bug in HeteroCL: when there's an ifThenElse
            statement in which both branch reads/writes the same buffer, 
            HeteroCL thinks it's accessing the buffer twice, thus preventing
            pipelining. For now, ?: works, but if..else.. doesn't, 
            because the latter has two load/store nodes.
            '''
            s.to(ultranet.conv1_pad, s[ultranet.conv1], fifo_depth=128)
            s.to(ultranet.conv2_pad, s[ultranet.conv2], fifo_depth=128)
            s.to(ultranet.conv3_pad, s[ultranet.conv3], fifo_depth=128)
            s.to(ultranet.conv4_pad, s[ultranet.conv4], fifo_depth=128)
            s.to(ultranet.conv5_pad, s[ultranet.conv5], fifo_depth=128)
            s.to(ultranet.conv6_pad, s[ultranet.conv6], fifo_depth=128)
            s.to(ultranet.conv7_pad, s[ultranet.conv7], fifo_depth=128)
            s.to(ultranet.conv8_pad, s[ultranet.conv8], fifo_depth=128)

            s.to(ultranet.conv1, s[ultranet.relu1], fifo_depth=128)
            s.to(ultranet.relu1, s[ultranet.pool1_pad], fifo_depth=128)
            s.to(ultranet.pool1_pad, s[ultranet.pool1], fifo_depth=128)
            s.to(ultranet.pool1, s[ultranet.conv2_pad], fifo_depth=128)
            s.to(ultranet.conv2, s[ultranet.relu2], fifo_depth=128)
            s.to(ultranet.relu2, s[ultranet.pool2_pad], fifo_depth=128)
            s.to(ultranet.pool2_pad, s[ultranet.pool2], fifo_depth=128)
            s.to(ultranet.pool2, s[ultranet.conv3_pad], fifo_depth=128)
            s.to(ultranet.conv3, s[ultranet.relu3], fifo_depth=128)
            s.to(ultranet.relu3, s[ultranet.pool3_pad], fifo_depth=128)
            s.to(ultranet.pool3_pad, s[ultranet.pool3], fifo_depth=128)
            s.to(ultranet.pool3, s[ultranet.conv4_pad], fifo_depth=128)
            s.to(ultranet.conv4, s[ultranet.relu4], fifo_depth=128)
            s.to(ultranet.relu4, s[ultranet.pool4_pad], fifo_depth=128)
            s.to(ultranet.pool4_pad, s[ultranet.pool4], fifo_depth=128)
            s.to(ultranet.pool4, s[ultranet.conv5_pad], fifo_depth=128)
            s.to(ultranet.conv5, s[ultranet.relu5], fifo_depth=128)
            s.to(ultranet.relu5, s[ultranet.conv6_pad], fifo_depth=128)
            s.to(ultranet.conv6, s[ultranet.relu6], fifo_depth=128)
            s.to(ultranet.relu6, s[ultranet.conv7_pad], fifo_depth=128)
            s.to(ultranet.conv7, s[ultranet.relu7], fifo_depth=128)
            s.to(ultranet.relu7, s[ultranet.conv8_pad], fifo_depth=128)
            s.to(ultranet.conv8, s[ultranet.result], fifo_depth=128)

    return hcl.build(s, name="main", target=target)
Ejemplo n.º 15
0
def top(target=None):

    # Algorithm definition (§1)
    def knn(test_image, train_images):

        # Imperative programming and bit operations (§2)
        def popcount(num):
            out = hcl.scalar(0, "out")
            with hcl.for_(0, train_images.type.bits) as i:
                # Bit selection operation
                out.v += num[i]
            return out.v

        # This function update the candidates, i.e., `knn_mat`. Here we mutate
        # through the shape of tensor `dist`. For each `dist` value, if it is
        # smaller than the maximum candidate, we replace it.
        def update_knn(dist, knn_mat, i, j):
            max_id = hcl.scalar(0, "max_id")
            with hcl.for_(0, 3) as k:
                with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]):
                    max_id.v = k
            with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]):
                knn_mat[i][max_id.v] = dist[i][j]

        # Main algorithm (§3)
        # Fist step: XOR (§3.1)
        diff = hcl.compute(train_images.shape,
                           lambda x, y: train_images[x][y] ^ test_image,
                           "diff")

        # Second step: popcount (§3.2)
        dist = hcl.compute(diff.shape, lambda x, y: popcount(diff[x][y]),
                           "dist")

        # Third step: initialize the candidates (§3.3)
        knn_mat_buf = hcl.compute((10, 4), lambda x, y: 50, "knn_mat_buf")

        # Fourth step: update the candidates (§3.4)
        hcl.mutate(dist.shape,
                   lambda x, y: update_knn(dist, knn_mat_buf, x, y),
                   "knn_update")
        knn_mat = hcl.compute((10, 3), lambda x, y: knn_mat_buf[x][y],
                              "knn_mat")

        # Final step: return the candidates (§3.5)
        return knn_mat

    # Inputs/Outputs definition (§4)
    # Scalars (§4.1)
    test_image = hcl.placeholder((), "test_image")
    # Tensors (§4.2)
    train_images = hcl.placeholder(data_size, "train_images")

    # Data type customization (§5.1)
    scheme = hcl.create_scheme([test_image, train_images], knn)
    scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat_buf, knn.knn_mat],
                    dtype_knnmat)

    # Compute customization (§5.2)
    s = hcl.create_schedule_from_scheme(scheme)

    diff = knn.diff
    dist = knn.dist
    knn_mat_buf = knn.knn_mat_buf
    knn_update = knn.knn_update

    # Merge loop nests
    s[diff].compute_at(s[dist], dist.axis[1])
    s[dist].compute_at(s[knn_update], knn_update.axis[1])

    # Reorder loop to expose more parallelism
    s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0])

    # Parallel initialization of knn mat
    s[knn_mat_buf].parallel(knn_mat_buf.axis[0])

    # Parallel outer loop and pipeline inner loop
    s[knn_update].parallel(knn_update.axis[0])
    s[knn_update].pipeline(knn_update.axis[1])

    # Parallel the innermost loop of 49 pixels
    s[dist].parallel(dist.axis[2])

    # At the end, we build the whole offloaded function.
    return hcl.build(s, target=target)
Ejemplo n.º 16
0
def top(target=None):
    def smith_waterman(seqA, seqB, consA, consB):
        def similarity_score(a, b):
            return hcl.select(a == b, 1, penalty)

        def find_max(A, len_):
            max_ = hcl.local(A[0], "max")
            act_ = hcl.local(0, "act")
            with hcl.for_(0, len_) as i:
                with hcl.if_(A[i] > max_[0]):
                    max_[0] = A[i]
                    act_[0] = i
            return max_[0], act_[0]

        matrix_max = hcl.local(0, "maxtrix_max")
        i_max = hcl.local(0, "i_max")
        j_max = hcl.local(0, "j_max")

        matrix = hcl.compute((lenA + 1, lenB + 1), lambda x, y: 0, "matrix")
        action = hcl.compute(matrix.shape, lambda x, y: 3, "action")

        def populate_matrix(i, j):
            trace_back = hcl.compute((4, ), lambda x: 0, "trace_back")

            with hcl.if_(hcl.and_(i != 0, j != 0)):
                trace_back[0] = matrix[i-1, j-1] + \
                                similarity_score(seqA[i-1], seqB[j-1])
                trace_back[1] = matrix[i - 1, j] + penalty
                trace_back[2] = matrix[i, j - 1] + penalty
                trace_back[3] = 0
                matrix[i, j], action[i, j] = find_max(trace_back, 4)
                with hcl.if_(matrix[i, j] > matrix_max[0]):
                    matrix_max[0] = matrix[i, j]
                    i_max[0] = i
                    j_max[0] = j

        P = hcl.mutate((lenA + 1, lenB + 1),
                       lambda i, j: populate_matrix(i, j))

        def align(curr_i, curr_j, next_i, next_j):
            outA = hcl.local(0, "a")
            outB = hcl.local(0, "b")

            with hcl.if_(next_i[0] == curr_i[0]):
                outA[0] = 0
            with hcl.else_():
                outA[0] = seqA[curr_i[0] - 1]

            with hcl.if_(next_j[0] == curr_j[0]):
                outB[0] = 0
            with hcl.else_():
                outB[0] = seqB[curr_j[0] - 1]
            return outA[0], outB[0]

        def get_next(action, i, j):
            act_ = hcl.local(action[i][j], "act")
            next_i = hcl.local(0, "next_i")
            next_j = hcl.local(0, "next_j")
            with hcl.if_(act_[0] == 0):
                next_i[0] = i - 1
                next_j[0] = j - 1
            with hcl.elif_(act_[0] == 1):
                next_i[0] = i - 1
                next_j[0] = j
            with hcl.elif_(act_[0] == 2):
                next_i[0] = i
                next_j[0] = j - 1
            with hcl.else_():
                next_i[0] = i
                next_j[0] = j
            return next_i[0], next_j[0]

        with hcl.Stage("T"):
            curr_i = hcl.local(i_max[0], "curr_i")
            curr_j = hcl.local(j_max[0], "curr_j")
            next_i = hcl.local(0, "next_i")
            next_j = hcl.local(0, "next_j")
            next_i[0], next_j[0] = get_next(action, curr_i[0], curr_j[0])
            tick = hcl.local(0, "tick")

            with hcl.while_(
                    hcl.or_(curr_i[0] != next_i[0], curr_j[0] != next_j[0])):
                consA[tick[0]], consB[tick[0]] = \
                    align(curr_i, curr_j, next_i, next_j)
                curr_i[0], curr_j[0] = next_i[0], next_j[0]
                next_i[0], next_j[0] = get_next(action, curr_i[0], curr_j[0])
                tick[0] += 1

    def batch_sw(seqAs, seqBs, outAs, outBs):
        hcl.mutate(
            (num, ),
            lambda t: smith_waterman(seqAs[t], seqBs[t], outAs[t], outBs[t]),
            "B")

    seqAs = hcl.placeholder((num, lenA), "seqAs", dtype)
    seqBs = hcl.placeholder((
        num,
        lenB,
    ), "seqBs", dtype)
    outAs = hcl.placeholder((num, lenA + lenB), "outAs", dtype)
    outBs = hcl.placeholder((num, lenA + lenB), "outBs", dtype)

    # seqAs = hcl.placeholder((num, lenA), "seqAs")
    # seqBs = hcl.placeholder((num, lenB,), "seqBs")
    # outAs = hcl.placeholder((num, lenA+lenB), "outAs")
    # outBs = hcl.placeholder((num, lenA+lenB), "outBs")

    scheme = hcl.create_scheme([seqAs, seqBs, outAs, outBs], batch_sw)
    scheme.downsize([batch_sw.B.matrix, batch_sw.B.action], mtype)
    s = hcl.create_schedule_from_scheme(scheme)
    o, p = s[batch_sw.B].split(batch_sw.B.axis[0], factor=32)
    s[batch_sw.B].pipeline(o)
    # s[batch_sw.B].parallel(p)
    s[batch_sw.B].unroll(p)
    return hcl.build(s, target=target)
Ejemplo n.º 17
0
assert np.array_equal(hcl_D.asnumpy(), m)

##############################################################################
# Data Type Customization for Modules
# -----------------------------------
# We can also apply data type customization to our defined modules. There are
# two ways to do that. First, you can specify the data types directly in the
# module decorator. Second, you can use the ``quantize`` and ``downsize`` APIs.
# Let's show how we can downsize the first example.

A = hcl.placeholder((10,), dtype=hcl.UInt(4))
B = hcl.placeholder((10,), dtype=hcl.UInt(4))
C = hcl.placeholder((10,), dtype=hcl.UInt(4))
D = hcl.placeholder((10,), dtype=hcl.UInt(4))

s = hcl.create_scheme([A, B, C, D], maximum)
# Downsize the input arguments and also the return value
s.downsize([maximum.find_max.A, maximum.find_max.B, maximum.find_max], hcl.UInt(4))
# We also need to downsize the intermediate results
s.downsize([maximum.max_1, maximum.max_2], hcl.UInt(4))
s = hcl.create_schedule_from_scheme(s)
f = hcl.build(s)

##############################################################################
# Let's run it.

hcl_A = hcl.asarray(a, hcl.UInt(4))
hcl_B = hcl.asarray(b, hcl.UInt(4))
hcl_C = hcl.asarray(c, hcl.UInt(4))
hcl_D = hcl.asarray(d, hcl.UInt(4))
hcl_O = hcl.asarray(o)
Ejemplo n.º 18
0
**Author**: Yi-Hsiang Lai (seanlatias@github)

HeteroCL provides multiple back-end supports. Currently, we support both CPU
and FPGA flows. We will be extending to other back ends including ASICs and
PIMs (processing in memory). To set to different back ends, simply set the
``target`` of ``hcl.build`` API. In this tutorial, we will demonstrate how
to target different back ends in HeteroCL. The same program and schedule will
be used throughout the entire tutorial.
"""
import heterocl as hcl
import numpy as np

A = hcl.placeholder((10, 10), "A")
def kernel(A):
    return hcl.compute((8, 8), lambda y, x: A[y][x] + A[y+2][x+2], "B")
s = hcl.create_scheme(A, kernel)
s.downsize(kernel.B, hcl.UInt(4))
s = hcl.create_schedule_from_scheme(s)
s.partition(A)
s[kernel.B].pipeline(kernel.B.axis[1])
##############################################################################
# CPU
# ---
# CPU is the default back end of a HeteroCL program. If you want to be more
# specific, set the ``target`` to be ``llvm``. Note the some customization
# primitives are ignored by the CPU back end. For instance, ``partition`` and
# ``pipeline`` have no effect. Instead, we can use ``parallel``.
f = hcl.build(s) # equivalent to hcl.build(s, target="llvm")

##############################################################################
# We can execute the returned function as we demonstrated in other tutorials.
Ejemplo n.º 19
0
params = arg_params.copy()
params.update(aux_params)
for name in names:
    val = params[name].asnumpy()
    ph = hcl.placeholder(val.shape, name)
    holders.append(ph)
    values.append(hcl.asarray(val, dtype=hcl.Float()))

# build the function
input_image = hcl.placeholder((batch_size, 3, 224, 224), "input_image")
resnet = hcl.placeholder((batch_size, 1000), "resnet")

# create scheme and build
arg_list = [input_image, resnet] + holders
scheme = hcl.create_scheme(arg_list, build_resnet)

# -----------------------------------
# create fixed point scheme
# -----------------------------------
from uptune import autotune, feedback
take_log, type_log = list(), list()
for index in range(len(name_pool)):
    primitive = eval('build_resnet.' + name_pool[index])
    taken = autotune(1, (0, 1))
    fraction = autotune(18, (0, 16))
    bitwidth = 32
    datatype = hcl.Fixed(bitwidth, fraction)
    take_log.append(taken)
    type_log.append(datatype)
    if taken:
Ejemplo n.º 20
0
                           "dist")
        knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat")
        hcl.mutate(dist.shape,
                        lambda x, y: update_knn(dist, knn_mat, x, y),
                        "knn_update")
        hcl.mutate((10, 3), lambda x, y: sort_knn(knn_mat, x, y), "sort")
        knn_new = hcl.compute(knn_mat.shape, 
                              lambda x, y: knn_mat[x][y], "copy")
        knn_pred = hcl.compute((10,), 
                               lambda x: knn_vote(knn_mat, x), "vote")
    return knn_pred

test_image = hcl.placeholder(test_size, "test_image", dtype_image)
train_images = hcl.placeholder(data_size, "train_images", dtype_image)

scheme = hcl.create_scheme([test_image, train_images], knn)
scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat], dtype_knnmat)

s = hcl.create_schedule_from_scheme(scheme)

diff = knn.diff
dist = knn.dist
vote = knn.copy
knn_update = knn.knn_update

s.to([test_images, train_images], target.xcel)
s.to(vote, target.host)

# merge loop nests
s[diff].compute_at(s[dist], dist.axis[1])
s[dist].compute_at(s[knn_update], knn_update.axis[1])
Ejemplo n.º 21
0
def build_ultranet_inf(batch_size=batch_size, target=None):
    # set up input/output placeholders
    input_image = hcl.placeholder((batch_size, 3, 160, 320), dtype=input_dtype, name="input_image")

    weight_conv1 = hcl.placeholder((16, 3, 3, 3), dtype=weight_dtype, name="weight_conv1") # 3 in, 16 out
    a_batchnorm1 = hcl.placeholder((16,), dtype=bn_a_dtype, name="a_batchnorm1")
    b_batchnorm1 = hcl.placeholder((16,), dtype=bn_b_dtype, name="b_batchnorm1")

    weight_conv2 = hcl.placeholder((32, 16, 3, 3), dtype=weight_dtype, name="weight_conv2") # 16 in, 32 out
    a_batchnorm2 = hcl.placeholder((32,), dtype=bn_a_dtype, name="a_batchnorm2")
    b_batchnorm2 = hcl.placeholder((32,), dtype=bn_b_dtype, name="b_batchnorm2")

    weight_conv3 = hcl.placeholder((64, 32, 3, 3), dtype=weight_dtype, name="weight_conv3") # 32 in, 64 out
    a_batchnorm3 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm3")
    b_batchnorm3 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm3")

    weight_conv4 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv4") # 64 in, 64 out
    a_batchnorm4 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm4")
    b_batchnorm4 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm4")

    weight_conv5 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv5") # 64 in, 64 out
    a_batchnorm5 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm5")
    b_batchnorm5 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm5")

    weight_conv6 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv6") # 64 in, 64 out
    a_batchnorm6 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm6")
    b_batchnorm6 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm6")

    weight_conv7 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv7") # 64 in, 64 out
    a_batchnorm7 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm7")
    b_batchnorm7 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm7")

    weight_conv8 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv8") # 64 in, 64 out
    a_batchnorm8 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm8")
    b_batchnorm8 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm8")

    sm = hcl.create_scheme(
        [input_image, 
        weight_conv1, a_batchnorm1, b_batchnorm1, 
        weight_conv2, a_batchnorm2, b_batchnorm2, 
        weight_conv3, a_batchnorm3, b_batchnorm3, 
        weight_conv4, a_batchnorm4, b_batchnorm4, 
        weight_conv5, a_batchnorm5, b_batchnorm5, 
        weight_conv6, a_batchnorm6, b_batchnorm6, 
        weight_conv7, a_batchnorm7, b_batchnorm7, 
        weight_conv8, a_batchnorm8, b_batchnorm8], 
        ultranet
    )

    # quantize activations
    sm.quantize(ultranet.conv1, conv_dtype)
    sm.quantize(ultranet.relu1, act_dtype)
    sm.quantize(ultranet.conv2, conv_dtype)
    sm.quantize(ultranet.relu2, act_dtype)
    sm.quantize(ultranet.conv3, conv_dtype)
    sm.quantize(ultranet.relu3, act_dtype)
    sm.quantize(ultranet.conv4, conv_dtype)
    sm.quantize(ultranet.relu4, act_dtype)
    sm.quantize(ultranet.conv5, conv_dtype)
    sm.quantize(ultranet.relu5, act_dtype)
    sm.quantize(ultranet.conv6, conv_dtype)
    sm.quantize(ultranet.relu6, act_dtype)
    sm.quantize(ultranet.conv7, conv_dtype)
    sm.quantize(ultranet.relu7, act_dtype)
    sm.quantize(ultranet.conv8, conv_dtype)
    sm.quantize(ultranet.relu8, act_dtype)
    
    s = hcl.create_schedule_from_scheme(sm, "main")
    return hcl.build(s, target=target)
Ejemplo n.º 22
0
print("Loaded {} images".format(num_images))
params = np.load("data/bnn-sdsoc.params.npz")

# declare hcl placeholders
hcl_array = []
hcl_ph = []
input_image = hcl.placeholder((batch_size, 1, 16, 16), "input_image",
                              qtype_int)
for name in params:
    dtype = qtype_bit if ("conv" in name or "w_" in name) else qtype_float
    hcl_array.append(hcl.asarray(params[name], dtype=dtype))
    hcl_ph.append(hcl.placeholder(params[name].shape, name, dtype=dtype))
hcl_out = hcl.asarray(np.zeros((batch_size, 10)).astype(np.float),
                      dtype=qtype_float)

# build the network
scheme = hcl.create_scheme([input_image] + hcl_ph, build_bnn)
s = hcl.create_schedule_from_scheme(scheme)
f = hcl.build(s, target=target)

correct_sum = 0
for i in range(num_images // batch_size):
    np_image = images[i * batch_size:(i + 1) * batch_size]
    hcl_image = hcl.asarray(np_image, dtype=qtype_int)
    f(hcl_image, *hcl_array, hcl_out)
    prediction = np.argmax(hcl_out.asnumpy(), axis=1)
    correct_sum += np.sum(
        np.equal(prediction, labels[i * batch_size:(i + 1) * batch_size]))
    if (i + 1) % 10 == 0:
        print("Done {} batches.".format(i + 1))
print("Testing accuracy: {}".format(correct_sum / float(num_images)))
Ejemplo n.º 23
0
Archivo: test.py Proyecto: Kins1ley/tvm
import os, sys
tempdir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, tempdir + "/heterocl")

import heterocl as hcl

hcl.init()

A = hcl.placeholder((10, ))
B = hcl.placeholder((10, ))


def quantization(A):

    return hcl.compute(A.shape, lambda x: hcl.tanh(A[x]), "B")


sm = hcl.create_scheme([A], quantization)
sm_B = quantization.B