Ejemplo n.º 1
0
def top(input, filter, bias, ):
    input_extent_3_required_s = (((((final_extent_2 + 31)//32) * final_extent_3) + -1)//hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1))
    final_total_extent_1 = (hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_1) * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_0))
    final_total_extent_2 = (final_total_extent_1 * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_2))
    final_total_extent_3 = (final_total_extent_2 * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_3))
    f_conv_n_extent_realized_s = hcl.select(hcl.select((((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)) > (final_extent_3 + -1), (((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)), (final_extent_3 + -1)) > (((((final_extent_2 + 31)//32) * final_extent_3) + -1)//(hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) + 1)), hcl.select((((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)) > (final_extent_3 + -1), (((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)), (final_extent_3 + -1)), (((((final_extent_2 + 31)//32) * final_extent_3) + -1)//(hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) + 1)))
    f_conv_z_extent_realized = hcl.select(((hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) * 32) + 32) > final_extent_2, ((hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) * 32) + 32), final_extent_2)
    f_conv = hcl.compute((final_extent_0, ((((final_extent_1 + -1)//32) * 32) + 32), f_conv_z_extent_realized, (f_conv_n_extent_realized_s + 1)), lambda x, y, z, w: 0, name = "f_conv", dtype = hcl.Float(bits = 32))
    with hcl.Stage("f_conv"):
        with hcl.for_(0, (final_extent_2 * final_extent_3), name = "f_conv_s0_z_par") as f_conv_s0_z_par:
            with hcl.for_(final_min_1, final_extent_1, name = "f_conv_s0_y") as f_conv_s0_y:
                with hcl.for_(final_min_0, final_extent_0, name = "f_conv_s0_x") as f_conv_s0_x:
                    f_conv[f_conv_s0_x, f_conv_s0_y, ((f_conv_s0_z_par % hcl.select(final_extent_2 > 1, final_extent_2, 1)) + final_min_2), ((f_conv_s0_z_par//hcl.select(final_extent_2 > 1, final_extent_2, 1)) + final_min_3)] = bias[((f_conv_s0_z_par % hcl.select(final_extent_2 > 1, final_extent_2, 1)) + final_min_2)]
        with hcl.for_(0, (((final_extent_2 + 31)//32) * final_extent_3), name = "f_conv_s1_z_z_par") as f_conv_s1_z_z_par:
            f_conv_s1_z_z_t_base_s = (f_conv_s1_z_z_par % hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1))
            with hcl.for_(0, 32, name = "f_conv_s1_r__z") as f_conv_s1_r__z:
                with hcl.for_(0, ((final_extent_1 + 31)//32), name = "f_conv_s1_y_y") as f_conv_s1_y_y:
                    with hcl.for_(0, 32, name = "f_conv_s1_z_z_t") as f_conv_s1_z_z_t:
                        with hcl.for_(0, 32, name = "f_conv_s1_y_y_t") as f_conv_s1_y_y_t:
                            with hcl.for_(final_min_0, final_extent_0, name = "f_conv_s1_x") as f_conv_s1_x:
                                with hcl.for_(0, 3, name = "f_conv_s1_r__y_r21") as f_conv_s1_r__y_r21:
                                    with hcl.for_(0, 3, name = "f_conv_s1_r__x_r20") as f_conv_s1_r__x_r20:
                                        t51_s = (f_conv_s1_z_z_par//hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1))
                                        f_conv[f_conv_s1_x, (((f_conv_s1_y_y * 32) + final_min_1) + f_conv_s1_y_y_t), (((f_conv_s1_z_z_t_base_s * 32) + final_min_2) + f_conv_s1_z_z_t), ((f_conv_s1_z_z_par//hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1)) + final_min_3)] = (f_conv[f_conv_s1_x, (((f_conv_s1_y_y * 32) + final_min_1) + f_conv_s1_y_y_t), (((f_conv_s1_z_z_t_base_s * 32) + final_min_2) + f_conv_s1_z_z_t), (final_min_3 + t51_s)] + (filter[f_conv_s1_r__x_r20, f_conv_s1_r__y_r21, f_conv_s1_r__z, (((f_conv_s1_z_z_t_base_s * 32) + final_min_2) + f_conv_s1_z_z_t)] * input[(f_conv_s1_r__x_r20 + f_conv_s1_x), ((((f_conv_s1_y_y * 32) + final_min_1) + f_conv_s1_y_y_t) + f_conv_s1_r__y_r21), f_conv_s1_r__z, (final_min_3 + t51_s)]))
    final = hcl.compute((64, 64, 32, 4), lambda x, y, z, w: 0, name = "final", dtype = hcl.Float(bits = 32))
    with hcl.Stage("final"):
        with hcl.for_(final_min_3, final_extent_3, name = "final_s0_n") as final_s0_n:
            with hcl.for_(final_min_2, final_extent_2, name = "final_s0_z") as final_s0_z:
                with hcl.for_(final_min_1, final_extent_1, name = "final_s0_y") as final_s0_y:
                    with hcl.for_(final_min_0, final_extent_0, name = "final_s0_x") as final_s0_x:
                        final[final_s0_x, final_s0_y, final_s0_z, final_s0_n] = hcl.select(f_conv[final_s0_x, final_s0_y, final_s0_z, final_s0_n] > hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.000000), f_conv[final_s0_x, final_s0_y, final_s0_z, final_s0_n], hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.000000))
    return final
Ejemplo n.º 2
0
    def kernel(A, B):
        C = hcl.compute((10, 32), lambda *args: A[args] + B[args], "C")

        with hcl.Stage("Super") as m:
            hcl.update(C, lambda *args: C[args] + 1, "update")

            with hcl.Stage("Plus") as stage:
                with hcl.for_(0, 10) as j:
                    C[j, 0] = 10
        return C
Ejemplo n.º 3
0
def top(input, ):
    final_total_extent_1 = (
        hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) *
        hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0))
    max_local = hcl.compute((final_extent_0, final_extent_1),
                            lambda x, y: 0,
                            name="max_local",
                            dtype=hcl.UInt(bits=16))
    with hcl.Stage("max_local"):
        with hcl.for_(final_min_1, final_extent_1,
                      name="max_local_s0_y") as max_local_s0_y:
            with hcl.for_(final_min_0, final_extent_0,
                          name="max_local_s0_x") as max_local_s0_x:
                maximum = hcl.compute((1, 1),
                                      lambda x, y: 0,
                                      name="maximum",
                                      dtype=hcl.UInt(bits=16))
                with hcl.Stage("maximum"):
                    maximum[max_local_s0_x,
                            max_local_s0_y] = hcl.cast(dtype=hcl.UInt(bits=16),
                                                       expr=0)
                    with hcl.for_(
                            0, 3,
                            name="maximum_s1_box__y") as maximum_s1_box__y:
                        with hcl.for_(
                                0, 3,
                                name="maximum_s1_box__x") as maximum_s1_box__x:
                            maximum[max_local_s0_x,
                                    max_local_s0_y] = hcl.select(
                                        maximum[max_local_s0_x, max_local_s0_y]
                                        > input[(max_local_s0_x +
                                                 maximum_s1_box__x),
                                                (max_local_s0_y +
                                                 maximum_s1_box__y)],
                                        maximum[max_local_s0_x,
                                                max_local_s0_y],
                                        input[(max_local_s0_x +
                                               maximum_s1_box__x),
                                              (max_local_s0_y +
                                               maximum_s1_box__y)])
                max_local[max_local_s0_x,
                          max_local_s0_y] = maximum[max_local_s0_x,
                                                    max_local_s0_y]
    final = hcl.compute((640, 480),
                        lambda x, y: 0,
                        name="final",
                        dtype=hcl.UInt(bits=16))
    with hcl.Stage("final"):
        with hcl.for_(final_min_1, final_extent_1,
                      name="final_s0_y") as final_s0_y:
            with hcl.for_(final_min_0, final_extent_0,
                          name="final_s0_x") as final_s0_x:
                final[final_s0_x, final_s0_y] = max_local[final_s0_x,
                                                          final_s0_y]
    return final
Ejemplo n.º 4
0
def kernel(trainData, testData, itemMem, idMem, rdv1, rdv2):
    def train_encoding(m, preTrainData):
        train_temp = hcl.compute((trainData.shape[1], dim), lambda x, y: itemMem[trainData[m][x]][y] ^ idMem[x][y], name = "train_temp")
        k1 = hcl.reduce_axis(0, trainData.shape[1], 'k1')
        train_result = hcl.compute((dim,), lambda x: hcl.sum(train_temp[k1, x], axis = k1, dtype=hcl.Int()), name = "train_result")
        with hcl.for_(0, dim) as n:
            preTrainData[m][n] = train_result[n]
        with hcl.if_((m + 1) % 1000 == 0):
            hcl.print((m+1), "Finish encoding %d training data\n")

    def test_encoding(m, preTestData):
        test_temp = hcl.compute((testData.shape[1], dim), lambda x, y: itemMem[testData[m][x]][y]^idMem[x][y], name = "test_temp")
        k2 = hcl.reduce_axis(0, testData.shape[1], 'k2')
        test_result = hcl.compute((dim,), lambda x: hcl.sum(test_temp[k2, x], axis = k2, dtype=hcl.Int()), name = "test_result")
        with hcl.for_(0, dim) as n:
            preTestData[m][n] = test_result[n]
        with hcl.if_((m+1)%100 == 0):
            hcl.print((m+1), "Finish encoding %d testing data\n")

    #Encoding
    hcl.print((), "Encoding the training data into HDVs.\n")
    preTrainData = hcl.compute((trainData.shape[0], dim), lambda x, y: 0, "preTrainData")
    hcl.mutate((trainData.shape[0], ), lambda x: train_encoding(x, preTrainData))

    hdTrainData = hcl.compute((trainData.shape[0], dim), lambda x, y: 0, "hdTrainData", dtype=hcl.UInt(1))
    with hcl.Stage("S1"):
        with hcl.if_(trainData.shape[1] % 2 == 0):
            hcl.print((), "Use the random vector\n")
            hcl.update(hdTrainData, lambda x, y: hcl.select(preTrainData[x][y] + rdv1[x][y] - trainData.shape[1]/2 > 0, 1, 0))
        with hcl.else_():
            hcl.update(hdTrainData, lambda x, y: hcl.select(preTrainData[x][y] - trainData.shape[1]/2 > 0, 1, 0))

    hcl.print((),"Encoding the testing data into HDVs.\n")
    preTestData = hcl.compute((testData.shape[0], dim), lambda x, y: 0, "preTestData")
    hcl.mutate((testData.shape[0], ), lambda x: test_encoding(x, preTestData))

    hdTestData = hcl.compute((testData.shape[0], dim), lambda x, y: 0, "hdTestData", dtype=hcl.UInt(1))
    with hcl.Stage("S2"):
        with hcl.if_(testData.shape[1] % 2 == 0):
            hcl.print((), "Use the random vector\n")
            hcl.update(hdTestData, lambda x, y: hcl.select(preTestData[x][y] + rdv2[x][y] - testData.shape[1]/2 > 0, 1, 0))
        with hcl.else_():
            hcl.update(hdTestData, lambda x, y: hcl.select(preTestData[x][y] - testData.shape[1]/2 > 0, 1, 0))

    ###data_packing
    pack_train = hcl.pack(hdTrainData, axis=1, dtype=hcl.UInt(bw), name="pack_train")
    pack_test = hcl.pack(hdTestData, axis=1, dtype=hcl.UInt(bw), name="pack_test")
    return pack_train, pack_test
Ejemplo n.º 5
0
    def fft(X_real, X_imag, IndexTable, F_real, F_imag):
        L = X_real.shape[0]
        if np.log2(L) % 1 > 0:
            raise ValueError("Length of input vector (1d tensor) must be power of 2")
        num_stages = int(np.log2(L))

        # bit reverse permutation
        hcl.update(F_real, lambda i: X_real[IndexTable[i]], name='F_real_update')
        hcl.update(F_imag, lambda i: X_imag[IndexTable[i]], name='F_imag_update')

        with hcl.Stage("Out"):
            one = hcl.scalar(1, dtype="int32")
            with hcl.for_(0, num_stages) as stage:
                DFTpts = one[0] << (stage + 1)
                numBF = DFTpts / 2
                e = -2 * np.pi / DFTpts
                a = hcl.scalar(0)
                with hcl.for_(0, numBF) as j:
                    c = hcl.scalar(hcl.cos(a[0]))
                    s = hcl.scalar(hcl.sin(a[0]))
                    a[0] = a[0] + e
                    with hcl.for_(j, L + DFTpts - 1, DFTpts) as i:
                        i_lower = i + numBF
                        temp_r = hcl.scalar(F_real[i_lower] * c - F_imag[i_lower] * s)
                        temp_i = hcl.scalar(F_imag[i_lower] * c + F_real[i_lower] * s)
                        F_real[i_lower] = F_real[i] - temp_r[0]
                        F_imag[i_lower] = F_imag[i] - temp_i[0]
                        F_real[i] = F_real[i] + temp_r[0]
                        F_imag[i] = F_imag[i] + temp_i[0]
Ejemplo n.º 6
0
 def kernel(A):
     with hcl.Stage():
         with hcl.for_(0, 10) as i:
             with hcl.for_(0, 10) as j:
                 with hcl.if_(j >= i):
                     hcl.break_()
                 A[i] += j
Ejemplo n.º 7
0
    def kernel(A, B, C, O):
        dtype_xyz = hcl.Struct({
            "x": hcl.Int(),
            "y": hcl.Int(),
            "z": hcl.Int()
        })
        dtype_out = hcl.Struct({
            "v0": hcl.Int(),
            "v1": hcl.Int(),
            "v2": hcl.Int(),
            "v3": hcl.Int(),
            "v4": hcl.Int(),
            "v5": hcl.Int()
        })

        D = hcl.compute(A.shape, lambda x: (A[x], B[x], C[x]), dtype=dtype_xyz)
        E = hcl.compute(A.shape,
                        lambda x:
                        (D[x].x * D[x].x, D[x].y * D[x].y, D[x].z * D[x].z, D[
                            x].x * D[x].y, D[x].y * D[x].z, D[x].x * D[x].z),
                        dtype=dtype_out)
        with hcl.Stage():
            with hcl.for_(0, 100) as i:
                for j in range(0, 6):
                    O[i][j] = E[i].__getattr__("v" + str(j))
Ejemplo n.º 8
0
 def func(data):
     out = hcl.compute((4, 4), lambda x, y: 0, "out", dtype)
     with hcl.Stage("S"):
         with hcl.for_(0, 4, name="i") as i:
             with hcl.for_(0, 4, name="j") as j:
                 out[i, j] = data[i, j] + 1
     return out
Ejemplo n.º 9
0
def kernel(A, B, C):
    with hcl.Stage("S"):
        with hcl.for_(0, 10) as i:
            # set the LSB of B to be the same as A
            B[i][0] = A[i][0]
            # set the lower 4-bit of C
            C[i][4:0] = A[i]
Ejemplo n.º 10
0
    def algorithm(A, B):
        @hcl.def_([A.shape, B.shape, ()])
        def update_B(A, B, x):
            B[x] = A[x] + 1

        with hcl.Stage():
            with hcl.for_(0, 10) as i:
                update_B(A, B, i)
Ejemplo n.º 11
0
 def kernel(A):
     with hcl.Stage():
         with hcl.if_(A[0] > 5):
             A[0] = 5
         with hcl.elif_(A[0] > 3):
             A[0] = 3
         with hcl.else_():
             A[0] = 0
Ejemplo n.º 12
0
 def kernel(A):
     with hcl.Stage():
         i = hcl.scalar(0)
         with hcl.while_(True):
             with hcl.if_(i[0] > 5):
                 hcl.break_()
             A[i[0]] = i[0]
             i[0] += 1
Ejemplo n.º 13
0
 def kernel(A, B):
     C = hcl.compute(A.shape, lambda *args : 0, "C")
     with hcl.Stage("stage"):
         with hcl.for_(0, 10, name="i") as i:
             with hcl.for_(0, 32, name="j") as j:
                 B[i, j] = A[i, j] + B[i, j]
                 C[i, j] = 2 * B[i, j]
     return C
Ejemplo n.º 14
0
 def kernel(A, B):
     C = hcl.compute((10, 32), lambda *args : 0, "C")
     D = hcl.compute(C.shape, lambda *args: 0, "D")
     with hcl.Stage("Super") as m:
         with hcl.for_(0, 10, name="j") as j:
             hcl.update(D, lambda *args: j*A[args] + B[args], name="update.D")
             hcl.update(C, lambda *args: A[args] + j*D[args], name="update.C")
     return C
Ejemplo n.º 15
0
def top(input, ):
    final_total_extent_1 = (hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_1) * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_0))
    blur_x = hcl.compute((final_extent_0, (final_extent_1 + 2)), lambda x, y: 0, name = "blur_x", dtype = hcl.UInt(bits = 16))
    with hcl.Stage("blur_x"):
        with hcl.for_(final_min_1, (final_extent_1 + 2), name = "blur_x_s0_y") as blur_x_s0_y:
            with hcl.for_(final_min_0, final_extent_0, name = "blur_x_s0_x") as blur_x_s0_x:
                blur_x[blur_x_s0_x, blur_x_s0_y] = ((input[(blur_x_s0_x + 2), blur_x_s0_y] + (input[blur_x_s0_x, blur_x_s0_y] + input[(blur_x_s0_x + 1), blur_x_s0_y]))//hcl.cast(dtype = hcl.UInt(bits = 16), expr = 3))
    blur_y = hcl.compute((final_extent_0, final_extent_1), lambda x, y: 0, name = "blur_y", dtype = hcl.UInt(bits = 16))
    with hcl.Stage("blur_y"):
        with hcl.for_(final_min_1, final_extent_1, name = "blur_y_s0_y") as blur_y_s0_y:
            with hcl.for_(final_min_0, final_extent_0, name = "blur_y_s0_x") as blur_y_s0_x:
                blur_y[blur_y_s0_x, blur_y_s0_y] = ((blur_x[blur_y_s0_x, (blur_y_s0_y + 2)] + (blur_x[blur_y_s0_x, blur_y_s0_y] + blur_x[blur_y_s0_x, (blur_y_s0_y + 1)]))//hcl.cast(dtype = hcl.UInt(bits = 16), expr = 3))
    final = hcl.compute((640, 480), lambda x, y: 0, name = "final", dtype = hcl.UInt(bits = 16))
    with hcl.Stage("final"):
        with hcl.for_(final_min_1, final_extent_1, name = "final_s0_y") as final_s0_y:
            with hcl.for_(final_min_0, final_extent_0, name = "final_s0_x") as final_s0_x:
                final[final_s0_x, final_s0_y] = blur_y[final_s0_x, final_s0_y]
    return final
Ejemplo n.º 16
0
def top(input, ):
    final_total_extent_1 = (
        hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) *
        hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0))
    mean_local = hcl.compute((final_extent_0, final_extent_1),
                             lambda x, y: 0,
                             name="mean_local",
                             dtype=hcl.UInt(bits=16))
    with hcl.Stage("mean_local"):
        with hcl.for_(final_min_1, final_extent_1,
                      name="mean_local_s0_y") as mean_local_s0_y:
            with hcl.for_(final_min_0, final_extent_0,
                          name="mean_local_s0_x") as mean_local_s0_x:
                mean_local[mean_local_s0_x,
                           mean_local_s0_y] = hcl.cast(dtype=hcl.UInt(bits=16),
                                                       expr=0)
        with hcl.for_(final_min_1, final_extent_1,
                      name="mean_local_s1_y") as mean_local_s1_y:
            with hcl.for_(final_min_0, final_extent_0,
                          name="mean_local_s1_x") as mean_local_s1_x:
                with hcl.for_(
                        0, 3,
                        name="mean_local_s1_box__y") as mean_local_s1_box__y:
                    with hcl.for_(0, 3, name="mean_local_s1_box__x"
                                  ) as mean_local_s1_box__x:
                        mean_local[mean_local_s1_x, mean_local_s1_y] = (
                            mean_local[mean_local_s1_x, mean_local_s1_y] +
                            (input[(mean_local_s1_box__x + mean_local_s1_x),
                                   (mean_local_s1_box__y + mean_local_s1_y)] //
                             hcl.cast(dtype=hcl.UInt(bits=16), expr=9)))
    final = hcl.compute((6418, 4818),
                        lambda x, y: 0,
                        name="final",
                        dtype=hcl.UInt(bits=16))
    with hcl.Stage("final"):
        with hcl.for_(final_min_1, final_extent_1,
                      name="final_s0_y") as final_s0_y:
            with hcl.for_(final_min_0, final_extent_0,
                          name="final_s0_x") as final_s0_x:
                final[final_s0_x, final_s0_y] = mean_local[final_s0_x,
                                                           final_s0_y]
    return final
Ejemplo n.º 17
0
def test_schedule_intra_stage():

    hcl.init()

    def popcount(A, B):  # each element in A is a 32-bit integer
        with hcl.for_(0, A.shape[0], name="x") as x:
            with hcl.for_(0, A.shape[1], name="y") as y:
                B[x, y] = 0
                with hcl.for_(0, 32) as i:
                    B[x, y] += A[x, y][i]

    A = hcl.placeholder((10, 20))
    B = hcl.placeholder(A.shape)
    with hcl.Stage() as C:
        popcount(A, B)

    def test_unroll():
        s = hcl.create_schedule([A, B])
        s[C].unroll(C.x, factor=3)
        ir = hcl.lower(s)
        assert "unrolled \"factor\"=3" in str(ir)

    def test_reorder():
        s = hcl.create_schedule([A, B])
        s[C].reorder(C.y, C.x)
        ir = hcl.lower(s)
        assert str(ir.body.body.body.body).startswith("for (y, 0, 20)")
        assert str(ir.body.body.body.body.body).startswith("for (x, 0, 10)")

    def test_fuse():
        s = hcl.create_schedule([A, B])
        s[C].fuse(C.x, C.y)
        ir = hcl.lower(s)
        assert str(
            ir.body.body.body.body).startswith("for (x.y.fused, 0, 200)")

    def test_split():
        s = hcl.create_schedule([A, B])
        s[C].split(C.x, factor=3)
        ir = hcl.lower(s)
        assert str(ir.body.body.body.body).startswith("for (x.outer, 0, 4)")
        assert str(
            ir.body.body.body.body.body).startswith("for (x.inner, 0, 3)")
        assert str(
            ir.body.body.body.body.body.body).startswith("for (y, 0, 20)")
        assert str(ir.body.body.body.body.body.body.body).startswith(
            "if ((x.inner < (10 - (x.outer*3))))")

    test_unroll()
    test_reorder()
    test_fuse()
    test_split()
Ejemplo n.º 18
0
    def vadd_vhls_ip(op1, op2, size, name=None):
        if name is None: name = "vadd"
        with hcl.Stage("ExternModule.vadd") as Module:
            register_tensors([op1, op2])

        Module.ext_ip_name = name
        Module.inputs = [op1, op2, size]

        # include cpp/hpp files
        deps = os.path.dirname(os.path.abspath(__file__))
        source = ["vadd.cpp"]
        Module.source = include_dependency(source)
        create_extern_module(Module, ip_type="HLS")
Ejemplo n.º 19
0
def insertion_sort(A):

    # Introduce a stage.
    with hcl.Stage("S"):
        # for i in range(1, A.shape[0])
        # We can name the axis
        with hcl.for_(1, A.shape[0], name="i") as i:
            key = hcl.local(A[i], "key")
            j = hcl.local(i - 1, "j")
            # while(j >= 0 && key < A[j])
            with hcl.while_(hcl.and_(j >= 0, key < A[j])):
                A[j + 1] = A[j]
                j[0] -= 1
            A[j + 1] = key[0]
Ejemplo n.º 20
0
    def systolic_array(A, B):

        # define modules with loop
        @hcl.def_([(1,), (1,), ()])
        def pe(a, b, x):
            with hcl.if_(x == 0):
                result = a * b
                hcl.return_(a)
            with hcl.elif_(x == 1):
                hcl.return_(b)
            with hcl.else_():
                hcl.return_(result)

        # PE = {f'pe_{i}' : partial(pe) for i in range(w*h)}
        PE = {}
        for i in range(w * h):
            with hcl.Stage("pe_{}".format(i)):
                PE['pe_{}'.format(i)] = partial(pe)

        # each k calls of update function calculate one block of result matrix
        # b_row: block row index
        # b_col: block col index
        def update(b_row, b_col, k, O):
            # fetch input
            localA = []
            localB = []
            for input_a in range(h):
                localA.append(hcl.compute((1,), lambda x : A[input_a + h * b_row, k], "localA_{}".format(input_a)))
            for input_b in range(w):
                localB.append(hcl.compute((1,), lambda x : B[k, input_b + w * b_col], "localB_{}".format(input_b)))

            # systolic connection
            net = [[None] * h] * w
            for i in range(h + w - 1):
                for row in range(i + 1):
                    col = i - row
                    if col < 0 or col > w-1 or row > h-1: continue
                    ## instantiate a PE and record partial results
                    input_a = localA[row] if col == 0 else hcl.compute((1,), lambda x : net[row][col-1][0], "input_a{}{}".format(row, col))
                    input_b = localB[col] if row == 0 else hcl.compute((1,), lambda x : net[row-1][col][1], "input_b{}{}".format(row, col))
                    out = hcl.compute((3,), lambda x : PE['pe_%d' % (row * w + col)](
                        input_a, input_b, x), "out_{}{}".format(row, col))
                    O[row + h * b_row, col + w * b_col] += out[2]
                    net[row][col] = out

        block_rows = int(m / h)
        block_cols = int(n / w)
        O = hcl.compute((m, n), lambda *args : 0, name="Output")
        hcl.mutate((block_rows, block_cols, k), lambda b_row, b_col, k: update(b_row, b_col, k, O), name="update")
        return O
Ejemplo n.º 21
0
def top(
    A,
    B,
):
    final_total_extent_1 = (
        hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) *
        hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0))
    prod = hcl.compute((final_extent_0, final_extent_1),
                       lambda x, y: 0,
                       name="prod",
                       dtype=hcl.Float(bits=32))
    with hcl.Stage("prod"):
        with hcl.for_(final_min_1, final_extent_1,
                      name="prod_s0_y") as prod_s0_y:
            with hcl.for_(final_min_0, final_extent_0,
                          name="prod_s0_x") as prod_s0_x:
                prod[prod_s0_x, prod_s0_y] = hcl.cast(dtype=hcl.Float(bits=32),
                                                      expr=0.000000)
        with hcl.for_(final_min_1, final_extent_1,
                      name="prod_s1_y") as prod_s1_y:
            with hcl.for_(final_min_0, final_extent_0,
                          name="prod_s1_x") as prod_s1_x:
                with hcl.for_(0, 1024, name="prod_s1_r__x") as prod_s1_r__x:
                    prod[prod_s1_x, prod_s1_y] = (prod[prod_s1_x, prod_s1_y] +
                                                  (A[prod_s1_x, prod_s1_r__x] *
                                                   B[prod_s1_r__x, prod_s1_y]))
    final = hcl.compute((1024, 1024),
                        lambda x, y: 0,
                        name="final",
                        dtype=hcl.Float(bits=32))
    with hcl.Stage("final"):
        with hcl.for_(final_min_1, final_extent_1,
                      name="final_s0_y") as final_s0_y:
            with hcl.for_(final_min_0, final_extent_0,
                          name="final_s0_x") as final_s0_x:
                final[final_s0_x, final_s0_y] = prod[final_s0_x, final_s0_y]
    return final
Ejemplo n.º 22
0
 def kernel(A):
     B = hcl.compute((2, 2),lambda x, y: 0, "B", dtype) # syntax sugar
     with hcl.Stage("S"):
         LB = hcl.compute((2, 4),lambda x, y: 0, "LB", dtype)
         with hcl.for_(0, 2, name="x") as x:
             with hcl.for_(0, 2, name="y") as y:
                 with hcl.for_(0, 2, name="LB_i") as LB_i:
                     with hcl.for_(0, 4, name="LB_j") as LB_j:
                         LB[LB_i, LB_j] = A[x * 2 + LB_i, LB_j]
                 val = hcl.scalar(0,"val")
                 with hcl.for_(0, 2, name="r") as r:
                     with hcl.for_(0, 2, name="c") as c:
                         val.v += LB[r, y * 2 + c]
                 B[x, y] = val / 4
     return B
Ejemplo n.º 23
0
 def pool(data):
     out = hcl.compute((2, 2), lambda x, y: 0, "out", dtype)
     with hcl.Stage("S"):
         LB = hcl.compute((2, 4), lambda x, y: 0, "LB", dtype)
         with hcl.for_(0, 2, name="x") as x:
             with hcl.for_(0, 2, name="y") as y:
                 with hcl.for_(0, 2, name="LB_i") as LB_i:
                     with hcl.for_(0, 4, name="LB_j") as LB_j:
                         LB[LB_i, LB_j] = data[x * 2 + LB_i, LB_j]
                 val = hcl.scalar(0, "val")
                 with hcl.for_(0, 2, name="r") as r:
                     with hcl.for_(0, 2, name="c") as c:
                         val.v += LB[r, y * 2 + c]
                 out[x, y] = val / 4
     return out
Ejemplo n.º 24
0
def top(input, ):
    final_total_extent_1 = (hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_1) * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_0))
    final_total_extent_2 = (final_total_extent_1 * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_2))
    linear = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2), final_extent_2), lambda x, y, z: 0, name = "linear", dtype = hcl.Float(bits = 32))
    with hcl.Stage("linear"):
        with hcl.for_(final_min_2, final_extent_2, name = "linear_s0_c") as linear_s0_c:
            with hcl.for_(final_min_1, (final_extent_1 + 2), name = "linear_s0_y") as linear_s0_y:
                with hcl.for_(final_min_0, (final_extent_0 + 2), name = "linear_s0_x") as linear_s0_x:
                    t4 = input[linear_s0_x, linear_s0_y, linear_s0_c]
                    linear[linear_s0_x, linear_s0_y, linear_s0_c] = hcl.select((hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.040450) < t4), hcl.power(((t4 * hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.947867)) + hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.052133)), hcl.cast(dtype = hcl.Float(bits = 32), expr = 2.400000)), (t4 * hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.077399)))
    blur_x = hcl.compute((final_extent_0, (final_extent_1 + 2), final_extent_2), lambda x, y, z: 0, name = "blur_x", dtype = hcl.Float(bits = 32))
    with hcl.Stage("blur_x"):
        with hcl.for_(final_min_2, final_extent_2, name = "blur_x_s0_c") as blur_x_s0_c:
            with hcl.for_(final_min_1, (final_extent_1 + 2), name = "blur_x_s0_y") as blur_x_s0_y:
                with hcl.for_(final_min_0, final_extent_0, name = "blur_x_s0_x") as blur_x_s0_x:
                    blur_x[blur_x_s0_x, blur_x_s0_y, blur_x_s0_c] = ((linear[(blur_x_s0_x + 2), blur_x_s0_y, blur_x_s0_c] + (linear[blur_x_s0_x, blur_x_s0_y, blur_x_s0_c] + linear[(blur_x_s0_x + 1), blur_x_s0_y, blur_x_s0_c])) * hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.333333))
    blur_y = hcl.compute((final_extent_0, final_extent_1, final_extent_2), lambda x, y, z: 0, name = "blur_y", dtype = hcl.Float(bits = 32))
    with hcl.Stage("blur_y"):
        with hcl.for_(final_min_2, final_extent_2, name = "blur_y_s0_c") as blur_y_s0_c:
            with hcl.for_(final_min_1, final_extent_1, name = "blur_y_s0_y") as blur_y_s0_y:
                with hcl.for_(final_min_0, final_extent_0, name = "blur_y_s0_x") as blur_y_s0_x:
                    blur_y[blur_y_s0_x, blur_y_s0_y, blur_y_s0_c] = ((blur_x[blur_y_s0_x, (blur_y_s0_y + 2), blur_y_s0_c] + (blur_x[blur_y_s0_x, blur_y_s0_y, blur_y_s0_c] + blur_x[blur_y_s0_x, (blur_y_s0_y + 1), blur_y_s0_c])) * hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.333333))
    srgb = hcl.compute((final_extent_0, final_extent_1, final_extent_2), lambda x, y, z: 0, name = "srgb", dtype = hcl.Float(bits = 32))
    with hcl.Stage("srgb"):
        with hcl.for_(final_min_2, final_extent_2, name = "srgb_s0_c") as srgb_s0_c:
            with hcl.for_(final_min_1, final_extent_1, name = "srgb_s0_y") as srgb_s0_y:
                with hcl.for_(final_min_0, final_extent_0, name = "srgb_s0_x") as srgb_s0_x:
                    t5 = blur_y[srgb_s0_x, srgb_s0_y, srgb_s0_c]
                    srgb[srgb_s0_x, srgb_s0_y, srgb_s0_c] = hcl.select((hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.003131) < t5), ((hcl.power(t5, hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.416667)) * hcl.cast(dtype = hcl.Float(bits = 32), expr = 1.055000)) + hcl.cast(dtype = hcl.Float(bits = 32), expr = -0.055000)), (t5 * hcl.cast(dtype = hcl.Float(bits = 32), expr = 12.920000)))
    final = hcl.compute((766, 1278, 3), lambda x, y, z: 0, name = "final", dtype = hcl.Float(bits = 32))
    with hcl.Stage("final"):
        with hcl.for_(final_min_2, final_extent_2, name = "final_s0_c") as final_s0_c:
            with hcl.for_(final_min_1, final_extent_1, name = "final_s0_y") as final_s0_y:
                with hcl.for_(final_min_0, final_extent_0, name = "final_s0_x") as final_s0_x:
                    final[final_s0_x, final_s0_y, final_s0_c] = srgb[final_s0_x, final_s0_y, final_s0_c]
    return final
Ejemplo n.º 25
0
def zculling(size_pixels,size,fragment,z_buffer,pixels):
    pixel_cntr = hcl.scalar(0,dtype=hcl.Int())

    with hcl.Stage("S2"):
        with hcl.for_(0,size) as n:
            x = hcl.scalar(fragment[n][0],dtype=hcl.Int())
            y = hcl.scalar(fragment[n][1],dtype=hcl.Int())
            z = hcl.scalar(fragment[n][2])
            color = hcl.scalar(fragment[n][3])
            with hcl.if_( z < z_buffer[y][x] ):
                pixels[pixel_cntr][0] = x.v
                pixels[pixel_cntr][1] = y.v
                pixels[pixel_cntr][2] = color.v
                pixel_cntr.v += 1
                z_buffer[y][x] = z.v
    size_pixels[0] = pixel_cntr.v
Ejemplo n.º 26
0
def test_if():

    hcl.init()

    def absolute(A, B):
        with hcl.for_(0, A.shape[0], name="x") as x:
            with hcl.for_(0, A.shape[1], name="y") as y:
                with hcl.if_(A[x, y] >= 0):
                    B[x, y] = A[x, y]
                with hcl.else_():
                    B[x, y] = -A[x, y]

    A = hcl.placeholder((10, 20), name="A", dtype="float32")
    B = hcl.placeholder(A.shape, name="B", dtype="float32")
    with hcl.Stage() as C:
        absolute(A, B)
    s = hcl.create_schedule([A, B])
    o, i = s[C].split(C.x, factor=3)
    s[C].reorder(i, o)
    # test lower
    ir = hcl.lower(s)
    assert str(ir.body.body.body.body).startswith("for (x.inner, 0, 3)")
    assert str(ir.body.body.body.body.body).startswith("for (x.outer, 0, 4)")
    assert str(ir.body.body.body.body.body.body).startswith("for (y, 0, 20)")
    assert str(ir.body.body.body.body.body.body.body.condition).startswith(
        "(x.inner < (10 - (x.outer*3)))")
    assert str(
        ir.body.body.body.body.body.body.body.then_case.condition).startswith(
            "(0.000000f <= A[(y + ((x.inner + (x.outer*3))*20))])")
    assert str(
        ir.body.body.body.body.body.body.body.then_case.then_case
    ).startswith(
        "B[(y + ((x.inner + (x.outer*3))*20))] = A[(y + ((x.inner + (x.outer*3))*20))]"
    )
    assert str(
        ir.body.body.body.body.body.body.body.then_case.else_case
    ).startswith(
        "B[(y + ((x.inner + (x.outer*3))*20))] = (A[(y + ((x.inner + (x.outer*3))*20))]*-1.000000f)"
    )
    # test build
    f = hcl.build(s)
    a_np = np.random.random((A.shape))
    a_hcl = hcl.asarray(a_np, dtype="float32")
    b_hcl = hcl.asarray(np.zeros(B.shape), dtype="float32")
    f(a_hcl, b_hcl)
    b_np = np.abs(a_np)
    np.testing.assert_allclose(b_np, b_hcl.asnumpy())
Ejemplo n.º 27
0
def byte_swap_rtl(input_vec, ret=None, name=None):

    if name is None: name = "my_byteswap"

    Len = input_vec.shape[0]
    return_tensors = False
    if ret is None:
        return_tensors = True
        ret = hcl.compute(input_vec.shape, lambda *args: 0, "vec")

    # functional behavior
    with hcl.Stage("ExternModule") as Module:
        hcl.update(ret,
                   lambda *args: input_vec[args] << 16 | input_vec[args] >> 16,
                   "swap")

    dicts = {}
    dicts["name"] = name
    tensors = [input_vec]
    dicts["args"] = [(_.name, _.dtype) for _ in tensors]

    # declare headers and typedef
    dicts["header"] = "unsigned int my_byteswap(unsigned int x);"
    dicts["func"] = """
    for (int k = 0; k < {}; k++) {{
      vec[k] = my_byteswap({}[k]);
    }}
""".format(Len, input_vec.name)

    # add dependency files or folders
    # the dependencies are copied to project folder
    deps = os.path.dirname(os.path.abspath(__file__))
    dicts["deps"] = deps + "/lib1"

    # custom compilation command (root path: project)
    # commands executed before impl or emulation
    dicts["cmds"] = "cd lib1; " + \
        "aocl library hdl-comp-pkg opencl_lib.xml -o opencl_lib.aoco;" + \
        "aocl library create -name opencl_lib opencl_lib.aoco;"

    # custom compiler flgas (load custom libs)
    dicts["flags"] = "-I lib1 -L lib1 -l opencl_lib.aoclib"

    create_extern_module(Module, dicts, ip_type="rtl")
    if return_tensors: return ret
Ejemplo n.º 28
0
def toynn_vhls_ip(input_1, output_1, name=None):
    if name is None: name = "myproject"
    # Function behavior definition
    with hcl.Stage("ExternModule.toyNN") as Module:
        register_tensors([input_1, output_1])

    Module.ext_ip_name = name
    Module.inputs = [input_1, output_1]

    # Include cpp/hpp files
    if not os.path.exists("firmware"):
        urllib.request.urlretrieve(
            "https://raw.githubusercontent.com/Hecmay/debug.trace/main/toynn.tar.gz",
            filename="toynn.tar.gz")
        os.system("tar -zxvf toynn.tar.gz")

    source = [
        "firmware/myproject.cpp", "firmware/nnet_utils/", "firmware/weights/"
    ]
    Module.source = include_dependency(source)
    create_extern_module(Module, ip_type="HLS")
Ejemplo n.º 29
0
def vadd_rtl(A, B, length, ret=None, name=None):

    if name is None: name = "vadd_rtl"
    Len = A.shape[0]
    assert A.shape == B.shape, "shape not match"
    assert Len == length, "shape not match"

    return_tensors = False
    if ret is None:
        return_tensors = True
        ret = hcl.compute(A.shape, lambda *args: 0, "ret") 

    # functional behavior
    with hcl.Stage("ExternModule") as Module:
        hcl.update(ret, lambda *args:
                A[args] + B[args], "vadd")

    dicts = {}
    dicts["name"] = name
    tensors = [A, B]
    dicts["args"] = [(_.name, _.dtype) for _ in tensors]

    # RTL IP is wrapped as a separate OpenCL kernel in Vitis
    # add dependency files or folders
    # the dependencies are copied to project folder
    deps = os.path.dirname(os.path.abspath(__file__))
    dicts["deps"] = deps + "/scripts"

    # custom compilation command (root path: project) 
    # commands executed before impl or emulation 
    dicts["cmds"] = "vivado -mode batch -source " + \
        "scripts/gen_xo.tcl -tclargs vadd.xo vadd hw_emu {} {}"

    # custom compiler flgas (load custom libs) 
    dicts["flags"] = "vadd.xo"

    create_extern_module(Module, dicts, ip_type="rtl")
    if return_tensors: return ret
Ejemplo n.º 30
0
def HJ_PDE_solver(V_new, V_init, thetas):
    # Calculate spatial derivative based on index and dimension number
    def spatial_derivative(i, j, k, dim):
        left = i * j - k
        right = i * j + k
        return left, right

    # Calculate Hamiltonian for every grid point in V_init
    with hcl.Stage("Hamiltonian"):
        with hcl.for_(1, V_init.shape[0], name="i") as i:
            with hcl.for_(1, V_init.shape[1], name="j") as j:
                with hcl.for_(1, V_init.shape[2], name="k") as k:
                    # Calculate dV_dx
                    dV_dx_L, dV_dx_R = spatial_derivative(i, j, k, 0)
                    dV_dy_L, dV_dy_R = spatial_derivative(i, j, k, 1)
                    dV_dtheta_L, dV_dtheta_R = spatial_derivative(i, j, k, 2)

                    # Calculate average gradient
                    dV_dx_C = (dV_dx_L + dV_dx_R) / 2
                    dV_dy_C = (dV_dy_L + dV_dy_R) / 2
                    dV_dtheta_C = (dV_dtheta_L + dV_dtheta_R) / 2

                    # Get optimal control
                    uOpt = 1

                    # Velocity
                    v = 1

                    # Assume that mode is min
                    with hcl.if_(dV_dtheta_C > 0):
                        uOpt = -uOpt

                    # Calculate dynamics function
                    #V_new[i,j,k] = 1 * cos(thetas[k]) * dV_dx_C +1 * sin(thetas[k]) * dV_dy_C +uOpt * dV_theta_C
                    #angle = hcl.scalar(thetas[k], "angle")
                    V_new[i, j,
                          k] = v * hcl.cos(thetas[k]) * dV_dx_C + v * hcl.sin(
                              thetas[k]) * dV_dy_C + dV_dtheta_C * uOpt