def add(a, b, c):
     d = hcl.compute(a.shape, lambda *x: a[x] + b[x], "d")
     hcl.assert_(True, "assert error 1")
     hcl.print(0, "print1\n")
     hcl.update(c, lambda *x: d[x] + 1, "u")
     hcl.assert_(False, "assert error 2")
     hcl.print(0, "print2")
예제 #2
0
    def fft(X_real, X_imag, IndexTable, F_real, F_imag):
        L = X_real.shape[0]
        if np.log2(L) % 1 > 0:
            raise ValueError("Length of input vector (1d tensor) must be power of 2")
        num_stages = int(np.log2(L))

        # bit reverse permutation
        hcl.update(F_real, lambda i: X_real[IndexTable[i]], name='F_real_update')
        hcl.update(F_imag, lambda i: X_imag[IndexTable[i]], name='F_imag_update')

        with hcl.Stage("Out"):
            one = hcl.scalar(1, dtype="int32")
            with hcl.for_(0, num_stages) as stage:
                DFTpts = one[0] << (stage + 1)
                numBF = DFTpts / 2
                e = -2 * np.pi / DFTpts
                a = hcl.scalar(0)
                with hcl.for_(0, numBF) as j:
                    c = hcl.scalar(hcl.cos(a[0]))
                    s = hcl.scalar(hcl.sin(a[0]))
                    a[0] = a[0] + e
                    with hcl.for_(j, L + DFTpts - 1, DFTpts) as i:
                        i_lower = i + numBF
                        temp_r = hcl.scalar(F_real[i_lower] * c - F_imag[i_lower] * s)
                        temp_i = hcl.scalar(F_imag[i_lower] * c + F_real[i_lower] * s)
                        F_real[i_lower] = F_real[i] - temp_r[0]
                        F_imag[i_lower] = F_imag[i] - temp_i[0]
                        F_real[i] = F_real[i] + temp_r[0]
                        F_imag[i] = F_imag[i] + temp_i[0]
 def add(a, b, c):
     d = hcl.compute(a.shape, lambda *x: a[x] + b[x])
     hcl.assert_(False)
     hcl.print(0, "print1")
     hcl.update(c, lambda *x: d[x] + 1)
     hcl.assert_(False)
     hcl.print(0, "print2")
 def learn(k, hdTrainData, prototype, prototypeCounter):
     #Find samples that have the label k
     match = hcl.compute(
         hdTrainData.shape,
         lambda x, y: hcl.select(trainLabels[x] == k, hdTrainData[x][y], 0),
         "match")
     #Record the number of these samples
     with hcl.for_(0, hdTrainData.shape[0]) as a:
         with hcl.if_(trainLabels[a] == k):
             max[k] += 1
     #Do hdc sum on these samples' hdv
     r = hcl.reduce_axis(0, hdTrainData.shape[0], 'r')
     result = hcl.compute((hdTrainData.shape[1], ),
                          lambda y: hcl.sum(match[r][y], axis=r), "result")
     #Do the binary voting
     sum1 = hcl.compute((hdTrainData.shape[1], ), lambda x: 0, "sum1")
     with hcl.if_(max[k] % 2 == 0):
         hcl.update(
             sum1, lambda x: hcl.select(
                 result[x] + rdv3[k][x] - max[k] / 2 > 0, 1, 0))
     with hcl.else_():
         hcl.update(sum1,
                    lambda x: hcl.select(result[x] - max[k] / 2 > 0, 1, 0))
     #Push the binary sum to prototype and the original sum to prototypeCounter
     with hcl.for_(0, hdTrainData.shape[1]) as t:
         prototype[k][t] = sum1[t]
         prototypeCounter[k][t] = result[t]
예제 #5
0
        def loop_kernel(labels):
            # assign cluster
            with hcl.for_(0, N, name="n") as n:
                min_dist = hcl.scalar(100000)
                new_label = hcl.scalar(labels[n])
                with hcl.for_(0, K) as k:
                    dist = hcl.scalar(0)
                    with hcl.for_(0, dim) as d:
                        dist_ = hcl.scalar(points[n, d] - means[k, d], "temp")
                        dist.v += dist_.v * dist_.v
                    with hcl.if_(dist.v < min_dist.v):
                        min_dist.v = dist.v
                        new_label[0] = k
                labels[n] = new_label
            # update mean
            num_k = hcl.compute((K, ), lambda x: 0, "num_k")
            sum_k = hcl.compute((K, dim), lambda x, y: 0, "sum_k")

            def calc_sum(n):
                num_k[labels[n]] += 1
                with hcl.for_(0, dim) as d:
                    sum_k[labels[n], d] += points[n, d]

            hcl.mutate((N, ), lambda n: calc_sum(n), "calc_sum")
            hcl.update(means, lambda k, d: sum_k[k, d] // num_k[k],
                       "update_mean")
    def update(l, prototype, prototypeCounter, max):
        hcl.print((l + 1),
                  "%d:Use hard examples to update the prototype counters.\n")

        ###data preparation
        distance = hcl.compute((hdTrainData.shape[1], ), lambda x: 0,
                               'distance')
        hamming_dist = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist")
        m = hcl.reduce_axis(0, hdTrainData.shape[1], "m")
        ###

        with hcl.for_(0, hdTrainData.shape[0]) as i:
            with hcl.for_(0, numClasses) as n:
                #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data)
                hcl.update(distance,
                           lambda x: hdTrainData[i][x] ^ prototype[n][x])
                #Calculate the hamming distance of the two vectors by adding 1s
                hamming_dist[n] = hcl.sum(distance[m], axis=m)

            #Find the one having the least hamming distance and choose it's label as the predicted label
            pred = hcl.scalar(0, 'pred')
            with hcl.for_(0, hamming_dist.shape[0]) as j:
                with hcl.if_(hamming_dist[j] < hamming_dist[pred]):
                    pred.v = j

            #Adjust the proto vectors by adding the sample vector on its label proto hdv and substrct it on its predicted proto hdv
            with hcl.if_(pred.v != trainLabels[i]):
                max[trainLabels[i]] += 1
                max[pred] -= 1
                with hcl.for_(0, hdTrainData.shape[1]) as m:
                    prototypeCounter[trainLabels[i]][m] += hdTrainData[i][m]
                    prototypeCounter[pred][m] -= hdTrainData[i][m]
                    with hcl.if_(max[trainLabels[i]] % 2 == 0):
                        with hcl.if_(prototypeCounter[trainLabels[i]][m] -
                                     max[trainLabels[i]] / 2 == 0):
                            prototype[trainLabels[i]][m] &= 1
                    with hcl.else_():
                        prototype[trainLabels[i]][m] = hcl.select(
                            prototypeCounter[trainLabels[i]][m] -
                            max[trainLabels[i]] / 2 > 0, 1, 0)

                    with hcl.if_(max[pred] % 2 == 0):
                        with hcl.if_(prototypeCounter[pred][m] -
                                     max[pred] / 2 == 0):
                            prototype[pred][m] &= 1
                    with hcl.else_():
                        prototype[pred][m] = hcl.select(
                            prototypeCounter[pred][m] - max[pred] / 2 > 0, 1,
                            0)

        #print the accuracy
        hcl.mutate(
            (1, ),
            lambda x: test_hdc_accu(prototype, hdTrainData, trainLabels, 1),
            'training_update')
        hcl.mutate(
            (1, ),
            lambda x: test_hdc_accu(prototype, hdTestData, testLabels, 2),
            'testing_update')
예제 #7
0
    def algorithm(A, B):
        @hcl.def_([A.shape, ()])
        def update_B(A, x):
            with hcl.if_(A[x] > 5):
                hcl.return_(-1)
            hcl.return_(A[x] + 1)

        hcl.update(B, lambda x: update_B(A, x))
예제 #8
0
 def kernel(A, B):
     C = hcl.compute((10, 32), lambda *args : 0, "C")
     D = hcl.compute(C.shape, lambda *args: 0, "D")
     with hcl.Stage("Super") as m:
         with hcl.for_(0, 10, name="j") as j:
             hcl.update(D, lambda *args: j*A[args] + B[args], name="update.D")
             hcl.update(C, lambda *args: A[args] + j*D[args], name="update.C")
     return C
 def add(a, b, c):
     with hcl.for_(0, 10) as i:
         a[i] = 0
         hcl.assert_(i < 10, "assert error 1")
     d = hcl.compute(a.shape, lambda *x: a[x] + b[x])
     hcl.assert_(a[0] == 0, "assert error 2")
     hcl.update(c, lambda *x: d[x] + 1)
     hcl.assert_(a[0] == 0, "assert error 3")
예제 #10
0
    def algorithm(A, B):
        @hcl.def_([A.shape, ()])
        def update_B(A, x):
            with hcl.for_(0, 10) as i:
                with hcl.if_(A[x] == i):
                    hcl.return_(1)
            hcl.return_(A[x])

        hcl.update(B, lambda x: update_B(A, x))
예제 #11
0
def SgdLR(data, label, theta, lut):

    label_local = hcl.unpack(label, name="label_local")
    theta_local = hcl.unpack(theta, name="theta_local")
    data_local = hcl.unpack(data, name="data_local")

    FTYPE = theta_local.dtype

    def Sigmoid(exponent):
        ret = hcl.scalar(0.0, "sigmoid", FTYPE)
        with hcl.if_(exponent > hcl.cast(FTYPE, 4.0)):
            ret[0] = 1.0
        with hcl.elif_(exponent < hcl.cast(FTYPE, -4.0)):
            ret[0] = 0.0
        with hcl.else_():
            with hcl.if_(exponent < hcl.cast(FTYPE, 0.0)):
                num = hcl.scalar(0, dtype=hcl.UFixed(18, 8))
                num[0][18:0] = exponent[29:11]
                num[0] = ~(num[0] << 8) + 1
                index = 2047.0 - num[0]
                ret[0] = lut[hcl.cast(hcl.Int(32), index)]
            with hcl.else_():
                index = exponent[21:11]
                ret[0] = lut[hcl.cast(hcl.Int(32), index)]
        return ret[0]

    with hcl.stage("M"):
        with hcl.for_(0, NUM_TRAINING) as train_id:
            training_instance = hcl.compute(
                (NUM_FEATURES, ),
                lambda x: data_local[train_id * NUM_FEATURES + x],
                "training_instance", data_local.dtype)

            # Main Computation
            k = hcl.reduce_axis(0, NUM_FEATURES, "k")
            dot = hcl.compute(
                (1, ),
                lambda x: hcl.sum(theta_local[k] * training_instance[k],
                                  axis=k,
                                  dtype=FTYPE),
                "dot",
                dtype=FTYPE)
            gradient = hcl.compute((NUM_FEATURES, ),
                                   lambda x: (Sigmoid(dot[0]) - label_local[
                                       train_id]) * training_instance[x],
                                   "gradient",
                                   dtype=FTYPE)
            update = hcl.update(
                theta_local,
                lambda x: theta_local[x] - 2565.0 * gradient[x],
                name="update")

    theta_pack = hcl.pack(theta_local, name="theta_pack", dtype=theta.dtype)
    stream_out = hcl.update(theta, lambda x: theta_pack[x], name="stream_out")

    return stream_out
예제 #12
0
    def kernel(A, B):
        C = hcl.compute((10, 32), lambda *args: A[args] + B[args], "C")

        with hcl.Stage("Super") as m:
            hcl.update(C, lambda *args: C[args] + 1, "update")

            with hcl.Stage("Plus") as stage:
                with hcl.for_(0, 10) as j:
                    C[j, 0] = 10
        return C
    def update(l, prototype, prototypeCounter, max):
        hcl.print((l+1),"%d:Use hard examples to update the prototype counters.\n")

        ###data preparation
        distance = hcl.compute((in_train.shape[1],), lambda x: 0, 'distance', dtype=hcl.UInt(in_bw))
        pre_dist = hcl.compute((in_train.shape[1],), lambda x: 0, "pre_dist")
        hamming_dist = hcl.compute((numClasses,), lambda x: 0, "hamming_dist")
        m = hcl.reduce_axis(0, in_train.shape[1], "m")
        ###

        with hcl.for_(0, in_train.shape[0]) as i:
            hcl.print((i),"%d suc\n")
            # pack_proto = hcl.pack(prototype, axis=1, dtype=hcl.UInt(in_bw), name="pack_proto") 
            with hcl.for_(0, numClasses) as n:
                #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data)
                hcl.update(distance, lambda x: in_train[i][x] ^ prototype[n][x])
                #Calculate the hamming distance of the two vectors by adding 1s
                hcl.update(pre_dist, lambda x: popcount(distance[x]))
                hcl.print((),"sum of 1s suc")
                hamming_dist[n] = hcl.sum(pre_dist[m], axis=m)

            #Find the one having the least hamming distance and choose it's label as the predicted label
            pred = hcl.scalar(0, 'pred')
            with hcl.for_(0, hamming_dist.shape[0]) as j:
                with hcl.if_(hamming_dist[j] < hamming_dist[pred]):
                    pred.v = j

            #Adjust the proto vectors by adding the sample vector on its label proto hdv and substrct it on its predicted proto hdv
            with hcl.if_(pred.v != trainLabels[i]):
                max[trainLabels[i]] += 1
                max[pred] -= 1
                with hcl.for_(0, in_train.shape[1]) as m:
                    with hcl.for_(0, in_bw) as bit:
                        # with hcl.if_(in_train[i][m][bit] == 1):
                        #     ###########
                        #     prototypeCounter[trainLabels[i]][m*in_bw+bit] += 1
                        #     prototypeCounter[pred][m*in_bw+bit] -= 1
                        prototypeCounter[trainLabels[i]][m*in_bw+bit] += in_train[i][m][bit]
                        prototypeCounter[pred][m*in_bw+bit] -= in_train[i][m][bit]
                        with hcl.if_(max[trainLabels[i]] % 2 == 0):
                            with hcl.if_(prototypeCounter[trainLabels[i]][m*in_bw+bit] - max[trainLabels[i]]/2 == 0):
                                prototype[trainLabels[i]][m][bit] &= 1
                        with hcl.else_():
                            prototype[trainLabels[i]][m][bit] = hcl.select(prototypeCounter[trainLabels[i]][m*in_bw+bit] - max[trainLabels[i]]/2 > 0, 1, 0)

                        with hcl.if_(max[pred] % 2 == 0):
                            with hcl.if_(prototypeCounter[pred][m*in_bw+bit] - max[pred]/2 == 0):
                                prototype[pred][m][bit] &= 1
                        with hcl.else_():
                            prototype[pred][m][bit] = hcl.select(prototypeCounter[pred][m*in_bw+bit] - max[pred]/2 > 0, 1, 0)

        #print the accuracy
        hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_train, trainLabels, 1), 'training_update')
        hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_test, testLabels, 2), 'testing_update')
예제 #14
0
    def test_hdc_accu(proto, pack_data, labels, type):
        #pack the prototype
        pack_proto = hcl.pack(proto,
                              axis=1,
                              dtype=hcl.UInt(bw),
                              name="pack_proto")

        ###data preparation
        distance1 = hcl.compute((pack_data.shape[1], ),
                                lambda x: 0,
                                'distance1',
                                dtype=hcl.UInt(bw))
        pre_hamming = hcl.compute((pack_data.shape[1], ), lambda x: 0,
                                  "pre_hamming")
        hamming_dist1 = hcl.compute((numClasses, ), lambda x: 0,
                                    "hamming_dist1")
        m1 = hcl.reduce_axis(0, pack_data.shape[1], "m1")
        correct1 = hcl.scalar(0, 'correct1')
        ###

        with hcl.for_(0, pack_data.shape[0]) as i:
            hcl.print((i), "%d suc\n")
            with hcl.for_(0, numClasses) as n:
                #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data)
                hcl.update(distance1,
                           lambda x: pack_data[i][x] ^ pack_proto[n][x])
                #Calculate the hamming distance of the two vectors by adding 1s
                hcl.update(pre_hamming, lambda x: popcount(distance1[x]))
                hcl.print((), "sum of 1s suc")
                ###########################seg fault
                hamming_dist1[n] = hcl.sum(pre_hamming[m1], axis=m1)

            #Find the one having the least hamming distance and choose it's label as the predicted label
            pred1 = hcl.scalar(0, 'pred1')
            with hcl.for_(0, hamming_dist1.shape[0]) as j:
                with hcl.if_(hamming_dist1[j] < hamming_dist1[pred1]):
                    pred1.v = j

            with hcl.if_(pred1.v == labels[i]):
                correct1.v += 1

        #Print the accuracy
        all1 = hcl.scalar(pack_data.shape[0], "all1", dtype=hcl.Float(32))
        accuracy1 = hcl.compute((1, ),
                                lambda x: correct1.v / all1.v * 100,
                                "accuracy1",
                                dtype=hcl.Float(32))
        with hcl.if_(type == 1):
            hcl.print((correct1, pack_data.shape[0], accuracy1[0]),
                      "Training accu: %d/%d (%.2f%%)\n")
        with hcl.else_():
            hcl.print((correct1, pack_data.shape[0], accuracy1[0]),
                      "Testing accu: %d/%d (%.2f%%)\n")
예제 #15
0
    def algorithm(A, B):
        @hcl.def_([A.shape, ()])
        def update_B(A, x):
            with hcl.for_(0, 10) as i:
                hcl.assert_(i < 20)
                hcl.print(0, "in for loop\n")
                with hcl.if_(A[x] == i):
                    hcl.assert_(A[x] > 10, "assert in if")
                    hcl.print(0, "this should not be printed")
                    hcl.return_(1)
            hcl.return_(A[x])

        hcl.update(B, lambda x: update_B(A, x))
예제 #16
0
    def algorithm(A, B):
        @hcl.def_([A.shape, ()])
        def update_B(A, x):
            hcl.print(0, "print1\n")
            hcl.assert_(A[x] != 7)
            hcl.print(0, "print2\n")
            hcl.return_(A[x] + 1)

        matrix_B = hcl.compute((m, k), lambda x, y: A[x] + B[x] + 7,
                               "matrix_B")
        hcl.update(B, lambda x: update_B(A, x))
        matrix_C = hcl.compute((m, k), lambda x, y: A[x] + B[x] + 7,
                               "matrix_C")

        hcl.print(0, "should not print\n")
예제 #17
0
def thresholdedrelu(out, x, theta):
    assert len(x.shape) == 2, "only support 2-dim ThresholdedReLU"
    m, n = x.shape
    k = hcl.reduce_axis(0, n)
    return hcl.update(
        out, lambda i, j: hcl.select(x[i, j] > theta, x[i, j],
                                     hcl.cast(x.dtype, 0)))
예제 #18
0
    def algorithm(A, B):
        @hcl.def_([A.shape, ()])
        def update_B(A, x):
            with hcl.if_(A[x] > 5):
                hcl.print(0, "print if 1\n")
                hcl.assert_(A[x] <= 5, "assert in if")
                hcl.print(0, "print if 2\n")
                hcl.return_(-1)
            with hcl.else_():
                hcl.print(0, "print else 1\n")
                hcl.assert_(A[x] <= 5, "assert in else")
                hcl.print(0, "print else 2\n")
                hcl.return_(A[x] + 1)

        hcl.update(B, lambda x: update_B(A, x))
        hcl.print(0, "shouldn't be printed")
예제 #19
0
def elu(out, x, alpha):
    assert len(x.shape) == 2, "only support 2-dim ELU"
    m, n = x.shape
    k = hcl.reduce_axis(0, n)
    return hcl.update(
        out, lambda i, j: hcl.select(x[i, j] < 0, alpha *
                                     (hcl.exp(x[i, j]) - 1), x[i, j]))
예제 #20
0
    def algorithm(A, B):
        @hcl.def_([A.shape, ()])
        def update_B(A, x):
            with hcl.if_(A[x] < 5):
                hcl.print(0, "print1\n")
                hcl.assert_(A[x] < 4, "assert message 1")
                hcl.print(0, "print2\n")
                hcl.return_(-1)
            hcl.assert_(A[x] >= 5, "assert message 2")
            hcl.print(0, "not in if\n")
            hcl.return_(A[x] + 1)

        matrix_B = hcl.compute((m, k), lambda x, y: A[x] + B[x] + 7,
                               "matrix_B")

        hcl.update(B, lambda x: update_B(A, x))
예제 #21
0
def jacobi(input_image, output_image):
    def jacobi_kernel(y, x):
        return (input_image[y + 1, x - 1] + input_image[y, x] +
                input_image[y + 1, x] + input_image[y + 1, x + 1] +
                input_image[y + 2, x]) / 5

    return hcl.update(output_image, jacobi_kernel, name=output_image.name)
예제 #22
0
def prelu(out, x, alpha):
    assert len(x.shape) == 2, "only support 2-dim PReLU"
    m, n = x.shape
    k = hcl.reduce_axis(0, n)
    return hcl.update(
        out, lambda i, j: hcl.select(x[
            i, j] < 0, hcl.cast(x.dtype, alpha[j] * x[i, j]), x[i, j]))
예제 #23
0
def unsharp(input_image, output_image):
    """
  Helper Functions
  """
    def clamp(val, min_, max_):
        local = hcl.scalar(val)
        with hcl.if_(val < min_):
            local[0] = min_
        with hcl.elif_(val > max_):
            local[0] = max_
        return local[0]

    def clamp2D(tensor, min_, max_):
        return hcl.compute(tensor.shape,
                           lambda x, y: clamp(tensor[x, y], min_, max_),
                           name="clamped_" + tensor.name)

    def clamp3D(tensor, min_, max_):
        return hcl.compute(tensor.shape,
                           lambda x, y, c: clamp(tensor[x, y, c], min_, max_),
                           name="clamped_" + tensor.name)

    def kernel_f(x):
        return hcl.exp(-(x * x) / (2 * 1.5 * 1.5)) / sqrt(2 * 3.14159 * 1.5)

    def kernel(x):
        return kernel_f(x) * 255 / (kernel_f(0) + kernel_f(1) * 2 +
                                    kernel_f(2) * 2 + kernel_f(3) * 2 +
                                    kernel_f(4) * 2)

    rx = hcl.reduce_axis(-4, 5, "rx")
    ry = hcl.reduce_axis(-4, 5, "ry")
    my = hcl.reduce_axis(0, 640, "my")

    gray = hcl.compute((480, 640),
                       lambda x, y: (input_image[x, y, 0] * 77 + input_image[
                           x, y, 1] * 150 + input_image[x, y, 2] * 29) >> 8,
                       name="gray")
    blur = hcl.compute(
        gray.shape,
        lambda x, y: hcl.sum(gray[rx + x, ry + y] * kernel(rx) * kernel(ry),
                             axis=[rx, ry]),
        name="blur")
    sharpen = clamp2D(
        hcl.compute(gray.shape,
                    lambda x, y: gray[x, y] * 2 - blur[x, y],
                    name="sharpen"), 0, 255)
    ratio = clamp2D(
        hcl.compute(
            gray.shape,
            lambda x, y: sharpen[x, y] * 32 / hcl.max(gray[x, my], axis=my),
            name="ratio"), 0, 255)
    out = clamp3D(
        hcl.compute(output_image.shape,
                    lambda x, y, c: ratio[x, y] * input_image[x, y, c] >> 5,
                    name="out"), 0, 255)
    U = hcl.update(output_image, lambda x, y, c: out[x, y, c])

    return U
예제 #24
0
def byte_swap_rtl(input_vec, ret=None, name=None):

    if name is None: name = "my_byteswap"

    Len = input_vec.shape[0]
    return_tensors = False
    if ret is None:
        return_tensors = True
        ret = hcl.compute(input_vec.shape, lambda *args: 0, "vec")

    # functional behavior
    with hcl.Stage("ExternModule") as Module:
        hcl.update(ret,
                   lambda *args: input_vec[args] << 16 | input_vec[args] >> 16,
                   "swap")

    dicts = {}
    dicts["name"] = name
    tensors = [input_vec]
    dicts["args"] = [(_.name, _.dtype) for _ in tensors]

    # declare headers and typedef
    dicts["header"] = "unsigned int my_byteswap(unsigned int x);"
    dicts["func"] = """
    for (int k = 0; k < {}; k++) {{
      vec[k] = my_byteswap({}[k]);
    }}
""".format(Len, input_vec.name)

    # add dependency files or folders
    # the dependencies are copied to project folder
    deps = os.path.dirname(os.path.abspath(__file__))
    dicts["deps"] = deps + "/lib1"

    # custom compilation command (root path: project)
    # commands executed before impl or emulation
    dicts["cmds"] = "cd lib1; " + \
        "aocl library hdl-comp-pkg opencl_lib.xml -o opencl_lib.aoco;" + \
        "aocl library create -name opencl_lib opencl_lib.aoco;"

    # custom compiler flgas (load custom libs)
    dicts["flags"] = "-I lib1 -L lib1 -l opencl_lib.aoclib"

    create_extern_module(Module, dicts, ip_type="rtl")
    if return_tensors: return ret
예제 #25
0
def softmax(out, x):
    assert len(x.shape) == 2, "only support 2-dim softmax"
    m, n = x.shape
    k = hcl.reduce_axis(0, n)
    max_elem = hcl.compute((m, ), lambda i: hcl.max(x[i, k], axis=k))
    k = hcl.reduce_axis(0, n)
    expsum = hcl.compute(
        (m, ), lambda i: hcl.sum(hcl.exp(x[i, k] - max_elem[i]), axis=k))
    return hcl.update(out,
                      lambda i, j: hcl.exp(x[i, j] - max_elem[i]) / expsum[i])
예제 #26
0
    def algorithm(A, B):
        @hcl.def_([A.shape, ()])
        def update_B(A, x):
            with hcl.if_(A[x] > 5):
                with hcl.if_(A[x] > 7):
                    hcl.print(0, "in if 1\n")
                    hcl.assert_(A[x] == 1, "assert in if")
                    hcl.print(0, "in if 2\n")
                    hcl.return_(-2)
                hcl.return_(-1)
            with hcl.else_():
                with hcl.if_(A[x] > 3):
                    hcl.print(0, "in else 1\n")
                    hcl.assert_(A[x] == 4, "assert in else")
                    hcl.print(2, "in else 2\n")
                    hcl.return_(-3)
            hcl.return_(A[x] + 1)

        hcl.update(B, lambda x: update_B(A, x))
예제 #27
0
def seidel(input_image, output_image):
  dtype = hcl.Float()
  rx = hcl.reduce_axis(0, 3, "rx")
  ry = hcl.reduce_axis(0, 3, "ry")

  tmp = hcl.compute(output_image.shape, lambda x, y: hcl.sum(
      input_image[x, ry+y], axis=[ry], dtype=dtype)/3, dtype=dtype, name='tmp')

  return hcl.update(output_image, lambda x, y: hcl.sum(
      tmp[rx+x, y], axis=[rx], dtype=dtype)/3, name=output_image.name)
예제 #28
0
 def loop_kernel(labels):
     # assign cluster
     with hcl.for_(0, N, name="N") as n:
         min_dist = hcl.scalar(100000)
         with hcl.for_(0, K) as k:
             dist = hcl.scalar(0)
             with hcl.for_(0, dim) as d:
                 dist_ = points[n, d]-means[k, d]
                 dist.v += dist_ * dist_
             with hcl.if_(dist.v < min_dist.v):
                 min_dist.v = dist.v
                 labels[n] = k
     # update mean
     num_k = hcl.compute((K,), lambda x: 0)
     sum_k = hcl.compute((K, dim), lambda x, y: 0)
     def calc_sum(n):
         num_k[labels[n]] += 1
         with hcl.for_(0, dim) as d:
             sum_k[labels[n], d] += points[n, d]
     hcl.mutate((N,), lambda n: calc_sum(n), "calc_sum")
     hcl.update(means,
             lambda k, d: sum_k[k, d]//num_k[k], "update_mean")
    def test_hdc_accu(proto, hyper_dataset, labels, type):
        ###data preparation
        distance1 = hcl.compute((hyper_dataset.shape[1], ), lambda x: 0,
                                'distance1')
        hamming_dist1 = hcl.compute((numClasses, ), lambda x: 0,
                                    "hamming_dist1")
        m1 = hcl.reduce_axis(0, hyper_dataset.shape[1], "m1")
        correct1 = hcl.scalar(0, 'correct1')
        ###

        with hcl.for_(0, hyper_dataset.shape[0]) as i:
            with hcl.for_(0, numClasses) as n:
                #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data)
                hcl.update(distance1,
                           lambda x: hyper_dataset[i][x] ^ proto[n][x])
                #Calculate the hamming distance of the two vectors by adding 1s
                hamming_dist1[n] = hcl.sum(distance1[m1], axis=m1)

            #Find the one having the least hamming distance and choose it's label as the predicted label
            pred1 = hcl.scalar(0, 'pred1')
            with hcl.for_(0, hamming_dist1.shape[0]) as j:
                with hcl.if_(hamming_dist1[j] < hamming_dist1[pred1]):
                    pred1.v = j

            with hcl.if_(pred1.v == labels[i]):
                correct1.v += 1

        #Print the accuracy
        all1 = hcl.scalar(hyper_dataset.shape[0], "all1", dtype=hcl.Float(32))
        accuracy1 = hcl.compute((1, ),
                                lambda x: correct1.v / all1.v * 100,
                                "accuracy1",
                                dtype=hcl.Float(32))
        with hcl.if_(type == 1):
            hcl.print((correct1, hyper_dataset.shape[0], accuracy1[0]),
                      "Training accu: %d/%d (%.2f%%)\n")
        with hcl.else_():
            hcl.print((correct1, hyper_dataset.shape[0], accuracy1[0]),
                      "Testing accu: %d/%d (%.2f%%)\n")
예제 #30
0
def vadd_rtl(A, B, length, ret=None, name=None):

    if name is None: name = "vadd_rtl"
    Len = A.shape[0]
    assert A.shape == B.shape, "shape not match"
    assert Len == length, "shape not match"

    return_tensors = False
    if ret is None:
        return_tensors = True
        ret = hcl.compute(A.shape, lambda *args: 0, "ret") 

    # functional behavior
    with hcl.Stage("ExternModule") as Module:
        hcl.update(ret, lambda *args:
                A[args] + B[args], "vadd")

    dicts = {}
    dicts["name"] = name
    tensors = [A, B]
    dicts["args"] = [(_.name, _.dtype) for _ in tensors]

    # RTL IP is wrapped as a separate OpenCL kernel in Vitis
    # add dependency files or folders
    # the dependencies are copied to project folder
    deps = os.path.dirname(os.path.abspath(__file__))
    dicts["deps"] = deps + "/scripts"

    # custom compilation command (root path: project) 
    # commands executed before impl or emulation 
    dicts["cmds"] = "vivado -mode batch -source " + \
        "scripts/gen_xo.tcl -tclargs vadd.xo vadd hw_emu {} {}"

    # custom compiler flgas (load custom libs) 
    dicts["flags"] = "vadd.xo"

    create_extern_module(Module, dicts, ip_type="rtl")
    if return_tensors: return ret