def add(a, b, c): d = hcl.compute(a.shape, lambda *x: a[x] + b[x], "d") hcl.assert_(True, "assert error 1") hcl.print(0, "print1\n") hcl.update(c, lambda *x: d[x] + 1, "u") hcl.assert_(False, "assert error 2") hcl.print(0, "print2")
def fft(X_real, X_imag, IndexTable, F_real, F_imag): L = X_real.shape[0] if np.log2(L) % 1 > 0: raise ValueError("Length of input vector (1d tensor) must be power of 2") num_stages = int(np.log2(L)) # bit reverse permutation hcl.update(F_real, lambda i: X_real[IndexTable[i]], name='F_real_update') hcl.update(F_imag, lambda i: X_imag[IndexTable[i]], name='F_imag_update') with hcl.Stage("Out"): one = hcl.scalar(1, dtype="int32") with hcl.for_(0, num_stages) as stage: DFTpts = one[0] << (stage + 1) numBF = DFTpts / 2 e = -2 * np.pi / DFTpts a = hcl.scalar(0) with hcl.for_(0, numBF) as j: c = hcl.scalar(hcl.cos(a[0])) s = hcl.scalar(hcl.sin(a[0])) a[0] = a[0] + e with hcl.for_(j, L + DFTpts - 1, DFTpts) as i: i_lower = i + numBF temp_r = hcl.scalar(F_real[i_lower] * c - F_imag[i_lower] * s) temp_i = hcl.scalar(F_imag[i_lower] * c + F_real[i_lower] * s) F_real[i_lower] = F_real[i] - temp_r[0] F_imag[i_lower] = F_imag[i] - temp_i[0] F_real[i] = F_real[i] + temp_r[0] F_imag[i] = F_imag[i] + temp_i[0]
def add(a, b, c): d = hcl.compute(a.shape, lambda *x: a[x] + b[x]) hcl.assert_(False) hcl.print(0, "print1") hcl.update(c, lambda *x: d[x] + 1) hcl.assert_(False) hcl.print(0, "print2")
def learn(k, hdTrainData, prototype, prototypeCounter): #Find samples that have the label k match = hcl.compute( hdTrainData.shape, lambda x, y: hcl.select(trainLabels[x] == k, hdTrainData[x][y], 0), "match") #Record the number of these samples with hcl.for_(0, hdTrainData.shape[0]) as a: with hcl.if_(trainLabels[a] == k): max[k] += 1 #Do hdc sum on these samples' hdv r = hcl.reduce_axis(0, hdTrainData.shape[0], 'r') result = hcl.compute((hdTrainData.shape[1], ), lambda y: hcl.sum(match[r][y], axis=r), "result") #Do the binary voting sum1 = hcl.compute((hdTrainData.shape[1], ), lambda x: 0, "sum1") with hcl.if_(max[k] % 2 == 0): hcl.update( sum1, lambda x: hcl.select( result[x] + rdv3[k][x] - max[k] / 2 > 0, 1, 0)) with hcl.else_(): hcl.update(sum1, lambda x: hcl.select(result[x] - max[k] / 2 > 0, 1, 0)) #Push the binary sum to prototype and the original sum to prototypeCounter with hcl.for_(0, hdTrainData.shape[1]) as t: prototype[k][t] = sum1[t] prototypeCounter[k][t] = result[t]
def loop_kernel(labels): # assign cluster with hcl.for_(0, N, name="n") as n: min_dist = hcl.scalar(100000) new_label = hcl.scalar(labels[n]) with hcl.for_(0, K) as k: dist = hcl.scalar(0) with hcl.for_(0, dim) as d: dist_ = hcl.scalar(points[n, d] - means[k, d], "temp") dist.v += dist_.v * dist_.v with hcl.if_(dist.v < min_dist.v): min_dist.v = dist.v new_label[0] = k labels[n] = new_label # update mean num_k = hcl.compute((K, ), lambda x: 0, "num_k") sum_k = hcl.compute((K, dim), lambda x, y: 0, "sum_k") def calc_sum(n): num_k[labels[n]] += 1 with hcl.for_(0, dim) as d: sum_k[labels[n], d] += points[n, d] hcl.mutate((N, ), lambda n: calc_sum(n), "calc_sum") hcl.update(means, lambda k, d: sum_k[k, d] // num_k[k], "update_mean")
def update(l, prototype, prototypeCounter, max): hcl.print((l + 1), "%d:Use hard examples to update the prototype counters.\n") ###data preparation distance = hcl.compute((hdTrainData.shape[1], ), lambda x: 0, 'distance') hamming_dist = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist") m = hcl.reduce_axis(0, hdTrainData.shape[1], "m") ### with hcl.for_(0, hdTrainData.shape[0]) as i: with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance, lambda x: hdTrainData[i][x] ^ prototype[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hamming_dist[n] = hcl.sum(distance[m], axis=m) #Find the one having the least hamming distance and choose it's label as the predicted label pred = hcl.scalar(0, 'pred') with hcl.for_(0, hamming_dist.shape[0]) as j: with hcl.if_(hamming_dist[j] < hamming_dist[pred]): pred.v = j #Adjust the proto vectors by adding the sample vector on its label proto hdv and substrct it on its predicted proto hdv with hcl.if_(pred.v != trainLabels[i]): max[trainLabels[i]] += 1 max[pred] -= 1 with hcl.for_(0, hdTrainData.shape[1]) as m: prototypeCounter[trainLabels[i]][m] += hdTrainData[i][m] prototypeCounter[pred][m] -= hdTrainData[i][m] with hcl.if_(max[trainLabels[i]] % 2 == 0): with hcl.if_(prototypeCounter[trainLabels[i]][m] - max[trainLabels[i]] / 2 == 0): prototype[trainLabels[i]][m] &= 1 with hcl.else_(): prototype[trainLabels[i]][m] = hcl.select( prototypeCounter[trainLabels[i]][m] - max[trainLabels[i]] / 2 > 0, 1, 0) with hcl.if_(max[pred] % 2 == 0): with hcl.if_(prototypeCounter[pred][m] - max[pred] / 2 == 0): prototype[pred][m] &= 1 with hcl.else_(): prototype[pred][m] = hcl.select( prototypeCounter[pred][m] - max[pred] / 2 > 0, 1, 0) #print the accuracy hcl.mutate( (1, ), lambda x: test_hdc_accu(prototype, hdTrainData, trainLabels, 1), 'training_update') hcl.mutate( (1, ), lambda x: test_hdc_accu(prototype, hdTestData, testLabels, 2), 'testing_update')
def algorithm(A, B): @hcl.def_([A.shape, ()]) def update_B(A, x): with hcl.if_(A[x] > 5): hcl.return_(-1) hcl.return_(A[x] + 1) hcl.update(B, lambda x: update_B(A, x))
def kernel(A, B): C = hcl.compute((10, 32), lambda *args : 0, "C") D = hcl.compute(C.shape, lambda *args: 0, "D") with hcl.Stage("Super") as m: with hcl.for_(0, 10, name="j") as j: hcl.update(D, lambda *args: j*A[args] + B[args], name="update.D") hcl.update(C, lambda *args: A[args] + j*D[args], name="update.C") return C
def add(a, b, c): with hcl.for_(0, 10) as i: a[i] = 0 hcl.assert_(i < 10, "assert error 1") d = hcl.compute(a.shape, lambda *x: a[x] + b[x]) hcl.assert_(a[0] == 0, "assert error 2") hcl.update(c, lambda *x: d[x] + 1) hcl.assert_(a[0] == 0, "assert error 3")
def algorithm(A, B): @hcl.def_([A.shape, ()]) def update_B(A, x): with hcl.for_(0, 10) as i: with hcl.if_(A[x] == i): hcl.return_(1) hcl.return_(A[x]) hcl.update(B, lambda x: update_B(A, x))
def SgdLR(data, label, theta, lut): label_local = hcl.unpack(label, name="label_local") theta_local = hcl.unpack(theta, name="theta_local") data_local = hcl.unpack(data, name="data_local") FTYPE = theta_local.dtype def Sigmoid(exponent): ret = hcl.scalar(0.0, "sigmoid", FTYPE) with hcl.if_(exponent > hcl.cast(FTYPE, 4.0)): ret[0] = 1.0 with hcl.elif_(exponent < hcl.cast(FTYPE, -4.0)): ret[0] = 0.0 with hcl.else_(): with hcl.if_(exponent < hcl.cast(FTYPE, 0.0)): num = hcl.scalar(0, dtype=hcl.UFixed(18, 8)) num[0][18:0] = exponent[29:11] num[0] = ~(num[0] << 8) + 1 index = 2047.0 - num[0] ret[0] = lut[hcl.cast(hcl.Int(32), index)] with hcl.else_(): index = exponent[21:11] ret[0] = lut[hcl.cast(hcl.Int(32), index)] return ret[0] with hcl.stage("M"): with hcl.for_(0, NUM_TRAINING) as train_id: training_instance = hcl.compute( (NUM_FEATURES, ), lambda x: data_local[train_id * NUM_FEATURES + x], "training_instance", data_local.dtype) # Main Computation k = hcl.reduce_axis(0, NUM_FEATURES, "k") dot = hcl.compute( (1, ), lambda x: hcl.sum(theta_local[k] * training_instance[k], axis=k, dtype=FTYPE), "dot", dtype=FTYPE) gradient = hcl.compute((NUM_FEATURES, ), lambda x: (Sigmoid(dot[0]) - label_local[ train_id]) * training_instance[x], "gradient", dtype=FTYPE) update = hcl.update( theta_local, lambda x: theta_local[x] - 2565.0 * gradient[x], name="update") theta_pack = hcl.pack(theta_local, name="theta_pack", dtype=theta.dtype) stream_out = hcl.update(theta, lambda x: theta_pack[x], name="stream_out") return stream_out
def kernel(A, B): C = hcl.compute((10, 32), lambda *args: A[args] + B[args], "C") with hcl.Stage("Super") as m: hcl.update(C, lambda *args: C[args] + 1, "update") with hcl.Stage("Plus") as stage: with hcl.for_(0, 10) as j: C[j, 0] = 10 return C
def update(l, prototype, prototypeCounter, max): hcl.print((l+1),"%d:Use hard examples to update the prototype counters.\n") ###data preparation distance = hcl.compute((in_train.shape[1],), lambda x: 0, 'distance', dtype=hcl.UInt(in_bw)) pre_dist = hcl.compute((in_train.shape[1],), lambda x: 0, "pre_dist") hamming_dist = hcl.compute((numClasses,), lambda x: 0, "hamming_dist") m = hcl.reduce_axis(0, in_train.shape[1], "m") ### with hcl.for_(0, in_train.shape[0]) as i: hcl.print((i),"%d suc\n") # pack_proto = hcl.pack(prototype, axis=1, dtype=hcl.UInt(in_bw), name="pack_proto") with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance, lambda x: in_train[i][x] ^ prototype[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hcl.update(pre_dist, lambda x: popcount(distance[x])) hcl.print((),"sum of 1s suc") hamming_dist[n] = hcl.sum(pre_dist[m], axis=m) #Find the one having the least hamming distance and choose it's label as the predicted label pred = hcl.scalar(0, 'pred') with hcl.for_(0, hamming_dist.shape[0]) as j: with hcl.if_(hamming_dist[j] < hamming_dist[pred]): pred.v = j #Adjust the proto vectors by adding the sample vector on its label proto hdv and substrct it on its predicted proto hdv with hcl.if_(pred.v != trainLabels[i]): max[trainLabels[i]] += 1 max[pred] -= 1 with hcl.for_(0, in_train.shape[1]) as m: with hcl.for_(0, in_bw) as bit: # with hcl.if_(in_train[i][m][bit] == 1): # ########### # prototypeCounter[trainLabels[i]][m*in_bw+bit] += 1 # prototypeCounter[pred][m*in_bw+bit] -= 1 prototypeCounter[trainLabels[i]][m*in_bw+bit] += in_train[i][m][bit] prototypeCounter[pred][m*in_bw+bit] -= in_train[i][m][bit] with hcl.if_(max[trainLabels[i]] % 2 == 0): with hcl.if_(prototypeCounter[trainLabels[i]][m*in_bw+bit] - max[trainLabels[i]]/2 == 0): prototype[trainLabels[i]][m][bit] &= 1 with hcl.else_(): prototype[trainLabels[i]][m][bit] = hcl.select(prototypeCounter[trainLabels[i]][m*in_bw+bit] - max[trainLabels[i]]/2 > 0, 1, 0) with hcl.if_(max[pred] % 2 == 0): with hcl.if_(prototypeCounter[pred][m*in_bw+bit] - max[pred]/2 == 0): prototype[pred][m][bit] &= 1 with hcl.else_(): prototype[pred][m][bit] = hcl.select(prototypeCounter[pred][m*in_bw+bit] - max[pred]/2 > 0, 1, 0) #print the accuracy hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_train, trainLabels, 1), 'training_update') hcl.mutate((1,), lambda x: test_hdc_accu(prototype, in_test, testLabels, 2), 'testing_update')
def test_hdc_accu(proto, pack_data, labels, type): #pack the prototype pack_proto = hcl.pack(proto, axis=1, dtype=hcl.UInt(bw), name="pack_proto") ###data preparation distance1 = hcl.compute((pack_data.shape[1], ), lambda x: 0, 'distance1', dtype=hcl.UInt(bw)) pre_hamming = hcl.compute((pack_data.shape[1], ), lambda x: 0, "pre_hamming") hamming_dist1 = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist1") m1 = hcl.reduce_axis(0, pack_data.shape[1], "m1") correct1 = hcl.scalar(0, 'correct1') ### with hcl.for_(0, pack_data.shape[0]) as i: hcl.print((i), "%d suc\n") with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance1, lambda x: pack_data[i][x] ^ pack_proto[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hcl.update(pre_hamming, lambda x: popcount(distance1[x])) hcl.print((), "sum of 1s suc") ###########################seg fault hamming_dist1[n] = hcl.sum(pre_hamming[m1], axis=m1) #Find the one having the least hamming distance and choose it's label as the predicted label pred1 = hcl.scalar(0, 'pred1') with hcl.for_(0, hamming_dist1.shape[0]) as j: with hcl.if_(hamming_dist1[j] < hamming_dist1[pred1]): pred1.v = j with hcl.if_(pred1.v == labels[i]): correct1.v += 1 #Print the accuracy all1 = hcl.scalar(pack_data.shape[0], "all1", dtype=hcl.Float(32)) accuracy1 = hcl.compute((1, ), lambda x: correct1.v / all1.v * 100, "accuracy1", dtype=hcl.Float(32)) with hcl.if_(type == 1): hcl.print((correct1, pack_data.shape[0], accuracy1[0]), "Training accu: %d/%d (%.2f%%)\n") with hcl.else_(): hcl.print((correct1, pack_data.shape[0], accuracy1[0]), "Testing accu: %d/%d (%.2f%%)\n")
def algorithm(A, B): @hcl.def_([A.shape, ()]) def update_B(A, x): with hcl.for_(0, 10) as i: hcl.assert_(i < 20) hcl.print(0, "in for loop\n") with hcl.if_(A[x] == i): hcl.assert_(A[x] > 10, "assert in if") hcl.print(0, "this should not be printed") hcl.return_(1) hcl.return_(A[x]) hcl.update(B, lambda x: update_B(A, x))
def algorithm(A, B): @hcl.def_([A.shape, ()]) def update_B(A, x): hcl.print(0, "print1\n") hcl.assert_(A[x] != 7) hcl.print(0, "print2\n") hcl.return_(A[x] + 1) matrix_B = hcl.compute((m, k), lambda x, y: A[x] + B[x] + 7, "matrix_B") hcl.update(B, lambda x: update_B(A, x)) matrix_C = hcl.compute((m, k), lambda x, y: A[x] + B[x] + 7, "matrix_C") hcl.print(0, "should not print\n")
def thresholdedrelu(out, x, theta): assert len(x.shape) == 2, "only support 2-dim ThresholdedReLU" m, n = x.shape k = hcl.reduce_axis(0, n) return hcl.update( out, lambda i, j: hcl.select(x[i, j] > theta, x[i, j], hcl.cast(x.dtype, 0)))
def algorithm(A, B): @hcl.def_([A.shape, ()]) def update_B(A, x): with hcl.if_(A[x] > 5): hcl.print(0, "print if 1\n") hcl.assert_(A[x] <= 5, "assert in if") hcl.print(0, "print if 2\n") hcl.return_(-1) with hcl.else_(): hcl.print(0, "print else 1\n") hcl.assert_(A[x] <= 5, "assert in else") hcl.print(0, "print else 2\n") hcl.return_(A[x] + 1) hcl.update(B, lambda x: update_B(A, x)) hcl.print(0, "shouldn't be printed")
def elu(out, x, alpha): assert len(x.shape) == 2, "only support 2-dim ELU" m, n = x.shape k = hcl.reduce_axis(0, n) return hcl.update( out, lambda i, j: hcl.select(x[i, j] < 0, alpha * (hcl.exp(x[i, j]) - 1), x[i, j]))
def algorithm(A, B): @hcl.def_([A.shape, ()]) def update_B(A, x): with hcl.if_(A[x] < 5): hcl.print(0, "print1\n") hcl.assert_(A[x] < 4, "assert message 1") hcl.print(0, "print2\n") hcl.return_(-1) hcl.assert_(A[x] >= 5, "assert message 2") hcl.print(0, "not in if\n") hcl.return_(A[x] + 1) matrix_B = hcl.compute((m, k), lambda x, y: A[x] + B[x] + 7, "matrix_B") hcl.update(B, lambda x: update_B(A, x))
def jacobi(input_image, output_image): def jacobi_kernel(y, x): return (input_image[y + 1, x - 1] + input_image[y, x] + input_image[y + 1, x] + input_image[y + 1, x + 1] + input_image[y + 2, x]) / 5 return hcl.update(output_image, jacobi_kernel, name=output_image.name)
def prelu(out, x, alpha): assert len(x.shape) == 2, "only support 2-dim PReLU" m, n = x.shape k = hcl.reduce_axis(0, n) return hcl.update( out, lambda i, j: hcl.select(x[ i, j] < 0, hcl.cast(x.dtype, alpha[j] * x[i, j]), x[i, j]))
def unsharp(input_image, output_image): """ Helper Functions """ def clamp(val, min_, max_): local = hcl.scalar(val) with hcl.if_(val < min_): local[0] = min_ with hcl.elif_(val > max_): local[0] = max_ return local[0] def clamp2D(tensor, min_, max_): return hcl.compute(tensor.shape, lambda x, y: clamp(tensor[x, y], min_, max_), name="clamped_" + tensor.name) def clamp3D(tensor, min_, max_): return hcl.compute(tensor.shape, lambda x, y, c: clamp(tensor[x, y, c], min_, max_), name="clamped_" + tensor.name) def kernel_f(x): return hcl.exp(-(x * x) / (2 * 1.5 * 1.5)) / sqrt(2 * 3.14159 * 1.5) def kernel(x): return kernel_f(x) * 255 / (kernel_f(0) + kernel_f(1) * 2 + kernel_f(2) * 2 + kernel_f(3) * 2 + kernel_f(4) * 2) rx = hcl.reduce_axis(-4, 5, "rx") ry = hcl.reduce_axis(-4, 5, "ry") my = hcl.reduce_axis(0, 640, "my") gray = hcl.compute((480, 640), lambda x, y: (input_image[x, y, 0] * 77 + input_image[ x, y, 1] * 150 + input_image[x, y, 2] * 29) >> 8, name="gray") blur = hcl.compute( gray.shape, lambda x, y: hcl.sum(gray[rx + x, ry + y] * kernel(rx) * kernel(ry), axis=[rx, ry]), name="blur") sharpen = clamp2D( hcl.compute(gray.shape, lambda x, y: gray[x, y] * 2 - blur[x, y], name="sharpen"), 0, 255) ratio = clamp2D( hcl.compute( gray.shape, lambda x, y: sharpen[x, y] * 32 / hcl.max(gray[x, my], axis=my), name="ratio"), 0, 255) out = clamp3D( hcl.compute(output_image.shape, lambda x, y, c: ratio[x, y] * input_image[x, y, c] >> 5, name="out"), 0, 255) U = hcl.update(output_image, lambda x, y, c: out[x, y, c]) return U
def byte_swap_rtl(input_vec, ret=None, name=None): if name is None: name = "my_byteswap" Len = input_vec.shape[0] return_tensors = False if ret is None: return_tensors = True ret = hcl.compute(input_vec.shape, lambda *args: 0, "vec") # functional behavior with hcl.Stage("ExternModule") as Module: hcl.update(ret, lambda *args: input_vec[args] << 16 | input_vec[args] >> 16, "swap") dicts = {} dicts["name"] = name tensors = [input_vec] dicts["args"] = [(_.name, _.dtype) for _ in tensors] # declare headers and typedef dicts["header"] = "unsigned int my_byteswap(unsigned int x);" dicts["func"] = """ for (int k = 0; k < {}; k++) {{ vec[k] = my_byteswap({}[k]); }} """.format(Len, input_vec.name) # add dependency files or folders # the dependencies are copied to project folder deps = os.path.dirname(os.path.abspath(__file__)) dicts["deps"] = deps + "/lib1" # custom compilation command (root path: project) # commands executed before impl or emulation dicts["cmds"] = "cd lib1; " + \ "aocl library hdl-comp-pkg opencl_lib.xml -o opencl_lib.aoco;" + \ "aocl library create -name opencl_lib opencl_lib.aoco;" # custom compiler flgas (load custom libs) dicts["flags"] = "-I lib1 -L lib1 -l opencl_lib.aoclib" create_extern_module(Module, dicts, ip_type="rtl") if return_tensors: return ret
def softmax(out, x): assert len(x.shape) == 2, "only support 2-dim softmax" m, n = x.shape k = hcl.reduce_axis(0, n) max_elem = hcl.compute((m, ), lambda i: hcl.max(x[i, k], axis=k)) k = hcl.reduce_axis(0, n) expsum = hcl.compute( (m, ), lambda i: hcl.sum(hcl.exp(x[i, k] - max_elem[i]), axis=k)) return hcl.update(out, lambda i, j: hcl.exp(x[i, j] - max_elem[i]) / expsum[i])
def algorithm(A, B): @hcl.def_([A.shape, ()]) def update_B(A, x): with hcl.if_(A[x] > 5): with hcl.if_(A[x] > 7): hcl.print(0, "in if 1\n") hcl.assert_(A[x] == 1, "assert in if") hcl.print(0, "in if 2\n") hcl.return_(-2) hcl.return_(-1) with hcl.else_(): with hcl.if_(A[x] > 3): hcl.print(0, "in else 1\n") hcl.assert_(A[x] == 4, "assert in else") hcl.print(2, "in else 2\n") hcl.return_(-3) hcl.return_(A[x] + 1) hcl.update(B, lambda x: update_B(A, x))
def seidel(input_image, output_image): dtype = hcl.Float() rx = hcl.reduce_axis(0, 3, "rx") ry = hcl.reduce_axis(0, 3, "ry") tmp = hcl.compute(output_image.shape, lambda x, y: hcl.sum( input_image[x, ry+y], axis=[ry], dtype=dtype)/3, dtype=dtype, name='tmp') return hcl.update(output_image, lambda x, y: hcl.sum( tmp[rx+x, y], axis=[rx], dtype=dtype)/3, name=output_image.name)
def loop_kernel(labels): # assign cluster with hcl.for_(0, N, name="N") as n: min_dist = hcl.scalar(100000) with hcl.for_(0, K) as k: dist = hcl.scalar(0) with hcl.for_(0, dim) as d: dist_ = points[n, d]-means[k, d] dist.v += dist_ * dist_ with hcl.if_(dist.v < min_dist.v): min_dist.v = dist.v labels[n] = k # update mean num_k = hcl.compute((K,), lambda x: 0) sum_k = hcl.compute((K, dim), lambda x, y: 0) def calc_sum(n): num_k[labels[n]] += 1 with hcl.for_(0, dim) as d: sum_k[labels[n], d] += points[n, d] hcl.mutate((N,), lambda n: calc_sum(n), "calc_sum") hcl.update(means, lambda k, d: sum_k[k, d]//num_k[k], "update_mean")
def test_hdc_accu(proto, hyper_dataset, labels, type): ###data preparation distance1 = hcl.compute((hyper_dataset.shape[1], ), lambda x: 0, 'distance1') hamming_dist1 = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist1") m1 = hcl.reduce_axis(0, hyper_dataset.shape[1], "m1") correct1 = hcl.scalar(0, 'correct1') ### with hcl.for_(0, hyper_dataset.shape[0]) as i: with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance1, lambda x: hyper_dataset[i][x] ^ proto[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hamming_dist1[n] = hcl.sum(distance1[m1], axis=m1) #Find the one having the least hamming distance and choose it's label as the predicted label pred1 = hcl.scalar(0, 'pred1') with hcl.for_(0, hamming_dist1.shape[0]) as j: with hcl.if_(hamming_dist1[j] < hamming_dist1[pred1]): pred1.v = j with hcl.if_(pred1.v == labels[i]): correct1.v += 1 #Print the accuracy all1 = hcl.scalar(hyper_dataset.shape[0], "all1", dtype=hcl.Float(32)) accuracy1 = hcl.compute((1, ), lambda x: correct1.v / all1.v * 100, "accuracy1", dtype=hcl.Float(32)) with hcl.if_(type == 1): hcl.print((correct1, hyper_dataset.shape[0], accuracy1[0]), "Training accu: %d/%d (%.2f%%)\n") with hcl.else_(): hcl.print((correct1, hyper_dataset.shape[0], accuracy1[0]), "Testing accu: %d/%d (%.2f%%)\n")
def vadd_rtl(A, B, length, ret=None, name=None): if name is None: name = "vadd_rtl" Len = A.shape[0] assert A.shape == B.shape, "shape not match" assert Len == length, "shape not match" return_tensors = False if ret is None: return_tensors = True ret = hcl.compute(A.shape, lambda *args: 0, "ret") # functional behavior with hcl.Stage("ExternModule") as Module: hcl.update(ret, lambda *args: A[args] + B[args], "vadd") dicts = {} dicts["name"] = name tensors = [A, B] dicts["args"] = [(_.name, _.dtype) for _ in tensors] # RTL IP is wrapped as a separate OpenCL kernel in Vitis # add dependency files or folders # the dependencies are copied to project folder deps = os.path.dirname(os.path.abspath(__file__)) dicts["deps"] = deps + "/scripts" # custom compilation command (root path: project) # commands executed before impl or emulation dicts["cmds"] = "vivado -mode batch -source " + \ "scripts/gen_xo.tcl -tclargs vadd.xo vadd hw_emu {} {}" # custom compiler flgas (load custom libs) dicts["flags"] = "vadd.xo" create_extern_module(Module, dicts, ip_type="rtl") if return_tensors: return ret