def simple_compute(a, A): B = hcl.compute(A.shape, lambda x, y: A[x, y] + a, "B") """ The above API is equivalent to the following Python code. for x in range(0, 10): for y in range(0, 10): B[x, y] = A[x, y] + a """ return B
def knn_vote(labels, max_label): max_vote = hcl.scalar(0) #max_label = hcl.compute((1,), lambda x: 0, "max_label") votes = hcl.compute((10,), lambda x: 0, "votes") with hcl.for_(0, K_CONST) as i: votes[labels[i]] += 1 with hcl.for_(0, 10) as i: with hcl.if_(votes[i] > max_vote.v): max_vote.v = votes[i] max_label[0] = i
def test_fixed(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Fixed(5, 3)) B = hcl.placeholder((1, 32), dtype=hcl.UFixed(5, 3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Fixed(7, 4)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target=target) assert strings[3] in code assert strings[4] in code assert strings[5] in code
def test_int(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target=target) assert strings[0] in code assert strings[1] in code assert strings[2] in code
def test_ap_int(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Int(3)) B = hcl.placeholder((1, 32), dtype=hcl.UInt(3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Int(8)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='vhls') assert "ap_int<3>" in code assert "ap_uint<3>" in code assert "ap_int<8>" in code
def test_ap_fixed(): hcl.init() A = hcl.placeholder((1, 32), dtype=hcl.Fixed(5, 3)) B = hcl.placeholder((1, 32), dtype=hcl.UFixed(5, 3)) C = hcl.compute(A.shape, lambda i, j: A[i][j] + B[i][j], dtype=hcl.Fixed(7, 4)) s = hcl.create_schedule([A, B, C]) code = hcl.build(s, target='vhls') assert "ap_fixed<5, 2>" in code assert "ap_ufixed<5, 2>" in code assert "ap_fixed<7, 3>" in code
def cordic(X, Y, C, theta, N): # Prepare all input values and intermediate variables. T = hcl.compute((1, ), lambda x: 0, "T", X.dtype) current = hcl.compute((1, ), lambda x: 0, "current", X.dtype) # Main loop body: The more steps we iterate, the better accuracy we get. def step_loop(step): with hcl.if_(theta[0] > current[0]): T[0] = X[0] - (Y[0] >> step) Y[0] = Y[0] + (X[0] >> step) X[0] = T[0] current[0] = current[0] + C[step] with hcl.else_(): T[0] = X[0] + (Y[0] >> step) Y[0] = Y[0] - (X[0] >> step) X[0] = T[0] current[0] = current[0] - C[step] # This is the main computation that calls the loop body. hcl.mutate((N, ), lambda step: step_loop(step), "calc")
def algorithm(a, b, c): @hcl.def_([a.shape, b.shape, c.shape]) def add(a, b, c): d = hcl.compute(a.shape, lambda *x: a[x] + b[x]) hcl.assert_(False) hcl.print(0, "print1") hcl.update(c, lambda *x: d[x] + 1) hcl.assert_(False) hcl.print(0, "print2") tmp = hcl.compute((64, 64), lambda x, y: 4 + 8) add(a, b, c)
def knn(test_image, train_images): # Imperative programming and bit operations (§2) def popcount(num): out = hcl.scalar(0, "out") with hcl.for_(0, train_images.type.bits) as i: # Bit selection operation out.v += num[i] return out.v # This function update the candidates, i.e., `knn_mat`. Here we mutate # through the shape of tensor `dist`. For each `dist` value, if it is # smaller than the maximum candidate, we replace it. def update_knn(dist, knn_mat, i, j): max_id = hcl.scalar(0, "max_id") with hcl.for_(0, 3) as k: with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]): max_id.v = k with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]): knn_mat[i][max_id.v] = dist[i][j] # Main algorithm (§3) # Fist step: XOR (§3.1) diff = hcl.compute(train_images.shape, lambda x, y: train_images[x][y] ^ test_image, "diff") # Second step: popcount (§3.2) dist = hcl.compute(diff.shape, lambda x, y: popcount(diff[x][y]), "dist") # Third step: initialize the candidates (§3.3) knn_mat = hcl.compute((10, 3), lambda x, y: 50, "knn_mat") # Fourth step: update the candidates (§3.4) hcl.mutate(dist.shape, lambda x, y: update_knn(dist, knn_mat, x, y), "knn_update") # Final step: return the candidates (§3.5) return knn_mat
def loop_body(m): triangle_3d = hcl.compute((9, ), lambda x: triangle_3ds[m][x], "triangle_3d") fragment = hcl.compute((500, 4), lambda x, y: 0, "fragment") pixels = hcl.compute((500, 3), lambda x, y: 0, "pixels") triangle_2d = hcl.compute((7, ), lambda x: 0, "triangle_2d") frag_cntr = hcl.compute((1, ), lambda x: 0, "frag_cntr") size_pixels = hcl.compute((1, ), lambda x: 0, "size_pixels") # 1st Stage Projection hcl.mutate((7, ), lambda x: projection(triangle_3d, triangle_2d, x), "twod_update") # 2nd Stage Rasterization:update fragment hcl.mutate((1, ), lambda x: rasterization(frag_cntr, triangle_2d, fragment), "fragment_update") # 3rd Stage Z-culling:update z_buffer,pixels hcl.mutate((1, ), lambda x: zculling(size_pixels, frag_cntr[ 0], fragment, z_buffer, pixels), "z_update") # coloring frame buffer hcl.mutate((size_pixels[0], ), lambda x: coloringFB(x, pixels, frame_buffer), "buffer_update")
def sobel_kernel(imgF, Gx, Gy): def pad(x, y, z): out = hcl.scalar(0, "out") with hcl.if_(hcl.and_(x > 0, y > 0)): out.v = imgF[x - 1, y - 1, z] with hcl.else_(): out.v = 0 return out.v P = hcl.compute((height + 2, width + 2, 3), lambda x, y, z: pad(x, y, z), "P") A = hcl.compute((height + 2, width + 2), lambda x, y: P[x][y][0] + P[x][y][1] + P[x][y][2], "A") r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) resX = hcl.compute((height, width), lambda x, y: hcl.sum( A[x + r, y + c] * Gx[r, c], axis=[r, c], name="sum1"), "X") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) resY = hcl.compute((height, width), lambda x, y: hcl.sum( A[x + t, y + g] * Gy[t, g], axis=[t, g], name="sum2"), "Y") R = hcl.compute((height, width), lambda x, y: hcl.sqrt(resX[x][ y] * resX[x][y] + resY[x][y] * resY[x][y]), "R") norm = hcl.scalar(255 / 4328) return hcl.compute((height, width), lambda x, y: R[x][y] * norm.v, "F")
def _conv2d_nhwc(Input, Filter, Bias=None, stride=[1, 1], padding=[1, 1], dilation=[1, 1], name='conv2d', out_dtype=None): if out_dtype is None: out_dtype = Input.dtype assert isinstance(stride, int) or len(stride) == 2 assert isinstance(dilation, int) or len(dilation) == 2 if isinstance(stride, int): stride_h = stride_w = stride else: stride_h, stride_w = stride if isinstance(dilation, int): dilation_h = dilation_w = dilation else: dilation_h, dilation_w = dilation batch, in_height, in_width, in_channel = Input.shape num_filter, channel, kernel_h, kernel_w = Filter.shape #compute output shape dilated_kernel_h = (kernel_h - 1) * dilation_h + 1 dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 pad_top, pad_left, pad_down, pad_right = hlib.nn.get_pad_tuple( padding, (dilated_kernel_h, dilated_kernel_w)) out_channel = num_filter out_height = hlib.nn.simplify( (in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1) out_width = hlib.nn.simplify( (in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1) pad_before = [0, pad_top, pad_left, 0] pad_after = [0, pad_down, pad_right, 0] print(pad_before, pad_after) temp = hlib.nn.pad(Input, pad_before, pad_after, name="pad_temp") rc = hcl.reduce_axis(0, in_channel) ry = hcl.reduce_axis(0, kernel_h) rx = hcl.reduce_axis(0, kernel_w) if not Bias == None: return hcl.compute( (batch, out_height, out_width, out_channel), lambda nn, yy, xx, ff: hcl. sum(temp[nn, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w, rc].astype(out_dtype) * Filter[ff, rc, ry, rx] .astype(out_dtype) + Bias[ff].astype(out_dtype), axis=[ry, rx, rc]), name=name, )
def top(input, ): mean_local = hcl.compute((final_extent_0, final_extent_1), lambda x, y: 0, name="mean_local", dtype=hcl.UInt(bits=16)) with hcl.Stage("mean_local"): with hcl.for_(final_min_1, final_extent_1, name="mean_local_s0_y") as mean_local_s0_y: with hcl.for_(final_min_0, final_extent_0, name="mean_local_s0_x") as mean_local_s0_x: mean_local[mean_local_s0_x, mean_local_s0_y] = hcl.cast(dtype=hcl.UInt(bits=16), expr=0) with hcl.for_(final_min_1, final_extent_1, name="mean_local_s1_y") as mean_local_s1_y: with hcl.for_(final_min_0, final_extent_0, name="mean_local_s1_x") as mean_local_s1_x: with hcl.for_( 0, 3, name="mean_local_s1_box__y") as mean_local_s1_box__y: with hcl.for_(0, 3, name="mean_local_s1_box__x" ) as mean_local_s1_box__x: mean_local[mean_local_s1_x, mean_local_s1_y] = ( mean_local[mean_local_s1_x, mean_local_s1_y] + (input[(mean_local_s1_box__x + mean_local_s1_x), (mean_local_s1_box__y + mean_local_s1_y)] / hcl.cast(dtype=hcl.UInt(bits=16), expr=9))) final = hcl.compute((640, 480), lambda x, y: 0, name="final", dtype=hcl.UInt(bits=16)) with hcl.Stage("final"): with hcl.for_(final_min_1, final_extent_1, name="final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y] = mean_local[final_s0_x, final_s0_y] return final
def test_split_num_axis(): hcl.init() a = hcl.placeholder((10, 20), name="a") b = hcl.placeholder((10, 20), name="b") c = hcl.compute(a.shape, lambda i, j: a[i, j] + b[i, j], name="c") s = hcl.create_schedule([a, b, c]) s[c].split(1, factor=4, mode="transform") ir = hcl.lower(s) assert str(ir.body.body).startswith("for (i, 0, 10)") assert str(ir.body.body.body).startswith("for (j.outer, 0, 5)") assert str(ir.body.body.body.body).startswith("for (j.inner, 0, 4)") assert str(ir.body.body.body.body.body).startswith("c[")
def test_reorder_num_axis(): hcl.init() a = hcl.placeholder((10, 20, 30, 40), name="a") b = hcl.placeholder((10, 20, 30, 40), name="b") c = hcl.compute(a.shape, lambda i, j, k, l: a[i, j, k, l] + b[i, j, k, l], name="c") s = hcl.create_schedule([a, b, c]) s[c].reorder(2, 1) ir = hcl.lower(s) assert str(ir.body.body).startswith("for (i, 0, 10)") assert str(ir.body.body.body).startswith("for (k, 0, 30)") assert str(ir.body.body.body.body).startswith("for (j, 0, 20)") assert str(ir.body.body.body.body.body).startswith("for (l, 0, 40)")
def update(b_row, b_col, k, O): # fetch input localA = [] localB = [] for input_a in range(h): localA.append(hcl.compute((1,), lambda x : A[input_a + h * b_row, k], "localA_{}".format(input_a))) for input_b in range(w): localB.append(hcl.compute((1,), lambda x : B[k, input_b + w * b_col], "localB_{}".format(input_b))) # systolic connection net = [[None] * h] * w for i in range(h + w - 1): for row in range(i + 1): col = i - row if col < 0 or col > w-1 or row > h-1: continue ## instantiate a PE and record partial results input_a = localA[row] if col == 0 else hcl.compute((1,), lambda x : net[row][col-1][0], "input_a{}{}".format(row, col)) input_b = localB[col] if row == 0 else hcl.compute((1,), lambda x : net[row-1][col][1], "input_b{}{}".format(row, col)) out = hcl.compute((3,), lambda x : PE['pe_%d' % (row * w + col)]( input_a, input_b, x), "out_{}{}".format(row, col)) O[row + h * b_row, col + w * b_col] += out[2] net[row][col] = out
def test_hdc_accu(proto, hyper_dataset, labels, type): ###data preparation distance1 = hcl.compute((hyper_dataset.shape[1], ), lambda x: 0, 'distance1') hamming_dist1 = hcl.compute((numClasses, ), lambda x: 0, "hamming_dist1") m1 = hcl.reduce_axis(0, hyper_dataset.shape[1], "m1") correct1 = hcl.scalar(0, 'correct1') ### with hcl.for_(0, hyper_dataset.shape[0]) as i: with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance1, lambda x: hyper_dataset[i][x] ^ proto[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hamming_dist1[n] = hcl.sum(distance1[m1], axis=m1) #Find the one having the least hamming distance and choose it's label as the predicted label pred1 = hcl.scalar(0, 'pred1') with hcl.for_(0, hamming_dist1.shape[0]) as j: with hcl.if_(hamming_dist1[j] < hamming_dist1[pred1]): pred1.v = j with hcl.if_(pred1.v == labels[i]): correct1.v += 1 #Print the accuracy all1 = hcl.scalar(hyper_dataset.shape[0], "all1", dtype=hcl.Float(32)) accuracy1 = hcl.compute((1, ), lambda x: correct1.v / all1.v * 100, "accuracy1", dtype=hcl.Float(32)) with hcl.if_(type == 1): hcl.print((correct1, hyper_dataset.shape[0], accuracy1[0]), "Training accu: %d/%d (%.2f%%)\n") with hcl.else_(): hcl.print((correct1, hyper_dataset.shape[0], accuracy1[0]), "Testing accu: %d/%d (%.2f%%)\n")
def kernel(inputs): def split(inputs, number): cus = [] size = inputs.shape[0] for i in range(number): base = i * (size / number) name = "batch_" + str(i) ret = hcl.compute((int(size / number), ), lambda x: inputs[base + x], dtype=st, name=name) cus.append(ret) return cus # ret is the input slice { (key, value)...} # res is the intermediate result def count(res, ret, x): res[ret[x].key] += ret[x].val def reducer(ress, output, x): for res in ress: output[x] += res[x] rets = split(inputs, compute_units) ress = [] for ret in rets: name = "map_batch_" + str(rets.index(ret)) res = hcl.compute((class_number, ), lambda *args: 0, name=name) # mapping (accumulate quality scores in each batch) hcl.mutate((int(size / compute_units), ), lambda x: count(res, ret, x), name="mutate_" + name) ress.append(res) # shuffle and reduce the ress into output output = hcl.compute((class_number, ), lambda x: 0, name="output") hcl.mutate((class_number, ), lambda x: reducer(ress, output, x), "reducer") return output
def test2(): hcl.init() A = hcl.placeholder((3, ), dtype=hcl.UInt(8), name="A") B = hcl.placeholder((3, ), dtype=hcl.UInt(8), name="B") rb = hcl.reduce_axis(0, 8, name="rb") out = hcl.compute( (3, ), lambda x: # popcnt(A[x] ^ B[x])) hcl.sum((A[x] ^ B[x])[rb], axis=rb)) s = hcl.create_schedule([A, B, out]) f = hcl.build(s, "vhls") print(f)
def test1(): hcl.init() a = hcl.placeholder((3,), dtype=hcl.UInt(8), name="a") out = hcl.compute((3,), lambda x: tvm.intrin.popcount(a[x]), dtype=hcl.UInt(32)) s = hcl.create_schedule([a, out]) f = hcl.build(s) hcl_a = hcl.asarray(np.array([9, 7, 31]), dtype=hcl.UInt(8)) hcl_out = hcl.asarray(np.array([0, 0, 0]), dtype=hcl.UInt(32)) f(hcl_a, hcl_out) print("Input : {}".format(hcl_a.asnumpy())) print("Output : {}".format(hcl_out.asnumpy()))
def loop_kernel(labels): # assign cluster with hcl.for_(0, N, name="N") as n: min_dist = hcl.scalar(100000) with hcl.for_(0, K) as k: dist = hcl.scalar(0) with hcl.for_(0, dim) as d: dist_ = points[n, d]-means[k, d] dist.v += dist_ * dist_ with hcl.if_(dist.v < min_dist.v): min_dist.v = dist.v labels[n] = k # update mean num_k = hcl.compute((K,), lambda x: 0) sum_k = hcl.compute((K, dim), lambda x, y: 0) def calc_sum(n): num_k[labels[n]] += 1 with hcl.for_(0, dim) as d: sum_k[labels[n], d] += points[n, d] hcl.mutate((N,), lambda n: calc_sum(n), "calc_sum") hcl.update(means, lambda k, d: sum_k[k, d]//num_k[k], "update_mean")
def test_hdc_accu(proto, pack_data, labels, type): #pack the prototype # pack_proto = hcl.pack(proto, axis=1, dtype=hcl.UInt(in_bw), name="pack_proto") ###data preparation distance1 = hcl.compute((pack_data.shape[1],), lambda x: 0, 'distance1', dtype=hcl.UInt(in_bw)) pre_hamming = hcl.compute((pack_data.shape[1],), lambda x: 0, "pre_hamming") hamming_dist1 = hcl.compute((numClasses,), lambda x: 0, "hamming_dist1") m1 = hcl.reduce_axis(0, pack_data.shape[1], "m1") correct1 = hcl.scalar(0, 'correct1') ### with hcl.for_(0, pack_data.shape[0]) as i: hcl.print((i),"%d suc\n") with hcl.for_(0, numClasses) as n: #Do hdc multiplication(XOR) on sample[i]'s hdv and prototype[n]'s hdv (elementwise on the high-bit data) hcl.update(distance1, lambda x: pack_data[i][x] ^ proto[n][x]) #Calculate the hamming distance of the two vectors by adding 1s hcl.update(pre_hamming, lambda x: popcount(distance1[x])) hcl.print((),"sum of 1s suc") ###########################seg fault hamming_dist1[n] = hcl.sum(pre_hamming[m1], axis=m1) #Find the one having the least hamming distance and choose its label as the predicted label pred1 = hcl.scalar(0, 'pred1') with hcl.for_(0, hamming_dist1.shape[0]) as j: with hcl.if_(hamming_dist1[j] < hamming_dist1[pred1]): pred1.v = j with hcl.if_(pred1.v == labels[i]): correct1.v += 1 #Print the accuracy all1 = hcl.scalar(pack_data.shape[0], "all1", dtype=hcl.Float(32)) accuracy1 = hcl.compute((1,), lambda x: correct1.v/all1.v*100, "accuracy1" , dtype=hcl.Float(32)) with hcl.if_(type == 1): hcl.print((correct1, pack_data.shape[0], accuracy1[0]), "Training accu: %d/%d (%.2f%%)\n") with hcl.else_(): hcl.print((correct1, pack_data.shape[0], accuracy1[0]), "Testing accu: %d/%d (%.2f%%)\n")
def kernel(matrix_1, matrix_2): return_matrix = hcl.compute((m,k), lambda x, y: matrix_1[x,y] + matrix_2[x,y], "return_matrix") with hcl.for_(0, 7, name="for_loop") as f: hcl.assert_(matrix_2[f,2] == 0, "assert message in the first for loop") #assert true hcl.print(0, "in the first for loop\n") #should be printed with hcl.for_(0, 7, name="for_loop") as f: hcl.assert_(matrix_2[f,2] != 0, "assert message in the second for loop") #assert false hcl.print(0, "in the second for loop\n") #should not be printed hcl.print(0, "this should not be printed\n") #should not be printed return return_matrix
def sobel(A, Gx, Gy): r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) B = hcl.compute((height - 2, width - 2), lambda x, y: hcl.sum( A[x + r, y + c] * Gx[r, c], axis=[r, c], name="sum1"), name="B", dtype=hcl.Float()) t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) C = hcl.compute((height - 2, width - 2), lambda x, y: hcl.sum( A[x + t, y + g] * Gy[t, g], axis=[t, g], name="sum2"), name="C", dtype=hcl.Float()) return hcl.compute((height - 2, width - 2), lambda x, y: hcl.sqrt(B[x, y] * B[x, y] + C[x, y] * C[ x, y]) / 4328 * 255, name="output", dtype=hcl.Float())
def sobel(A, Gx, Gy): D = hcl.compute((height, width), lambda x, y: (A[x][y][0], A[x][y][1], A[x][y][2]), dtype=ts) B = hcl.compute((height, width), lambda x, y: D[x][y].fa + D[x][y].fb + D[x][y].fc, "B", dtype=hcl.Float()) r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) Fx = hcl.compute( (height - 2, width - 2), lambda x, y: hcl.sum(B[x + r, y + c] * Gx[r, c], axis=[r, c]), "xx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) Fy = hcl.compute( (height - 2, width - 2), lambda x, y: hcl.sum(B[x + t, y + g] * Gy[t, g], axis=[t, g]), "yy") return hcl.compute((height - 2, width - 2), lambda x, y: hcl.sqrt(Fx[ x][y] * Fx[x][y] + Fy[x][y] * Fy[x][y]) * 0.05891867, "Fimg")
def algorithm(a, b, c): @hcl.def_([a.shape, b.shape, c.shape]) def add(a, b, c): d = hcl.compute(a.shape, lambda *x: a[x] + b[x], "d") hcl.assert_(True, "assert error 1") hcl.print(0, "print1\n") hcl.update(c, lambda *x: d[x] + 1, "u") hcl.assert_(False, "assert error 2") hcl.print(0, "print2") tmp = hcl.compute((64, 64), lambda x, y: 4 + 8) add(a, b, c) hcl.print(0, "print end")
def kernel(A, B): C = hcl.compute((10, 32), lambda *args: 10) @hcl.def_([(10, 32), (10, 32)]) def add(A, B): hcl.update(B, lambda *args: A[args] + 1) @hcl.def_([(10, 32), (10, 32)]) def mul(B, C): hcl.update(C, lambda *args: B[args] * 2) add(A, B) mul(B, C)
def algorithm(A, B): @hcl.def_([A.shape, B.shape, ()]) def add(A, B, x): hcl.return_(A[x] + B[x]) @hcl.def_([A.shape, B.shape, ()]) def mul(A, B, x): temp = hcl.local(0) with hcl.for_(0, x) as i: temp[0] += add(A, B, x) hcl.return_(temp[0]) return hcl.compute(A.shape, lambda x: mul(A, B, x))
def Gaussian_Sobel_filters(A, G, Fx, Fy): h = hcl.reduce_axis(0, kernel_size) w = hcl.reduce_axis(0, kernel_size) B = hcl.compute( (height, width), lambda y, x: hcl.select( hcl.and_(y > (k - 1), y < (width - k), x > (k - 1), x < (height - k)), hcl.sum(A[y + w, x + h] * G[w, h], axis=[w, h]), B[y, x]), "B", dtype=hcl.Float()) # Sobel Filters r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) Gx = hcl.compute( (height, width), lambda y, x: hcl.select( hcl.and_(y > (k - 1), y < (width - k), x > (k - 1), x < (height - k)), hcl.sum(B[y + r, x + c] * Fx[r, c], axis=[r, c]), B[y, x]), "Gx", dtype=hcl.Float()) t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) Gy = hcl.compute( (height, width), lambda y, x: hcl.select( hcl.and_(y > (k - 1), y < (width - k), x > (k - 1), x < (height - k)), hcl.sum(B[y + t, x + g] * Fy[t, g], axis=[t, g]), B[y, x]), "Gy", dtype=hcl.Float()) # return the intensity matrix and the edge direction matrix? return hcl.compute( (height, width), lambda y, x: (hcl.sqrt(Gx[y][x] * Gx[y][x] + Gy[y][x] * Gy[y][x])) / 4328 * 255, dtype=hcl.Float())
def kernel(matrix_1, matrix_2): return_matrix = hcl.compute((m,k), lambda x, y: matrix_1[x,y] + matrix_2[x,y], "return_matrix") matrix_A = hcl.compute((m,k), lambda x, y: matrix_1[x,y] + matrix_2[x,y] + 7, "matrix_A") matrix_B = hcl.compute((m,k), lambda x, y: matrix_1[x,y] + matrix_2[x,y] + 8, "matrix_B") with hcl.for_(0, 7, name="for_loop") as f: with hcl.if_(matrix_1[0, f] == 0): hcl.assert_(matrix_2[f,2] == 0, "assert message in the first for loop") #assert true hcl.print(0, "in the first for loop and if statement\n") #should be printed 7 times hcl.print(0, "in the first for loop, outside if statement\n") #should be printed 7 times with hcl.for_(0, 7, name="for_loop") as f: with hcl.if_(matrix_1[0, f] == 0): hcl.assert_(matrix_2[f,2] != 0, "assert message in the second for loop") #assert false hcl.print(0, "in the second for loop and if statement\n") #should not be printed hcl.print(0, "in the second for loop, outside if statement\n") #should not be printed hcl.print(0, "this should not be printed\n") #should not be printed matrix_C = hcl.compute((m,k), lambda x, y: matrix_1[x,y] + matrix_2[x,y] + 9, "matrix_C") matrix_D = hcl.compute((m,k), lambda x, y: matrix_1[x,y] + matrix_2[x,y] + 10, "matrix_D") return return_matrix