def find_max(A, len_): max_ = hcl.local(A[0], "max") act_ = hcl.local(0, "act") with hcl.for_(0, len_) as i: with hcl.if_(A[i] > max_[0]): max_[0] = A[i] act_[0] = i return max_[0], act_[0]
def align(curr_i, curr_j, next_i, next_j): outA = hcl.local(0, "a") outB = hcl.local(0, "b") with hcl.if_(next_i[0] == curr_i[0]): outA[0] = 0 with hcl.else_(): outA[0] = seqA[curr_i[0] - 1] with hcl.if_(next_j[0] == curr_j[0]): outB[0] = 0 with hcl.else_(): outB[0] = seqB[curr_j[0] - 1] return outA[0], outB[0]
def insertion_sort(A): # Introduce a stage. with hcl.Stage("S"): # for i in range(1, A.shape[0]) # We can name the axis with hcl.for_(1, A.shape[0], name="i") as i: key = hcl.local(A[i], "key") j = hcl.local(i - 1, "j") # while(j >= 0 && key < A[j]) with hcl.while_(hcl.and_(j >= 0, key < A[j])): A[j + 1] = A[j] j[0] -= 1 A[j + 1] = key[0]
def clamp(val, min_, max_): local = hcl.local(val) with hcl.if_(val < min_): local[0] = min_ with hcl.elif_(val > max_): local[0] = max_ return local[0]
def update_knn(dist, knn_mat, i, j): max_id = hcl.local(0, "max_id") with hcl.for_(0, 3) as k: with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]): max_id.v = k with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]): knn_mat[i][max_id.v] = dist[i][j]
def Sigmoid(exponent): ret = hcl.local(0.0, "sigmoid", FTYPE) with hcl.if_(exponent > hcl.cast(FTYPE, 4.0)): ret[0] = 1.0 with hcl.elif_(exponent < hcl.cast(FTYPE, -4.0)): ret[0] = 0.0 with hcl.else_(): with hcl.if_(exponent < hcl.cast(FTYPE, 0.0)): num = hcl.local(0, dtype = hcl.UFixed(18, 8)) num[0][18:0] = exponent[29:11] num[0] = ~(num[0] << 8) + 1 index = 2047.0 - num[0] ret[0] = lut[hcl.cast(hcl.Int(32), index)] with hcl.else_(): index = exponent[21:11] ret[0] = lut[hcl.cast(hcl.Int(32), index)] return ret[0]
def _mvpodd_reduce(*args): """compute {1, -1} dot product on packed data.""" temp = hcl.local(0, name='mvpodd_acc', dtype=hcl.Int(64)) with hcl.for_(0, in_block_num) as o: with hcl.for_(0, block_size) as i: temp[0] += tvm.popcount(d_packed[args[0], i+block_size*o] ^ w_packed[args[1], i+block_size*o]) temp[0] = ppac_config.elem_num - temp[0]*2 return temp[0]
def kernel(A): with hcl.Stage(): i = hcl.local(0) with hcl.while_(True): with hcl.if_(i[0] > 5): hcl.break_() A[i[0]] = i[0] i[0] += 1
def get_next(action, i, j): act_ = hcl.local(action[i][j], "act") next_i = hcl.local(0, "next_i") next_j = hcl.local(0, "next_j") with hcl.if_(act_[0] == 0): next_i[0] = i - 1 next_j[0] = j - 1 with hcl.elif_(act_[0] == 1): next_i[0] = i - 1 next_j[0] = j with hcl.elif_(act_[0] == 2): next_i[0] = i next_j[0] = j - 1 with hcl.else_(): next_i[0] = i next_j[0] = j return next_i[0], next_j[0]
def sort_knn(knn_mat, i, j): val = hcl.local(0, "val") with hcl.if_( j == 1 ): with hcl.if_( knn_mat[i][1] > knn_mat[i][2] ): val.v = knn_mat[i][1] knn_mat[i][1] = knn_mat[i][2] knn_mat[i][2] = val.v with hcl.else_(): with hcl.if_( knn_mat[i][0] > knn_mat[i][1] ): val.v = knn_mat[i][0] knn_mat[i][0] = knn_mat[i][1] knn_mat[i][1] = val.v
def fft(X_real, X_imag, IndexTable, F_real, F_imag): L = X_real.shape[0] if np.log2(L) % 1 > 0: raise ValueError("Length of input vector (1d tensor) must be power of 2") num_stages = int(np.log2(L)) # bit reverse permutation hcl.update(F_real, lambda i: X_real[IndexTable[i]], name='F_real_update') hcl.update(F_imag, lambda i: X_imag[IndexTable[i]], name='F_imag_update') with hcl.Stage("Out"): one = hcl.local(1, dtype="int32") with hcl.for_(0, num_stages) as stage: DFTpts = one[0] << (stage + 1) numBF = DFTpts / 2 e = -2 * np.pi / DFTpts a = hcl.local(0) with hcl.for_(0, numBF) as j: c = hcl.local(hcl.cos(a[0])) s = hcl.local(hcl.sin(a[0])) a[0] = a[0] + e with hcl.for_(j, L + DFTpts - 1, DFTpts) as i: i_lower = i + numBF temp_r = hcl.local(F_real[i_lower] * c - F_imag[i_lower] * s) temp_i = hcl.local(F_imag[i_lower] * c + F_real[i_lower] * s) F_real[i_lower] = F_real[i] - temp_r[0] F_imag[i_lower] = F_imag[i] - temp_i[0] F_real[i] = F_real[i] + temp_r[0] F_imag[i] = F_imag[i] + temp_i[0]
def knn_vote(knn_mat, j): id0 = hcl.local(0, "id0") id1 = hcl.local(0, "id1") id2 = hcl.local(0, "id2") count = hcl.local(0, "count") with hcl.for_(0, 10) as n: with hcl.if_(knn_mat[n][0] < knn_mat[id0.v][0]): id0.v = n with hcl.for_(0, 10) as m: with hcl.if_(knn_mat[m][0] < knn_mat[id1.v][0]): id1.v = m with hcl.for_(0, 10) as k: with hcl.if_(knn_mat[k][0] < knn_mat[id2.v][0]): id2.v = k with hcl.if_(j == id0.v): count.v += 1 with hcl.elif_(j == id1.v): count.v += 1 with hcl.elif_(j == id2.v): count.v += 1 with hcl.else_(): count.v += 0 return count.v
def loop_kernel(labels): # assign cluster with hcl.for_(0, N, name="N") as n: min_dist = hcl.local(100000) with hcl.for_(0, K) as k: dist = hcl.local(0) with hcl.for_(0, dim) as d: dist_ = points[n, d] - means[k, d] dist[0] += dist_ * dist_ with hcl.if_(dist[0] < min_dist[0]): min_dist[0] = dist[0] labels[n] = k # update mean num_k = hcl.compute((K, ), lambda x: 0) sum_k = hcl.compute((K, dim), lambda x, y: 0) def calc_sum(n): num_k[labels[n]] += 1 with hcl.for_(0, dim) as d: sum_k[labels[n], d] += points[n, d] hcl.mutate((N, ), lambda n: calc_sum(n), "calc_sum") hcl.update(means, lambda k, d: sum_k[k, d] // num_k[k], "update_mean")
#d1Local is N(d1); d2local is N(d2) Loop = 1 X = hcl.placeholder((1, )) T = hcl.placeholder((1, )) S = hcl.placeholder((1, )) r = hcl.placeholder((1, )) sigma = hcl.placeholder((1, )) C = hcl.placeholder((1, )) P = hcl.placeholder((1, )) xdiv1 = hcl.placeholder((1, )) xdiv2 = hcl.placeholder((1, )) d1Local = hcl.placeholder((1, )) d2Local = hcl.placeholder((1, )) #------------------------------------------# with hcl.stage() as s: xlogterm = hcl.local(0) xlogterm[0] = hcl.log(S[0] / X[0]) xpowerterm = hcl.local(0) xpowerterm[0] = 0.5 * sigma[0] * sigma[0] xnum = hcl.local(0) xnum[0] = xlogterm[0] + (r[0] + xpowerterm[0]) * T[0] xsqrtterm = hcl.local(0) xsqrtterm[0] = hcl.sqrt(T[0]) xden = hcl.local(0) xden[0] = sigma[0] * xsqrtterm[0] #xdiv1 = hcl.local(0) xdiv1[0] = xnum[0] / xden[0] #xdiv2 = hcl.local(0) xdiv2[0] = xdiv1[0] - xden[0] futurevaluex = hcl.local(0) futurevaluex[0] = X[0] * hcl.exp(-r[0] * T[0])
def mul(A, B, x): temp = hcl.local(0) with hcl.for_(0, x) as i: temp[0] += add(A, B, x) hcl.return_(temp[0])
def kernel(A): with hcl.Stage(): a = hcl.local(0) with hcl.while_(a[0] < 10): A[a[0]] = a[0] a[0] += 1
def popcount(num): out = hcl.local(0, "out") with hcl.for_(0, train_images.type.bits) as i: # Bit selection operation out[0] += num[i] return out[0]
def smith_waterman(seqA, seqB, consA, consB): def similarity_score(a, b): return hcl.select(a == b, 1, penalty) def find_max(A, len_): max_ = hcl.local(A[0], "max") act_ = hcl.local(0, "act") with hcl.for_(0, len_) as i: with hcl.if_(A[i] > max_[0]): max_[0] = A[i] act_[0] = i return max_[0], act_[0] matrix_max = hcl.local(0, "maxtrix_max") i_max = hcl.local(0, "i_max") j_max = hcl.local(0, "j_max") matrix = hcl.compute((lenA + 1, lenB + 1), lambda x, y: 0, "matrix") action = hcl.compute(matrix.shape, lambda x, y: 3, "action") def populate_matrix(i, j): trace_back = hcl.compute((4, ), lambda x: 0, "trace_back") with hcl.if_(hcl.and_(i != 0, j != 0)): trace_back[0] = matrix[i-1, j-1] + \ similarity_score(seqA[i-1], seqB[j-1]) trace_back[1] = matrix[i - 1, j] + penalty trace_back[2] = matrix[i, j - 1] + penalty trace_back[3] = 0 matrix[i, j], action[i, j] = find_max(trace_back, 4) with hcl.if_(matrix[i, j] > matrix_max[0]): matrix_max[0] = matrix[i, j] i_max[0] = i j_max[0] = j P = hcl.mutate((lenA + 1, lenB + 1), lambda i, j: populate_matrix(i, j)) def align(curr_i, curr_j, next_i, next_j): outA = hcl.local(0, "a") outB = hcl.local(0, "b") with hcl.if_(next_i[0] == curr_i[0]): outA[0] = 0 with hcl.else_(): outA[0] = seqA[curr_i[0] - 1] with hcl.if_(next_j[0] == curr_j[0]): outB[0] = 0 with hcl.else_(): outB[0] = seqB[curr_j[0] - 1] return outA[0], outB[0] def get_next(action, i, j): act_ = hcl.local(action[i][j], "act") next_i = hcl.local(0, "next_i") next_j = hcl.local(0, "next_j") with hcl.if_(act_[0] == 0): next_i[0] = i - 1 next_j[0] = j - 1 with hcl.elif_(act_[0] == 1): next_i[0] = i - 1 next_j[0] = j with hcl.elif_(act_[0] == 2): next_i[0] = i next_j[0] = j - 1 with hcl.else_(): next_i[0] = i next_j[0] = j return next_i[0], next_j[0] with hcl.Stage("T"): curr_i = hcl.local(i_max[0], "curr_i") curr_j = hcl.local(j_max[0], "curr_j") next_i = hcl.local(0, "next_i") next_j = hcl.local(0, "next_j") next_i[0], next_j[0] = get_next(action, curr_i[0], curr_j[0]) tick = hcl.local(0, "tick") with hcl.while_( hcl.or_(curr_i[0] != next_i[0], curr_j[0] != next_j[0])): consA[tick[0]], consB[tick[0]] = \ align(curr_i, curr_j, next_i, next_j) curr_i[0], curr_j[0] = next_i[0], next_j[0] next_i[0], next_j[0] = get_next(action, curr_i[0], curr_j[0]) tick[0] += 1
def for_loop(a): with hcl.Stage("S"): b = hcl.local(a, "b") with hcl.for_(0, 5, name="i") as i: b[0] = b[0] + 1 return b
def bitcount(v): out = hcl.local(0, "out", dtype=hcl.UInt(32)) with hcl.for_(0, 3) as i: out[0] += v[i] return out[0]
def _assign_val(*args): temp = hcl.local(0, name='sim_acc', dtype=hcl.UInt(64)) temp[0] = tvm.popcount(~(x[args[0]] ^ y[args[1]])) return temp[0]
def _assign_val(*args): temp = hcl.local(0, name='pack_acc', dtype=hcl.UInt(8)) with hcl.for_(0, 8) as i: temp[0] = temp[0] | (tensor[args[0], i + args[1]*8] << i) return temp[0]
def popcount(num): out = hcl.local(0, "out") with hcl.for_(0, train_images.type.bits) as i: out.v += num[i] return out.v