def top(input, filter, bias, ): input_extent_3_required_s = (((((final_extent_2 + 31)//32) * final_extent_3) + -1)//hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1)) final_total_extent_1 = (hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_1) * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_0)) final_total_extent_2 = (final_total_extent_1 * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_2)) final_total_extent_3 = (final_total_extent_2 * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_3)) f_conv_n_extent_realized_s = hcl.select(hcl.select((((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)) > (final_extent_3 + -1), (((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)), (final_extent_3 + -1)) > (((((final_extent_2 + 31)//32) * final_extent_3) + -1)//(hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) + 1)), hcl.select((((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)) > (final_extent_3 + -1), (((final_extent_2 * final_extent_3) + -1)//hcl.select(final_extent_2 > 1, final_extent_2, 1)), (final_extent_3 + -1)), (((((final_extent_2 + 31)//32) * final_extent_3) + -1)//(hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) + 1))) f_conv_z_extent_realized = hcl.select(((hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) * 32) + 32) > final_extent_2, ((hcl.select(((final_extent_2 + -1)//32) > 0, ((final_extent_2 + -1)//32), 0) * 32) + 32), final_extent_2) f_conv = hcl.compute((final_extent_0, ((((final_extent_1 + -1)//32) * 32) + 32), f_conv_z_extent_realized, (f_conv_n_extent_realized_s + 1)), lambda x, y, z, w: 0, name = "f_conv", dtype = hcl.Float(bits = 32)) with hcl.Stage("f_conv"): with hcl.for_(0, (final_extent_2 * final_extent_3), name = "f_conv_s0_z_par") as f_conv_s0_z_par: with hcl.for_(final_min_1, final_extent_1, name = "f_conv_s0_y") as f_conv_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "f_conv_s0_x") as f_conv_s0_x: f_conv[f_conv_s0_x, f_conv_s0_y, ((f_conv_s0_z_par % hcl.select(final_extent_2 > 1, final_extent_2, 1)) + final_min_2), ((f_conv_s0_z_par//hcl.select(final_extent_2 > 1, final_extent_2, 1)) + final_min_3)] = bias[((f_conv_s0_z_par % hcl.select(final_extent_2 > 1, final_extent_2, 1)) + final_min_2)] with hcl.for_(0, (((final_extent_2 + 31)//32) * final_extent_3), name = "f_conv_s1_z_z_par") as f_conv_s1_z_z_par: f_conv_s1_z_z_t_base_s = (f_conv_s1_z_z_par % hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1)) with hcl.for_(0, 32, name = "f_conv_s1_r__z") as f_conv_s1_r__z: with hcl.for_(0, ((final_extent_1 + 31)//32), name = "f_conv_s1_y_y") as f_conv_s1_y_y: with hcl.for_(0, 32, name = "f_conv_s1_z_z_t") as f_conv_s1_z_z_t: with hcl.for_(0, 32, name = "f_conv_s1_y_y_t") as f_conv_s1_y_y_t: with hcl.for_(final_min_0, final_extent_0, name = "f_conv_s1_x") as f_conv_s1_x: with hcl.for_(0, 3, name = "f_conv_s1_r__y_r21") as f_conv_s1_r__y_r21: with hcl.for_(0, 3, name = "f_conv_s1_r__x_r20") as f_conv_s1_r__x_r20: t51_s = (f_conv_s1_z_z_par//hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1)) f_conv[f_conv_s1_x, (((f_conv_s1_y_y * 32) + final_min_1) + f_conv_s1_y_y_t), (((f_conv_s1_z_z_t_base_s * 32) + final_min_2) + f_conv_s1_z_z_t), ((f_conv_s1_z_z_par//hcl.select(((final_extent_2 + 31)//32) > 1, ((final_extent_2 + 31)//32), 1)) + final_min_3)] = (f_conv[f_conv_s1_x, (((f_conv_s1_y_y * 32) + final_min_1) + f_conv_s1_y_y_t), (((f_conv_s1_z_z_t_base_s * 32) + final_min_2) + f_conv_s1_z_z_t), (final_min_3 + t51_s)] + (filter[f_conv_s1_r__x_r20, f_conv_s1_r__y_r21, f_conv_s1_r__z, (((f_conv_s1_z_z_t_base_s * 32) + final_min_2) + f_conv_s1_z_z_t)] * input[(f_conv_s1_r__x_r20 + f_conv_s1_x), ((((f_conv_s1_y_y * 32) + final_min_1) + f_conv_s1_y_y_t) + f_conv_s1_r__y_r21), f_conv_s1_r__z, (final_min_3 + t51_s)])) final = hcl.compute((64, 64, 32, 4), lambda x, y, z, w: 0, name = "final", dtype = hcl.Float(bits = 32)) with hcl.Stage("final"): with hcl.for_(final_min_3, final_extent_3, name = "final_s0_n") as final_s0_n: with hcl.for_(final_min_2, final_extent_2, name = "final_s0_z") as final_s0_z: with hcl.for_(final_min_1, final_extent_1, name = "final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y, final_s0_z, final_s0_n] = hcl.select(f_conv[final_s0_x, final_s0_y, final_s0_z, final_s0_n] > hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.000000), f_conv[final_s0_x, final_s0_y, final_s0_z, final_s0_n], hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.000000)) return final
def kernel_select(a, b, c, d): use_imm = hcl.scalar(1) with hcl.for_(0, 10, name="i") as i: src = hcl.select(use_imm == 1, hcl.cast(hcl.Int(16), (c[i] + b[i])), hcl.cast(hcl.Int(32), (c[i] - b[i]))) dst = hcl.cast(hcl.Int(32), (2 * (c[i] + b[i]))) d[i] = hcl.select(dst >= (-1 * src), hcl.select(dst <= src, a[i], src), (-1 * src))
def top(input, ): final_total_extent_1 = ( hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) * hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0)) max_local = hcl.compute((final_extent_0, final_extent_1), lambda x, y: 0, name="max_local", dtype=hcl.UInt(bits=16)) with hcl.Stage("max_local"): with hcl.for_(final_min_1, final_extent_1, name="max_local_s0_y") as max_local_s0_y: with hcl.for_(final_min_0, final_extent_0, name="max_local_s0_x") as max_local_s0_x: maximum = hcl.compute((1, 1), lambda x, y: 0, name="maximum", dtype=hcl.UInt(bits=16)) with hcl.Stage("maximum"): maximum[max_local_s0_x, max_local_s0_y] = hcl.cast(dtype=hcl.UInt(bits=16), expr=0) with hcl.for_( 0, 3, name="maximum_s1_box__y") as maximum_s1_box__y: with hcl.for_( 0, 3, name="maximum_s1_box__x") as maximum_s1_box__x: maximum[max_local_s0_x, max_local_s0_y] = hcl.select( maximum[max_local_s0_x, max_local_s0_y] > input[(max_local_s0_x + maximum_s1_box__x), (max_local_s0_y + maximum_s1_box__y)], maximum[max_local_s0_x, max_local_s0_y], input[(max_local_s0_x + maximum_s1_box__x), (max_local_s0_y + maximum_s1_box__y)]) max_local[max_local_s0_x, max_local_s0_y] = maximum[max_local_s0_x, max_local_s0_y] final = hcl.compute((640, 480), lambda x, y: 0, name="final", dtype=hcl.UInt(bits=16)) with hcl.Stage("final"): with hcl.for_(final_min_1, final_extent_1, name="final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y] = max_local[final_s0_x, final_s0_y] return final
def algo(A, B): def f_mutate(i, j): factor = hcl.scalar(B[0][0][13:11], name="factor") idx = hcl.scalar(B[0][0][11:0], dtype=hcl.UInt(16), name="idx") idx += i * hcl.cast(hcl.UInt(16), factor.v) A[idx][j] = B[idx][j] bound = hcl.scalar(5, dtype=hcl.Int(32)) domain = (hcl.cast(hcl.UInt(32), bound.v), hcl.cast(hcl.UInt(32), bound.v)) hcl.mutate(domain, f_mutate)
def stateToIndex(sVals, iVals, bounds, ptsEachDim): iVals[0] = ((sVals[0] - bounds[0, 0]) / (bounds[0, 1] - bounds[0, 0])) * (ptsEachDim[0] - 1) iVals[1] = ((sVals[1] - bounds[1, 0]) / (bounds[1, 1] - bounds[1, 0])) * (ptsEachDim[1] - 1) iVals[2] = ((sVals[2] - bounds[2, 0]) / (bounds[2, 1] - bounds[2, 0])) * (ptsEachDim[2] - 1) # NOTE: add 0.5 to simulate rounding iVals[0] = hcl.cast(hcl.Int(), iVals[0] + 0.5) iVals[1] = hcl.cast(hcl.Int(), iVals[1] + 0.5) iVals[2] = hcl.cast(hcl.Int(), iVals[2] + 0.5)
def full_like(array, fill_val, dtype=None, name='full_like'): if dtype is None: dtype = array.dtype hcl.init(dtype) return hcl.compute(array.shape, lambda *x: hcl.cast(dtype, fill_val), name=name)
def full(shape=(1, ), fill_val=1, dtype=dtype, name='full'): if isinstance(shape, list): shape = tuple(shape) return hcl.compute(shape, lambda *x: hcl.cast(dtype, fill_val), name=name, dtype=dtype)
def kernel(A, B): C = hcl.compute(A.shape, lambda x: hcl.cast(hcl.UInt(bw), A[x]) << sl, dtype=hcl.UInt(bw)) D = hcl.compute(A.shape, lambda x: B[x] + C[x], dtype=hcl.UInt(bw)) E = hcl.compute(A.shape, lambda x: A[x]) return E
def prelu(out, x, alpha): assert len(x.shape) == 2, "only support 2-dim PReLU" m, n = x.shape k = hcl.reduce_axis(0, n) return hcl.update( out, lambda i, j: hcl.select(x[ i, j] < 0, hcl.cast(x.dtype, alpha[j] * x[i, j]), x[i, j]))
def thresholdedrelu(out, x, theta): assert len(x.shape) == 2, "only support 2-dim ThresholdedReLU" m, n = x.shape k = hcl.reduce_axis(0, n) return hcl.update( out, lambda i, j: hcl.select(x[i, j] > theta, x[i, j], hcl.cast(x.dtype, 0)))
def Sigmoid(exponent): ret = hcl.scalar(0.0, "sigmoid", FTYPE) with hcl.if_(exponent > hcl.cast(FTYPE, 4.0)): ret[0] = 1.0 with hcl.elif_(exponent < hcl.cast(FTYPE, -4.0)): ret[0] = 0.0 with hcl.else_(): with hcl.if_(exponent < hcl.cast(FTYPE, 0.0)): num = hcl.scalar(0, dtype=hcl.UFixed(18, 8)) num[0][18:0] = exponent[29:11] num[0] = ~(num[0] << 8) + 1 index = 2047.0 - num[0] ret[0] = lut[hcl.cast(hcl.Int(32), index)] with hcl.else_(): index = exponent[21:11] ret[0] = lut[hcl.cast(hcl.Int(32), index)] return ret[0]
def zeros(shape=(1, ), dtype=dtype, name='zeros'): dtype = hcl.dtype_to_hcl(dtype) shape = list(shape) for i in range(len(shape)): if hasattr(shape[i], 'value'): shape[i] = shape[i].value return hcl.compute(tuple(shape), lambda *x: hcl.cast(dtype, 0), name=name, dtype=dtype)
def top(input, ): final_total_extent_1 = ( hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) * hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0)) mean_local = hcl.compute((final_extent_0, final_extent_1), lambda x, y: 0, name="mean_local", dtype=hcl.UInt(bits=16)) with hcl.Stage("mean_local"): with hcl.for_(final_min_1, final_extent_1, name="mean_local_s0_y") as mean_local_s0_y: with hcl.for_(final_min_0, final_extent_0, name="mean_local_s0_x") as mean_local_s0_x: mean_local[mean_local_s0_x, mean_local_s0_y] = hcl.cast(dtype=hcl.UInt(bits=16), expr=0) with hcl.for_(final_min_1, final_extent_1, name="mean_local_s1_y") as mean_local_s1_y: with hcl.for_(final_min_0, final_extent_0, name="mean_local_s1_x") as mean_local_s1_x: with hcl.for_( 0, 3, name="mean_local_s1_box__y") as mean_local_s1_box__y: with hcl.for_(0, 3, name="mean_local_s1_box__x" ) as mean_local_s1_box__x: mean_local[mean_local_s1_x, mean_local_s1_y] = ( mean_local[mean_local_s1_x, mean_local_s1_y] + (input[(mean_local_s1_box__x + mean_local_s1_x), (mean_local_s1_box__y + mean_local_s1_y)] // hcl.cast(dtype=hcl.UInt(bits=16), expr=9))) final = hcl.compute((6418, 4818), lambda x, y: 0, name="final", dtype=hcl.UInt(bits=16)) with hcl.Stage("final"): with hcl.for_(final_min_1, final_extent_1, name="final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y] = mean_local[final_s0_x, final_s0_y] return final
def prelu(data, alpha, axis=1): def _axis_ind(axis, ind): ind = ind[0] new_ind = [] for i in range(len(ind)): if i == axis: new_ind = ind[i] return tuple(new_ind) return hcl.compute( data.shape, lambda *x: hcl.select( data[x] < 0, hcl.cast(data.dtype, alpha[_axis_ind(axis, x)] * data[x]), data[x]) )
def top( A, B, ): final_total_extent_1 = ( hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) * hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0)) prod = hcl.compute((final_extent_0, final_extent_1), lambda x, y: 0, name="prod", dtype=hcl.Float(bits=32)) with hcl.Stage("prod"): with hcl.for_(final_min_1, final_extent_1, name="prod_s0_y") as prod_s0_y: with hcl.for_(final_min_0, final_extent_0, name="prod_s0_x") as prod_s0_x: prod[prod_s0_x, prod_s0_y] = hcl.cast(dtype=hcl.Float(bits=32), expr=0.000000) with hcl.for_(final_min_1, final_extent_1, name="prod_s1_y") as prod_s1_y: with hcl.for_(final_min_0, final_extent_0, name="prod_s1_x") as prod_s1_x: with hcl.for_(0, 1024, name="prod_s1_r__x") as prod_s1_r__x: prod[prod_s1_x, prod_s1_y] = (prod[prod_s1_x, prod_s1_y] + (A[prod_s1_x, prod_s1_r__x] * B[prod_s1_r__x, prod_s1_y])) final = hcl.compute((1024, 1024), lambda x, y: 0, name="final", dtype=hcl.Float(bits=32)) with hcl.Stage("final"): with hcl.for_(final_min_1, final_extent_1, name="final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y] = prod[final_s0_x, final_s0_y] return final
def top(input, ): final_total_extent_1 = (hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_1) * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_0)) final_total_extent_2 = (final_total_extent_1 * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_2)) linear = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2), final_extent_2), lambda x, y, z: 0, name = "linear", dtype = hcl.Float(bits = 32)) with hcl.Stage("linear"): with hcl.for_(final_min_2, final_extent_2, name = "linear_s0_c") as linear_s0_c: with hcl.for_(final_min_1, (final_extent_1 + 2), name = "linear_s0_y") as linear_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name = "linear_s0_x") as linear_s0_x: t4 = input[linear_s0_x, linear_s0_y, linear_s0_c] linear[linear_s0_x, linear_s0_y, linear_s0_c] = hcl.select((hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.040450) < t4), hcl.power(((t4 * hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.947867)) + hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.052133)), hcl.cast(dtype = hcl.Float(bits = 32), expr = 2.400000)), (t4 * hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.077399))) blur_x = hcl.compute((final_extent_0, (final_extent_1 + 2), final_extent_2), lambda x, y, z: 0, name = "blur_x", dtype = hcl.Float(bits = 32)) with hcl.Stage("blur_x"): with hcl.for_(final_min_2, final_extent_2, name = "blur_x_s0_c") as blur_x_s0_c: with hcl.for_(final_min_1, (final_extent_1 + 2), name = "blur_x_s0_y") as blur_x_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "blur_x_s0_x") as blur_x_s0_x: blur_x[blur_x_s0_x, blur_x_s0_y, blur_x_s0_c] = ((linear[(blur_x_s0_x + 2), blur_x_s0_y, blur_x_s0_c] + (linear[blur_x_s0_x, blur_x_s0_y, blur_x_s0_c] + linear[(blur_x_s0_x + 1), blur_x_s0_y, blur_x_s0_c])) * hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.333333)) blur_y = hcl.compute((final_extent_0, final_extent_1, final_extent_2), lambda x, y, z: 0, name = "blur_y", dtype = hcl.Float(bits = 32)) with hcl.Stage("blur_y"): with hcl.for_(final_min_2, final_extent_2, name = "blur_y_s0_c") as blur_y_s0_c: with hcl.for_(final_min_1, final_extent_1, name = "blur_y_s0_y") as blur_y_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "blur_y_s0_x") as blur_y_s0_x: blur_y[blur_y_s0_x, blur_y_s0_y, blur_y_s0_c] = ((blur_x[blur_y_s0_x, (blur_y_s0_y + 2), blur_y_s0_c] + (blur_x[blur_y_s0_x, blur_y_s0_y, blur_y_s0_c] + blur_x[blur_y_s0_x, (blur_y_s0_y + 1), blur_y_s0_c])) * hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.333333)) srgb = hcl.compute((final_extent_0, final_extent_1, final_extent_2), lambda x, y, z: 0, name = "srgb", dtype = hcl.Float(bits = 32)) with hcl.Stage("srgb"): with hcl.for_(final_min_2, final_extent_2, name = "srgb_s0_c") as srgb_s0_c: with hcl.for_(final_min_1, final_extent_1, name = "srgb_s0_y") as srgb_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "srgb_s0_x") as srgb_s0_x: t5 = blur_y[srgb_s0_x, srgb_s0_y, srgb_s0_c] srgb[srgb_s0_x, srgb_s0_y, srgb_s0_c] = hcl.select((hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.003131) < t5), ((hcl.power(t5, hcl.cast(dtype = hcl.Float(bits = 32), expr = 0.416667)) * hcl.cast(dtype = hcl.Float(bits = 32), expr = 1.055000)) + hcl.cast(dtype = hcl.Float(bits = 32), expr = -0.055000)), (t5 * hcl.cast(dtype = hcl.Float(bits = 32), expr = 12.920000))) final = hcl.compute((766, 1278, 3), lambda x, y, z: 0, name = "final", dtype = hcl.Float(bits = 32)) with hcl.Stage("final"): with hcl.for_(final_min_2, final_extent_2, name = "final_s0_c") as final_s0_c: with hcl.for_(final_min_1, final_extent_1, name = "final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y, final_s0_c] = srgb[final_s0_x, final_s0_y, final_s0_c] return final
def _dilate(*indices): not_zero = [] index_tuple = [] for i in range(n): if strides[i] != 1: index_tuple.append(indices[i] / strides[i]) not_zero.append((indices[i] % strides[i]).equal(0)) else: index_tuple.append(indices[i]) if not_zero: not_zero = tvm.api.all(*not_zero) if not_zero: return data(*index_tuple) else: return hcl.cast(data.dtype, 0.0) return data(*index_tuple)
def top(input, ): final_total_extent_1 = (hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_1) * hcl.cast(dtype = hcl.Int(bits = 64), expr = final_extent_0)) blur_x = hcl.compute((final_extent_0, (final_extent_1 + 2)), lambda x, y: 0, name = "blur_x", dtype = hcl.UInt(bits = 16)) with hcl.Stage("blur_x"): with hcl.for_(final_min_1, (final_extent_1 + 2), name = "blur_x_s0_y") as blur_x_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "blur_x_s0_x") as blur_x_s0_x: blur_x[blur_x_s0_x, blur_x_s0_y] = ((input[(blur_x_s0_x + 2), blur_x_s0_y] + (input[blur_x_s0_x, blur_x_s0_y] + input[(blur_x_s0_x + 1), blur_x_s0_y]))//hcl.cast(dtype = hcl.UInt(bits = 16), expr = 3)) blur_y = hcl.compute((final_extent_0, final_extent_1), lambda x, y: 0, name = "blur_y", dtype = hcl.UInt(bits = 16)) with hcl.Stage("blur_y"): with hcl.for_(final_min_1, final_extent_1, name = "blur_y_s0_y") as blur_y_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "blur_y_s0_x") as blur_y_s0_x: blur_y[blur_y_s0_x, blur_y_s0_y] = ((blur_x[blur_y_s0_x, (blur_y_s0_y + 2)] + (blur_x[blur_y_s0_x, blur_y_s0_y] + blur_x[blur_y_s0_x, (blur_y_s0_y + 1)]))//hcl.cast(dtype = hcl.UInt(bits = 16), expr = 3)) final = hcl.compute((640, 480), lambda x, y: 0, name = "final", dtype = hcl.UInt(bits = 16)) with hcl.Stage("final"): with hcl.for_(final_min_1, final_extent_1, name = "final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name = "final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y] = blur_y[final_s0_x, final_s0_y] return final
def zeros(in_shape, dtype=dtype, name='zeros'): return hcl.compute(in_shape, lambda *x: hcl.cast(dtype, 0), name=name)
def top(input, filter, bias): f_conv = hcl.compute( (output_extent_0, output_extent_1, output_extent_2, output_extent_3), lambda x, y, z, n: 0, name="f_conv", dtype=hcl.Float(bits=32)) with hcl.Stage("f_conv"): with hcl.for_(output_min_3, output_extent_3, name="f_conv_s0_n") as f_conv_s0_n: with hcl.for_(output_min_2, output_extent_2, name="f_conv_s0_c") as f_conv_s0_z: with hcl.for_(output_min_1, output_extent_1, name="f_conv_s0_y") as f_conv_s0_y: with hcl.for_(output_min_0, output_extent_0, name="f_conv_s0_x") as f_conv_s0_x: f_conv[f_conv_s0_x, f_conv_s0_y, f_conv_s0_z, f_conv_s0_n] = bias[f_conv_s0_z] with hcl.for_(output_min_3, output_extent_3, name="f_conv_s1_n") as f_conv_s1_n: with hcl.for_(output_min_2, output_extent_2, name="f_conv_s1_c") as f_conv_s1_z: with hcl.for_(output_min_1, output_extent_1, name="f_conv_s1_y") as f_conv_s1_y: with hcl.for_(output_min_0, output_extent_0, name="f_conv_s1_x") as f_conv_s1_x: with hcl.for_(0, 32, name="f_conv_s1_r__z") as f_conv_s1_r__z: with hcl.for_( 0, 3, name="f_conv_s1_r__y") as f_conv_s1_r__y: with hcl.for_(0, 3, name="f_conv_s1_r__x" ) as f_conv_s1_r__x: f_conv[ f_conv_s1_x, f_conv_s1_y, f_conv_s1_z, f_conv_s1_n] = ( f_conv[f_conv_s1_x, f_conv_s1_y, f_conv_s1_z, f_conv_s1_n] + (filter[ f_conv_s1_r__x, f_conv_s1_r__y, f_conv_s1_r__z, f_conv_s1_z] * input[ (f_conv_s1_r__x + f_conv_s1_x), (f_conv_s1_r__y + f_conv_s1_y), f_conv_s1_r__z, f_conv_s1_n])) f_relu = hcl.compute( (output_extent_0, output_extent_1, output_extent_2, output_extent_3), lambda x, y, z, n: 0, name="f_conv", dtype=hcl.Float(bits=32)) with hcl.Stage("f_relu"): with hcl.for_(output_min_3, output_extent_3, name="f_relu_s0_n") as f_relu_s0_n: with hcl.for_(output_min_2, output_extent_2, name="f_relu_s0_c") as f_relu_s0_z: with hcl.for_(output_min_1, output_extent_1, name="f_relu_s0_y") as f_relu_s0_y: with hcl.for_(output_min_0, output_extent_0, name="f_relu_s0_x") as f_relu_s0_x: f_relu[f_relu_s0_x, f_relu_s0_y, f_relu_s0_z, f_relu_s0_n] = hcl.select( f_conv[f_relu_s0_x, f_relu_s0_y, f_relu_s0_z, f_relu_s0_n] > hcl.cast( dtype=hcl.Float(bits=32), expr=0.000000), f_conv[f_relu_s0_x, f_relu_s0_y, f_relu_s0_z, f_relu_s0_n], hcl.cast(dtype=hcl.Float(bits=32), expr=0.000000)) return f_relu
def top( input, filter, bias, ): final_total_extent_1 = ( hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) * hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0)) final_total_extent_2 = ( final_total_extent_1 * hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_2)) final_total_extent_3 = ( final_total_extent_2 * hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_3)) f_conv = hcl.compute( (hcl.select(final_extent_0 > 4, final_extent_0, 4), hcl.select(final_extent_1 > 4, final_extent_1, 4), final_extent_2, final_extent_3), lambda x, y, z, w: 0, name="f_conv", dtype=hcl.Float(bits=32)) with hcl.Stage("f_conv"): with hcl.for_(final_min_3, final_extent_3, name="f_conv_s0_n") as f_conv_s0_n: with hcl.for_(final_min_2, final_extent_2, name="f_conv_s0_z") as f_conv_s0_z: with hcl.for_(0, ((final_extent_1 + 3) // 4), name="f_conv_s0_y_y") as f_conv_s0_y_y: f_conv_s0_y_yi_base_s = hcl.select( (f_conv_s0_y_y * 4) < (final_extent_1 + -4), (f_conv_s0_y_y * 4), (final_extent_1 + -4)) with hcl.for_(0, ((final_extent_0 + 3) // 4), name="f_conv_s0_x_x") as f_conv_s0_x_x: f_conv_s0_x_xi_base_s = hcl.select( (f_conv_s0_x_x * 4) < (final_extent_0 + -4), (f_conv_s0_x_x * 4), (final_extent_0 + -4)) with hcl.for_(0, 4, name="f_conv_s0_y_yi") as f_conv_s0_y_yi: with hcl.for_( 0, 4, name="f_conv_s0_x_xi") as f_conv_s0_x_xi: f_conv[((f_conv_s0_x_xi_base_s + final_min_0) + f_conv_s0_x_xi), ((f_conv_s0_y_yi_base_s + final_min_1) + f_conv_s0_y_yi), f_conv_s0_z, f_conv_s0_n] = bias[f_conv_s0_z] # with hcl.for_(final_min_3, final_extent_3, name = "f_conv_s1_n") as f_conv_s1_n: # with hcl.for_(final_min_2, final_extent_2, name = "f_conv_s1_z") as f_conv_s1_z: # with hcl.for_(final_min_1, final_extent_1, name = "f_conv_s1_y") as f_conv_s1_y: # with hcl.for_(final_min_0, final_extent_0, name = "f_conv_s1_x") as f_conv_s1_x: # with hcl.for_(0, 32, name = "f_conv_s1_r__z") as f_conv_s1_r__z: # with hcl.for_(0, 3, name = "f_conv_s1_r__y") as f_conv_s1_r__y: # with hcl.for_(0, 3, name = "f_conv_s1_r__x") as f_conv_s1_r__x: # f_conv[f_conv_s1_x, f_conv_s1_y, f_conv_s1_z, f_conv_s1_n] = (f_conv[f_conv_s1_x, f_conv_s1_y, f_conv_s1_z, f_conv_s1_n] + (filter[f_conv_s1_r__x, f_conv_s1_r__y, f_conv_s1_r__z, f_conv_s1_z] * input[(f_conv_s1_r__x + f_conv_s1_x), (f_conv_s1_r__y + f_conv_s1_y), f_conv_s1_r__z, f_conv_s1_n])) with hcl.for_(final_min_3, final_extent_3, name="f_conv_s1_n") as f_conv_s1_n: with hcl.for_(0, 32, name="f_conv_s1_r__z") as f_conv_s1_r__z: with hcl.for_(0, ((final_extent_1 + 3) // 4), name="f_conv_s1_y_y") as f_conv_s1_y_y: f_conv_s1_y_yi_base_s = hcl.select( (f_conv_s1_y_y * 4) < (final_extent_1 + -4), (f_conv_s1_y_y * 4), (final_extent_1 + -4)) with hcl.for_(0, ((final_extent_0 + 3) // 4), name="f_conv_s1_x_x") as f_conv_s1_x_x: f_conv_s1_x_xi_base_s = hcl.select( (f_conv_s1_x_x * 4) < (final_extent_0 + -4), (f_conv_s1_x_x * 4), (final_extent_0 + -4)) with hcl.for_(0, 4, name="f_conv_s1_y_yi") as f_conv_s1_y_yi: with hcl.for_( 0, 4, name="f_conv_s1_x_xi") as f_conv_s1_x_xi: with hcl.for_( final_min_2, final_extent_2, name="f_conv_s1_z") as f_conv_s1_z: with hcl.for_(0, 3, name="f_conv_s1_r__y" ) as f_conv_s1_r__y: with hcl.for_(0, 3, name="f_conv_s1_r__x" ) as f_conv_s1_r__x: f_conv[( (f_conv_s1_x_xi_base_s + final_min_0) + f_conv_s1_x_xi ), ( (f_conv_s1_y_yi_base_s + final_min_1) + f_conv_s1_y_yi ), f_conv_s1_z, f_conv_s1_n] = ( f_conv[ ((f_conv_s1_x_xi_base_s + final_min_0) + f_conv_s1_x_xi), ((f_conv_s1_y_yi_base_s + final_min_1) + f_conv_s1_y_yi), f_conv_s1_z, f_conv_s1_n] + (filter[f_conv_s1_r__x, f_conv_s1_r__y, f_conv_s1_r__z, f_conv_s1_z] * input[(f_conv_s1_r__x + ( (f_conv_s1_x_xi_base_s + final_min_0) + f_conv_s1_x_xi)), (f_conv_s1_r__y + ((f_conv_s1_y_yi_base_s + final_min_1) + f_conv_s1_y_yi)), f_conv_s1_r__z, f_conv_s1_n])) f_relu = hcl.compute( (final_extent_0, final_extent_1, final_extent_2, final_extent_3), lambda x, y, z, w: 0, name="f_relu", dtype=hcl.Float(bits=32)) with hcl.Stage("f_relu"): with hcl.for_(final_min_3, final_extent_3, name="f_relu_s0_n") as f_relu_s0_n: with hcl.for_(final_min_2, final_extent_2, name="f_relu_s0_z") as f_relu_s0_z: with hcl.for_(final_min_1, final_extent_1, name="f_relu_s0_y") as f_relu_s0_y: with hcl.for_(final_min_0, final_extent_0, name="f_relu_s0_x") as f_relu_s0_x: f_relu[f_relu_s0_x, f_relu_s0_y, f_relu_s0_z, f_relu_s0_n] = hcl.select( f_conv[f_relu_s0_x, f_relu_s0_y, f_relu_s0_z, f_relu_s0_n] > hcl.cast( dtype=hcl.Float(bits=32), expr=0.000000), f_conv[f_relu_s0_x, f_relu_s0_y, f_relu_s0_z, f_relu_s0_n], hcl.cast(dtype=hcl.Float(bits=32), expr=0.000000)) final = hcl.compute((64, 64, 32, 4), lambda x, y, z, w: 0, name="final", dtype=hcl.Float(bits=32)) with hcl.Stage("final"): with hcl.for_(final_min_3, final_extent_3, name="final_s0_n") as final_s0_n: with hcl.for_(final_min_2, final_extent_2, name="final_s0_z") as final_s0_z: with hcl.for_(final_min_1, final_extent_1, name="final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y, final_s0_z, final_s0_n] = f_relu[final_s0_x, final_s0_y, final_s0_z, final_s0_n] return final
def stateToIndexInterpolants(Qopt, sVals, actions, bounds, ptsEachDim, interpV, fillVal): iMin = hcl.scalar(0, "iMin") jMin = hcl.scalar(0, "jMin") kMin = hcl.scalar(0, "kMin") iMax = hcl.scalar(0, "iMax") jMax = hcl.scalar(0, "jMax") kMax = hcl.scalar(0, "kMax") c000 = hcl.scalar(fillVal[0], "c000") c001 = hcl.scalar(fillVal[0], "c001") c010 = hcl.scalar(fillVal[0], "c010") c011 = hcl.scalar(fillVal[0], "c011") c100 = hcl.scalar(fillVal[0], "c100") c101 = hcl.scalar(fillVal[0], "c101") c110 = hcl.scalar(fillVal[0], "c110") c111 = hcl.scalar(fillVal[0], "c111") c00 = hcl.scalar(0, "c00") c01 = hcl.scalar(0, "c01") c10 = hcl.scalar(0, "c10") c11 = hcl.scalar(0, "c11") c0 = hcl.scalar(0, "c0") c1 = hcl.scalar(0, "c1") ia = hcl.scalar(0, "ia") ja = hcl.scalar(0, "ja") ka = hcl.scalar(0, "ka") di = hcl.scalar(0, "di") dj = hcl.scalar(0, "dj") dk = hcl.scalar(0, "dk") # obtain unrounded index values ia[0] = ((sVals[0] - bounds[0, 0]) / (bounds[0, 1] - bounds[0, 0])) * (ptsEachDim[0] - 1) ja[0] = ((sVals[1] - bounds[1, 0]) / (bounds[1, 1] - bounds[1, 0])) * (ptsEachDim[1] - 1) ka[0] = ((sVals[2] - bounds[2, 0]) / (bounds[2, 1] - bounds[2, 0])) * (ptsEachDim[2] - 1) # obtain neighbouring state indeces in each direction with hcl.if_(ia[0] < 0): iMin[0] = hcl.cast(hcl.Int(), ia[0] - 1.0) iMax[0] = hcl.cast(hcl.Int(), ia[0]) with hcl.else_(): iMin[0] = hcl.cast(hcl.Int(), ia[0]) iMax[0] = hcl.cast(hcl.Int(), ia[0] + 1.0) with hcl.if_(ja[0] < 0): jMin[0] = hcl.cast(hcl.Int(), ja[0] - 1.0) jMax[0] = hcl.cast(hcl.Int(), ja[0]) with hcl.else_(): jMin[0] = hcl.cast(hcl.Int(), ja[0]) jMax[0] = hcl.cast(hcl.Int(), ja[0] + 1.0) with hcl.if_(ka[0] < 0): kMin[0] = hcl.cast(hcl.Int(), ka[0] - 1.0) kMax[0] = hcl.cast(hcl.Int(), ka[0]) with hcl.else_(): kMin[0] = hcl.cast(hcl.Int(), ka[0]) kMax[0] = hcl.cast(hcl.Int(), ka[0] + 1.0) # obtain weights in each direction di[0] = ia[0] - iMin[0] dj[0] = ja[0] - jMin[0] dk[0] = ka[0] - kMin[0] # Obtain value of each neighbour state # Qopt[iMin, jMin, kMin] with hcl.if_( hcl.and_(iMin[0] < Qopt.shape[0], jMin[0] < Qopt.shape[1], kMin[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMin[0] >= 0, jMin[0] >= 0, kMin[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c000[0] < Qopt[iMin[0], jMin[0], kMin[0], a_]): c000[0] = Qopt[iMin[0], jMin[0], kMin[0], a_] # Qopt[iMin, jMin, kMax] with hcl.if_( hcl.and_(iMin[0] < Qopt.shape[0], jMin[0] < Qopt.shape[1], kMax[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMin[0] >= 0, jMin[0] >= 0, kMax[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c001[0] < Qopt[iMin[0], jMin[0], kMax[0], a_]): c001[0] = Qopt[iMin[0], jMin[0], kMax[0], a_] # Qopt[iMin, jMax, kMin] with hcl.if_( hcl.and_(iMin[0] < Qopt.shape[0], jMax[0] < Qopt.shape[1], kMin[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMin[0] >= 0, jMax[0] >= 0, kMin[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c010[0] < Qopt[iMin[0], jMax[0], kMin[0], a_]): c010[0] = Qopt[iMin[0], jMax[0], kMin[0], a_] # Qopt[iMin, jMax, kMax] with hcl.if_( hcl.and_(iMin[0] < Qopt.shape[0], jMax[0] < Qopt.shape[1], kMax[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMin[0] >= 0, jMax[0] >= 0, kMax[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c011[0] < Qopt[iMin[0], jMax[0], kMax[0], a_]): c011[0] = Qopt[iMin[0], jMax[0], kMax[0], a_] # Qopt[iMax, jMin, kMin] with hcl.if_( hcl.and_(iMax[0] < Qopt.shape[0], jMin[0] < Qopt.shape[1], kMin[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMax[0] >= 0, jMin[0] >= 0, kMin[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c100[0] < Qopt[iMax[0], jMin[0], kMin[0], a_]): c100[0] = Qopt[iMax[0], jMin[0], kMin[0], a_] # Qopt[iMax, jMin, kMax] with hcl.if_( hcl.and_(iMax[0] < Qopt.shape[0], jMin[0] < Qopt.shape[1], kMax[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMax[0] >= 0, jMin[0] >= 0, kMax[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c101[0] < Qopt[iMax[0], jMin[0], kMax[0], a_]): c101[0] = Qopt[iMax[0], jMin[0], kMax[0], a_] # Qopt[iMax, jMax, kMin] with hcl.if_( hcl.and_(iMax[0] < Qopt.shape[0], jMax[0] < Qopt.shape[1], kMin[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMax[0] >= 0, jMax[0] >= 0, kMin[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c110[0] < Qopt[iMax[0], jMax[0], kMin[0], a_]): c110[0] = Qopt[iMax[0], jMax[0], kMin[0], a_] # Qopt[iMax, jMax, kMax] with hcl.if_( hcl.and_(iMax[0] < Qopt.shape[0], jMax[0] < Qopt.shape[1], kMax[0] < Qopt.shape[2])): with hcl.if_(hcl.and_(iMax[0] >= 0, jMax[0] >= 0, kMax[0] >= 0)): with hcl.for_(0, actions.shape[0], name="a_") as a_: with hcl.if_(c111[0] < Qopt[iMax[0], jMax[0], kMax[0], a_]): c111[0] = Qopt[iMax[0], jMax[0], kMax[0], a_] # perform linear interpolation c00[0] = (c000[0] * (1 - di[0])) + (c100[0] * di[0]) c01[0] = (c001[0] * (1 - di[0])) + (c101[0] * di[0]) c10[0] = (c010[0] * (1 - di[0])) + (c110[0] * di[0]) c11[0] = (c011[0] * (1 - di[0])) + (c111[0] * di[0]) c0[0] = (c00[0] * (1 - dj[0])) + (c10[0] * dj[0]) c1[0] = (c01[0] * (1 - dj[0])) + (c11[0] * dj[0]) interpV[0] = (c0[0] * (1 - dk[0])) + (c1[0] * dk[0])
def cast(array, dtype=None, name='cast'): return hcl.compute(array.shape, lambda *x: hcl.cast(dtype, array[x]), dtype=dtype, name=name)
def dropout(data, rate=0.5): data = hcl.compute(data.shape, lambda *x: data[x]) mask = hcl.compute(data.shape, lambda *x: hcl.cast(dtype, 1)) return data, mask
def thresholdedrelu(data, theta): return hcl.compute( data.shape, lambda *x: hcl.select(data[x] > theta, data[x], hcl.cast(data.dtype, 0)))
def full(in_shape, fill_val=1, dtype=dtype, name='full'): return hcl.compute(in_shape, lambda *x: hcl.cast(dtype, fill_val), name=name)
def relu(data, name='relu'): return hcl.compute( data.shape, lambda *y: hcl.select(data[y] < 0, hcl.cast(data.dtype, 0), data[y]), name)
def top(input, ): final_total_extent_1 = ( hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_1) * hcl.cast(dtype=hcl.Int(bits=64), expr=final_extent_0)) padded16 = hcl.compute(((final_extent_0 + 6), (final_extent_1 + 6)), lambda x, y: 0, name="padded16", dtype=hcl.Int(bits=16)) with hcl.Stage("padded16"): with hcl.for_(final_min_1, (final_extent_1 + 6), name="padded16_s0_y") as padded16_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 6), name="padded16_s0_x") as padded16_s0_x: padded16[padded16_s0_x, padded16_s0_y] = hcl.cast(dtype=hcl.Int(bits=16), expr=input[padded16_s0_x, padded16_s0_y]) grad_x = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_x", dtype=hcl.Int(bits=16)) with hcl.Stage("grad_x"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_x_s0_y") as grad_x_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_x_s0_x") as grad_x_s0_x: grad_x[grad_x_s0_x, grad_x_s0_y] = ( padded16[(grad_x_s0_x + 2), (grad_x_s0_y + 2)] + (((padded16[(grad_x_s0_x + 2), (grad_x_s0_y + 1)] * hcl.cast(dtype=hcl.Int(bits=16), expr=2)) + ((padded16[(grad_x_s0_x + 2), grad_x_s0_y] - padded16[grad_x_s0_x, grad_x_s0_y]) - (padded16[grad_x_s0_x, (grad_x_s0_y + 1)] * hcl.cast(dtype=hcl.Int(bits=16), expr=2)))) - padded16[grad_x_s0_x, (grad_x_s0_y + 2)])) grad_xx = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_xx", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_xx"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_xx_s0_y") as grad_xx_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_xx_s0_x") as grad_xx_s0_x: t30_s = grad_x[grad_xx_s0_x, grad_xx_s0_y] grad_xx[grad_xx_s0_x, grad_xx_s0_y] = ( hcl.cast(dtype=hcl.Int(bits=32), expr=t30_s) * hcl.cast(dtype=hcl.Int(bits=32), expr=t30_s)) grad_gx = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2)), lambda x, y: 0, name="grad_gx", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_gx"): with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gx_s0_y") as grad_gx_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gx_s0_x") as grad_gx_s0_x: grad_gx[grad_gx_s0_x, grad_gx_s0_y] = 0 with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gx_s1_y") as grad_gx_s1_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gx_s1_x") as grad_gx_s1_x: with hcl.for_(0, 3, name="grad_gx_s1_box__y") as grad_gx_s1_box__y: with hcl.for_( 0, 3, name="grad_gx_s1_box__x") as grad_gx_s1_box__x: grad_gx[grad_gx_s1_x, grad_gx_s1_y] = ( grad_gx[grad_gx_s1_x, grad_gx_s1_y] + grad_xx[(grad_gx_s1_box__x + grad_gx_s1_x), (grad_gx_s1_box__y + grad_gx_s1_y)]) grad_y = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_y", dtype=hcl.Int(bits=16)) with hcl.Stage("grad_y"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_y_s0_y") as grad_y_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_y_s0_x") as grad_y_s0_x: grad_y[grad_y_s0_x, grad_y_s0_y] = ( (padded16[(grad_y_s0_x + 2), (grad_y_s0_y + 2)] + (((padded16[(grad_y_s0_x + 1), (grad_y_s0_y + 2)] * hcl.cast(dtype=hcl.Int(bits=16), expr=2)) + (padded16[grad_y_s0_x, (grad_y_s0_y + 2)] - padded16[grad_y_s0_x, grad_y_s0_y])) - (padded16[(grad_y_s0_x + 1), grad_y_s0_y] * hcl.cast(dtype=hcl.Int(bits=16), expr=2)))) - padded16[(grad_y_s0_x + 2), grad_y_s0_y]) grad_xy = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_xy", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_xy"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_xy_s0_y") as grad_xy_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_xy_s0_x") as grad_xy_s0_x: grad_xy[grad_xy_s0_x, grad_xy_s0_y] = ( hcl.cast(dtype=hcl.Int(bits=32), expr=grad_x[grad_xy_s0_x, grad_xy_s0_y]) * hcl.cast(dtype=hcl.Int(bits=32), expr=grad_y[grad_xy_s0_x, grad_xy_s0_y])) grad_gxy = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2)), lambda x, y: 0, name="grad_gxy", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_gxy"): with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gxy_s0_y") as grad_gxy_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gxy_s0_x") as grad_gxy_s0_x: grad_gxy[grad_gxy_s0_x, grad_gxy_s0_y] = 0 with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gxy_s1_y") as grad_gxy_s1_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gxy_s1_x") as grad_gxy_s1_x: with hcl.for_(0, 3, name="grad_gxy_s1_box__y") as grad_gxy_s1_box__y: with hcl.for_( 0, 3, name="grad_gxy_s1_box__x") as grad_gxy_s1_box__x: grad_gxy[grad_gxy_s1_x, grad_gxy_s1_y] = ( grad_gxy[grad_gxy_s1_x, grad_gxy_s1_y] + grad_xy[(grad_gxy_s1_box__x + grad_gxy_s1_x), (grad_gxy_s1_box__y + grad_gxy_s1_y)]) grad_yy = hcl.compute(((final_extent_0 + 4), (final_extent_1 + 4)), lambda x, y: 0, name="grad_yy", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_yy"): with hcl.for_(final_min_1, (final_extent_1 + 4), name="grad_yy_s0_y") as grad_yy_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 4), name="grad_yy_s0_x") as grad_yy_s0_x: t31_s = grad_y[grad_yy_s0_x, grad_yy_s0_y] grad_yy[grad_yy_s0_x, grad_yy_s0_y] = ( hcl.cast(dtype=hcl.Int(bits=32), expr=t31_s) * hcl.cast(dtype=hcl.Int(bits=32), expr=t31_s)) grad_gy = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2)), lambda x, y: 0, name="grad_gy", dtype=hcl.Int(bits=32)) with hcl.Stage("grad_gy"): with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gy_s0_y") as grad_gy_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gy_s0_x") as grad_gy_s0_x: grad_gy[grad_gy_s0_x, grad_gy_s0_y] = 0 with hcl.for_(final_min_1, (final_extent_1 + 2), name="grad_gy_s1_y") as grad_gy_s1_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="grad_gy_s1_x") as grad_gy_s1_x: with hcl.for_(0, 3, name="grad_gy_s1_box__y") as grad_gy_s1_box__y: with hcl.for_( 0, 3, name="grad_gy_s1_box__x") as grad_gy_s1_box__x: grad_gy[grad_gy_s1_x, grad_gy_s1_y] = ( grad_gy[grad_gy_s1_x, grad_gy_s1_y] + grad_yy[(grad_gy_s1_box__x + grad_gy_s1_x), (grad_gy_s1_box__y + grad_gy_s1_y)]) cim = hcl.compute(((final_extent_0 + 2), (final_extent_1 + 2)), lambda x, y: 0, name="cim", dtype=hcl.Float(bits=32)) with hcl.Stage("cim"): with hcl.for_(final_min_1, (final_extent_1 + 2), name="cim_s0_y") as cim_s0_y: with hcl.for_(final_min_0, (final_extent_0 + 2), name="cim_s0_x") as cim_s0_x: t32 = grad_gx[cim_s0_x, cim_s0_y] t33 = grad_gy[cim_s0_x, cim_s0_y] t34 = grad_gxy[cim_s0_x, cim_s0_y] t35 = (hcl.cast(dtype=hcl.Float(bits=32), expr=(t32 // 144)) + hcl.cast(dtype=hcl.Float(bits=32), expr=(t33 // 144))) cim[cim_s0_x, cim_s0_y] = ( ((hcl.cast(dtype=hcl.Float(bits=32), expr=(t32 // 144)) * hcl.cast(dtype=hcl.Float(bits=32), expr=(t33 // 144))) - (hcl.cast(dtype=hcl.Float(bits=32), expr=(t34 // 144)) * hcl.cast(dtype=hcl.Float(bits=32), expr=(t34 // 144)))) - ((t35 * t35) * hcl.cast(dtype=hcl.Float(bits=32), expr=0.040000))) output_final = hcl.compute((final_extent_0, final_extent_1), lambda x, y: 0, name="output_final", dtype=hcl.UInt(bits=16)) with hcl.Stage("output_final"): with hcl.for_(final_min_1, final_extent_1, name="output_final_s0_y") as output_final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="output_final_s0_x") as output_final_s0_x: t36 = cim[(output_final_s0_x + 1), (output_final_s0_y + 1)] output_final[output_final_s0_x, output_final_s0_y] = hcl.select( hcl.and_( (hcl.cast(dtype=hcl.Float(bits=32), expr=100.000000) <= t36), (hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 1), (output_final_s0_y + 2)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))), cim[output_final_s0_x, (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])))), cim[(output_final_s0_x + 1), (output_final_s0_y + 2)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))), cim[output_final_s0_x, (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))))), cim[(output_final_s0_x + 2), (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 1), (output_final_s0_y + 2)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))), cim[output_final_s0_x, (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])))), cim[(output_final_s0_x + 1), (output_final_s0_y + 2)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 2)] > hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]))), cim[output_final_s0_x, (output_final_s0_y + 2)], hcl.select( cim[(output_final_s0_x + 2), (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])), cim[(output_final_s0_x + 2), (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, (output_final_s0_y + 1)] > hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y]), cim[output_final_s0_x, (output_final_s0_y + 1)], hcl.select( cim[output_final_s0_x, output_final_s0_y] > cim[(output_final_s0_x + 1), output_final_s0_y], cim[output_final_s0_x, output_final_s0_y], cim[(output_final_s0_x + 1), output_final_s0_y])))))) < t36)), hcl.cast(dtype=hcl.UInt(bits=16), expr=255), hcl.cast(dtype=hcl.UInt(bits=16), expr=0)) final = hcl.compute((2442, 3258), lambda x, y: 0, name="final", dtype=hcl.UInt(bits=16)) with hcl.Stage("final"): with hcl.for_(final_min_1, final_extent_1, name="final_s0_y") as final_s0_y: with hcl.for_(final_min_0, final_extent_0, name="final_s0_x") as final_s0_x: final[final_s0_x, final_s0_y] = output_final[final_s0_x, final_s0_y] return final
def zeros_like(array, dtype=None, name='zeros_like'): if dtype is None: dtype = array.dtype return hcl.compute(array.shape, lambda *x: hcl.cast(dtype, 0), name=name)
def ones(in_shape, dtype=dtype, name='ones'): return hcl.compute(in_shape, lambda *x: hcl.cast(dtype, 1), name=name)