def kernel(A): r = hcl.reduce_axis(0, KERNEL_SIZE) c = hcl.reduce_axis(0, KERNEL_SIZE) F = hcl.copy(np.random.randint(0, 10, (KERNEL_SIZE, KERNEL_SIZE)), "F") return hcl.compute( (SIZE - KERNEL_SIZE + 1, SIZE - KERNEL_SIZE + 1), lambda y, x: hcl.sum(A[y + r, x + c] * F[r, c], axis=[r, c]), "B")
def max_pool2d_nchw(data, pooling, stride, padding, name='max_pool2d'): assert len(data.shape) == 4, "only support 4-dim pooling" assert len(stride) == 2, "only support 2-dim stride" pooling_h, pooling_w = pooling stride_h, stride_w = stride batch, channel, height, width = data.shape pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple( padding, (pooling_h, pooling_w)) pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_bottom, pad_right] if padding != [0, 0]: data = pad(data, pad_before, pad_after, pad_value=tvm.min_value("float32")) out_height = simplify((height - pooling_h + pad_top + pad_bottom) // stride_h + 1) out_width = simplify((width - pooling_w + pad_left + pad_right) // stride_w + 1) dheight = hcl.reduce_axis(0, pooling_h) dwidth = hcl.reduce_axis(0, pooling_w) return hcl.compute( (batch, channel, out_height, out_width), lambda i, c, h, w: max(data[i, c, h * stride_h + dheight, w * stride_w + dwidth], axis=[dheight, dwidth]), name=name, attrs=OrderedDict([('out_img_w', out_width), ('out_img_h', out_height), ('in_num', channel), ('kernel_h', pooling[1]), ('kernel_w', pooling[0]), ('stride_h', stride[1]), ('stride_w', stride[0]), ('app_name', tvm.make.StringImm('max_pool'))]))
def sobel(A, Gx, Gy): B = hcl.compute((height, width), lambda x, y: A[x][y][0] + A[x][y][1] + A[x][y][2], "B", dtype=hcl.Float()) r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) D = hcl.compute( (height - 2, width - 2), lambda x, y: hcl.sum(B[x + r, y + c] * Gx[r, c], axis=[r, c]), "D", dtype=hcl.Float()) t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) E = hcl.compute( (height - 2, width - 2), lambda x, y: hcl.sum(B[x + t, y + g] * Gy[t, g], axis=[t, g]), "E", dtype=hcl.Float()) return hcl.compute((height - 2, width - 2), lambda x, y: hcl.sqrt(D[x][y] * D[x][y] + E[x][y] * E[x] [y]) / 4328 * 255, dtype=hcl.Float())
def test_conv2D_lb(): hcl.init() A = hcl.placeholder((10, 10)) r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) B = hcl.compute((8, 8), lambda y, x: hcl.sum(A[y + r, x + c], axis=[r, c])) s = hcl.create_schedule([A, B]) LB = s.reuse_at(A, s[B], B.axis[0]) f = hcl.build(s) np_A = np.random.randint(0, 10, size=(10, 10)) np_B = np.zeros((8, 8), dtype="int") np_C = np.zeros((8, 8), dtype="int") for y in range(0, 8): for x in range(0, 8): for r in range(0, 3): for c in range(0, 3): np_C[y][x] += np_A[y + r][x + c] hcl_A = hcl.asarray(np_A) hcl_B = hcl.asarray(np_B) f(hcl_A, hcl_B) np_B = hcl_B.asnumpy() assert np.array_equal(np_B, np_C)
def softmax(x, name="softmax", axis=0, frontend='keras'): shape = x.shape k = hcl.reduce_axis(0, shape[axis]) new_shape = [] for i in range(len(shape)): if i != axis: new_shape.append(shape[i]) def _reduce_axis(axis, new_axis, keep_axis, *indices): indices = indices[0] new_ind = [] put_axis = False for i in range(len(indices)): if i == axis and keep_axis: new_ind.append(new_axis) put_axis = True new_ind.append(indices[i]) elif i != axis: new_ind.append(indices[i]) if put_axis == False and keep_axis: new_ind.append(new_axis) return tuple(new_ind) max_elem = hcl.compute( tuple(new_shape), lambda *y: max(x[_reduce_axis(axis, k, True, y)], axis=[k])) k = hcl.reduce_axis(0, shape[axis]) expsum = hcl.compute( tuple(new_shape), lambda *y: sum( tvm.exp(x[_reduce_axis(axis, k, True, y)] - max_elem[y]), axis=k)) return hcl.compute( x.shape, lambda *y: tvm.exp(x[y] - max_elem[_reduce_axis( axis, k, False, y)]) / expsum[_reduce_axis(axis, k, False, y)], name)
def sobel_kernel(imgF, Gx, Gy): def pad(x, y, z): out = hcl.scalar(0, "out") with hcl.if_(hcl.and_(x > 0, y > 0)): out.v = imgF[x - 1, y - 1, z] with hcl.else_(): out.v = 0 return out.v P = hcl.compute((height + 2, width + 2, 3), lambda x, y, z: pad(x, y, z), "P") A = hcl.compute((height + 2, width + 2), lambda x, y: P[x][y][0] + P[x][y][1] + P[x][y][2], "A") r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) resX = hcl.compute((height, width), lambda x, y: hcl.sum( A[x + r, y + c] * Gx[r, c], axis=[r, c], name="sum1"), "X") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) resY = hcl.compute((height, width), lambda x, y: hcl.sum( A[x + t, y + g] * Gy[t, g], axis=[t, g], name="sum2"), "Y") R = hcl.compute((height, width), lambda x, y: hcl.sqrt(resX[x][ y] * resX[x][y] + resY[x][y] * resY[x][y]), "R") norm = hcl.scalar(255 / 4328) return hcl.compute((height, width), lambda x, y: R[x][y] * norm.v, "F")
def avg_pool2d_nhwc(data, pooling, stride=[1, 1], padding=[0, 0], name='avg_pool2d'): assert len(data.shape) == 4, "only support 4-dim pooling" assert len(stride) == 2, "only support 2-dim stride" pooling_h, pooling_w = pooling stride_h, stride_w = stride batch, height, width, channel = data.shape pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple( padding, (pooling_h, pooling_w)) pad_before = [0, pad_top, pad_left, 0] pad_after = [0, pad_bottom, pad_right, 0] data = pad(data, pad_before, pad_after, pad_value=tvm.const(0.0, data.dtype)) out_height = simplify((height - pooling_h + pad_top + pad_bottom) // stride_h + 1) out_width = simplify((width - pooling_w + pad_left + pad_right) // stride_w + 1) dheight = hcl.reduce_axis(0, pooling_h) dwidth = hcl.reduce_axis(0, pooling_w) return hcl.compute( (batch, out_height, out_width, channel), lambda i, h, w, c: sum( data[i, h * stride_h + dheight, w * stride_w + dwidth, c], axis=[dheight, dwidth]) / (pooling_w * pooling_h), name=name, attrs=OrderedDict([('out_img_w', out_width), ('out_img_h', out_height), ('in_num', channel), ('kernel_h', pooling[1]), ('kernel_w', pooling[0]), ('stride_h', stride[1]), ('stride_w', stride[0]), ('app_name', tvm.make.StringImm('avg_pool'))]))
def sobel(A, Gx, Gy): r = hcl.reduce_axis(0,3) c = hcl.reduce_axis(0,3) A1 = hcl.compute((height,width), lambda y, x: A[y][x][0] + A[y][x][1] + A[y][x][2], "A1") B1 = hcl.compute((height-2,width-2), lambda x,y: hcl.sum(A1[x+r,y+c]*Gx[r,c], axis=[r,c], name="sum1"), name="B1", dtype=hcl.Float()) t = hcl.reduce_axis(0,3) g = hcl.reduce_axis(0,3) B2 = hcl.compute((height-2,width-2), lambda x,y: hcl.sum(A1[x+t,y+g]*Gy[t,g], axis=[t,g], name="sum2"), name="B2", dtype=hcl.Float()) def avg(in1, in2): ll = hcl.scalar(in1, "in1") lr = hcl.scalar(in2, "in2") return hcl.sqrt(ll.v * ll.v + lr.v * lr.v)/4328*255 return hcl.compute((height-2,width-2), lambda x, y : avg(B1[x,y], B2[x,y]), name="output", dtype=hcl.Float())
def max_pool(data, kernel, stride, padding=[[0, 0], [0, 0]], name="max_pool"): assert len(data.shape) == 4, "only support 4-dim pooling" assert len(stride) == 2, "only support 2-dim stride" kernel_height, kernel_width = kernel stride_height, stride_width = stride batch, channel, height, width = data.shape [pad_top, pad_left], [pad_down, pad_right] = padding pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_down, pad_right] if padding != [[0, 0], [0, 0]]: data = pad(data, pad_before, pad_after, pad_value=tvm.min_value("float32")) out_height = simplify((height - kernel_height + pad_top + pad_down) // stride_height + 1) out_width = simplify((width - kernel_width + pad_left + pad_right) // stride_width + 1) dheight = hcl.reduce_axis(0, kernel_height) dwidth = hcl.reduce_axis(0, kernel_width) return hcl.compute( (batch, channel, out_height, out_width), lambda i, c, h, w: max(data[i, c, h * stride_height + dheight, w * stride_width + dwidth], axis=[dheight, dwidth]), name=name, attrs=OrderedDict([('out_img_w', out_width), ('out_img_h', out_height), ('in_num', channel), ('kernel_h', kernel[1]), ('kernel_w', kernel[0]), ('stride_h', stride[1]), ('stride_w', stride[0]), ('app_name', tvm.make.StringImm('max_pool'))]))
def unsharp(input_image, output_image): """ Helper Functions """ def clamp(val, min_, max_): local = hcl.scalar(val) with hcl.if_(val < min_): local[0] = min_ with hcl.elif_(val > max_): local[0] = max_ return local[0] def clamp2D(tensor, min_, max_): return hcl.compute(tensor.shape, lambda x, y: clamp(tensor[x, y], min_, max_), name="clamped_" + tensor.name) def clamp3D(tensor, min_, max_): return hcl.compute(tensor.shape, lambda x, y, c: clamp(tensor[x, y, c], min_, max_), name="clamped_" + tensor.name) def kernel_f(x): return hcl.exp(-(x * x) / (2 * 1.5 * 1.5)) / sqrt(2 * 3.14159 * 1.5) def kernel(x): return kernel_f(x) * 255 / (kernel_f(0) + kernel_f(1) * 2 + kernel_f(2) * 2 + kernel_f(3) * 2 + kernel_f(4) * 2) rx = hcl.reduce_axis(-4, 5, "rx") ry = hcl.reduce_axis(-4, 5, "ry") my = hcl.reduce_axis(0, 640, "my") gray = hcl.compute((480, 640), lambda x, y: (input_image[x, y, 0] * 77 + input_image[ x, y, 1] * 150 + input_image[x, y, 2] * 29) >> 8, name="gray") blur = hcl.compute( gray.shape, lambda x, y: hcl.sum(gray[rx + x, ry + y] * kernel(rx) * kernel(ry), axis=[rx, ry]), name="blur") sharpen = clamp2D( hcl.compute(gray.shape, lambda x, y: gray[x, y] * 2 - blur[x, y], name="sharpen"), 0, 255) ratio = clamp2D( hcl.compute( gray.shape, lambda x, y: sharpen[x, y] * 32 / hcl.max(gray[x, my], axis=my), name="ratio"), 0, 255) out = clamp3D( hcl.compute(output_image.shape, lambda x, y, c: ratio[x, y] * input_image[x, y, c] >> 5, name="out"), 0, 255) U = hcl.update(output_image, lambda x, y, c: out[x, y, c]) return U
def conv2d_nchw(Input, Filter, bias=None, stride=(1, 1), padding='VALID', dilation=(1, 1), out_dtype=None, name="conv2d_nchw"): if out_dtype is None: out_dtype = Input.dtype assert isinstance(stride, int) or len(stride) == 2 assert isinstance(dilation, int) or len(dilation) == 2 if isinstance(stride, int): stride_h = stride_w = stride else: stride_h, stride_w = stride if isinstance(dilation, int): dilation_h = dilation_w = dilation else: dilation_h, dilation_w = dilation batch, in_channel, in_height, in_width = Input.shape num_filter, channel, kernel_h, kernel_w = Filter.shape # compute the output shape dilated_kernel_h = (kernel_h - 1) * dilation_h + 1 dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 pad_top, pad_left, pad_down, pad_right = get_pad_tuple( padding, (dilated_kernel_h, dilated_kernel_w)) out_channel = num_filter out_height = simplify( (in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1) out_width = simplify( (in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1) # compute graph pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_down, pad_right] temp = pad(Input, pad_before, pad_after, name="pad_temp") rc = hcl.reduce_axis(0, in_channel, name='rc') ry = hcl.reduce_axis(0, kernel_h, name='ry') rx = hcl.reduce_axis(0, kernel_w, name='rx') conv2d = hcl.compute((batch, out_channel, out_height, out_width), lambda nn, ff, yy, xx: sum(temp[nn, rc, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w].astype(out_dtype) * Filter[ff, rc, ry, rx].astype(out_dtype), dtype=out_dtype, axis=[rc, ry, rx]), name=name) if bias is not None: conv2d = hcl.compute(conv2d.shape, lambda i, j, k, l: conv2d[i, j, k, l] + bias[j], name=name) return conv2d
def sobelAlgo(A, Fx, Fy): B = hcl.compute((height, width), lambda x,y :A[x][y][0]+A[x][y][1]+A[x][y][2],"B", dtype=hcl.Float()) r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) Gx = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+r,y+c]*Fx[r,c],axis=[r,c]), B[x,y]), "Gx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) Gy = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+t,y+g]*Fy[t,g],axis=[t,g]), B[x,y]), "Gy") return hcl.compute((height, width), lambda x,y:(hcl.sqrt(Gx[x][y]*Gx[x][y]+Gy[x][y]*Gy[x][y]))/4328*255, dtype = hcl.Float())
def dev(gx, gy, org): assert gx.shape == gy.shape, "mismatch" rx = hcl.reduce_axis(0, 255, "rx") ry = hcl.reduce_axis(0, 255, "ry") mat_sum = hcl.compute(gx.shape, lambda nn, ff, xx, yy: gx[nn, ff, xx, yy] + gy[nn, ff, xx, yy], name="add") return hcl.compute(mat_sum.shape, lambda nn, ff, xx, yy: mat_sum[nn, ff, xx, yy] * 255.0 / hcl.max(mat_sum[nn, ff, rx, ry], axis=[rx, ry]), name = "derv")
def sobelAlgo(A, Fx, Fy): B = hcl.compute((height+2, width+2), lambda x,y:A[x][y][0]+A[x][y][1]+A[x][y][2], "B") r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) Gx = hcl.compute((height, width), lambda y,x:hcl.sum(B[y+r, x+c]*Fx[r,c], axis = [r,c]), "Gx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) Gy = hcl.compute((height, width), lambda y,x:hcl.sum(B[y+t, x+g]*Fy[t,g], axis = [t,g]), "Gy") return hcl.compute((height, width), lambda y,x:(hcl.sqrt(Gx[y][x]*Gx[y][x]+Gy[y][x]*Gy[y][x]))/4328*255)
def softmax(out, x): assert len(x.shape) == 2, "only support 2-dim softmax" m, n = x.shape k = hcl.reduce_axis(0, n) max_elem = hcl.compute((m, ), lambda i: hcl.max(x[i, k], axis=k)) k = hcl.reduce_axis(0, n) expsum = hcl.compute( (m, ), lambda i: hcl.sum(hcl.exp(x[i, k] - max_elem[i]), axis=k)) return hcl.update(out, lambda i, j: hcl.exp(x[i, j] - max_elem[i]) / expsum[i])
def seidel(input_image, output_image): dtype = hcl.Float() rx = hcl.reduce_axis(0, 3, "rx") ry = hcl.reduce_axis(0, 3, "ry") tmp = hcl.compute(output_image.shape, lambda x, y: hcl.sum( input_image[x, ry+y], axis=[ry], dtype=dtype)/3, dtype=dtype, name='tmp') return hcl.update(output_image, lambda x, y: hcl.sum( tmp[rx+x, y], axis=[rx], dtype=dtype)/3, name=output_image.name)
def sobel(B, G): r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) return hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + r, y + c] * G[r, c], axis=[r, c]), B[x, y]), "D", dtype=hcl.Float())
def softmax(x, name="softmax", axis=0): assert len(x.shape) == 2, "only support 2-dim softmax" m, n = x.shape k = hcl.reduce_axis(0, n) max_elem = hcl.compute((m, ), lambda i: max(x[i, k], axis=k)) k = hcl.reduce_axis(0, n) expsum = hcl.compute((m, ), lambda i: sum(tvm.exp(x[i, k] - max_elem[i]), axis=k)) return hcl.compute(x.shape, lambda i, j: tvm.exp(x[i, j] - max_elem[i]) / expsum[i], name)
def guassian(A, G): h = hcl.reduce_axis(0, size) w = hcl.reduce_axis(0, size) return hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > (size - 1), x < (height - size), y > (size - 1), y < (width - size)), hcl.sum(A[x + h, y + w] * G[h, w], axis=[h, w]), A[x, y]), "F", dtype=hcl.Float())
def sobel(A,Gx,Gy): B = hcl.compute((height,width), lambda x,y: A[x][y][0]+A[x][y][1]+A[x][y][2],"B") r = hcl.reduce_axis(0,3) c = hcl.reduce_axis(0,3) D = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+r,y+c]*Gx[r,c],axis=[r,c]), B[x,y]), "Gx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) E = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+t,y+g]*Gy[t,g],axis=[t,g]), B[x,y]), "Gy") return hcl.compute((height,width), lambda x,y:hcl.sqrt(D[x][y]*D[x][y]+E[x][y]*E[x][y])/4328*255)
def test_reuse_compute_nd(): hcl.init() nz = 1 rx = hcl.reduce_axis(0, 3, name="rx") rz = hcl.reduce_axis(0, nz, name="rz") A = hcl.placeholder((nz, 10, 10), name="A") B = hcl.compute((10, 8), lambda y, x: hcl.sum(A[rz, y, x + rx], axis=[rz, rx]), "B") s = hcl.create_schedule([A, B]) RB = s.reuse_at(A, s[B], B.axis[1]) print(hcl.lower(s)) f = hcl.build(s)
def _conv2d_nhwc(Input, Filter, Bias=None, stride=[1, 1], padding=[1, 1], dilation=[1, 1], name='conv2d', out_dtype=None): if out_dtype is None: out_dtype = Input.dtype assert isinstance(stride, int) or len(stride) == 2 assert isinstance(dilation, int) or len(dilation) == 2 if isinstance(stride, int): stride_h = stride_w = stride else: stride_h, stride_w = stride if isinstance(dilation, int): dilation_h = dilation_w = dilation else: dilation_h, dilation_w = dilation batch, in_height, in_width, in_channel = Input.shape num_filter, channel, kernel_h, kernel_w = Filter.shape #compute output shape dilated_kernel_h = (kernel_h - 1) * dilation_h + 1 dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 pad_top, pad_left, pad_down, pad_right = hlib.nn.get_pad_tuple( padding, (dilated_kernel_h, dilated_kernel_w)) out_channel = num_filter out_height = hlib.nn.simplify( (in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1) out_width = hlib.nn.simplify( (in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1) pad_before = [0, pad_top, pad_left, 0] pad_after = [0, pad_down, pad_right, 0] print(pad_before, pad_after) temp = hlib.nn.pad(Input, pad_before, pad_after, name="pad_temp") rc = hcl.reduce_axis(0, in_channel) ry = hcl.reduce_axis(0, kernel_h) rx = hcl.reduce_axis(0, kernel_w) if not Bias == None: return hcl.compute( (batch, out_height, out_width, out_channel), lambda nn, yy, xx, ff: hcl. sum(temp[nn, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w, rc].astype(out_dtype) * Filter[ff, rc, ry, rx] .astype(out_dtype) + Bias[ff].astype(out_dtype), axis=[ry, rx, rc]), name=name, )
def sobel(RGB,Gx,Gy): B = hcl.compute((height,width), lambda x,y: RGB[x][y][8:0] + RGB[x][y][16:8] + RGB[x][y][24:16], "B") r = hcl.reduce_axis(0,3) c = hcl.reduce_axis(0,3) D = hcl.compute((height-2, width-2), lambda x,y: hcl.sum(B[x+r, y+c]*Gx[r,c], axis=[r,c], name="sum1"), "xx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) E = hcl.compute((height-2, width-2), lambda x,y: hcl.sum(B[x+t, y+g]*Gy[t,g], axis=[t,g]), name="sum2"), "yy") return hcl.compute((height-2,width-2), lambda x,y:hcl.sqrt(D[x][y]*D[x][y]+E[x][y]*E[x][y])*0.05891867, "Fimg")
def kernel(A): init = hcl.compute((A.shape[0]*A.shape[1],), lambda x: 11) def freduce(x, Y): with hcl.for_(0, Y.shape[0]) as i: with hcl.if_(x < Y[i]): with hcl.for_(Y.shape[0]-1, i, -1) as j: Y[j] = Y[j-1] Y[i] = x hcl.break_() my_sort = hcl.reducer(init, freduce) rx = hcl.reduce_axis(0, 10) ry = hcl.reduce_axis(0, 10) return hcl.compute(init.shape, lambda _x: my_sort(A[rx, ry], axis=[rx, ry]))
def sobel(A,Gx,Gy): B = hcl.compute((height,width), lambda x,y: A[x][y][0]+A[x][y][1]+A[x][y][2], "B") r = hcl.reduce_axis(0,3) c = hcl.reduce_axis(0,3) # D = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+r,y+c]*Gx[r,c],axis=[r,c]), B[x,y]), "xx") D = hcl.compute((height-2, width-2), lambda x,y: hcl.sum(B[x+r, y+c]*Gx[r,c], axis=[r,c], name="sum1"), "xx") t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) # E = hcl.compute((height, width), lambda x,y: hcl.select(hcl.and_(x>0,x<(height-1),y>0,y<(width-1)), hcl.sum(B[x+t,y+g]*Gy[t,g],axis=[t,g]), B[x,y]), "yy") E = hcl.compute((height-2, width-2), lambda x,y: hcl.sum(B[x+t, y+g]*Gy[t,g], axis=[t,g]), "yy") return hcl.compute((height-2,width-2), lambda x,y:hcl.sqrt(D[x][y]*D[x][y]+E[x][y]*E[x][y])*0.05891867,"Fimg")
def gaussian_func_gen(input_image, output_image): def kernel_f(x): return hcl.exp(-(x * x) / (2 * 1.5 * 1.5)) / math.sqrt(2 * 3.14159 * 1.5) def kernel(x): return kernel_f(x) * 255 / (kernel_f(0) + kernel_f(1) * 2 + kernel_f(2) * 2 + kernel_f(3) * 2 + kernel_f(4) * 2) rx = hcl.reduce_axis(-1, 1, "rx") ry = hcl.reduce_axis(-1, 1, "ry") return hcl.update(output_image, lambda x, y: hcl.sum( input_image[rx+x, ry+y] * kernel(rx) * kernel(ry), axis=[rx, ry], name='reduce', dtype=hcl.Float()), name=output_image.name)
def sobel_x(A, Gx): B = hcl.compute((height, width), lambda x, y: A[x][y][0] + A[x][y][1] + A[x][y][2], "B", dtype=hcl.Float()) r = hcl.reduce_axis(0, 3) c = hcl.reduce_axis(0, 3) return hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + r, y + c] * Gx[r, c], axis=[r, c]), B[x, y]), "X", dtype=hcl.Float())
def kernel_conv2d(inputs, weight, stride=1, padding=1, dilation=1, groups=1): """Convolution 2d NCHW layout Args: ----------------------------- inputs : hcl.tensor.Tensor shape [batch, channel, height, width] weight : hcl.tensor.Tensor shape [out_channel, channel // groups, kernel_height, kernel_width] stride : (optional:1) int or tuple padding : (optional:0) int or tuple dilation: (optional:1) int groups : (optional:1) int ----------------------------- Returns: ----------------------------- hcl.tensor.Tensor shape [batch, out_channel, output_height, output_width] ----------------------------- """ batch_size, in_channel, in_h, in_w = inputs.shape out_channel, channel_per_group, k_h, k_w = weight.shape out_channel_per_group = out_channel // groups stride = (stride, stride) if isinstance(stride, (int, )) else stride padding = (padding, padding) if isinstance(padding, (int, )) else padding dilation = (dilation, dilation) if isinstance(dilation, (int, )) else dilation out_h = (in_h + 2 * padding[0] - dilation[0] * (k_h - 1) - 1) // stride[0] + 1 out_w = (in_w + 2 * padding[1] - dilation[1] * (k_w - 1) - 1) // stride[1] + 1 rc = hcl.reduce_axis(*(0, channel_per_group), name="rc") rh = hcl.reduce_axis(*(0, k_h), name="rh") rw = hcl.reduce_axis(*(0, k_w), name="rw") padded = zero_pad2d(inputs, padding=padding) output = hcl.compute( (batch_size, out_channel, out_h, out_w), lambda b, c, h, w: hcl.sum( (padded[b, c // out_channel_per_group * channel_per_group + rc, h * stride[0] + rh * dilation[0], w * stride[1] + rw * dilation[1]] * weight[c, rc, rh, rw]), axis=[rc, rw, rh] ), "C" ) return output
def kernel(A, F): name = "conv1" ry = hcl.reduce_axis(0, 3, name=name + '_ry') rx = hcl.reduce_axis(0, 3, name=name + '_rx') rc_ = 0 rc = hcl.reduce_axis(0, 1, name=name + '_rc') batch, out_height, out_width, out_channel = (1, 6, 6, 16) return hcl.compute((batch, out_height, out_width, out_channel), lambda nn, yy, xx, ff: hcl.sum(A[ nn, yy + ry, xx + rx, rc_] * F[ff, ry, rx, rc_], axis=[ry, rx, rc], name=name + "_sum", dtype=hcl.UInt(16)), name=name, dtype=hcl.UInt(16))
def sobel_y(A, Gy): B = hcl.compute((height, width), lambda x, y: A[x][y][0] + A[x][y][1] + A[x][y][2], "B", dtype=hcl.Float()) t = hcl.reduce_axis(0, 3) g = hcl.reduce_axis(0, 3) return hcl.compute( (height, width), lambda x, y: hcl.select( hcl.and_(x > 0, x < (height - 1), y > 0, y < (width - 1)), hcl.sum(B[x + t, y + g] * Gy[t, g], axis=[t, g]), B[x, y]), "Y", dtype=hcl.Float())