def combine2(im1, im2, width, height, dist): init_mask1 = hl.Func("mask1_layer_0") init_mask2 = hl.Func("mask2_layer_0") accumulator = hl.Func("combine_accumulator") output = hl.Func("combine_output") x, y = hl.Var("x"), hl.Var("y") im1_mirror = hl.BoundaryConditions.repeat_edge(im1, [(0, width), (0, height)]) im2_mirror = hl.BoundaryConditions.repeat_edge(im2, [(0, width), (0, height)]) weight1 = hl.f32(dist[im1_mirror[x, y]]) weight2 = hl.f32(dist[im2_mirror[x, y]]) init_mask1[x, y] = weight1 / (weight1 + weight2) init_mask2[x, y] = 1 - init_mask1[x, y] mask1 = init_mask1 mask2 = init_mask2 accumulator[x, y] = hl.i32(0) accumulator[x, y] += hl.i32(im1_mirror[x, y] * mask1[x, y]) + hl.i32(im2_mirror[x, y] * mask2[x, y]) output[x, y] = hl.u16_sat(accumulator[x, y]) init_mask1.compute_root().parallel(y).vectorize(x, 16) accumulator.compute_root().parallel(y).vectorize(x, 16) accumulator.update(0).parallel(y).vectorize(x, 16) return output
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) s_sigma = 8 x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.i32(1), y * s_sigma * hl.i32(1)] val22 = clamped[x * s_sigma - hl.i32(s_sigma // 2), y * s_sigma - hl.i32(s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] try: val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2] except RuntimeError as e: assert 'Implicit cast from float32 to int' in str(e) else: assert False, 'Did not see expected exception!'
def diff(im1, im2, name): output = hl.Func(name) x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") if im1.dimensions() == 2: output[x, y] = hl.i32(im1[x, y]) - hl.i32(im2[x, y]) else: output[x, y, c] = hl.i32(im1[x, y, c]) - hl.i32(im2[x, y, c]) return output
def test_basics(): input = hl.ImageParam(hl.UInt(16), 2, 'input') x, y = hl.Var('x'), hl.Var('y') blur_x = hl.Func('blur_x') blur_xx = hl.Func('blur_xx') blur_y = hl.Func('blur_y') yy = hl.i32(1) assert yy.type() == hl.Int(32) z = x + 1 input[x, y] input[0, 0] input[z, y] input[x + 1, y] input[x, y] + input[x + 1, y] if False: aa = blur_x[x, y] bb = blur_x[x, y + 1] aa + bb blur_x[x, y] + blur_x[x, y + 1] (input[x, y] + input[x + 1, y]) / 2 blur_x[x, y] blur_xx[x, y] = input[x, y] blur_x[x, y] = (input[x, y] + input[x + 1, y] + input[x + 2, y]) / 3 blur_y[x, y] = (blur_x[x, y] + blur_x[x, y + 1] + blur_x[x, y + 2]) / 3 xi, yi = hl.Var('xi'), hl.Var('yi') blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) blur_y.compile_jit()
def test_basics3(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] val = hl.clamp(val, 0.0, 1.0) zi = hl.i32((val / r_sigma) + 0.5) histogram = hl.Func('histogram') histogram[x, y, z, c] = 0.0 ss = hl.select(c == 0, val, 1.0) left = histogram[x, y, zi, c] left += 5 left += ss
def black_white_level(input, black_point, white_point): output = hl.Func("black_white_level_output") x, y = hl.Var("x"), hl.Var("y") white_factor = 65535 / (white_point - black_point) output[x, y] = hl.u16_sat((hl.i32(input[x, y]) - black_point) * white_factor) return output
def test_minmax(): x = hl.Var() f = hl.Func() f[x] = hl.select(x == 0, hl.min(x, 1), (x == 2) | (x == 4), hl.i32(hl.min(hl.f32(x), hl.f32(3.2), x * hl.f32(2.1))), x == 3, hl.max(x, x * 3, 1, x * 4), x) b = f.realize(5) assert b[0] == 0 assert b[1] == 1, b[1] assert b[2] == 2 assert b[3] == 12 assert b[4] == 3
def get_bilateral_grid(input, r_sigma, s_sigma): x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') xi = hl.Var("xi") yi = hl.Var("yi") zi = hl.Var("zi") # Add a boundary condition clamped = hl.BoundaryConditions.repeat_edge(input) # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] val = hl.clamp(val, 0.0, 1.0) zi = hl.i32(val / r_sigma + 0.5) histogram = hl.Func('histogram') histogram[x, y, z, c] = 0.0 histogram[x, y, zi, c] += hl.select(c == 0, val, 1.0) # Blur the histogram using a five-tap filter blurx, blury, blurz = hl.Func('blurx'), hl.Func('blury'), hl.Func('blurz') blurz[x, y, z, c] = histogram[x, y, z-2, c] + histogram[x, y, z-1, c]*4 + histogram[x, y, z, c]*6 + histogram[x, y, z+1, c]*4 + histogram[x, y, z+2, c] blurx[x, y, z, c] = blurz[x-2, y, z, c] + blurz[x-1, y, z, c]*4 + blurz[x, y, z, c]*6 + blurz[x+1, y, z, c]*4 + blurz[x+2, y, z, c] blury[x, y, z, c] = blurx[x, y-2, z, c] + blurx[x, y-1, z, c]*4 + blurx[x, y, z, c]*6 + blurx[x, y+1, z, c]*4 + blurx[x, y+2, z, c] # Take trilinear samples to compute the output val = hl.clamp(clamped[x, y], 0.0, 1.0) zv = val / r_sigma zi = hl.i32(zv) zf = zv - zi xf = hl.f32(x % s_sigma) / s_sigma yf = hl.f32(y % s_sigma) / s_sigma xi = x / s_sigma yi = y / s_sigma interpolated = hl.Func('interpolated') interpolated[x, y, c] = hl.lerp(hl.lerp(hl.lerp(blury[xi, yi, zi, c], blury[xi+1, yi, zi, c], xf), hl.lerp(blury[xi, yi+1, zi, c], blury[xi+1, yi+1, zi, c], xf), yf), hl.lerp(hl.lerp(blury[xi, yi, zi+1, c], blury[xi+1, yi, zi+1, c], xf), hl.lerp(blury[xi, yi+1, zi+1, c], blury[xi+1, yi+1, zi+1, c], xf), yf), zf) # Normalize bilateral_grid = hl.Func('bilateral_grid') bilateral_grid[x, y] = interpolated[x, y, 0] / interpolated[x, y, 1] target = hl.get_target_from_environment() if target.has_gpu_feature(): # GPU schedule # Currently running this directly from the Python code is very slow. # Probably because of the dispatch time because generated code # is same speed as C++ generated code. print ("Compiling for GPU.") histogram.compute_root().reorder(c, z, x, y).gpu_tile(x, y, 8, 8); histogram.update().reorder(c, r.x, r.y, x, y).gpu_tile(x, y, xi, yi, 8, 8).unroll(c) blurx.compute_root().gpu_tile(x, y, z, xi, yi, zi, 16, 16, 1) blury.compute_root().gpu_tile(x, y, z, xi, yi, zi, 16, 16, 1) blurz.compute_root().gpu_tile(x, y, z, xi, yi, zi, 8, 8, 4) bilateral_grid.compute_root().gpu_tile(x, y, xi, yi, s_sigma, s_sigma) else: # CPU schedule print ("Compiling for CPU.") histogram.compute_root().parallel(z) histogram.update().reorder(c, r.x, r.y, x, y).unroll(c) blurz.compute_root().reorder(c, z, x, y).parallel(y).vectorize(x, 4).unroll(c) blurx.compute_root().reorder(c, x, y, z).parallel(z).vectorize(x, 4).unroll(c) blury.compute_root().reorder(c, x, y, z).parallel(z).vectorize(x, 4).unroll(c) bilateral_grid.compute_root().parallel(y).vectorize(x, 4) return bilateral_grid
def combine(im1, im2, width, height, dist): init_mask1 = hl.Func("mask1_layer_0") init_mask2 = hl.Func("mask2_layer_0") accumulator = hl.Func("combine_accumulator") output = hl.Func("combine_output") x, y = hl.Var("x"), hl.Var("y") im1_mirror = hl.BoundaryConditions.repeat_edge(im1, [(0, width), (0, height)]) im2_mirror = hl.BoundaryConditions.repeat_edge(im2, [(0, width), (0, height)]) unblurred1 = im1_mirror unblurred2 = im2_mirror blurred1 = gauss_7x7(im1_mirror, "img1_layer_0") blurred2 = gauss_7x7(im2_mirror, "img2_layer_0") weight1 = hl.f32(dist[im1_mirror[x, y]]) weight2 = hl.f32(dist[im2_mirror[x, y]]) init_mask1[x, y] = weight1 / (weight1 + weight2) init_mask2[x, y] = 1 - init_mask1[x, y] mask1 = init_mask1 mask2 = init_mask2 num_layers = 2 accumulator[x, y] = hl.i32(0) for i in range(1, num_layers): print(' layer', i) prev_layer_str = str(i - 1) layer_str = str(i) laplace1 = diff(unblurred1, blurred1, "laplace1_layer_" + prev_layer_str) laplace2 = diff(unblurred2, blurred2, "laplace2_layer_" + layer_str) accumulator[x, y] += hl.i32(laplace1[x, y] * mask1[x, y]) + hl.i32(laplace2[x, y] * mask2[x, y]) unblurred1 = blurred1 unblurred2 = blurred2 blurred1 = gauss_7x7(blurred1, "img1_layer_" + layer_str) blurred2 = gauss_7x7(blurred2, "img2_layer_" + layer_str) mask1 = gauss_7x7(mask1, "mask1_layer_" + layer_str) mask2 = gauss_7x7(mask2, "mask2_layer_" + layer_str) accumulator[x, y] += hl.i32(blurred1[x, y] * mask1[x, y]) + hl.i32(blurred2[x, y] * mask2[x, y]) output[x, y] = hl.u16_sat(accumulator[x, y]) init_mask1.compute_root().parallel(y).vectorize(x, 16) accumulator.compute_root().parallel(y).vectorize(x, 16) for i in range(num_layers): accumulator.update(i).parallel(y).vectorize(x, 16) return output
def demosaic(input, width, height): print(f'width: {width}, height: {height}') f0 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f0") f1 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f1") f2 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f2") f3 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f3") f0.translate([-2, -2]) f1.translate([-2, -2]) f2.translate([-2, -2]) f3.translate([-2, -2]) d0 = hl.Func("demosaic_0") d1 = hl.Func("demosaic_1") d2 = hl.Func("demosaic_2") d3 = hl.Func("demosaic_3") output = hl.Func("demosaic_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") rdom0 = hl.RDom([(-2, 5), (-2, 5)]) # rdom1 = hl.RDom([(0, width / 2), (0, height / 2)]) input_mirror = hl.BoundaryConditions.mirror_interior(input, [(0, width), (0, height)]) f0.fill(0) f1.fill(0) f2.fill(0) f3.fill(0) f0_sum = 8 f1_sum = 16 f2_sum = 16 f3_sum = 16 f0[0, -2] = -1 f0[0, -1] = 2 f0[-2, 0] = -1 f0[-1, 0] = 2 f0[0, 0] = 4 f0[1, 0] = 2 f0[2, 0] = -1 f0[0, 1] = 2 f0[0, 2] = -1 f1[0, -2] = 1 f1[-1, -1] = -2 f1[1, -1] = -2 f1[-2, 0] = -2 f1[-1, 0] = 8 f1[0, 0] = 10 f1[1, 0] = 8 f1[2, 0] = -2 f1[-1, 1] = -2 f1[1, 1] = -2 f1[0, 2] = 1 f2[0, -2] = -2 f2[-1, -1] = -2 f2[0, -1] = 8 f2[1, -1] = -2 f2[-2, 0] = 1 f2[0, 0] = 10 f2[2, 0] = 1 f2[-1, 1] = -2 f2[0, 1] = 8 f2[1, 1] = -2 f2[0, 2] = -2 f3[0, -2] = -3 f3[-1, -1] = 4 f3[1, -1] = 4 f3[-2, 0] = -3 f3[0, 0] = 12 f3[2, 0] = -3 f3[-1, 1] = 4 f3[1, 1] = 4 f3[0, 2] = -3 d0[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f0[rdom0.x, rdom0.y]) / f0_sum) d1[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f1[rdom0.x, rdom0.y]) / f1_sum) d2[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f2[rdom0.x, rdom0.y]) / f2_sum) d3[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f3[rdom0.x, rdom0.y]) / f3_sum) R_row = y % 2 == 0 B_row = y % 2 != 0 R_col = x % 2 == 0 B_col = x % 2 != 0 at_R = c == 0 at_G = c == 1 at_B = c == 2 output[x, y, c] = hl.select(at_R & R_row & B_col, d1[x, y], at_R & B_row & R_col, d2[x, y], at_R & B_row & B_col, d3[x, y], at_G & R_row & R_col, d0[x, y], at_G & B_row & B_col, d0[x, y], at_B & B_row & R_col, d1[x, y], at_B & R_row & B_col, d2[x, y], at_B & R_row & R_col, d3[x, y], input[x, y]) d0.compute_root().parallel(y).vectorize(x, 16) d1.compute_root().parallel(y).vectorize(x, 16) d2.compute_root().parallel(y).vectorize(x, 16) d3.compute_root().parallel(y).vectorize(x, 16) output.compute_root().parallel(y).align_bounds(x, 2).unroll(x, 2).align_bounds(y, 2).unroll(y, 2).vectorize(x, 16) return output
def test_typed_funcs(): x = hl.Var('x') y = hl.Var('y') f = hl.Func('f') assert not f.defined() try: assert f.output_type() == Int(32) except RuntimeError as e: assert 'it is undefined' in str(e) else: assert False, 'Did not see expected exception!' try: assert f.outputs() == 0 except RuntimeError as e: assert 'it is undefined' in str(e) else: assert False, 'Did not see expected exception!' try: assert f.dimensions() == 0 except RuntimeError as e: assert 'it is undefined' in str(e) else: assert False, 'Did not see expected exception!' f = hl.Func(hl.Int(32), 2, 'f') assert not f.defined() assert f.output_type() == hl.Int(32) assert f.output_types() == [hl.Int(32)] assert f.outputs() == 1 assert f.dimensions() == 2 f = hl.Func([hl.Int(32), hl.Float(64)], 3, 'f') assert not f.defined() try: assert f.output_type() == hl.Int(32) except RuntimeError as e: assert 'it returns a Tuple' in str(e) else: assert False, 'Did not see expected exception!' assert f.output_types() == [hl.Int(32), hl.Float(64)] assert f.outputs() == 2 assert f.dimensions() == 3 f = hl.Func(hl.Int(32), 1, 'f') try: f[x, y] = hl.i32(0) f.realize([10, 10]) except RuntimeError as e: assert 'is constrained to have exactly 1 dimensions, but is defined with 2 dimensions' in str( e) else: assert False, 'Did not see expected exception!' f = hl.Func(hl.Int(32), 2, 'f') try: f[x, y] = hl.i16(0) f.realize([10, 10]) except RuntimeError as e: assert 'is constrained to only hold values of type int32 but is defined with values of type int16' in str( e) else: assert False, 'Did not see expected exception!' f = hl.Func((hl.Int(32), hl.Float(32)), 2, 'f') try: f[x, y] = (hl.i16(0), hl.f64(0)) f.realize([10, 10]) except RuntimeError as e: assert 'is constrained to only hold values of type (int32, float32) but is defined with values of type (int16, float64)' in str( e) else: assert False, 'Did not see expected exception!'
def gen_g(self): ''' define g() function ''' # vars i, j, k, l = [self.vars[c] for c in "ijkl"] # clamped inputs x, y, z, expnt, fm, rnorm = [ self.clamps[c] for c in ["x", "y", "z", "expnt", "fm", "rnorm"] ] # unclamped input (for sizing) fm_in = self.inputs["fm_in"] # scalar inputs delo2, delta, rdelta = [ self.inputs[c] for c in ["delo2", "delta", "rdelta"] ] dx = hl.Func("dx") dy = hl.Func("dy") dz = hl.Func("dz") r2 = hl.Func("g_r2") expnt2 = hl.Func("expnt2") expnt_inv = hl.Func("expnt_inv") self.add_funcs_by_name([dx, dy, dz, r2, expnt2, expnt_inv]) dx[i, j] = x[i] - x[j] dy[i, j] = y[i] - y[j] dz[i, j] = z[i] - z[j] r2[i, j] = dx[i, j] * dx[i, j] + dy[i, j] * dy[i, j] + dz[i, j] * dz[i, j] expnt2[i, j] = expnt[i] + expnt[j] expnt_inv[i, j] = hl.f64(1.0) / expnt2[i, j] fac2 = hl.Func("fac2") ex_arg = hl.Func("ex_arg") ex = hl.Func("ex") denom = hl.Func("denom") fac4d = hl.Func("fac4d") self.add_funcs_by_name([fac2, ex_arg, ex, denom, fac4d]) fac2[i, j] = expnt[i] * expnt[j] * expnt_inv[i, j] ex_arg[i, j, k, l] = -fac2[i, j] * r2[i, j] - fac2[k, l] * r2[k, l] ex[i, j, k, l] = hl.select(ex_arg[i, j, k, l] < hl.f64(-37.0), hl.f64(0.0), hl.exp(ex_arg[i, j, k, l])) denom[i, j, k, l] = expnt2[i, j] * expnt2[k, l] * hl.sqrt(expnt2[i, j] + expnt2[k, l]) fac4d[i, j, k, l] = expnt2[i, j] * expnt2[k, l] / (expnt2[i, j] + expnt2[k, l]) x2 = hl.Func("g_x2") y2 = hl.Func("g_y2") z2 = hl.Func("g_z2") rpq2 = hl.Func("rpq2") self.add_funcs_by_name([x2, y2, z2, rpq2]) x2[i, j] = (x[i] * expnt[i] + x[j] * expnt[j]) * expnt_inv[i, j] y2[i, j] = (y[i] * expnt[i] + y[j] * expnt[j]) * expnt_inv[i, j] z2[i, j] = (z[i] * expnt[i] + z[j] * expnt[j]) * expnt_inv[i, j] rpq2[i, j, k, l] = ((x2[i, j] - x2[k, l]) * (x2[i, j] - x2[k, l]) + (y2[i, j] - y2[k, l]) * (y2[i, j] - y2[k, l]) + (z2[i, j] - z2[k, l]) * (z2[i, j] - z2[k, l])) f0t = hl.Func("f0t") f0n = hl.Func("f0n") f0x = hl.Func("f0x") f0val = hl.Func("f0val") self.add_funcs_by_name([f0t, f0n, f0x, f0val]) f0t[i, j, k, l] = fac4d[i, j, k, l] * rpq2[i, j, k, l] f0n[i, j, k, l] = hl.clamp(hl.i32((f0t[i, j, k, l] + delo2) * rdelta), fm_in.dim(0).min(), fm_in.dim(0).max()) f0x[i, j, k, l] = delta * f0n[i, j, k, l] - f0t[i, j, k, l] f0val[i, j, k, l] = hl.select( f0t[i, j, k, l] >= hl.f64(28.0), hl.f64(0.88622692545276) / hl.sqrt(f0t[i, j, k, l]), fm[f0n[i, j, k, l], 0] + f0x[i, j, k, l] * (fm[f0n[i, j, k, l], 1] + f0x[i, j, k, l] * hl.f64(0.5) * (fm[f0n[i, j, k, l], 2] + f0x[i, j, k, l] * hl.f64(1. / 3.) * (fm[f0n[i, j, k, l], 3] + f0x[i, j, k, l] * hl.f64(0.25) * fm[f0n[i, j, k, l], 4])))) g = hl.Func("g") self.add_funcs_by_name([g]) if self.tracing and self.tracing_g: g_trace_in = hl.ImageParam(hl.Float(64), 4, "g_trace_in") g_trace = hl.BoundaryConditions.constant_exterior(g_trace_in, 0) self.inputs["g_trace_in"] = g_trace_in self.clamps["g_trace"] = g_trace g_trace.compute_root() g[i, j, k, l] = (hl.f64(2.00) * hl.f64(pow(pi, 2.50)) / denom[i, j, k, l] ) * ex[i, j, k, l] * f0val[i, j, k, l] * rnorm[i] * rnorm[ j] * rnorm[k] * rnorm[l] + g_trace[i, j, k, l] else: g_trace = None g[i, j, k, l] = (hl.f64(2.00) * hl.f64(pow(pi, 2.50)) / denom[i, j, k, l]) * ex[i, j, k, l] * f0val[ i, j, k, l] * rnorm[i] * rnorm[j] * rnorm[k] * rnorm[l]