def merge_spatial(input): weight = hl.Func("raised_cosine_weights") output = hl.Func("merge_spatial_output") v, x, y = hl.Var('v'), hl.Var('x'), hl.Var('y') # modified raised cosine window weight[v] = 0.5 - 0.5 * hl.cos(2 * math.pi * (v + 0.5) / TILE_SIZE) weight_00 = weight[idx_0(x)] * weight[idx_0(y)] weight_10 = weight[idx_1(x)] * weight[idx_0(y)] weight_01 = weight[idx_0(x)] * weight[idx_1(y)] weight_11 = weight[idx_1(x)] * weight[idx_1(y)] val_00 = input[idx_0(x), idx_0(y), tile_0(x), tile_0(y)] val_10 = input[idx_1(x), idx_0(y), tile_1(x), tile_0(y)] val_01 = input[idx_0(x), idx_1(y), tile_0(x), tile_1(y)] val_11 = input[idx_1(x), idx_1(y), tile_1(x), tile_1(y)] output[x, y] = hl.cast(hl.UInt(16), weight_00 * val_00 + weight_10 * val_10 + weight_01 * val_01 + weight_11 * val_11) weight.compute_root().vectorize(v, 32) output.compute_root().parallel(y).vectorize(x, 32) return output
def gauss(input, k, rdom, name): blur_x = hl.Func(name + "_x") output = hl.Func(name) x, y, c, xi, yi = hl.Var("x"), hl.Var("y"), hl.Var("c"), hl.Var("xi"), hl.Var("yi") val = hl.Expr("val") if input.dimensions() == 2: blur_x[x, y] = hl.sum(input[x + rdom, y] * k[rdom]) val = hl.sum(blur_x[x, y + rdom] * k[rdom]) if input.output_types()[0] == hl.UInt(16): val = hl.u16(val) output[x, y] = val else: blur_x[x, y, c] = hl.sum(input[x + rdom, y, c] * k[rdom]) val = hl.sum(blur_x[x, y + rdom, c] * k[rdom]) if input.output_types()[0] == hl.UInt(16): val = hl.u16(val) output[x, y, c] = val blur_x.compute_at(output, x).vectorize(x, 16) output.compute_root().tile(x, y, xi, yi, 256, 128).vectorize(xi, 16).parallel(y) return output
def main(): hl.load_plugin("autoschedule_li2018") x = hl.Var('x') f_in = hl.Func('in') f_in[x] = hl.f32(x) # Cast to float 32 f_0 = hl.Func('f_0') f_0[x] = 2 * f_in[x] f_1 = hl.Func('f_1') f_1[x] = hl.sin(f_0[x]) f_2 = hl.Func('f_2') f_2[x] = f_1[x] * f_1[x] # Setup f_2.set_estimate(x, 0, 1000) p = hl.Pipeline(f_2) target = hl.Target() # Only first parameter is used (number of cores on CPU) params = hl.MachineParams(32, 0, 0) result = p.auto_schedule('Li2018', target, params) print('Schedule:') print(result.schedule_source) p.compile_jit() # compile buf = p.realize(1000) # compute and get the buffer
def test_basics3(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] val = hl.clamp(val, 0.0, 1.0) zi = hl.i32((val / r_sigma) + 0.5) histogram = hl.Func('histogram') histogram[x, y, z, c] = 0.0 ss = hl.select(c == 0, val, 1.0) left = histogram[x, y, zi, c] left += 5 left += ss
def combine2(im1, im2, width, height, dist): init_mask1 = hl.Func("mask1_layer_0") init_mask2 = hl.Func("mask2_layer_0") accumulator = hl.Func("combine_accumulator") output = hl.Func("combine_output") x, y = hl.Var("x"), hl.Var("y") im1_mirror = hl.BoundaryConditions.repeat_edge(im1, [(0, width), (0, height)]) im2_mirror = hl.BoundaryConditions.repeat_edge(im2, [(0, width), (0, height)]) weight1 = hl.f32(dist[im1_mirror[x, y]]) weight2 = hl.f32(dist[im2_mirror[x, y]]) init_mask1[x, y] = weight1 / (weight1 + weight2) init_mask2[x, y] = 1 - init_mask1[x, y] mask1 = init_mask1 mask2 = init_mask2 accumulator[x, y] = hl.i32(0) accumulator[x, y] += hl.i32(im1_mirror[x, y] * mask1[x, y]) + hl.i32(im2_mirror[x, y] * mask2[x, y]) output[x, y] = hl.u16_sat(accumulator[x, y]) init_mask1.compute_root().parallel(y).vectorize(x, 16) accumulator.compute_root().parallel(y).vectorize(x, 16) accumulator.update(0).parallel(y).vectorize(x, 16) return output
def test_basics(): input = hl.ImageParam(hl.UInt(16), 2, 'input') x, y = hl.Var('x'), hl.Var('y') blur_x = hl.Func('blur_x') blur_xx = hl.Func('blur_xx') blur_y = hl.Func('blur_y') yy = hl.i32(1) assert yy.type() == hl.Int(32) z = x + 1 input[x, y] input[0, 0] input[z, y] input[x + 1, y] input[x, y] + input[x + 1, y] if False: aa = blur_x[x, y] bb = blur_x[x, y + 1] aa + bb blur_x[x, y] + blur_x[x, y + 1] (input[x, y] + input[x + 1, y]) / 2 blur_x[x, y] blur_xx[x, y] = input[x, y] blur_x[x, y] = (input[x, y] + input[x + 1, y] + input[x + 2, y]) / 3 blur_y[x, y] = (blur_x[x, y] + blur_x[x, y + 1] + blur_x[x, y + 2]) / 3 xi, yi = hl.Var('xi'), hl.Var('yi') blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) blur_y.compile_jit()
def histogram(x, y, c, img, w, h, hist_index): print("GET HIST ON: ", w, h) histogram = hl.Func("histogram") # Histogram buckets start as zero. histogram[hist_index] = 0 # Define a multi-dimensional reduction domain over the input image: r = hl.RDom([(0, w), (0, h)]) # For every point in the reduction domain, increment the # histogram bucket corresponding to the intensity of the # input image at that point. histogram[hl.Expr(img[r.x, r.y])] += 1 histogram.set_estimate(hist_index, 0, 255) # Get the sum of all histogram cells r = hl.RDom([(0,255)]) hist_sum = hl.Func('hist_sum') hist_sum[()] = 0.0 # Compute the sum as a 32-bit integer hist_sum[()] += histogram[r.x] # Return each histogram as a % of total color pct_hist = hl.Func('pct_hist') pct_hist[hist_index] = histogram[hist_index] / hist_sum[()] return histogram
def __init__(self, input): assert input.type() == hl.UInt(8) self.lut = hl.Func("lut") self.padded = hl.Func("padded") self.padded16 = hl.Func("padded16") self.sharpen = hl.Func("sharpen") self.curved = hl.Func("curved") self.input = input # For this lesson, we'll use a two-stage pipeline that sharpens # and then applies a look-up-table (LUT). # First we'll define the LUT. It will be a gamma curve. gamma = hl.f32(1.2) self.lut[i] = hl.u8(hl.clamp(hl.pow(i / 255.0, gamma) * 255.0, 0, 255)) # Augment the input with a boundary condition. self.padded[x, y, c] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), c] # Cast it to 16-bit to do the math. self.padded16[x, y, c] = hl.u16(self.padded[x, y, c]) # Next we sharpen it with a five-tap filter. self.sharpen[x, y, c] = ( self.padded16[x, y, c] * 2 - (self.padded16[x - 1, y, c] + self.padded16[x, y - 1, c] + self.padded16[x + 1, y, c] + self.padded16[x, y + 1, c]) / 4) # Then apply the LUT. self.curved[x, y, c] = self.lut[self.sharpen[x, y, c]]
def get_blur(input): assert type(input) == hl.ImageParam assert input.dimensions() == 2 x, y = hl.Var("x"), hl.Var("y") clamped_input = hl.BoundaryConditions.repeat_edge(input) input_uint16 = hl.Func("input_uint16") input_uint16[x, y] = hl.cast(hl.UInt(16), clamped_input[x, y]) ci = input_uint16 blur_x = hl.Func("blur_x") blur_y = hl.Func("blur_y") blur_x[x, y] = (ci[x, y] + ci[x + 1, y] + ci[x + 2, y]) / 3 blur_y[x, y] = hl.cast( hl.UInt(8), (blur_x[x, y] + blur_x[x, y + 1] + blur_x[x, y + 2]) / 3) # schedule xi, yi = hl.Var("xi"), hl.Var("yi") blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) return blur_y
def upsample2D(f): upx, upy = hl.Func('upx%d'%upsample_counter[0]), hl.Func('upy%d'%upsample_counter[0]) upsample_counter[0] += 1 upx[x,y] = 0.25 * f[(x//2) - 1 + 2*(x%2),y] + 0.75 * f[x//2,y] upy[x,y] = 0.25 * upx[x, (y//2) - 1 + 2*(y%2)] + 0.75 * upx[x,y//2] return upy
def downsample2D(f): downx, downy = hl.Func('downx%d'%downsample_counter[0]), hl.Func('downy%d'%downsample_counter[0]) downsample_counter[0] += 1 downx[x,y] = (f[2*x-1,y] + 3.0*(f[2*x,y]+f[2*x+1,y]) + f[2*x+2,y])/8.0 downy[x,y] = (downx[x,2*y-1] + 3.0*(downx[x,2*y]+downx[x,2*y+1]) + downx[x,2*y+2])/8.0 return downy
def merge_temporal(images, alignment): weight = hl.Func("merge_temporal_weights") total_weight = hl.Func("merge_temporal_total_weights") output = hl.Func("merge_temporal_output") ix, iy, tx, ty, n = hl.Var('ix'), hl.Var('iy'), hl.Var('tx'), hl.Var('ty'), hl.Var('n') rdom0 = hl.RDom([(0, 16), (0, 16)]) rdom1 = hl.RDom([(1, images.dim(2).extent() - 1)]) imgs_mirror = hl.BoundaryConditions.mirror_interior(images, [(0, images.width()), (0, images.height())]) layer = box_down2(imgs_mirror, "merge_layer") offset = Point(alignment[tx, ty, n]).clamp(Point(MINIMUM_OFFSET, MINIMUM_OFFSET), Point(MAXIMUM_OFFSET, MAXIMUM_OFFSET)) al_x = idx_layer(tx, rdom0.x) + offset.x / 2 al_y = idx_layer(ty, rdom0.y) + offset.y / 2 ref_val = layer[idx_layer(tx, rdom0.x), idx_layer(ty, rdom0.y), 0] alt_val = layer[al_x, al_y, n] factor = 8.0 min_distance = 10 max_distance = 300 # max L1 distance, otherwise the value is not used distance = hl.sum(hl.abs(hl.cast(hl.Int(32), ref_val) - hl.cast(hl.Int(32), alt_val))) / 256 normal_distance = hl.max(1, hl.cast(hl.Int(32), distance) / factor - min_distance / factor) # Weight for the alternate frame weight[tx, ty, n] = hl.select(normal_distance > (max_distance - min_distance), 0.0, 1.0 / normal_distance) total_weight[tx, ty] = hl.sum(weight[tx, ty, rdom1]) + 1 offset = Point(alignment[tx, ty, rdom1]) al_x = idx_im(tx, ix) + offset.x al_y = idx_im(ty, iy) + offset.y ref_val = imgs_mirror[idx_im(tx, ix), idx_im(ty, iy), 0] alt_val = imgs_mirror[al_x, al_y, rdom1] # Sum all values according to their weight, and divide by total weight to obtain average output[ix, iy, tx, ty] = hl.sum(weight[tx, ty, rdom1] * alt_val / total_weight[tx, ty]) + ref_val / total_weight[ tx, ty] weight.compute_root().parallel(ty).vectorize(tx, 16) total_weight.compute_root().parallel(ty).vectorize(tx, 16) output.compute_root().parallel(ty).vectorize(ix, 32) return output
def default_inline(): print("=" * 50) x, y = hl.Var("x"), hl.Var("y") A, B = hl.Func("A_default"), hl.Func("B_default") A[x, y] = x + 10 * y B[x, y] = A[x, y] + 1 print("pipeline with default schedule: inline") print('-' * 50) B.realize(w, h) B.print_loop_nest()
def compute_root(): print("=" * 50) A, B = hl.Func("A_root"), hl.Func("B_root") A[x, y] = x + 10 * y B[x, y] = A[x, y] + 1 print("pipeline with schedule: A.compute_root()") print('-' * 50) A.compute_root() B.realize(w, h) B.print_loop_nest()
def test_basics4(): # Test for f[g[r]] = ... # See https://github.com/halide/Halide/issues/4285 x = hl.Var('x') f = hl.Func('f') g = hl.Func('g') g[x] = 1 f[x] = 0.0 r = hl.RDom([(0, 100)]) f[g[r]] = 2.3 # This triggers a warning of double-to-float conversion f.compute_root() f.compile_jit()
def test_basics4(): # Test for f[g[r]] = ... # See https://github.com/halide/Halide/issues/4285 x = hl.Var('x') f = hl.Func('f') g = hl.Func('g') g[x] = 1 f[x] = 0.0 r = hl.RDom([(0, 100)]) f[g[r]] = 2.5 f.compute_root() f.compile_jit()
def test_mux_tuple(): f = hl.Func() g = hl.Func() x = hl.Var() c = hl.Var() g[x] = (123, 456, x) f[x, c] = hl.mux(c, g[x]) b = f.realize(1, 4) assert b[0, 0] == 123 assert b[0, 1] == 456 assert b[0, 2] == 0 assert b[0, 3] == 0
def upsample(f): nonlocal n_upsamples upx, upy = hl.Func('upx%i' % n_upsamples), hl.Func('upy%i' % n_upsamples) n_upsamples += 1 upx[x, y, c] = 0.25 * f[(x // 2) - 1 + 2 * (x % 2), y, c] + 0.75 * f[x // 2, y, c] upy[x, y, c] = 0.25 * upx[x, (y // 2) - 1 + 2 * (y % 2), c] + 0.75 * upx[x, y // 2, c] return upy
def downsample2D(f): nonlocal n_downsamples downx, downy = hl.Func('downx%i' % n_downsamples), hl.Func( 'downy%i' % n_downsamples) n_downsamples += 1 downx[x, y] = (f[2 * x - 1, y] + 3.0 * (f[2 * x, y] + f[2 * x + 1, y]) + f[2 * x + 2, y]) / 8.0 downy[x, y] = (downx[x, 2 * y - 1] + 3.0 * (downx[x, 2 * y] + downx[x, 2 * y + 1]) + downx[x, 2 * y + 2]) / 8.0 return downy
def test_basics(): input = hl.ImageParam(hl.UInt(16), 2, 'input') x, y = hl.Var('x'), hl.Var('y') blur_x = hl.Func('blur_x') blur_xx = hl.Func('blur_xx') blur_y = hl.Func('blur_y') yy = hl.cast(hl.Int(32), 1) assert yy.type() == hl.Int(32) print("yy type:", yy.type()) z = x + 1 input[x,y] input[0,0] input[z,y] input[x+1,y] print("ping 0.2") input[x,y]+input[x+1,y] if False: aa = blur_x[x,y] bb = blur_x[x,y+1] aa + bb blur_x[x,y]+blur_x[x,y+1] print("ping 0.3") (input[x,y]+input[x+1,y]) / 2 print("ping 0.4") blur_x[x,y] print("ping 0.4.1") blur_xx[x,y] = input[x,y] print("ping 0.5") blur_x[x,y] = (input[x,y]+input[x+1,y]+input[x+2,y])/3 print("ping 1") blur_y[x,y] = (blur_x[x,y]+blur_x[x,y+1]+blur_x[x,y+2])/3 xi, yi = hl.Var('xi'), hl.Var('yi') print("ping 2") blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) blur_y.compile_jit() print("Compiled to jit") return
def test_generate_halide(self): zone = self.define_original_twoel() decomposed = zone.split_recursive() self.vars = {k: hl.Var(k) for k in "ijkl"} i, j, k, l = [self.vars[k] for k in "ijkl"] g_dens = hl.Func("g_dens") g_dens[i,j] = i * j g = hl.Func("g") g[i,j,k,l] = hl.cos(i*j) * hl.sin(k*l) self.inputs = {"g": g, "g_dens": g_dens} self.clamps = {"g": g, "g_dens": g_dens} self.funcs = {"g": g, "g_dens": g_dens} self.loopnest_funcs = {} func = decomposed.generate_halide(self, [8, 8, 8, 8])
def align_images(images): print(f'\n{"=" * 30}\nAligning images...\n{"=" * 30}') start = datetime.utcnow() alignment_3 = hl.Func("layer_3_alignment") alignment = hl.Func("alignment") tx, ty, n = hl.Var('tx'), hl.Var('ty'), hl.Var('n') print('Subsampling image layers...') imgs_mirror = hl.BoundaryConditions.mirror_interior( images, [(0, images.width()), (0, images.height())]) # Each consecutive layer is downsampled by a factor of 4 (2 in both x- and y-dimensions) layer_0 = box_down2(imgs_mirror, "layer_0") layer_1 = gaussian_down4(layer_0, "layer_1") layer_2 = gaussian_down4(layer_1, "layer_2") # Search regions min_search = Point(-4, -4) max_search = Point(3, 3) min_3 = Point(0, 0) min_2 = DOWNSAMPLE_RATE * min_3 + min_search min_1 = DOWNSAMPLE_RATE * min_2 + min_search max_3 = Point(0, 0) max_2 = DOWNSAMPLE_RATE * max_3 + max_search max_1 = DOWNSAMPLE_RATE * max_2 + max_search print('Aligning layers...') alignment_3[tx, ty, n] = Point(0, 0) # Initial alignment (0,0) # Align layers of the gaussian pyramid from coarse to fine # Pass previous alignment as initial guess for alignment alignment_2 = align_layer(layer_2, alignment_3, min_3, max_3) alignment_1 = align_layer(layer_1, alignment_2, min_2, max_2) alignment_0 = align_layer(layer_0, alignment_1, min_1, max_1) num_tx = math.floor(images.width() / TILE_SIZE_2 - 1) # number of tiles num_ty = math.floor(images.height() / TILE_SIZE_2 - 1) alignment[tx, ty, n] = 2 * Point( alignment_0[tx, ty, n]) # alignment of the original image alignment_repeat = hl.BoundaryConditions.repeat_edge( alignment, [(0, num_tx), (0, num_ty)]) print(f'Alignment finished in {time_diff(start)} ms.\n') return alignment_repeat
def main(): gradient = hl.Func("gradient") x, y = hl.Var("x"), hl.Var("y") # We'll define our gradient function as before. gradient[x, y] = x + y # And tell Halide that we'd like to be notified of all # evaluations. gradient.trace_stores() # Realize the function over an 8x8 region. print("Evaluating gradient") output = gradient.realize(8, 8) # This will print out all the times gradient(x, y) gets # evaluated. # Now that we can snoop on what Halide is doing, let's try our # first scheduling primitive. We'll make a new version of # gradient that processes each scanline in parallel. parallel_gradient = hl.Func("parallel_gradient") parallel_gradient[x, y] = x + y # We'll also trace this function. parallel_gradient.trace_stores() # Things are the same so far. We've defined the algorithm, but # haven't said anything about how to schedule it. In general, # exploring different scheduling decisions doesn't change the code # that describes the algorithm. # Now we tell Halide to use a parallel for loop over the y # coordinate. On linux we run this using a thread pool and a task # queue. On os x we call into grand central dispatch, which does # the same thing for us. parallel_gradient.parallel(y) # This time the printfs should come out of order, because each # scanline is potentially being processed in a different # thread. The number of threads should adapt to your system, but # on linux you can control it manually using the environment # variable HL_NUMTHREADS. print("\nEvaluating parallel_gradient") parallel_gradient.realize(8, 8) print("Success!") return 0
def test_member_logical_not_function(): x = hl.Var('x') f = hl.Func('f') f[x] = x > 5 not_f = hl.Func('not_f') not_f[x] = f[x].logical_not() f_out = f.realize(10) not_f_out = not_f.realize(10) for i in range(10): assert f_out[i] == (i > 5) assert not_f_out[i] == (i <= 5)
def resize_scale(input, fx, fy): shr = hl.Func('resize') x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") index_x = hl.Func("index_x") index_y = hl.Func("index_y") index_x.trace_stores() index_y.trace_stores() index_x[x] = hl.cast(hl.Int(32), x / fx) index_y[y] = hl.cast(hl.Int(32), y / fy) final = hl.Func("final") final[x, y, c] = input[index_x[x], index_y[y], c] return final
def test_free_logical_not_function(): x = hl.Var('x') f = hl.Func('f') f[x] = x > 5 not_f = hl.Func('not_f') not_f[x] = hl.logical_not(f[x]) f_out = f.realize([10]) not_f_out = not_f.realize([10]) for i in range(10): assert f_out[i] == (i > 5) assert not_f_out[i] == (i <= 5)
def main(): x = h.Var("x") f = h.Func("f") f[x] = 100 * x args = [] f.compile_to_bitcode("f.bc", args, "f") assert os.path.isfile("f.bc") f.compile_to_c("f.cpp", args, "f") assert os.path.isfile("f.cpp") f.compile_to_object("f.o", args, "f") assert os.path.isfile("f.o") f.compile_to_header("f.h", args, "f") assert os.path.isfile("f.h") f.compile_to_assembly("f.s", args, "f") assert os.path.isfile("f.s") f.compile_to_lowered_stmt("f.txt", args) assert os.path.isfile("f.txt") f.compile_to_file("f_all", args) assert os.path.isfile("f_all.h") assert os.path.isfile("f_all.o") print("Success!") return 0
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) s_sigma = 8 x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.i32(1), y * s_sigma * hl.i32(1)] val22 = clamped[x * s_sigma - hl.i32(s_sigma // 2), y * s_sigma - hl.i32(s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] try: val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2] except RuntimeError as e: assert 'Implicit cast from float32 to int' in str(e) else: assert False, 'Did not see expected exception!'
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width()-1), hl.clamp(y, 0, input.height()-1),0] # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma//2), y * s_sigma - hl.cast(hl.Int(32), s_sigma//2)] val2 = clamped[x * s_sigma - s_sigma//2, y * s_sigma - s_sigma//2] val3 = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2] return
def desaturate_noise(input, width, height): print(' desaturate_noise') output = hl.Func("desaturate_noise_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") input_mirror = hl.BoundaryConditions.mirror_image(input, [(0, width), (0, height)]) blur = gauss_15x15(gauss_15x15(input_mirror, "desaturate_noise_blur1"), "desaturate_noise_blur_2") factor = 1.4 threshold = 25000 output[x, y, c] = input[x, y, c] output[x, y, 1] = hl.select((hl.abs(blur[x, y, 1]) / hl.abs(input[x, y, 1]) < factor) & (hl.abs(input[x, y, 1]) < threshold) & (hl.abs(blur[x, y, 1]) < threshold), 0.7 * blur[x, y, 1] + 0.3 * input[x, y, 1], input[x, y, 1]) output[x, y, 2] = hl.select((hl.abs(blur[x, y, 2]) / hl.abs(input[x, y, 2]) < factor) & (hl.abs(input[x, y, 2]) < threshold) & (hl.abs(blur[x, y, 2]) < threshold), 0.7 * blur[x, y, 2] + 0.3 * input[x, y, 2], input[x, y, 2]) output.compute_root().parallel(y).vectorize(x, 16) return output