def test_basics3(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] val = hl.clamp(val, 0.0, 1.0) zi = hl.i32((val / r_sigma) + 0.5) histogram = hl.Func('histogram') histogram[x, y, z, c] = 0.0 ss = hl.select(c == 0, val, 1.0) left = histogram[x, y, zi, c] left += 5 left += ss
def test_rdom(): x = hl.Var("x") y = hl.Var("y") diagonal = hl.Func("diagonal") diagonal[x, y] = 1 domain_width = 10 domain_height = 10 r = hl.RDom(0, domain_width, 0, domain_height) r.where(r.x <= r.y) diagonal[r.x, r.y] = 2 output = diagonal.realize(domain_width, domain_height) for iy in range(domain_height): for ix in range(domain_width): if ix <= iy: assert output(ix, iy) == 2 else: assert output(ix, iy) == 1 print("Success!") return 0
def schedule_test(f, vector_width, target): if vector_width != 1: f.vectorize(x, vector_width) if target.has_gpu_feature() and vector_width <= 16: xo, yo, xi, yi = hl.Var(), hl.Var(), hl.Var(), hl.Var() f.gpu_tile(x, y, xo, yo, xi, yi, 2, 2)
def desaturate_noise(input, width, height): print(' desaturate_noise') output = hl.Func("desaturate_noise_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") input_mirror = hl.BoundaryConditions.mirror_image(input, [(0, width), (0, height)]) blur = gauss_15x15(gauss_15x15(input_mirror, "desaturate_noise_blur1"), "desaturate_noise_blur_2") factor = 1.4 threshold = 25000 output[x, y, c] = input[x, y, c] output[x, y, 1] = hl.select((hl.abs(blur[x, y, 1]) / hl.abs(input[x, y, 1]) < factor) & (hl.abs(input[x, y, 1]) < threshold) & (hl.abs(blur[x, y, 1]) < threshold), 0.7 * blur[x, y, 1] + 0.3 * input[x, y, 1], input[x, y, 1]) output[x, y, 2] = hl.select((hl.abs(blur[x, y, 2]) / hl.abs(input[x, y, 2]) < factor) & (hl.abs(input[x, y, 2]) < threshold) & (hl.abs(blur[x, y, 2]) < threshold), 0.7 * blur[x, y, 2] + 0.3 * input[x, y, 2], input[x, y, 2]) output.compute_root().parallel(y).vectorize(x, 16) return output
def test_basics(): input = hl.ImageParam(hl.UInt(16), 2, 'input') x, y = hl.Var('x'), hl.Var('y') blur_x = hl.Func('blur_x') blur_xx = hl.Func('blur_xx') blur_y = hl.Func('blur_y') yy = hl.i32(1) assert yy.type() == hl.Int(32) z = x + 1 input[x, y] input[0, 0] input[z, y] input[x + 1, y] input[x, y] + input[x + 1, y] if False: aa = blur_x[x, y] bb = blur_x[x, y + 1] aa + bb blur_x[x, y] + blur_x[x, y + 1] (input[x, y] + input[x + 1, y]) / 2 blur_x[x, y] blur_xx[x, y] = input[x, y] blur_x[x, y] = (input[x, y] + input[x + 1, y] + input[x + 2, y]) / 3 blur_y[x, y] = (blur_x[x, y] + blur_x[x, y + 1] + blur_x[x, y + 2]) / 3 xi, yi = hl.Var('xi'), hl.Var('yi') blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) blur_y.compile_jit()
def combine2(im1, im2, width, height, dist): init_mask1 = hl.Func("mask1_layer_0") init_mask2 = hl.Func("mask2_layer_0") accumulator = hl.Func("combine_accumulator") output = hl.Func("combine_output") x, y = hl.Var("x"), hl.Var("y") im1_mirror = hl.BoundaryConditions.repeat_edge(im1, [(0, width), (0, height)]) im2_mirror = hl.BoundaryConditions.repeat_edge(im2, [(0, width), (0, height)]) weight1 = hl.f32(dist[im1_mirror[x, y]]) weight2 = hl.f32(dist[im2_mirror[x, y]]) init_mask1[x, y] = weight1 / (weight1 + weight2) init_mask2[x, y] = 1 - init_mask1[x, y] mask1 = init_mask1 mask2 = init_mask2 accumulator[x, y] = hl.i32(0) accumulator[x, y] += hl.i32(im1_mirror[x, y] * mask1[x, y]) + hl.i32(im2_mirror[x, y] * mask2[x, y]) output[x, y] = hl.u16_sat(accumulator[x, y]) init_mask1.compute_root().parallel(y).vectorize(x, 16) accumulator.compute_root().parallel(y).vectorize(x, 16) accumulator.update(0).parallel(y).vectorize(x, 16) return output
def merge_spatial(input): weight = hl.Func("raised_cosine_weights") output = hl.Func("merge_spatial_output") v, x, y = hl.Var('v'), hl.Var('x'), hl.Var('y') # modified raised cosine window weight[v] = 0.5 - 0.5 * hl.cos(2 * math.pi * (v + 0.5) / TILE_SIZE) weight_00 = weight[idx_0(x)] * weight[idx_0(y)] weight_10 = weight[idx_1(x)] * weight[idx_0(y)] weight_01 = weight[idx_0(x)] * weight[idx_1(y)] weight_11 = weight[idx_1(x)] * weight[idx_1(y)] val_00 = input[idx_0(x), idx_0(y), tile_0(x), tile_0(y)] val_10 = input[idx_1(x), idx_0(y), tile_1(x), tile_0(y)] val_01 = input[idx_0(x), idx_1(y), tile_0(x), tile_1(y)] val_11 = input[idx_1(x), idx_1(y), tile_1(x), tile_1(y)] output[x, y] = hl.cast(hl.UInt(16), weight_00 * val_00 + weight_10 * val_10 + weight_01 * val_01 + weight_11 * val_11) weight.compute_root().vectorize(v, 32) output.compute_root().parallel(y).vectorize(x, 32) return output
def gauss(input, k, rdom, name): blur_x = hl.Func(name + "_x") output = hl.Func(name) x, y, c, xi, yi = hl.Var("x"), hl.Var("y"), hl.Var("c"), hl.Var("xi"), hl.Var("yi") val = hl.Expr("val") if input.dimensions() == 2: blur_x[x, y] = hl.sum(input[x + rdom, y] * k[rdom]) val = hl.sum(blur_x[x, y + rdom] * k[rdom]) if input.output_types()[0] == hl.UInt(16): val = hl.u16(val) output[x, y] = val else: blur_x[x, y, c] = hl.sum(input[x + rdom, y, c] * k[rdom]) val = hl.sum(blur_x[x, y + rdom, c] * k[rdom]) if input.output_types()[0] == hl.UInt(16): val = hl.u16(val) output[x, y, c] = val blur_x.compute_at(output, x).vectorize(x, 16) output.compute_root().tile(x, y, xi, yi, 256, 128).vectorize(xi, 16).parallel(y) return output
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) s_sigma = 8 x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.i32(1), y * s_sigma * hl.i32(1)] val22 = clamped[x * s_sigma - hl.i32(s_sigma // 2), y * s_sigma - hl.i32(s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] try: val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2] except RuntimeError as e: assert 'Implicit cast from float32 to int' in str(e) else: assert False, 'Did not see expected exception!'
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width()-1), hl.clamp(y, 0, input.height()-1),0] # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma//2), y * s_sigma - hl.cast(hl.Int(32), s_sigma//2)] val2 = clamped[x * s_sigma - s_sigma//2, y * s_sigma - s_sigma//2] val3 = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2] return
def get_blur(input): assert type(input) == hl.ImageParam assert input.dimensions() == 2 x, y = hl.Var("x"), hl.Var("y") clamped_input = hl.BoundaryConditions.repeat_edge(input) input_uint16 = hl.Func("input_uint16") input_uint16[x, y] = hl.cast(hl.UInt(16), clamped_input[x, y]) ci = input_uint16 blur_x = hl.Func("blur_x") blur_y = hl.Func("blur_y") blur_x[x, y] = (ci[x, y] + ci[x + 1, y] + ci[x + 2, y]) / 3 blur_y[x, y] = hl.cast( hl.UInt(8), (blur_x[x, y] + blur_x[x, y + 1] + blur_x[x, y + 2]) / 3) # schedule xi, yi = hl.Var("xi"), hl.Var("yi") blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) return blur_y
def brighten(input, gain): output = hl.Func("brighten_output") x, y = hl.Var("x"), hl.Var("y") output[x, y] = hl.u16_sat(gain * hl.u32(input[x, y])) return output
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] if True: print("s_sigma", s_sigma) print("s_sigma/2", s_sigma / 2) print("s_sigma//2", s_sigma // 2) print() print("x * s_sigma", x * s_sigma) print("x * 8", x * 8) print("x * 8 + 4", x * 8 + 4) print("x * 8 * 4", x * 8 * 4) print() print("x", x) print("(x * s_sigma).type()", ) print("(x * 8).type()", (x * 8).type()) print("(x * 8 + 4).type()", (x * 8 + 4).type()) print("(x * 8 * 4).type()", (x * 8 * 4).type()) print("(x * 8 / 4).type()", (x * 8 / 4).type()) print("((x * 8) * 4).type()", ((x * 8) * 4).type()) print("(x * (8 * 4)).type()", (x * (8 * 4)).type()) assert (x * 8).type() == hl.Int(32) assert (x * 8 * 4).type() == hl.Int(32) # yes this did fail at some point assert ((x * 8) / 4).type() == hl.Int(32) assert (x * (8 / 4)).type() == hl.Float(32) # under python3 division rules assert (x * (8 // 4)).type() == hl.Int(32) #assert (x * 8 // 4).type() == hl.Int(32) # not yet implemented # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2), y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] return
def merge_temporal(images, alignment): weight = hl.Func("merge_temporal_weights") total_weight = hl.Func("merge_temporal_total_weights") output = hl.Func("merge_temporal_output") ix, iy, tx, ty, n = hl.Var('ix'), hl.Var('iy'), hl.Var('tx'), hl.Var('ty'), hl.Var('n') rdom0 = hl.RDom([(0, 16), (0, 16)]) rdom1 = hl.RDom([(1, images.dim(2).extent() - 1)]) imgs_mirror = hl.BoundaryConditions.mirror_interior(images, [(0, images.width()), (0, images.height())]) layer = box_down2(imgs_mirror, "merge_layer") offset = Point(alignment[tx, ty, n]).clamp(Point(MINIMUM_OFFSET, MINIMUM_OFFSET), Point(MAXIMUM_OFFSET, MAXIMUM_OFFSET)) al_x = idx_layer(tx, rdom0.x) + offset.x / 2 al_y = idx_layer(ty, rdom0.y) + offset.y / 2 ref_val = layer[idx_layer(tx, rdom0.x), idx_layer(ty, rdom0.y), 0] alt_val = layer[al_x, al_y, n] factor = 8.0 min_distance = 10 max_distance = 300 # max L1 distance, otherwise the value is not used distance = hl.sum(hl.abs(hl.cast(hl.Int(32), ref_val) - hl.cast(hl.Int(32), alt_val))) / 256 normal_distance = hl.max(1, hl.cast(hl.Int(32), distance) / factor - min_distance / factor) # Weight for the alternate frame weight[tx, ty, n] = hl.select(normal_distance > (max_distance - min_distance), 0.0, 1.0 / normal_distance) total_weight[tx, ty] = hl.sum(weight[tx, ty, rdom1]) + 1 offset = Point(alignment[tx, ty, rdom1]) al_x = idx_im(tx, ix) + offset.x al_y = idx_im(ty, iy) + offset.y ref_val = imgs_mirror[idx_im(tx, ix), idx_im(ty, iy), 0] alt_val = imgs_mirror[al_x, al_y, rdom1] # Sum all values according to their weight, and divide by total weight to obtain average output[ix, iy, tx, ty] = hl.sum(weight[tx, ty, rdom1] * alt_val / total_weight[tx, ty]) + ref_val / total_weight[ tx, ty] weight.compute_root().parallel(ty).vectorize(tx, 16) total_weight.compute_root().parallel(ty).vectorize(tx, 16) output.compute_root().parallel(ty).vectorize(ix, 32) return output
def black_white_level(input, black_point, white_point): output = hl.Func("black_white_level_output") x, y = hl.Var("x"), hl.Var("y") white_factor = 65535 / (white_point - black_point) output[x, y] = hl.u16_sat((hl.i32(input[x, y]) - black_point) * white_factor) return output
def test_imageparam_bug(): "see https://github.com/rodrigob/Halide/issues/2" x = hl.Var("x") y = hl.Var("y") fx = hl.Func("fx") input = hl.ImageParam(hl.UInt(8), 1, "input") fx[x, y] = input[y] return
def u8bit_interleave(input): output = hl.Func("8bit_interleaved_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") output[x, y, c] = hl.u8_sat(input[x, y, c] / 256) output.compute_root().parallel(y).vectorize(x, 16) return output
def diff(im1, im2, name): output = hl.Func(name) x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") if im1.dimensions() == 2: output[x, y] = hl.i32(im1[x, y]) - hl.i32(im2[x, y]) else: output[x, y, c] = hl.i32(im1[x, y, c]) - hl.i32(im2[x, y, c]) return output
def test_misused_or(): x = hl.Var('x') y = hl.Var('y') f = hl.Func('f') try: f[x, y] = hl.print_when(x == 0 or y == 0, 0, "x=", x, "y=", y) f.realize(10, 10) except ValueError as e: assert 'cannot be converted to a bool' in str(e) else: assert False, 'Did not see expected exception!'
def default_inline(): print("=" * 50) x, y = hl.Var("x"), hl.Var("y") A, B = hl.Func("A_default"), hl.Func("B_default") A[x, y] = x + 10 * y B[x, y] = A[x, y] + 1 print("pipeline with default schedule: inline") print('-' * 50) B.realize(w, h) B.print_loop_nest()
def test_mux_tuple(): f = hl.Func() g = hl.Func() x = hl.Var() c = hl.Var() g[x] = (123, 456, x) f[x, c] = hl.mux(c, g[x]) b = f.realize(1, 4) assert b[0, 0] == 123 assert b[0, 1] == 456 assert b[0, 2] == 0 assert b[0, 3] == 0
def test_compiletime_error(): x = hl.Var('x') y = hl.Var('y') f = hl.Func('f') f[x, y] = hl.u16(x + y) # Deliberate type-mismatch error buf = hl.Buffer(hl.UInt(8), [2, 2]) try: f.realize(buf) except RuntimeError as e: assert 'Output buffer f has type uint16 but type of the buffer passed in is uint8' in str(e) else: assert False, 'Did not see expected exception!'
def test_nobuildmethod(): x, y, c = hl.Var(), hl.Var(), hl.Var() target = hl.get_jit_target_from_environment() b_in = hl.Buffer(hl.Float(32), [2, 2]) b_in.fill(123) b_out = hl.Buffer(hl.Int(32), [2, 2]) f = nobuildmethod.generate(target, b_in, 1.0) f.realize(b_out) assert b_out.all_equal(123)
def test_compiletime_error(): x = hl.Var('x') y = hl.Var('y') f = hl.Func('f') f[x, y] = hl.cast(hl.UInt(16), x + y) # Deliberate type-mismatch error buf = hl.Buffer(hl.UInt(8), [2, 2]) try: f.realize(buf) except RuntimeError as e: assert 'Buffer has type uint8, but Func "f" has type uint16.' in str(e) else: assert False, 'Did not see expected exception!'
def mult(input, scale): brighter = hl.Func("mult") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") value = input[x, y, c] value = hl.cast(hl.Float(32), value) value = value * scale value = hl.min(value, 255.0) value = hl.cast(hl.UInt(8), value) brighter[x, y, c] = value return brighter
def shift_bayer_to_rggb(input, cfa_pattern): print(f'cfa_pattern: {cfa_pattern}') output = hl.Func("rggb_input") x, y = hl.Var("x"), hl.Var("y") cfa = hl.u16(cfa_pattern) output[x, y] = hl.select(cfa == hl.u16(1), input[x, y], cfa == hl.u16(2), input[x + 1, y], cfa == hl.u16(4), input[x, y + 1], cfa == hl.u16(3), input[x + 1, y + 1], 0) return output
def increase_saturation(input, strength): print(' increase saturation') output = hl.Func("increase_saturation_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") output[x, y, c] = strength * input[x, y, c] output[x, y, 0] = input[x, y, 0] output.compute_root().parallel(y).vectorize(x, 16) return output
def test_basics(): input = hl.ImageParam(hl.UInt(16), 2, 'input') x, y = hl.Var('x'), hl.Var('y') blur_x = hl.Func('blur_x') blur_xx = hl.Func('blur_xx') blur_y = hl.Func('blur_y') yy = hl.cast(hl.Int(32), 1) assert yy.type() == hl.Int(32) print("yy type:", yy.type()) z = x + 1 input[x,y] input[0,0] input[z,y] input[x+1,y] print("ping 0.2") input[x,y]+input[x+1,y] if False: aa = blur_x[x,y] bb = blur_x[x,y+1] aa + bb blur_x[x,y]+blur_x[x,y+1] print("ping 0.3") (input[x,y]+input[x+1,y]) / 2 print("ping 0.4") blur_x[x,y] print("ping 0.4.1") blur_xx[x,y] = input[x,y] print("ping 0.5") blur_x[x,y] = (input[x,y]+input[x+1,y]+input[x+2,y])/3 print("ping 1") blur_y[x,y] = (blur_x[x,y]+blur_x[x,y+1]+blur_x[x,y+2])/3 xi, yi = hl.Var('xi'), hl.Var('yi') print("ping 2") blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) blur_y.compile_jit() print("Compiled to jit") return
def test_var(): v1 = hl.Var() v2 = hl.Var() assert len(v1.name()) > 0 assert len(v2.name()) > 0 assert not v1.same_as(v2) v1 = hl.Var.implicit(1) assert v1.name() == "_1" v2 = hl.Var("_1") assert v1.same_as(v2) v3 = hl._1 assert v1.same_as(v3) v4 = hl.Var("v4") assert not v1.same_as(v4) assert v1.is_implicit() assert v2.is_implicit() assert v3.is_implicit() assert not v4.is_implicit() # assert hl.Var.is_implicit("_1") # assert not hl.Var.is_implicit("v4") assert v1.implicit_index() == 1 assert v2.implicit_index() == 1 assert v3.implicit_index() == 1 assert v4.implicit_index() == -1 # assert hl.Var.implicit_index("_1") == 1 # assert hl.Var.implicit_index("v4") == -1 ph = hl._ assert ph.name() == "_" assert ph.is_placeholder() # assert hl.Var.is_placeholder(ph) assert not v1.is_placeholder() outermost = hl.Var.outermost() assert outermost.name() == "__outermost" # repr() and str() x = hl.Var('x') assert str(x) == "x" assert repr(x) == "<halide.Var 'x'>" # This verifies that halide.Var is implicitly convertible to halide.Expr r = hl.random_int(x) # This verifies that halide.Var is explicitly convertible to halide.Expr r = hl.random_int(x.as_expr())
def test_compiletime_error(): x = hl.Var('x') y = hl.Var('y') f = hl.Func('f') f[x, y] = hl.cast(hl.UInt(16), x + y) # Deliberate type-mismatch error buf = hl.Buffer(hl.UInt(8), 2, 2) try: f.realize(buf) except RuntimeError as e: print('Saw expected exception (%s)' % str(e)) else: assert False, 'Did not see expected exception!'