def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width()-1), hl.clamp(y, 0, input.height()-1),0] # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma//2), y * s_sigma - hl.cast(hl.Int(32), s_sigma//2)] val2 = clamped[x * s_sigma - s_sigma//2, y * s_sigma - s_sigma//2] val3 = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2] return
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) s_sigma = 8 x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.i32(1), y * s_sigma * hl.i32(1)] val22 = clamped[x * s_sigma - hl.i32(s_sigma // 2), y * s_sigma - hl.i32(s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] try: val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2] except RuntimeError as e: assert 'Implicit cast from float32 to int' in str(e) else: assert False, 'Did not see expected exception!'
def test_basics3(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] val = hl.clamp(val, 0.0, 1.0) zi = hl.i32((val / r_sigma) + 0.5) histogram = hl.Func('histogram') histogram[x, y, z, c] = 0.0 ss = hl.select(c == 0, val, 1.0) left = histogram[x, y, zi, c] left += 5 left += ss
def test_image_to_ndarray(): if "image_to_ndarray" not in globals(): print("Skipping test_image_to_ndarray") return import numpy i0 = Image(hl.Float(32), 50, 50) assert i0.type() == hl.Float(32) a0 = image_to_ndarray(i0) print("a0.shape", a0.shape) print("a0.dtype", a0.dtype) assert a0.dtype == numpy.float32 i1 = Image(hl.Int(16), 50, 50) assert i1.type() == hl.Int(16) i1[24, 24] = 42 assert i1(24, 24) == 42 a1 = image_to_ndarray(i1) print("a1.shape", a1.shape) print("a1.dtype", a1.dtype) assert a1.dtype == numpy.int16 assert a1[24, 24] == 42 return
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] if True: print("s_sigma", s_sigma) print("s_sigma/2", s_sigma / 2) print("s_sigma//2", s_sigma // 2) print() print("x * s_sigma", x * s_sigma) print("x * 8", x * 8) print("x * 8 + 4", x * 8 + 4) print("x * 8 * 4", x * 8 * 4) print() print("x", x) print("(x * s_sigma).type()", ) print("(x * 8).type()", (x * 8).type()) print("(x * 8 + 4).type()", (x * 8 + 4).type()) print("(x * 8 * 4).type()", (x * 8 * 4).type()) print("(x * 8 / 4).type()", (x * 8 / 4).type()) print("((x * 8) * 4).type()", ((x * 8) * 4).type()) print("(x * (8 * 4)).type()", (x * (8 * 4)).type()) assert (x * 8).type() == hl.Int(32) assert (x * 8 * 4).type() == hl.Int(32) # yes this did fail at some point assert ((x * 8) / 4).type() == hl.Int(32) assert (x * (8 / 4)).type() == hl.Float(32) # under python3 division rules assert (x * (8 // 4)).type() == hl.Int(32) #assert (x * 8 // 4).type() == hl.Int(32) # not yet implemented # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2), y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] return
def f32(x, y, c, img): out = mkfunc("f32", img) if img.dimensions() == 2: out[x, y] = hl.cast(hl.Float(32), img[x, y]) else: out[x, y, c] = hl.cast(hl.Float(32), img[x, y, c]) return out
def test_multipass_constraints(): input = hl.ImageParam(hl.Float(32), 2, "input") f = hl.Func("f") x = hl.Var("x") y = hl.Var("y") f[x, y] = input[x + 1, y + 1] + input[x - 1, y - 1] f[x, y] += 3.0 f.update().vectorize(x, 4) o = f.output_buffer() # Now make some hard-to-resolve constraints input.dim(0).set_bounds(min=input.dim(1).min() - 5, extent=input.dim(1).extent() + o.dim(0).extent()) o.dim(0).set_bounds(min=0, extent=hl.select( o.dim(0).extent() < 22, o.dim(0).extent() + 1, o.dim(0).extent())) # Make a bounds query buffer query_buf = hl.Buffer.make_bounds_query(type=hl.Float(32), sizes=[7, 8]) query_buf.set_min([2, 2]) f.infer_input_bounds(query_buf) if input.get().dim(0).min() != -4 or \ input.get().dim(0).extent() != 34 or \ input.get().dim(1).min() != 1 or \ input.get().dim(1).extent() != 10 or \ query_buf.dim(0).min() != 0 or \ query_buf.dim(0).extent() != 24 or \ query_buf.dim(1).min() != 2 or \ query_buf.dim(1).extent() != 8: print("Constraints not correctly satisfied:\n", "in:", input.get().dim(0).min(), input.get().dim(0).extent(), input.get().dim(1).min(), input.get().dim(1).extent(), "out:", query_buf.dim(0).min(), query_buf.dim(0).extent(), query_buf.dim(1).min(), query_buf.dim(1).extent()) assert False
def gauss_15x15(input, name): print(' gauss_15x15') k = hl.Buffer(hl.Float(32), [15], "gauss_15x15") k.translate([-7]) rdom = hl.RDom([(-7, 15)]) k.fill(0) k[-7] = 0.004961 k[-6] = 0.012246 k[-5] = 0.026304 k[-4] = 0.049165 k[-3] = 0.079968 k[-2] = 0.113193 k[-1] = 0.139431 k[0] = 0.149464 k[7] = 0.004961 k[6] = 0.012246 k[5] = 0.026304 k[4] = 0.049165 k[3] = 0.079968 k[2] = 0.113193 k[1] = 0.139431 return gauss(input, k, rdom, name)
def test_for_each_element(): buf = hl.Buffer(hl.Float(32), [3, 4]) for x in range(3): for y in range(4): buf[x, y] = x + y # Can't use 'assert' in a lambda, but can call a fn that uses it. buf.for_each_element(lambda pos, buf=buf: _assert_fn(buf[pos[0], pos[1]] == pos[0] + pos[1]))
def main(): input = hl.ImageParam(hl.Float(32), 2, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version bilateral_grid = get_bilateral_grid(input, r_sigma, s_sigma) # Set `generate` to False to run the jit immediately and get instant gratification. #generate = True generate = False if generate: generate_compiled_file(bilateral_grid) else: filter_test_image(bilateral_grid, input) print("\nEnd of game. Have a nice day!") return
def _realize_and_check(f, offset=0): b = hl.Buffer(hl.Float(32), [2, 2]) f.realize(b) assert b[0, 0] == 3.5 + offset + 123 assert b[0, 1] == 4.5 + offset + 123 assert b[1, 0] == 4.5 + offset + 123 assert b[1, 1] == 5.5 + offset + 123
def test_division(): f32 = hl.Param(hl.Float(32), 'f32', -32.0) f64 = hl.Param(hl.Float(64), 'f64', 64.0) i16 = hl.Param(hl.Int(16), 'i16', -16) i32 = hl.Param(hl.Int(32), 'i32', 32) u16 = hl.Param(hl.UInt(16), 'u16', 16) u32 = hl.Param(hl.UInt(32), 'u32', 32) # Verify that the types match the rules in match_types() assert (f32 / f64).type() == hl.Float(64) assert (f32 // f64).type() == hl.Float(64) assert (i16 / i32).type() == hl.Int(32) assert (i16 // i32).type() == hl.Int(32) assert (u16 / u32).type() == hl.UInt(32) assert (u16 // u32).type() == hl.UInt(32) # int / uint -> int assert (u16 / i32).type() == hl.Int(32) assert (i32 // u16).type() == hl.Int(32) # any / float -> float # float / any -> float assert (u16 / f32).type() == hl.Float(32) assert (u16 // f32).type() == hl.Float(32) assert (i16 / f64).type() == hl.Float(64) assert (i16 // f64).type() == hl.Float(64) # Verify that division semantics match those for Halide # (rather than python); this differs for int/int which # defaults to float (rather than floordiv) in Python3. # Also test that // always floors the result, even for float. assert _evaluate(f32 / f64) == -0.5 assert _evaluate(f32 // f64) == -1.0 assert _evaluate(i16 / i32) == -1 assert _evaluate(i16 // i32) == -1 assert _evaluate(i32 / i16) == -2 assert _evaluate(u16 / u32) == 0 assert _evaluate(u16 // u32) == 0 assert _evaluate(u16 / i32) == 0 assert _evaluate(i32 // u16) == 2 assert _evaluate(u16 / f32) == -0.5 assert _evaluate(u16 // f32) == -1.0 assert _evaluate(i16 / f64) == -0.25 assert _evaluate(i16 // f64) == -1.0
def generate_compiled_file(bilateral_grid): target = hl.get_target_from_environment() # Need to copy the filter executable from the C++ apps/bilateral_grid folder to run this. # (after making it of course) arguments = ArgumentsVector() arguments.append(Argument('r_sigma', InputScalar, hl.Float(32), 0)) arguments.append(Argument('input', InputBuffer, hl.UInt(16), 2)) bilateral_grid.compile_to_file("bilateral_grid", arguments, "bilateral_grid", target) print("Generated compiled file for bilateral_grid function.")
def test_scalar_buffers(): buf = hl.Buffer.make_scalar(hl.Float(32)) assert buf.dimensions() == 0 buf.fill(0) buf[()] = 2.5 assert buf[()] == 2.5 buf.fill(32) assert buf[()] == 32
def mult(input, scale): brighter = hl.Func("mult") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") value = input[x, y, c] value = hl.cast(hl.Float(32), value) value = value * scale value = hl.min(value, 255.0) value = hl.cast(hl.UInt(8), value) brighter[x, y, c] = value return brighter
def test_nobuildmethod(): x, y, c = hl.Var(), hl.Var(), hl.Var() target = hl.get_jit_target_from_environment() b_in = hl.Buffer(hl.Float(32), [2, 2]) b_in.fill(123) b_out = hl.Buffer(hl.Int(32), [2, 2]) f = nobuildmethod.generate(target, b_in, 1.0) f.realize(b_out) assert b_out.all_equal(123)
def test_partialbuildmethod(): x, y, c = hl.Var(), hl.Var(), hl.Var() target = hl.get_jit_target_from_environment() b_in = hl.Buffer(hl.Float(32), [2, 2]) b_in.fill(123) b_out = hl.Buffer(hl.Int(32), [2, 2]) try: f = partialbuildmethod.generate(target, b_in, 1) except RuntimeError as e: assert "Generators that use build() (instead of generate()+Output<>) are not supported in the Python bindings." in str(e) else: assert False, 'Did not see expected exception!'
def gauss_7x7(input, name): k = hl.Buffer(hl.Float(32), [7], "gauss_7x7_kernel") k.translate([-3]) rdom = hl.RDom([(-3, 7)]) k.fill(0) k[-3] = 0.026267 k[-2] = 0.100742 k[-1] = 0.225511 k[0] = 0.29496 k[1] = 0.225511 k[2] = 0.100742 k[3] = 0.026267 return gauss(input, k, rdom, name)
def test_float_or_int(): x = hl.Var('x') i32, f32 = hl.Int(32), hl.Float(32) assert hl.Expr(x).type() == i32 assert (x * 2).type() == i32 assert (x / 2).type() == i32 assert ((x // 2) - 1 + 2 * (x % 2)).type() == i32 assert ((x / 2) - 1 + 2 * (x % 2)).type() == i32 assert ((x / 2)).type() == i32 assert ((x / 2.0)).type() == f32 assert ((x // 2)).type() == i32 assert ((x // 2) - 1).type() == i32 assert ((x % 2)).type() == i32 assert (2 * (x % 2)).type() == i32 assert ((x // 2) - 1 + 2 * (x % 2)).type() == i32 assert type(x) == hl.Var assert (hl.Expr(x)).type() == i32 assert (hl.Expr(2.0)).type() == f32 assert (hl.Expr(2)).type() == i32 assert (x + 2).type() == i32 assert (2 + x).type() == i32 assert (hl.Expr(2) + hl.Expr(3)).type() == i32 assert (hl.Expr(2.0) + hl.Expr(3)).type() == f32 assert (hl.Expr(2) + 3.0).type() == f32 assert (hl.Expr(2) + 3).type() == i32 assert (hl.Expr(x) + 2).type() == i32 assert (2 + hl.Expr(x)).type() == i32 assert (2 * (x + 2)).type() == i32 assert (x + 0).type() == i32 assert (x % 2).type() == i32 assert (2 * x).type() == i32 assert (x * 2).type() == i32 assert (x * 2).type() == i32 assert ((x % 2)).type() == i32 assert ((x % 2) * 2).type() == i32 assert (2 * (x % 2)).type() == i32 assert ((x + 2) * 2).type() == i32
def test_float_or_int(): x = hl.Var('x') i, f = hl.Int(32), hl.Float(32) assert ((x//2) - 1 + 2*(x%2)).type() == i assert ((x/2) - 1 + 2*(x%2)).type() == i assert ((x/2)).type() == i assert ((x/2.0)).type() == f assert ((x//2)).type() == i assert ((x//2) - 1).type() == i assert ((x%2)).type() == i assert (2*(x%2)).type() == i assert ((x//2) - 1 + 2*(x%2)).type() == i assert type(x) == hl.Var assert (x.as_expr()).type() == i assert (hl.Expr(2.0)).type() == f assert (hl.Expr(2)).type() == i assert (x + 2).type() == i assert (2 + x).type() == i assert (hl.Expr(2) + hl.Expr(3)).type() == i assert (hl.Expr(2.0) + hl.Expr(3)).type() == f assert (hl.Expr(2) + 3.0).type() == f assert (hl.Expr(2) + 3).type() == i assert (x.as_expr() + 2).type() == i # yes this failed at some point assert (2 + x.as_expr()).type() == i assert (2 * (x + 2)).type() == i # yes this failed at some point assert (x + 0).type() == i assert (x % 2).type() == i assert (2 * x).type() == i assert (x * 2).type() == i assert (x * 2).type() == i assert ((x % 2)).type() == i assert ((x % 2) * 2).type() == i #assert (2 * (x % 2)).type() == i # yes this failed at some point assert ((x + 2) * 2).type() == i return
def contrast(input, strength, black_point): output = hl.Func("contrast_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") scale = strength inner_constant = math.pi / (2 * scale) sin_constant = hl.sin(inner_constant) slope = 65535 / (2 * sin_constant) constant = slope * sin_constant factor = math.pi / (scale * 65535) val = factor * hl.cast(hl.Float(32), input[x, y, c]) output[x, y, c] = hl.u16_sat(slope * hl.sin(val - inner_constant) + constant) white_scale = 65535 / (65535 - black_point) output[x, y, c] = hl.u16_sat((hl.cast(hl.Int(32), output[x, y, c]) - black_point) * white_scale) output.compute_root().parallel(y).vectorize(x, 16) return output
# TODO: This allows you to use "true" div (vs floordiv) in Python2 for the / operator; # unfortunately it appears to also replace the overloads we've carefully added for Halide. # Figure out if it's possible to allow this to leave our Halide stuff unaffected. # # from __future__ import division import time, sys import halide as hl from datetime import datetime from scipy.misc import imread, imsave import numpy as np import os.path int_t = hl.Int(32) float_t = hl.Float(32) def get_interpolate(input, levels): """ Build function, schedules it, and invokes jit compiler :return: halide.hl.Func """ # THE ALGORITHM downsampled = [hl.Func('downsampled%d' % i) for i in range(levels)] downx = [hl.Func('downx%d' % l) for l in range(levels)] interpolated = [hl.Func('interpolated%d' % i) for i in range(levels)] # level_widths = [hl.Param(int_t,'level_widths%d'%i) for i in range(levels)] # level_heights = [hl.Param(int_t,'level_heights%d'%i) for i in range(levels)]
def call_twoel(zone_name, seed=2, datasize=15, itercount=10, target_name="host-disable_llvm_loop_opt", **kwargs): N = datasize seed = 2 inputs = [ { "name": "delo2", "d": 0, "value": 0.001 }, { "name": "delta", "d": 0, "value": 0.001 }, { "name": "rdelta", "d": 0, "value": 0.001 }, { "name": "expnt", "d": 1, "value": 0.00001 }, { "name": "rnorm", "d": 1 }, { "name": "x", "d": 1 }, { "name": "y", "d": 1 }, { "name": "z", "d": 1 }, { "name": "fm", "d": 2, "shape": [1002, 5] }, { "name": "g_fock", "d": 2 }, { "name": "g_dens", "d": 2 }, { "name": "g_trace", "d": 4, "value": 0.0 }, ] outputs = [ { "name": "rv", "d": 1, "shape": [1] }, { "name": "g_fock", "d": 2 }, ] inputs = {x["name"]: x for x in inputs} outputs = {x["name"]: x for x in outputs} # generate input data print("input/output size is", N, "^2") buffers = [] buffers_by_name = {} np.random.seed(seed) for key in inputs: param = inputs[key] if param['d'] == 0: thing = 0.2 else: shape = [N] * param['d'] if 'shape' in param: shape = param['shape'] thing = hl.Buffer(hl.Float(64), shape, name=key) if 'value' in param: if param['value'] != 0.0: for pos in np.ndindex(*shape): thing[pos] = param['value'] else: values = np.random.rand(*shape) - 0.5 for pos in np.ndindex(*shape): thing[pos] = values[pos] buffers.append(thing) buffers_by_name[key] = thing # get JIT pipeline zones = twoel_gen.define_original_twoel_zone().split_recursive() zone_names = zone_name.split(",") myzones = [] for zone in zones.loops: if zone_name == 'all' or zone['name'] in zone_names: myzones.append(zone) if len(myzones) == 0: if zone_name == 'list': print([z.name for z in zones]) else: print("no zone %s found" % zone_name) exit(1) zones.loops = myzones gen = twoel_gen.Generate_twoel(loopnests=zones, **kwargs) gen.generate_twoel() p = gen.pipeline target = hl.Target(target_name) zone_names = [z.name for z in myzones] print("compiling zones", zone_names, "for target", target) p.compile_jit(target) # plug in the parameter values for param in gen.inputs.values(): name = param.name() if name in buffers_by_name: thing = buffers_by_name[name] elif name.endswith("_in") and name[:-3] in buffers_by_name: name = name[:-3] thing = buffers_by_name[name] else: raise KeyError(name) param.set(thing) # dry-run p.realize(N, N) print(itercount, "timed runs") if itercount == 0: # when generating trace output, just doing the dry-run is enough. return 0.0, 0.0 # benchmark it walltime = 0.0 cputime = 0.0 for _ in range(itercount): cpu_before = time.process_time() wall_before = time.time() rv, g_fock_out = p.realize(N, N) cpu_after = time.process_time() wall_after = time.time() walltime += wall_after - wall_before cputime += cpu_after - cpu_before print("walltime: %.3f" % walltime) print("cputime: %.3f" % cputime) walltime /= itercount cputime /= itercount print("walltime per iter: %.3e" % walltime) print("cputime per iter: %.3e" % cputime) throughput = N * N * N * N / walltime print("throughput: %.3e g() calls per second (roughly)" % throughput) rv = rv[0] g_fock_out = np.array(g_fock_out) return walltime, cputime
def test_complexstub(): constant_image = _make_constant_image() input = hl.ImageParam(hl.UInt(8), 3, 'input') input.set(constant_image) x, y, c = hl.Var(), hl.Var(), hl.Var() target = hl.get_jit_target_from_environment() float_arg = 1.25 int_arg = 33 r = complexstub(target, typed_buffer_input=constant_image, untyped_buffer_input=constant_image, simple_input=input, array_input=[input, input], float_arg=float_arg, int_arg=[int_arg, int_arg], untyped_buffer_output_type="uint8", vectorize=True) # return value is a tuple; unpack separately to avoid # making the callsite above unreadable (simple_output, tuple_output, array_output, typed_buffer_output, untyped_buffer_output, static_compiled_buffer_output) = r b = simple_output.realize(32, 32, 3, target) assert b.type() == hl.Float(32) for x in range(32): for y in range(32): for c in range(3): expected = constant_image[x, y, c] actual = b[x, y, c] assert expected == actual, "Expected %s Actual %s" % (expected, actual) b = tuple_output.realize(32, 32, 3, target) assert b[0].type() == hl.Float(32) assert b[1].type() == hl.Float(32) assert len(b) == 2 for x in range(32): for y in range(32): for c in range(3): expected1 = constant_image[x, y, c] * float_arg expected2 = expected1 + int_arg actual1, actual2 = b[0][x, y, c], b[1][x, y, c] assert expected1 == actual1, "Expected1 %s Actual1 %s" % ( expected1, actual1) assert expected2 == actual2, "Expected2 %s Actual1 %s" % ( expected2, actual2) assert len(array_output) == 2 for a in array_output: b = a.realize(32, 32, target) assert b.type() == hl.Int(16) for x in range(32): for y in range(32): expected = constant_image[x, y, 0] + int_arg actual = b[x, y] assert expected == actual, "Expected %s Actual %s" % (expected, actual) # TODO: Output<Buffer<>> has additional behaviors useful when a Stub # is used within another Generator; this isn't yet implemented since there # isn't yet Python bindings for Generator authoring. This section # of the test may need revision at that point. b = typed_buffer_output.realize(32, 32, 3, target) assert b.type() == hl.Float(32) for x in range(32): for y in range(32): for c in range(3): expected = constant_image[x, y, c] actual = b[x, y, c] assert expected == actual, "Expected %s Actual %s" % (expected, actual) b = untyped_buffer_output.realize(32, 32, 3, target) assert b.type() == hl.UInt(8) for x in range(32): for y in range(32): for c in range(3): expected = constant_image[x, y, c] actual = b[x, y, c] assert expected == actual, "Expected %s Actual %s" % (expected, actual) b = static_compiled_buffer_output.realize(4, 4, 1, target) assert b.type() == hl.UInt(8) for x in range(4): for y in range(4): for c in range(1): expected = constant_image[x, y, c] + 42 actual = b[x, y, c] assert expected == actual, "Expected %s Actual %s" % (expected, actual)
def main(): # This program defines a single-stage imaging pipeline that # brightens an image. # First we'll load the input image we wish to brighten. image_path = os.path.join(os.path.dirname(__file__), "../../tutorial/images/rgb.png") # We create a hl.Buffer object to wrap the numpy array input = hl.Buffer(imageio.imread(image_path)) assert input.type() == hl.UInt(8) # Next we define our hl.Func object that represents our one pipeline # stage. brighter = hl.Func("brighter") # Our hl.Func will have three arguments, representing the position # in the image and the color channel. Halide treats color # channels as an extra dimension of the image. x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") # Normally we'd probably write the whole function definition on # one line. Here we'll break it apart so we can explain what # we're doing at every step. # For each pixel of the input image. value = input[x, y, c] assert type(value) == hl.Expr # Cast it to a floating point value. value = hl.cast(hl.Float(32), value) # Multiply it by 1.5 to brighten it. Halide represents real # numbers as floats, not doubles, so we stick an 'f' on the end # of our constant. value = value * 1.5 # Clamp it to be less than 255, so we don't get overflow when we # hl.cast it back to an 8-bit unsigned int. value = hl.min(value, 255.0) # Cast it back to an 8-bit unsigned integer. value = hl.cast(hl.UInt(8), value) # Define the function. brighter[x, y, c] = value # The equivalent one-liner to all of the above is: # # brighter(x, y, c) = hl.cast<uint8_t>(hl.min(input(x, y, c) * 1.5f, 255)) # brighter[x, y, c] = hl.cast(hl.UInt(8), hl.min(input[x, y, c] * 1.5, 255)) # # In the shorter version: # - I skipped the hl.cast to float, because multiplying by 1.5f does # that automatically. # - I also used integer constants in hl.clamp, because they get hl.cast # to match the type of the first argument. # - I left the h. off hl.clamp. It's unnecessary due to Koenig # lookup. # Remember. All we've done so far is build a representation of a # Halide program in memory. We haven't actually processed any # pixels yet. We haven't even compiled that Halide program yet. # So now we'll realize the hl.Func. The size of the output image # should match the size of the input image. If we just wanted to # brighten a portion of the input image we could request a # smaller size. If we request a larger size Halide will throw an # error at runtime telling us we're trying to read out of bounds # on the input image. output_image = brighter.realize( [input.width(), input.height(), input.channels()]) assert output_image.type() == hl.UInt(8) # Save the output for inspection. It should look like a bright parrot. # python3-imageio versions <2.5 expect a numpy array imageio.imsave("brighter.png", np.asanyarray(output_image)) print("Created brighter.png result file.") print("Success!") return 0
def f32(e): return hl.cast(hl.Float(32), e)
def main(): # So far Funcs (such as the one below) have evaluated to a single # scalar value for each point in their domain. single_valued = hl.Func() x, y = hl.Var("x"), hl.Var("y") single_valued[x, y] = x + y # One way to write a hl.Func that returns a collection of values is # to add an additional dimension which indexes that # collection. This is how we typically deal with color. For # example, the hl.Func below represents a collection of three values # for every x, y coordinate indexed by c. color_image = hl.Func() c = hl.Var("c") color_image[x, y, c] = hl.select( c == 0, 245, # Red value c == 1, 42, # Green value 132) # Blue value # Since this pattern appears quite often, Halide provides a # syntatic sugar to write the code above as the following, # using the "mux" function. # color_image[x, y, c] = hl.mux(c, [245, 42, 132]); # This method is often convenient because it makes it easy to # operate on this hl.Func in a way that treats each item in the # collection equally: brighter = hl.Func() brighter[x, y, c] = color_image[x, y, c] + 10 # However this method is also inconvenient for three reasons. # # 1) Funcs are defined over an infinite domain, so users of this # hl.Func can for example access color_image(x, y, -17), which is # not a meaningful value and is probably indicative of a bug. # # 2) It requires a hl.select, which can impact performance if not # bounded and unrolled: # brighter.bound(c, 0, 3).unroll(c) # # 3) With this method, all values in the collection must have the # same type. While the above two issues are merely inconvenient, # this one is a hard limitation that makes it impossible to # express certain things in this way. # It is also possible to represent a collection of values as a # collection of Funcs: func_array = [hl.Func() for i in range(3)] func_array[0][x, y] = x + y func_array[1][x, y] = hl.sin(x) func_array[2][x, y] = hl.cos(y) # This method avoids the three problems above, but introduces a # new annoyance. Because these are separate Funcs, it is # difficult to schedule them so that they are all computed # together inside a single loop over x, y. # A third alternative is to define a hl.Func as evaluating to a # Tuple instead of an hl.Expr. A Tuple is a fixed-size collection of # Exprs which may have different type. The following function # evaluates to an integer value (x+y), and a floating point value # (hl.sin(x*y)). multi_valued = hl.Func("multi_valued") multi_valued[x, y] = (x + y, hl.sin(x * y)) # Realizing a tuple-valued hl.Func returns a collection of # Buffers. We call this a Realization. It's equivalent to a # std::vector of hl.Buffer/Image objects: if True: im1, im2 = multi_valued.realize([80, 60]) assert im1.type() == hl.Int(32) assert im2.type() == hl.Float(32) assert im1[30, 40] == 30 + 40 assert np.isclose(im2[30, 40], math.sin(30 * 40)) # You can also pass a tuple of pre-allocated buffers to realize() # rather than having new ones created. (The Buffers must have the correct # types and have identical sizes.) if True: im1, im2 = hl.Buffer(hl.Int(32), [80, 60]), hl.Buffer(hl.Float(32), [80, 60]) multi_valued.realize((im1, im2)) assert im1[30, 40] == 30 + 40 assert np.isclose(im2[30, 40], math.sin(30 * 40)) # All Tuple elements are evaluated together over the same domain # in the same loop nest, but stored in distinct allocations. The # equivalent C++ code to the above is: if True: multi_valued_0 = np.empty((80 * 60), dtype=np.int32) multi_valued_1 = np.empty((80 * 60), dtype=np.int32) for yy in range(80): for xx in range(60): multi_valued_0[xx + 60 * yy] = xx + yy multi_valued_1[xx + 60 * yy] = math.sin(xx * yy) # When compiling ahead-of-time, a Tuple-valued hl.Func evaluates # into multiple distinct output halide_buffer_t structs. These appear in # order at the end of the function signature: # int multi_valued(...input buffers and params..., halide_buffer_t # *output_1, halide_buffer_t *output_2) # You can construct a Tuple by passing multiple Exprs to the # Tuple constructor as we did above. Perhaps more elegantly, you # can also take advantage of initializer lists and just # enclose your Exprs in braces: multi_valued_2 = hl.Func("multi_valued_2") multi_valued_2[x, y] = (x + y, hl.sin(x * y)) # Calls to a multi-valued hl.Func cannot be treated as Exprs. The # following is a syntax error: # hl.Func consumer # consumer[x, y] = multi_valued_2[x, y] + 10 # Instead you must index the returned object with square brackets # to retrieve the individual Exprs: integer_part = multi_valued_2[x, y][0] floating_part = multi_valued_2[x, y][1] assert type(integer_part) is hl.FuncTupleElementRef assert type(floating_part) is hl.FuncTupleElementRef consumer = hl.Func() consumer[x, y] = (integer_part + 10, floating_part + 10.0) # Tuple reductions. if True: # Tuples are particularly useful in reductions, as they allow # the reduction to maintain complex state as it walks along # its domain. The simplest example is an argmax. # First we create an Image to take the argmax over. input_func = hl.Func() input_func[x] = hl.sin(x) input = input_func.realize([100]) assert input.type() == hl.Float(32) # Then we defined a 2-valued Tuple which tracks the maximum value # its index. arg_max = hl.Func() # Pure definition. # (using [()] for zero-dimensional Funcs is a convention of this python interface) arg_max[()] = (0, input[0]) # Update definition. r = hl.RDom([(1, 99)]) old_index = arg_max[()][0] old_max = arg_max[()][1] new_index = hl.select(old_max > input[r], r, old_index) new_max = hl.max(input[r], old_max) arg_max[()] = (new_index, new_max) # The equivalent C++ is: arg_max_0 = 0 arg_max_1 = float(input[0]) for r in range(1, 100): old_index = arg_max_0 old_max = arg_max_1 new_index = r if (old_max > input[r]) else old_index new_max = max(input[r], old_max) # In a tuple update definition, all loads and computation # are done before any stores, so that all Tuple elements # are updated atomically with respect to recursive calls # to the same hl.Func. arg_max_0 = new_index arg_max_1 = new_max # Let's verify that the Halide and C++ found the same maximum # value and index. if True: r0, r1 = arg_max.realize() assert r0.type() == hl.Int(32) assert r1.type() == hl.Float(32) assert arg_max_0 == r0[()] assert np.isclose(arg_max_1, r1[()]) # Halide provides argmax and hl.argmin as built-in reductions # similar to sum, product, maximum, and minimum. They return # a Tuple consisting of the point in the reduction domain # corresponding to that value, and the value itself. In the # case of ties they return the first value found. We'll use # one of these in the following section. # Tuples for user-defined types. if True: # Tuples can also be a convenient way to represent compound # objects such as complex numbers. Defining an object that # can be converted to and from a Tuple is one way to extend # Halide's type system with user-defined types. class Complex: def __init__(self, r, i=None): if type(r) is float and type(i) is float: self.real = hl.Expr(r) self.imag = hl.Expr(i) elif i is not None: self.real = r self.imag = i else: self.real = r[0] self.imag = r[1] def as_tuple(self): "Convert to a Tuple" return (self.real, self.imag) def __add__(self, other): "Complex addition" return Complex(self.real + other.real, self.imag + other.imag) def __mul__(self, other): "Complex multiplication" return Complex(self.real * other.real - self.imag * other.imag, self.real * other.imag + self.imag * other.real) def __getitem__(self, idx): return (self.real, self.imag)[idx] def __len__(self): return 2 def magnitude(self): "Complex magnitude" return (self.real * self.real) + (self.imag * self.imag) # Other complex operators would go here. The above are # sufficient for this example. # Let's use the Complex struct to compute a Mandelbrot set. mandelbrot = hl.Func() # The initial complex value corresponding to an x, y coordinate # in our hl.Func. initial = Complex(x / 15.0 - 2.5, y / 6.0 - 2.0) # Pure definition. t = hl.Var("t") mandelbrot[x, y, t] = Complex(0.0, 0.0) # We'll use an update definition to take 12 steps. r = hl.RDom([(1, 12)]) current = Complex(mandelbrot[x, y, r - 1]) # The following line uses the complex multiplication and # addition we defined above. mandelbrot[x, y, r] = (Complex(current * current) + initial) # We'll use another tuple reduction to compute the iteration # number where the value first escapes a circle of radius 4. # This can be expressed as an hl.argmin of a boolean - we want # the index of the first time the given boolean expression is # false (we consider false to be less than true). The argmax # would return the index of the first time the expression is # true. escape_condition = Complex(mandelbrot[x, y, r]).magnitude() < 16.0 first_escape = hl.argmin(escape_condition) assert type(first_escape) is tuple # We only want the index, not the value, but hl.argmin returns # both, so we'll index the hl.argmin Tuple expression using # square brackets to get the hl.Expr representing the index. escape = hl.Func() escape[x, y] = first_escape[0] # Realize the pipeline and print the result as ascii art. result = escape.realize([61, 25]) assert result.type() == hl.Int(32) code = " .:-~*={&%#@" for yy in range(result.height()): for xx in range(result.width()): index = result[xx, yy] if index < len(code): print("%c" % code[index], end="") else: pass # is lesson 13 cpp version buggy ? print("") print("Success!") return 0
def test_target(): # Target("") should be exactly like get_host_target(). t1 = hl.get_host_target() t2 = hl.Target("") assert t1 == t2, "Default ctor failure" assert t1.supported() # to_string roundtripping t1 = hl.Target() ts = t1.to_string() assert ts == "arch_unknown-0-os_unknown" # Note, this should *not* validate, since validate_target_string # now returns false if any of arch-bits-os are undefined assert not hl.Target.validate_target_string(ts) # Don't attempt to roundtrip this: trying to create # a Target with unknown portions will now assert-fail. # # t2 = hl.Target(ts) # assert t2 == t1 # repr() and str() assert str(t1) == "arch_unknown-0-os_unknown" assert repr(t1) == "<halide.Target arch_unknown-0-os_unknown>" assert t1.os == hl.TargetOS.OSUnknown assert t1.arch == hl.TargetArch.ArchUnknown assert t1.bits == 0 # Full specification round-trip: t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) ts = t1.to_string() assert ts == "x86-32-linux-sse41" assert hl.Target.validate_target_string(ts) # Full specification (without features) round-trip: t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32) ts = t1.to_string() assert ts == "x86-32-linux" assert hl.Target.validate_target_string(ts) # Full specification round-trip, crazy features t1 = hl.Target(hl.TargetOS.Android, hl.TargetArch.ARM, 32, [ hl.TargetFeature.JIT, hl.TargetFeature.SSE41, hl.TargetFeature.AVX, hl.TargetFeature.AVX2, hl.TargetFeature.CUDA, hl.TargetFeature.OpenCL, hl.TargetFeature.OpenGL, hl.TargetFeature.OpenGLCompute, hl.TargetFeature.Debug ]) ts = t1.to_string() assert ts == "arm-32-android-avx-avx2-cuda-debug-jit-opencl-opengl-openglcompute-sse41" assert hl.Target.validate_target_string(ts) # Expected failures: ts = "host-unknowntoken" assert not hl.Target.validate_target_string(ts) ts = "x86-23" assert not hl.Target.validate_target_string(ts) # bits == 0 is allowed only if arch_unknown and os_unknown are specified, # and no features are set ts = "x86-0" assert not hl.Target.validate_target_string(ts) ts = "0-arch_unknown-os_unknown-sse41" assert not hl.Target.validate_target_string(ts) # "host" is only supported as the first token ts = "opencl-host" assert not hl.Target.validate_target_string(ts) # set_feature t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) assert t1.has_feature(hl.TargetFeature.SSE41) assert not t1.has_feature(hl.TargetFeature.AVX) t1.set_feature(hl.TargetFeature.AVX) t1.set_feature(hl.TargetFeature.SSE41, False) assert t1.has_feature(hl.TargetFeature.AVX) assert not t1.has_feature(hl.TargetFeature.SSE41) # set_features t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) assert t1.has_feature(hl.TargetFeature.SSE41) assert not t1.has_feature(hl.TargetFeature.AVX) t1.set_features([hl.TargetFeature.SSE41], False) t1.set_features([hl.TargetFeature.AVX, hl.TargetFeature.AVX2], True) assert t1.has_feature(hl.TargetFeature.AVX) assert t1.has_feature(hl.TargetFeature.AVX2) assert not t1.has_feature(hl.TargetFeature.SSE41) # with_feature t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) t2 = t1.with_feature(hl.TargetFeature.NoAsserts).with_feature( hl.TargetFeature.NoBoundsQuery) ts = t2.to_string() assert ts == "x86-32-linux-no_asserts-no_bounds_query-sse41" # without_feature t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41, hl.TargetFeature.NoAsserts]) # Note that NoBoundsQuery wasn't set here, so 'without' is a no-op t2 = t1.without_feature(hl.TargetFeature.NoAsserts).without_feature( hl.TargetFeature.NoBoundsQuery) ts = t2.to_string() assert ts == "x86-32-linux-sse41" # natural_vector_size # SSE4.1 is 16 bytes wide t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.SSE41]) assert t1.natural_vector_size(hl.UInt(8)) == 16 assert t1.natural_vector_size(hl.Int(16)) == 8 assert t1.natural_vector_size(hl.UInt(32)) == 4 assert t1.natural_vector_size(hl.Float(32)) == 4 # has_gpu_feature t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [hl.TargetFeature.OpenCL]) t2 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, []) assert t1.has_gpu_feature() assert not t2.has_gpu_feature() # has_large_buffers & maximum_buffer_size t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64, [hl.TargetFeature.LargeBuffers]) t2 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64, []) assert t1.has_large_buffers() assert t1.maximum_buffer_size() == 9223372036854775807 assert not t2.has_large_buffers() assert t2.maximum_buffer_size() == 2147483647 # supports_device_api t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64, [hl.TargetFeature.CUDA]) t2 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64) assert t1.supports_device_api(hl.DeviceAPI.CUDA) assert not t2.supports_device_api(hl.DeviceAPI.CUDA) # supports_type (deprecated version) t1 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64, [hl.TargetFeature.Metal]) t2 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64) assert not t1.supports_type(hl.Float(64)) assert t2.supports_type(hl.Float(64)) # supports_type (preferred version) t1 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64, [hl.TargetFeature.Metal]) t2 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64) assert not t1.supports_type(hl.Float(64), hl.DeviceAPI.Metal) assert not t2.supports_type(hl.Float(64), hl.DeviceAPI.Metal) # target_feature_for_device_api assert hl.target_feature_for_device_api( hl.DeviceAPI.OpenCL) == hl.TargetFeature.OpenCL # with_feature with non-convertible lists try: t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, ["this is a string"]) except TypeError as e: assert "incompatible constructor arguments" in str(e) else: assert False, 'Did not see expected exception!'
def main(): # All Exprs have a scalar type, and all Funcs evaluate to one or # more scalar types. The scalar types in Halide are unsigned # integers of various bit widths, signed integers of the same set # of bit widths, floating point numbers in single and double # precision, and opaque handles (equivalent to void *). The # following array contains all the legal types. valid_halide_types = [ hl.UInt(8), hl.UInt(16), hl.UInt(32), hl.UInt(64), hl.Int(8), hl.Int(16), hl.Int(32), hl.Int(64), hl.Float(32), hl.Float(64), hl.Handle() ] # Constructing and inspecting types. if True: # You can programmatically examine the properties of a Halide # type. This is useful when you write a C++ function that has # hl.Expr arguments and you wish to check their types: assert hl.UInt(8).bits() == 8 assert hl.Int(8).is_int() # You can also programmatically construct Types as a function of other Types. t = hl.UInt(8) t = t.with_bits(t.bits() * 2) assert t == hl.UInt(16) # Or construct a Type from a C++ scalar type #assert type_of<float>() == hl.Float(32) # The Type struct is also capable of representing vector types, # but this is reserved for Halide's internal use. You should # vectorize code by using hl.Func::vectorize, not by attempting to # construct vector expressions directly. You may encounter vector # types if you programmatically manipulate lowered Halide code, # but this is an advanced topic (see hl.Func::add_custom_lowering_pass). # You can query any Halide hl.Expr for its type. An hl.Expr # representing a hl.Var has type hl.Int(32): x = hl.Var("x") assert hl.Expr(x).type() == hl.Int(32) # Most transcendental functions in Halide hl.cast their inputs to a # hl.Float(32) and return a hl.Float(32): assert hl.sin(x).type() == hl.Float(32) # You can hl.cast an hl.Expr from one Type to another using the hl.cast operator: assert hl.cast(hl.UInt(8), x).type() == hl.UInt(8) # This also comes in a template form that takes a C++ type. #assert hl.cast<uint8_t>(x).type() == hl.UInt(8) # You can also query any defined hl.Func for the types it produces. f1 = hl.Func("f1") f1[x] = hl.cast(hl.UInt(8), x) assert f1.output_types()[0] == hl.UInt(8) f2 = hl.Func("f2") f2[x] = (x, hl.sin(x)) assert f2.output_types()[0] == hl.Int(32) and \ f2.output_types()[1] == hl.Float(32) # Type promotion rules. if True: # When you combine Exprs of different types (e.g. using '+', # '*', etc), Halide uses a system of type promotion # rules. These differ to C's rules. To demonstrate these # we'll make some Exprs of each type. x = hl.Var("x") u8 = hl.cast(hl.UInt(8), x) u16 = hl.cast(hl.UInt(16), x) u32 = hl.cast(hl.UInt(32), x) u64 = hl.cast(hl.UInt(64), x) s8 = hl.cast(hl.Int(8), x) s16 = hl.cast(hl.Int(16), x) s32 = hl.cast(hl.Int(32), x) s64 = hl.cast(hl.Int(64), x) f32 = hl.cast(hl.Float(32), x) f64 = hl.cast(hl.Float(64), x) # The rules are as follows, and are applied in the order they are # written below. # 1) It is an error to hl.cast or use arithmetic operators on Exprs of type hl.Handle(). # 2) If the types are the same, then no type conversions occur. for t in valid_halide_types: # Skip the handle type. if t.is_handle(): continue e = hl.cast(t, x) assert (e + e).type() == e.type() # 3) If one type is a float but the other is not, then the # non-float argument is promoted to a float (possibly causing a # loss of precision for large integers). assert (u8 + f32).type() == hl.Float(32) assert (f32 + s64).type() == hl.Float(32) assert (u16 + f64).type() == hl.Float(64) assert (f64 + s32).type() == hl.Float(64) # 4) If both types are float, then the narrower argument is # promoted to the wider bit-width. assert (f64 + f32).type() == hl.Float(64) # The rules above handle all the floating-point cases. The # following three rules handle the integer cases. # 5) If one of the expressions is an integer constant, then it is # coerced to the type of the other expression. assert (u32 + 3).type() == hl.UInt(32) assert (3 + s16).type() == hl.Int(16) # If this rule would cause the integer to overflow, then Halide # will trigger an error, e.g. uncommenting the following line # will cause this program to terminate with an error. # hl.Expr bad = u8 + 257 # 6) If both types are unsigned integers, or both types are # signed integers, then the narrower argument is promoted to # wider type. assert (u32 + u8).type() == hl.UInt(32) assert (s16 + s64).type() == hl.Int(64) # 7) If one type is signed and the other is unsigned, both # arguments are promoted to a signed integer with the greater of # the two bit widths. assert (u8 + s32).type() == hl.Int(32) assert (u32 + s8).type() == hl.Int(32) # Note that this may silently overflow the unsigned type in the # case where the bit widths are the same. assert (u32 + s32).type() == hl.Int(32) if False: # evaluate<X> not yet exposed to python # When an unsigned hl.Expr is converted to a wider signed type in # this way, it is first widened to a wider unsigned type # (zero-extended), and then reinterpreted as a signed # integer. I.e. casting the hl.UInt(8) value 255 to an hl.Int(32) # produces 255, not -1. #int32_t result32 = evaluate<int>(hl.cast<int32_t>(hl.cast<uint8_t>(255))) assert result32 == 255 # When a signed type is explicitly converted to a wider unsigned # type with the hl.cast operator (the type promotion rules will # never do this automatically), it is first converted to the # wider signed type (sign-extended), and then reinterpreted as # an unsigned integer. I.e. casting the hl.Int(8) value -1 to a # hl.UInt(16) produces 65535, not 255. #uint16_t result16 = evaluate<uint16_t>(hl.cast<uint16_t>(hl.cast<int8_t>(-1))) assert result16 == 65535 # The type hl.Handle(). if True: # hl.Handle is used to represent opaque pointers. Applying # type_of to any pointer type will return hl.Handle() #assert type_of<void *>() == hl.Handle() #assert type_of<const char * const **>() == hl.Handle() # (not clear what the proper python version would be) # Handles are always stored as 64-bit, regardless of the compilation # target. assert hl.Handle().bits() == 64 # The main use of an hl.Expr of type hl.Handle is to pass # it through Halide to other external code. # Generic code. if True: # The main explicit use of Type in Halide is to write Halide # code parameterized by a Type. In C++ you'd do this with # templates. In Halide there's no need - you can inspect and # modify the types dynamically at C++ runtime instead. The # function defined below averages two expressions of any # equal numeric type. x = hl.Var("x") assert average(hl.cast(hl.Float(32), x), 3.0).type() == hl.Float(32) assert average(x, 3).type() == hl.Int(32) assert average(hl.cast(hl.UInt(8), x), hl.cast(hl.UInt(8), 3)).type() == hl.UInt(8) print("Success!") return 0
def bilateral_filter(input, width, height): print(' bilateral_filter') k = hl.Buffer(hl.Float(32), [7, 7], "gauss_kernel") k.translate([-3, -3]) weights = hl.Func("bilateral_weights") total_weights = hl.Func("bilateral_total_weights") bilateral = hl.Func("bilateral") output = hl.Func("bilateral_filter_output") x, y, dx, dy, c = hl.Var("x"), hl.Var("y"), hl.Var("dx"), hl.Var("dy"), hl.Var("c") rdom = hl.RDom([(-3, 7), (-3, 7)]) k.fill(0) k[-3, -3] = 0.000690 k[-2, -3] = 0.002646 k[-1, -3] = 0.005923 k[0, -3] = 0.007748 k[1, -3] = 0.005923 k[2, -3] = 0.002646 k[3, -3] = 0.000690 k[-3, -2] = 0.002646 k[-2, -2] = 0.010149 k[-1, -2] = 0.022718 k[0, -2] = 0.029715 k[1, -2] = 0.022718 k[2, -2] = 0.010149 k[3, -2] = 0.002646 k[-3, -1] = 0.005923 k[-2, -1] = 0.022718 k[-1, -1] = 0.050855 k[0, -1] = 0.066517 k[1, -1] = 0.050855 k[2, -1] = 0.022718 k[3, -1] = 0.005923 k[-3, 0] = 0.007748 k[-2, 0] = 0.029715 k[-1, 0] = 0.066517 k[0, 0] = 0.087001 k[1, 0] = 0.066517 k[2, 0] = 0.029715 k[3, 0] = 0.007748 k[-3, 1] = 0.005923 k[-2, 1] = 0.022718 k[-1, 1] = 0.050855 k[0, 1] = 0.066517 k[1, 1] = 0.050855 k[2, 1] = 0.022718 k[3, 1] = 0.005923 k[-3, 2] = 0.002646 k[-2, 2] = 0.010149 k[-1, 2] = 0.022718 k[0, 2] = 0.029715 k[1, 2] = 0.022718 k[2, 2] = 0.010149 k[3, 2] = 0.002646 k[-3, 3] = 0.000690 k[-2, 3] = 0.002646 k[-1, 3] = 0.005923 k[0, 3] = 0.007748 k[1, 3] = 0.005923 k[2, 3] = 0.002646 k[3, 3] = 0.000690 input_mirror = hl.BoundaryConditions.mirror_interior(input, [(0, width), (0, height)]) dist = hl.cast(hl.Float(32), hl.cast(hl.Int(32), input_mirror[x, y, c]) - hl.cast(hl.Int(32), input_mirror[x + dx, y + dy, c])) sig2 = 100 threshold = 25000 score = hl.select(hl.abs(input_mirror[x + dx, y + dy, c]) > threshold, 0, hl.exp(-dist * dist / sig2)) weights[dx, dy, x, y, c] = k[dx, dy] * score total_weights[x, y, c] = hl.sum(weights[rdom.x, rdom.y, x, y, c]) bilateral[x, y, c] = hl.sum(input_mirror[x + rdom.x, y + rdom.y, c] * weights[rdom.x, rdom.y, x, y, c]) / \ total_weights[x, y, c] output[x, y, c] = hl.cast(hl.Float(32), input[x, y, c]) output[x, y, 1] = bilateral[x, y, 1] output[x, y, 2] = bilateral[x, y, 2] weights.compute_at(output, y).vectorize(x, 16) output.compute_root().parallel(y).vectorize(x, 16) output.update(0).parallel(y).vectorize(x, 16) output.update(1).parallel(y).vectorize(x, 16) return output