def test_param_bug(): "see https://github.com/rodrigob/Halide/issues/1" p1 = hl.Param(hl.UInt(8), "p1", 0) p2 = hl.Param(hl.UInt(8), "p2") p3 = hl.Param(hl.UInt(8), 42) return
def main(): input_img = hl.ImageParam(hl.UInt(16), 3, 'input') # number of intensity levels levels = hl.Param(int_t, 'levels', 8) # Parameters controlling the filter alpha = hl.Param(float_t, 'alpha', 1.0 / 7.0) beta = hl.Param(float_t, 'beta', 1.0) local_laplacian = get_local_laplacian(input_img, levels, alpha, beta) filter_test_image(local_laplacian, input_img)
def setup_inputs(self): # input scalars delo2 = hl.Param(hl.Float(64), "delo2") delta = hl.Param(hl.Float(64), "delta") rdelta = hl.Param(hl.Float(64), "rdelta") # input vectors expnt_in = hl.ImageParam(hl.Float(64), 1, "expnt_in") rnorm_in = hl.ImageParam(hl.Float(64), 1, "rnorm_in") x_in = hl.ImageParam(hl.Float(64), 1, "x_in") y_in = hl.ImageParam(hl.Float(64), 1, "y_in") z_in = hl.ImageParam(hl.Float(64), 1, "z_in") # input matrices fm_in = hl.ImageParam(hl.Float(64), 2, "fm_in") g_fock_in_in = hl.ImageParam(hl.Float(64), 2, "g_fock_in") g_dens_in = hl.ImageParam(hl.Float(64), 2, "g_dens_in") self.inputs.update({ x.name(): x for x in [ delo2, delta, rdelta, expnt_in, rnorm_in, x_in, y_in, z_in, fm_in, g_fock_in_in, g_dens_in ] }) # clamp all inputs, to prevent out-of-bounds errors from odd tile sizes and such expnt = hl.BoundaryConditions.constant_exterior(expnt_in, 0) rnorm = hl.BoundaryConditions.constant_exterior(rnorm_in, 0) x = hl.BoundaryConditions.constant_exterior(x_in, 0) y = hl.BoundaryConditions.constant_exterior(y_in, 0) z = hl.BoundaryConditions.constant_exterior(z_in, 0) fm = hl.BoundaryConditions.constant_exterior(fm_in, 0) g_fock_in = hl.BoundaryConditions.constant_exterior(g_fock_in_in, 0) g_dens = hl.BoundaryConditions.constant_exterior(g_dens_in, 0) self.clamps.update({ "expnt": expnt, "rnorm": rnorm, "x": x, "y": y, "z": z, "fm": fm, "g_fock_in_clamped": g_fock_in, "g_dens": g_dens }) # nbfn=number of basis functions. This is our problem size self.nbfn = g_fock_in_in.height()
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) s_sigma = 8 x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.i32(1), y * s_sigma * hl.i32(1)] val22 = clamped[x * s_sigma - hl.i32(s_sigma // 2), y * s_sigma - hl.i32(s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] try: val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2] except RuntimeError as e: assert 'Implicit cast from float32 to int' in str(e) else: assert False, 'Did not see expected exception!'
def test_basics3(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] val = hl.clamp(val, 0.0, 1.0) zi = hl.i32((val / r_sigma) + 0.5) histogram = hl.Func('histogram') histogram[x, y, z, c] = 0.0 ss = hl.select(c == 0, val, 1.0) left = histogram[x, y, zi, c] left += 5 left += ss
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width()-1), hl.clamp(y, 0, input.height()-1),0] # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma//2), y * s_sigma - hl.cast(hl.Int(32), s_sigma//2)] val2 = clamped[x * s_sigma - s_sigma//2, y * s_sigma - s_sigma//2] val3 = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2] return
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] if True: print("s_sigma", s_sigma) print("s_sigma/2", s_sigma / 2) print("s_sigma//2", s_sigma // 2) print() print("x * s_sigma", x * s_sigma) print("x * 8", x * 8) print("x * 8 + 4", x * 8 + 4) print("x * 8 * 4", x * 8 * 4) print() print("x", x) print("(x * s_sigma).type()", ) print("(x * 8).type()", (x * 8).type()) print("(x * 8 + 4).type()", (x * 8 + 4).type()) print("(x * 8 * 4).type()", (x * 8 * 4).type()) print("(x * 8 / 4).type()", (x * 8 / 4).type()) print("((x * 8) * 4).type()", ((x * 8) * 4).type()) print("(x * (8 * 4)).type()", (x * (8 * 4)).type()) assert (x * 8).type() == hl.Int(32) assert (x * 8 * 4).type() == hl.Int(32) # yes this did fail at some point assert ((x * 8) / 4).type() == hl.Int(32) assert (x * (8 / 4)).type() == hl.Float(32) # under python3 division rules assert (x * (8 // 4)).type() == hl.Int(32) #assert (x * 8 // 4).type() == hl.Int(32) # not yet implemented # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2), y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] return
def main(): input = hl.ImageParam(hl.UInt(16), 3, 'input') # number of intensity levels levels = hl.Param(int_t, 'levels', 8) #Parameters controlling the filter alpha = hl.Param(float_t, 'alpha', 1.0 / 7.0) beta = hl.Param(float_t, 'beta', 1.0) local_laplacian = get_local_laplacian(input, levels, alpha, beta) generate = False # Set to False to run the jit immediately and get instant gratification. if generate: generate_compiled_file(local_laplacian) else: filter_test_image(local_laplacian, input) return
def main(): # We'll define a simple one-stage pipeline: brighter = hl.Func("brighter") x, y = hl.Var("x"), hl.Var("y") # The pipeline will depend on one scalar parameter. offset = hl.Param(hl.UInt(8), name="offset") # And take one grayscale 8-bit input buffer. The first # constructor argument gives the type of a pixel, and the second # specifies the number of dimensions (not the number of # channels!). For a grayscale image this is two for a color # image it's three. Currently, four dimensions is the maximum for # inputs and outputs. input = hl.ImageParam(hl.UInt(8), 2) # If we were jit-compiling, these would just be an int and a # hl.Buffer, but because we want to compile the pipeline once and # have it work for any value of the parameter, we need to make a # hl.Param object, which can be used like an hl.Expr, and an hl.ImageParam # object, which can be used like a hl.Buffer. # Define the hl.Func. brighter[x, y] = input[x, y] + offset # Schedule it. brighter.vectorize(x, 16).parallel(y) # This time, instead of calling brighter.realize(...), which # would compile and run the pipeline immediately, we'll call a # method that compiles the pipeline to an object file and header. # # For AOT-compiled code, we need to explicitly declare the # arguments to the routine. This routine takes two. Arguments are # usually Params or ImageParams. fname = "lesson_10_halide" brighter.compile_to( { hl.Output.object: "lesson_10_halide.o", hl.Output.c_header: "lesson_10_halide.h", hl.Output.python_extension: "lesson_10_halide.py.cpp" }, [input, offset], "lesson_10_halide") print("Halide pipeline compiled, but not yet run.") # To continue this lesson, look in the file lesson_10_aot_compilation_run.cpp return 0
def main(): input = hl.ImageParam(hl.Float(32), 2, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version bilateral_grid = get_bilateral_grid(input, r_sigma, s_sigma) # Set `generate` to False to run the jit immediately and get instant gratification. #generate = True generate = False if generate: generate_compiled_file(bilateral_grid) else: filter_test_image(bilateral_grid, input) print("\nEnd of game. Have a nice day!") return
def test_division(): f32 = hl.Param(hl.Float(32), 'f32', -32.0) f64 = hl.Param(hl.Float(64), 'f64', 64.0) i16 = hl.Param(hl.Int(16), 'i16', -16) i32 = hl.Param(hl.Int(32), 'i32', 32) u16 = hl.Param(hl.UInt(16), 'u16', 16) u32 = hl.Param(hl.UInt(32), 'u32', 32) # Verify that the types match the rules in match_types() assert (f32 / f64).type() == hl.Float(64) assert (f32 // f64).type() == hl.Float(64) assert (i16 / i32).type() == hl.Int(32) assert (i16 // i32).type() == hl.Int(32) assert (u16 / u32).type() == hl.UInt(32) assert (u16 // u32).type() == hl.UInt(32) # int / uint -> int assert (u16 / i32).type() == hl.Int(32) assert (i32 // u16).type() == hl.Int(32) # any / float -> float # float / any -> float assert (u16 / f32).type() == hl.Float(32) assert (u16 // f32).type() == hl.Float(32) assert (i16 / f64).type() == hl.Float(64) assert (i16 // f64).type() == hl.Float(64) # Verify that division semantics match those for Halide # (rather than python); this differs for int/int which # defaults to float (rather than floordiv) in Python3. # Also test that // always floors the result, even for float. assert _evaluate(f32 / f64) == -0.5 assert _evaluate(f32 // f64) == -1.0 assert _evaluate(i16 / i32) == -1 assert _evaluate(i16 // i32) == -1 assert _evaluate(i32 / i16) == -2 assert _evaluate(u16 / u32) == 0 assert _evaluate(u16 // u32) == 0 assert _evaluate(u16 / i32) == 0 assert _evaluate(i32 // u16) == 2 assert _evaluate(u16 / f32) == -0.5 assert _evaluate(u16 // f32) == -1.0 assert _evaluate(i16 / f64) == -0.25 assert _evaluate(i16 // f64) == -1.0
def test_int_promotion(): # Verify that (Exprlike op literal) correctly matches the type # of the literal to the Exprlike (rather than promoting the result to int32). # All types that use add_binary_operators() should be tested here. x = hl.Var('x') # All the binary ops are handled the same, so + is good enough # Exprlike = FuncRef f = hl.Func('f') f[x] = hl.u16(x) _check_is_u16(f[x] + 2) _check_is_u16(2 + f[x]) # Exprlike = Expr e = hl.Expr(f[x]) _check_is_u16(e + 2) _check_is_u16(2 + e) # Exprlike = Param p = hl.Param(hl.UInt(16)) _check_is_u16(p + 2) _check_is_u16(2 + p)
def main(): # We'll define the simple one-stage pipeline that we used in lesson 10. brighter = hl.Func("brighter") x, y = hl.Var("x"), hl.Var("y") # Declare the arguments. offset = hl.Param(hl.UInt(8)) input = hl.ImageParam(hl.UInt(8), 2) args = [input, offset] # Define the hl.Func. brighter[x, y] = input[x, y] + offset # Schedule it. brighter.vectorize(x, 16).parallel(y) # The following line is what we did in lesson 10. It compiles an # object file suitable for the system that you're running this # program on. For example, if you compile and run this file on # 64-bit linux on an x86 cpu with sse4.1, then the generated code # will be suitable for 64-bit linux on x86 with sse4.1. brighter.compile_to_file("lesson_11_host", args, "lesson_11_host") # We can also compile object files suitable for other cpus and # operating systems. You do this with an optional third argument # to compile_to_file which specifies the target to compile for. create_android = True create_windows = True create_ios = True if create_android: # Let's use this to compile a 32-bit arm android version of this code: target = hl.Target() target.os = hl.TargetOS.Android # The operating system target.arch = hl.TargetArch.ARM # The CPU architecture target.bits = 32 # The bit-width of the architecture arm_features = [] # A list of features to set target.set_features(arm_features) # Pass the target as the last argument. brighter.compile_to_file("lesson_11_arm_32_android", args, "lesson_11_arm_32_android", target) if create_windows: # And now a Windows object file for 64-bit x86 with AVX and SSE 4.1: target = hl.Target() target.os = hl.TargetOS.Windows target.arch = hl.TargetArch.X86 target.bits = 64 target.set_features([hl.TargetFeature.AVX, hl.TargetFeature.SSE41]) brighter.compile_to_file("lesson_11_x86_64_windows", args, "lesson_11_x86_64_windows", target) if create_ios: # And finally an iOS mach-o object file for one of Apple's 32-bit # ARM processors - the A6. It's used in the iPhone 5. The A6 uses # a slightly modified ARM architecture called ARMv7s. We specify # this using the target features field. Support for Apple's # 64-bit ARM processors is very new in llvm, and still somewhat # flaky. target = hl.Target() target.os = hl.TargetOS.IOS target.arch = hl.TargetArch.ARM target.bits = 32 target.set_features([hl.TargetFeature.ARMv7s]) brighter.compile_to_file("lesson_11_arm_32_ios", args, "lesson_11_arm_32_ios", target) # Now let's check these files are what they claim, by examining # their first few bytes. if create_android: # 32-arm android object files start with the magic bytes: # uint8_t [] arm_32_android_magic = [ 0x7f, ord('E'), ord('L'), ord('F'), # ELF format 1, # 32-bit 1, # 2's complement little-endian 1 ] # Current version of elf length = len(arm_32_android_magic) f = open("lesson_11_arm_32_android.o", "rb") try: header_bytes = f.read(length) except: print("Android object file not generated") return -1 f.close() header = list(unpack("B" * length, header_bytes)) if header != arm_32_android_magic: print([x == y for x, y in zip(header, arm_32_android_magic)]) raise Exception( "Unexpected header bytes in 32-bit arm object file.") return -1 if create_windows: # 64-bit windows object files start with the magic 16-bit value 0x8664 # (presumably referring to x86-64) # uint8_t [] win_64_magic = [0x64, 0x86] f = open("lesson_11_x86_64_windows.obj", "rb") try: header_bytes = f.read(2) except: print("Windows object file not generated") return -1 f.close() header = list(unpack("B" * 2, header_bytes)) if header != win_64_magic: raise Exception( "Unexpected header bytes in 64-bit windows object file.") return -1 if create_ios: # 32-bit arm iOS mach-o files start with the following magic bytes: # uint32_t [] arm_32_ios_magic = [ 0xfeedface, # Mach-o magic bytes #0xfe, 0xed, 0xfa, 0xce, # Mach-o magic bytes 12, # CPU type is ARM 11, # CPU subtype is ARMv7s 1 ] # It's a relocatable object file. f = open("lesson_11_arm_32_ios.o", "rb") try: header_bytes = f.read(4 * 4) except: print("ios object file not generated") return -1 f.close() header = list(unpack("I" * 4, header_bytes)) if header != arm_32_ios_magic: raise Exception( "Unexpected header bytes in 32-bit arm ios object file.") return -1 # It looks like the object files we produced are plausible for # those targets. We'll count that as a success for the purposes # of this tutorial. For a real application you'd then need to # figure out how to integrate Halide into your cross-compilation # toolchain. There are several small examples of this in the # Halide repository under the apps folder. See HelloAndroid and # HelloiOS here: # https:#github.com/halide/Halide/tree/master/apps/ print("Success!") return 0
def test_autodiff(): x = hl.Var('x') b = hl.Buffer(hl.Float(32), [3]) p = hl.Param(hl.Float(32), 'p', 1) b[0] = 1.0 b[1] = 2.0 b[2] = 3.0 f, g, h = hl.Func('f'), hl.Func('g'), hl.Func('h') f[x] = b[x] f[0] = 4.0 g[x] = f[x] * 5.0 * p r = hl.RDom([(0, 3)]) h[()] = 0.0 h[()] += g[r.x] d = hl.propagate_adjoints(h) # gradient w.r.t. the initialization of f d_f_init = d[f] d_f_init_buf = d_f_init.realize([3]) assert(d_f_init_buf[0] == 0.0) assert(d_f_init_buf[1] == 5.0) assert(d_f_init_buf[2] == 5.0) d_f_init = d[f]# test different interface d_f_init_buf = d_f_init.realize([3]) assert(d_f_init_buf[0] == 0.0) assert(d_f_init_buf[1] == 5.0) assert(d_f_init_buf[2] == 5.0) # gradient w.r.t. the updated f d_f_update_0 = d[f, 0] d_f_update_0_buf = d_f_update_0.realize([3]) assert(d_f_update_0_buf[0] == 5.0) assert(d_f_update_0_buf[1] == 5.0) assert(d_f_update_0_buf[2] == 5.0) d_f_update_0 = d[f, 0] d_f_update_0_buf = d_f_update_0.realize([3]) assert(d_f_update_0_buf[0] == 5.0) assert(d_f_update_0_buf[1] == 5.0) assert(d_f_update_0_buf[2] == 5.0) # gradient w.r.t. the buffer d_b = d[b] d_b_buf = d_b.realize([3]) assert(d_b_buf[0] == 0.0) assert(d_b_buf[1] == 5.0) assert(d_b_buf[2] == 5.0) d_b = d[b] d_b_buf = d_b.realize([3]) assert(d_b_buf[0] == 0.0) assert(d_b_buf[1] == 5.0) assert(d_b_buf[2] == 5.0) # gradient w.r.t. the param d_p = d[p] d_p_buf = d_p.realize() # 5 * (4 + 2 + 3) assert(abs(d_p_buf[()] - 45.0) < 1e-6) d_p = d[p] d_p_buf = d_p.realize() assert(abs(d_p_buf[()] - 45.0) < 1e-6)