def test_basics3(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] val = hl.clamp(val, 0.0, 1.0) zi = hl.i32((val / r_sigma) + 0.5) histogram = hl.Func('histogram') histogram[x, y, z, c] = 0.0 ss = hl.select(c == 0, val, 1.0) left = histogram[x, y, zi, c] left += 5 left += ss
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width()-1), hl.clamp(y, 0, input.height()-1),0] # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma//2), y * s_sigma - hl.cast(hl.Int(32), s_sigma//2)] val2 = clamped[x * s_sigma - s_sigma//2, y * s_sigma - s_sigma//2] val3 = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2] return
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) s_sigma = 8 x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] # Construct the bilateral grid r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.i32(1), y * s_sigma * hl.i32(1)] val22 = clamped[x * s_sigma - hl.i32(s_sigma // 2), y * s_sigma - hl.i32(s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] try: val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2] except RuntimeError as e: assert 'Implicit cast from float32 to int' in str(e) else: assert False, 'Did not see expected exception!'
def test_basics(): input = hl.ImageParam(hl.UInt(16), 2, 'input') x, y = hl.Var('x'), hl.Var('y') blur_x = hl.Func('blur_x') blur_xx = hl.Func('blur_xx') blur_y = hl.Func('blur_y') yy = hl.i32(1) assert yy.type() == hl.Int(32) z = x + 1 input[x, y] input[0, 0] input[z, y] input[x + 1, y] input[x, y] + input[x + 1, y] if False: aa = blur_x[x, y] bb = blur_x[x, y + 1] aa + bb blur_x[x, y] + blur_x[x, y + 1] (input[x, y] + input[x + 1, y]) / 2 blur_x[x, y] blur_xx[x, y] = input[x, y] blur_x[x, y] = (input[x, y] + input[x + 1, y] + input[x + 2, y]) / 3 blur_y[x, y] = (blur_x[x, y] + blur_x[x, y + 1] + blur_x[x, y + 2]) / 3 xi, yi = hl.Var('xi'), hl.Var('yi') blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) blur_y.compile_jit()
def main(): # define and compile the function input = hl.ImageParam(hl.UInt(8), 3, "input") erode = get_erode(input) erode.compile_jit() # preparing input and output memory buffers (numpy ndarrays) input_data = get_input_data() input_image = hl.Buffer(input_data) input.set(input_image) output_data = np.empty(input_data.shape, dtype=input_data.dtype, order="F") output_image = hl.Buffer(output_data) print("input_image", input_image) print("output_image", output_image) # do the actual computation erode.realize(output_image) # save results input_path = "erode_input.png" output_path = "erode_result.png" imageio.imsave(input_path, input_data) imageio.imsave(output_path, output_data) print("\nerode realized on output image.", "Result saved at", output_path, "( input data copy at", input_path, ")") print("\nEnd of game. Have a nice day!") return
def main(): input = hl.ImageParam(float_t, 3, "input") levels = 10 interpolate = get_interpolate(input, levels) # preparing input and output memory buffers (numpy ndarrays) input_data = get_input_data() assert input_data.shape[2] == 4 input_image = hl.Buffer(input_data) input.set(input_image) input_width, input_height = input_data.shape[:2] t0 = datetime.now() output_image = interpolate.realize(input_width, input_height, 3) t1 = datetime.now() print('Interpolated in %.5f secs' % (t1 - t0).total_seconds()) output_data = hl.buffer_to_ndarray(output_image) # save results input_path = "interpolate_input.png" output_path = "interpolate_result.png" imsave(input_path, input_data) imsave(output_path, output_data) print("\nblur realized on output image.", "Result saved at", output_path, "( input data copy at", input_path, ")") print("\nEnd of game. Have a nice day!")
def test_basics2(): input = hl.ImageParam(hl.Float(32), 3, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version x = hl.Var('x') y = hl.Var('y') z = hl.Var('z') c = hl.Var('c') # Add a boundary condition clamped = hl.Func('clamped') clamped[x, y] = input[hl.clamp(x, 0, input.width() - 1), hl.clamp(y, 0, input.height() - 1), 0] if True: print("s_sigma", s_sigma) print("s_sigma/2", s_sigma / 2) print("s_sigma//2", s_sigma // 2) print() print("x * s_sigma", x * s_sigma) print("x * 8", x * 8) print("x * 8 + 4", x * 8 + 4) print("x * 8 * 4", x * 8 * 4) print() print("x", x) print("(x * s_sigma).type()", ) print("(x * 8).type()", (x * 8).type()) print("(x * 8 + 4).type()", (x * 8 + 4).type()) print("(x * 8 * 4).type()", (x * 8 * 4).type()) print("(x * 8 / 4).type()", (x * 8 / 4).type()) print("((x * 8) * 4).type()", ((x * 8) * 4).type()) print("(x * (8 * 4)).type()", (x * (8 * 4)).type()) assert (x * 8).type() == hl.Int(32) assert (x * 8 * 4).type() == hl.Int(32) # yes this did fail at some point assert ((x * 8) / 4).type() == hl.Int(32) assert (x * (8 / 4)).type() == hl.Float(32) # under python3 division rules assert (x * (8 // 4)).type() == hl.Int(32) #assert (x * 8 // 4).type() == hl.Int(32) # not yet implemented # Construct the bilateral grid r = hl.RDom(0, s_sigma, 0, s_sigma, 'r') val0 = clamped[x * s_sigma, y * s_sigma] val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)] #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2), y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)] val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2] val3 = clamped[x * s_sigma + r.x - s_sigma // 2, y * s_sigma + r.y - s_sigma // 2] return
def test_imageparam_bug(): "see https://github.com/rodrigob/Halide/issues/2" x = hl.Var("x") y = hl.Var("y") fx = hl.Func("fx") input = hl.ImageParam(hl.UInt(8), 1, "input") fx[x, y] = input[y] return
def main(): input_img = hl.ImageParam(hl.UInt(16), 3, 'input') # number of intensity levels levels = hl.Param(int_t, 'levels', 8) # Parameters controlling the filter alpha = hl.Param(float_t, 'alpha', 1.0 / 7.0) beta = hl.Param(float_t, 'beta', 1.0) local_laplacian = get_local_laplacian(input_img, levels, alpha, beta) filter_test_image(local_laplacian, input_img)
def setup_inputs(self): # input scalars delo2 = hl.Param(hl.Float(64), "delo2") delta = hl.Param(hl.Float(64), "delta") rdelta = hl.Param(hl.Float(64), "rdelta") # input vectors expnt_in = hl.ImageParam(hl.Float(64), 1, "expnt_in") rnorm_in = hl.ImageParam(hl.Float(64), 1, "rnorm_in") x_in = hl.ImageParam(hl.Float(64), 1, "x_in") y_in = hl.ImageParam(hl.Float(64), 1, "y_in") z_in = hl.ImageParam(hl.Float(64), 1, "z_in") # input matrices fm_in = hl.ImageParam(hl.Float(64), 2, "fm_in") g_fock_in_in = hl.ImageParam(hl.Float(64), 2, "g_fock_in") g_dens_in = hl.ImageParam(hl.Float(64), 2, "g_dens_in") self.inputs.update({ x.name(): x for x in [ delo2, delta, rdelta, expnt_in, rnorm_in, x_in, y_in, z_in, fm_in, g_fock_in_in, g_dens_in ] }) # clamp all inputs, to prevent out-of-bounds errors from odd tile sizes and such expnt = hl.BoundaryConditions.constant_exterior(expnt_in, 0) rnorm = hl.BoundaryConditions.constant_exterior(rnorm_in, 0) x = hl.BoundaryConditions.constant_exterior(x_in, 0) y = hl.BoundaryConditions.constant_exterior(y_in, 0) z = hl.BoundaryConditions.constant_exterior(z_in, 0) fm = hl.BoundaryConditions.constant_exterior(fm_in, 0) g_fock_in = hl.BoundaryConditions.constant_exterior(g_fock_in_in, 0) g_dens = hl.BoundaryConditions.constant_exterior(g_dens_in, 0) self.clamps.update({ "expnt": expnt, "rnorm": rnorm, "x": x, "y": y, "z": z, "fm": fm, "g_fock_in_clamped": g_fock_in, "g_dens": g_dens }) # nbfn=number of basis functions. This is our problem size self.nbfn = g_fock_in_in.height()
def test_basics(): input = hl.ImageParam(hl.UInt(16), 2, 'input') x, y = hl.Var('x'), hl.Var('y') blur_x = hl.Func('blur_x') blur_xx = hl.Func('blur_xx') blur_y = hl.Func('blur_y') yy = hl.cast(hl.Int(32), 1) assert yy.type() == hl.Int(32) print("yy type:", yy.type()) z = x + 1 input[x,y] input[0,0] input[z,y] input[x+1,y] print("ping 0.2") input[x,y]+input[x+1,y] if False: aa = blur_x[x,y] bb = blur_x[x,y+1] aa + bb blur_x[x,y]+blur_x[x,y+1] print("ping 0.3") (input[x,y]+input[x+1,y]) / 2 print("ping 0.4") blur_x[x,y] print("ping 0.4.1") blur_xx[x,y] = input[x,y] print("ping 0.5") blur_x[x,y] = (input[x,y]+input[x+1,y]+input[x+2,y])/3 print("ping 1") blur_y[x,y] = (blur_x[x,y]+blur_x[x,y+1]+blur_x[x,y+2])/3 xi, yi = hl.Var('xi'), hl.Var('yi') print("ping 2") blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8) blur_x.compute_at(blur_y, x).vectorize(x, 8) blur_y.compile_jit() print("Compiled to jit") return
def main(): # We'll define a simple one-stage pipeline: brighter = hl.Func("brighter") x, y = hl.Var("x"), hl.Var("y") # The pipeline will depend on one scalar parameter. offset = hl.Param(hl.UInt(8), name="offset") # And take one grayscale 8-bit input buffer. The first # constructor argument gives the type of a pixel, and the second # specifies the number of dimensions (not the number of # channels!). For a grayscale image this is two for a color # image it's three. Currently, four dimensions is the maximum for # inputs and outputs. input = hl.ImageParam(hl.UInt(8), 2) # If we were jit-compiling, these would just be an int and a # hl.Buffer, but because we want to compile the pipeline once and # have it work for any value of the parameter, we need to make a # hl.Param object, which can be used like an hl.Expr, and an hl.ImageParam # object, which can be used like a hl.Buffer. # Define the hl.Func. brighter[x, y] = input[x, y] + offset # Schedule it. brighter.vectorize(x, 16).parallel(y) # This time, instead of calling brighter.realize(...), which # would compile and run the pipeline immediately, we'll call a # method that compiles the pipeline to an object file and header. # # For AOT-compiled code, we need to explicitly declare the # arguments to the routine. This routine takes two. Arguments are # usually Params or ImageParams. fname = "lesson_10_halide" brighter.compile_to( { hl.Output.object: "lesson_10_halide.o", hl.Output.c_header: "lesson_10_halide.h", hl.Output.python_extension: "lesson_10_halide.py.cpp" }, [input, offset], "lesson_10_halide") print("Halide pipeline compiled, but not yet run.") # To continue this lesson, look in the file lesson_10_aot_compilation_run.cpp return 0
def test_multipass_constraints(): input = hl.ImageParam(hl.Float(32), 2, "input") f = hl.Func("f") x = hl.Var("x") y = hl.Var("y") f[x, y] = input[x + 1, y + 1] + input[x - 1, y - 1] f[x, y] += 3.0 f.update().vectorize(x, 4) o = f.output_buffer() # Now make some hard-to-resolve constraints input.dim(0).set_bounds(min=input.dim(1).min() - 5, extent=input.dim(1).extent() + o.dim(0).extent()) o.dim(0).set_bounds(min=0, extent=hl.select( o.dim(0).extent() < 22, o.dim(0).extent() + 1, o.dim(0).extent())) # Make a bounds query buffer query_buf = hl.Buffer.make_bounds_query(type=hl.Float(32), sizes=[7, 8]) query_buf.set_min([2, 2]) f.infer_input_bounds(query_buf) if input.get().dim(0).min() != -4 or \ input.get().dim(0).extent() != 34 or \ input.get().dim(1).min() != 1 or \ input.get().dim(1).extent() != 10 or \ query_buf.dim(0).min() != 0 or \ query_buf.dim(0).extent() != 24 or \ query_buf.dim(1).min() != 2 or \ query_buf.dim(1).extent() != 8: print("Constraints not correctly satisfied:\n", "in:", input.get().dim(0).min(), input.get().dim(0).extent(), input.get().dim(1).min(), input.get().dim(1).extent(), "out:", query_buf.dim(0).min(), query_buf.dim(0).extent(), query_buf.dim(1).min(), query_buf.dim(1).extent()) assert False
def test_scalar_funcs(): input = hl.ImageParam(hl.UInt(16), 0, 'input') f = hl.Func('f') g = hl.Func('g') input[()] (input[()] + input[()]) / 2 f[()] g[()] f[()] = (input[()] + input[()] + input[()]) / 3 g[()] = (f[()] + f[()] + f[()]) / 3 g.compile_jit()
def main(): input = hl.ImageParam(hl.Float(32), 2, 'input') r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable s_sigma = 8 # This is passed during code generation in the C++ version bilateral_grid = get_bilateral_grid(input, r_sigma, s_sigma) # Set `generate` to False to run the jit immediately and get instant gratification. #generate = True generate = False if generate: generate_compiled_file(bilateral_grid) else: filter_test_image(bilateral_grid, input) print("\nEnd of game. Have a nice day!") return
def main(): input = hl.ImageParam(hl.UInt(16), 3, 'input') # number of intensity levels levels = hl.Param(int_t, 'levels', 8) #Parameters controlling the filter alpha = hl.Param(float_t, 'alpha', 1.0 / 7.0) beta = hl.Param(float_t, 'beta', 1.0) local_laplacian = get_local_laplacian(input, levels, alpha, beta) generate = False # Set to False to run the jit immediately and get instant gratification. if generate: generate_compiled_file(local_laplacian) else: filter_test_image(local_laplacian, input) return
def main(): input = hl.ImageParam(float_t, 3, "input") levels = 10 interpolate = get_interpolate(input, levels) # preparing input and output memory buffers (numpy ndarrays) input_data = get_input_data() assert input_data.shape[2] == 4 input_image = hl.Buffer(input_data) input.set(input_image) input_width, input_height = input_data.shape[:2] t0 = datetime.now() output_image = interpolate.realize(input_width, input_height, 3) t1 = datetime.now() elapsed = (t1 - t0).total_seconds() print('Interpolated in {:.5f} secs'.format(elapsed)) output_data = np.asanyarray(output_image) # convert output input_data = (input_data * 255).astype(np.uint8) output_data = (output_data * 255).astype(np.uint8) # save results input_path = "interpolate_input.png" output_path = "interpolate_result.png" imageio.imsave(input_path, input_data) imageio.imsave(output_path, output_data) print() print( 'blur realized on output image. Result saved at {} (input data copy at {})' .format(output_path, input_path)) print() print("End of game. Have a nice day!")
import halide as hl import imageio import numpy as np # Constructing Halide functions statically. input = hl.ImageParam(hl.Float(32), 3) f = hl.Func('f') x, y, c = hl.Var('x'), hl.Var('y'), hl.Var('c') # Double the values and clamp them by 1. f[x, y, c] = hl.min(2 * input[x, y, c], 1.0) # Actually compiling/executing the Halide functions. # # Setup the input by loading an image (Halide assumes Fortran ordering). input_buffer = hl.Buffer( np.asfortranarray( imageio.imread('images/rgb.png').astype(np.float32) / 255.0)) input.set(input_buffer) # Process the input by calling f.realize output = f.realize(input_buffer.width(), input_buffer.height(), input_buffer.channels()) # Save the image to a file by converting to a numpy array. output = np.array(output) imageio.imsave('output.png', (output * 255.0).astype(np.uint8))
def focus_stack_pipeline(): outputs = [] start_w, start_h = 3000, 2000 number_of_layers = 5 layer_sizes = [[start_w, start_h]] for i in range(0, number_of_layers): # Grab from prev layer w,h = layer_sizes[-1] layer_sizes.append([int(math.ceil(w/2.0)),int(math.ceil(h/2.0))]) # Add last size in once more to get the 2nd top lap layer (gaussian) for # the energy/deviation split. layer_sizes.append(layer_sizes[-1]) input = hl.ImageParam(hl.UInt(8), 3) input.dim(0).set_estimate(0, start_w) input.dim(1).set_estimate(0, start_h) input.dim(2).set_estimate(0, 3) lap_inputs = [] max_energy_inputs = [] for i in range(0,number_of_layers+1): lap_layer = hl.ImageParam(hl.Float(32), 3, "lap{}".format(i)) lap_inputs.append(lap_layer) w,h = layer_sizes[i] lap_layer.dim(0).set_estimate(0, w) lap_layer.dim(1).set_estimate(0, h) lap_layer.dim(2).set_estimate(0, 3) if i == number_of_layers: # last (top - small) layer # Add the last laplacian (really direct from gaussian) layer # in twice. We output one maxed on entropies and one maxed on # deviations. lap_layer = hl.ImageParam(hl.Float(32), 3, "lap{}".format(i+1)) lap_inputs.append(lap_layer) lap_layer.dim(0).set_estimate(0, w) lap_layer.dim(1).set_estimate(0, h) lap_layer.dim(2).set_estimate(0, 3) entropy_layer = hl.ImageParam(hl.Float(32), 2, "entroy{}".format(i)) max_energy_inputs.append(entropy_layer) entropy_layer.dim(0).set_estimate(0, w) entropy_layer.dim(1).set_estimate(0, h) deviation_layer = hl.ImageParam(hl.Float(32), 2, "deviation{}".format(i)) max_energy_inputs.append(deviation_layer) deviation_layer.dim(0).set_estimate(0, w) deviation_layer.dim(1).set_estimate(0, h) else: max_energy_layer = hl.ImageParam(hl.Float(32), 2, "max_energy{}".format(i)) max_energy_inputs.append(max_energy_layer) max_energy_layer.dim(0).set_estimate(0, w) max_energy_layer.dim(1).set_estimate(0, h) x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") hist_index = hl.Var('hist_index') clamped = f32(x, y, c, mirror(input, 3000, 2000)) f = hl.Func("input32") f[x, y, c] = clamped[x, y, c] energy_outputs = [] gaussian_layers = [f] laplacian_layers = [] merged_laps = [] for layer_num in range(0, number_of_layers): # Add the layer size in also w,h = layer_sizes[layer_num] start_layer = gaussian_layers[-1] # Blur the image gaussian_layer = gaussian(x, y, c, start_layer) # Grab next layer size # w,h = layer_sizes[layer_num+1] # Reduce the layer size and add it into the list next_layer = reduce_layer(x, y, c, gaussian_layer) gaussian_layers.append(next_layer) # Expand back up expanded = expand_layer(x, y, c, next_layer) # Generate the laplacian from the # original - blurred/reduced/expanded version laplacian_layer = laplacian(x, y, c, start_layer, expanded) laplacian_layers.append(laplacian_layer) # Calculate energies for the gaussian layer prev_energies = mirror(max_energy_inputs[layer_num], w, h) next_energies = region_energy(x, y, c, laplacian_layer) prev_laplacian = mirror(lap_inputs[layer_num], w, h) merged_energies = energy_maxes(x, y, c, prev_energies, next_energies) merged_lap = merge_laplacian(x, y, c, merged_energies, next_energies, prev_laplacian, laplacian_layer) energy_outputs.append([[w,h,True],merged_energies]) merged_laps.append(merged_lap) # Add estimates next_layer.set_estimate(x, 0, w) next_layer.set_estimate(y, 0, h) next_layer.set_estimate(c, 0, 3) # Handle last layer differently w,h = layer_sizes[-1] # The next_lap is really just the last gaussian layer next_lap = gaussian_layers[-1] prev_entropy_laplacian = mirror(lap_inputs[-2], w, h) prev_entropy = mirror(max_energy_inputs[-2], w, h) next_entropy = entropy(x, y, c, next_lap, w, h, hist_index) merged_entropy = energy_maxes(x, y, c, prev_entropy, next_entropy) merged_lap_on_entropy = merge_laplacian(x, y, c, merged_entropy, next_entropy, prev_entropy_laplacian, next_lap) merged_laps.append(merged_lap_on_entropy) prev_deviation_laplacian = mirror(lap_inputs[-1], w, h) prev_deviation = mirror(max_energy_inputs[-1], w, h) next_deviation = deviation(x, y, c, next_lap) merged_deviation = energy_maxes(x, y, c, prev_deviation, next_deviation) merged_lap_on_deviation = merge_laplacian(x, y, c, merged_deviation, next_deviation, prev_deviation_laplacian, next_lap) merged_laps.append(merged_lap_on_deviation) energy_outputs.append([[w,h,True],merged_entropy]) energy_outputs.append([[w,h,True],merged_deviation]) print("NUM LAYERS: ", len(gaussian_layers), len(laplacian_layers), layer_sizes) # Add all of the laplacian layers to the output first i = 0 for merged_lap in merged_laps: w,h = layer_sizes[i] mid = (i < (len(merged_laps) - 2)) outputs.append([[w,h,False,mid], merged_lap]) i += 1 # Then energies for energy_output in energy_outputs: outputs.append(energy_output) new_outputs = [] for size, output in outputs: w = size[0] h = size[1] gray = len(size) > 2 and size[2] mid = len(size) > 3 and size[3] if mid: uint8_output = output else: uint8_output = output uint8_output.set_estimate(x, 0, w) uint8_output.set_estimate(y, 0, h) if not gray: uint8_output.set_estimate(c, 0, 3) new_outputs.append([size, uint8_output]) outputs = new_outputs print("OUTPUT LAYERS: ") pprint(outputs) output_funcs = [output for _, output in outputs] pipeline = hl.Pipeline(output_funcs) return { 'pipeline': pipeline, 'inputs': [input] + lap_inputs + max_energy_inputs }
def test_extern(): """ Shows an example of Halide calling a C library loaded in the Python process via ctypes """ # Requires Makefile support to build the external function in linkable form print("TODO: test_extern not yet implemented in Python; skipping...") return 0 x = hl.Var("x") data = np.random.random(10).astype(np.float64) expected_result = np.sort(data) output_data = np.empty(10, dtype=np.float64) sort_func = hl.Func("extern_sort_func") # gsl_sort, # see http://www.gnu.org/software/gsl/manual/html_node/Sorting-vectors.html#Sorting-vectors input = hl.ImageParam(hl.Float(64), 1, "input_data") extern_name = "the_sort_func" params = [hl.ExternFuncArgument(input)] output_types = [hl.Int(32)] dimensionality = 1 sort_func.define_extern(extern_name, params, output_types, dimensionality) try: sort_func.compile_jit() except RuntimeError: pass else: raise Exception( "compile_jit should have raised a 'Symbol not found' RuntimeError") import ctypes sort_lib = ctypes.CDLL("the_sort_function.so") print(sort_lib.the_sort_func) try: sort_func.compile_jit() except RuntimeError: print("ctypes CDLL did not work out") else: print("ctypes CDLL worked !") lib_path = "the_sort_function.so" #lib_path = "/home/rodrigob/code/references/" \ # "Halide_master/python_bindings/tests/the_sort_function.nohere.so" load_error = load_library_into_llvm(lib_path) assert load_error == False sort_func.compile_jit() # now that things are loaded, we try to call them input.set(data) sort_func.realize(output_data) assert np.isclose(expected_result, output_data) return
def main(): # We'll define the simple one-stage pipeline that we used in lesson 10. brighter = hl.Func("brighter") x, y = hl.Var("x"), hl.Var("y") # Declare the arguments. offset = hl.Param(hl.UInt(8)) input = hl.ImageParam(hl.UInt(8), 2) args = [input, offset] # Define the hl.Func. brighter[x, y] = input[x, y] + offset # Schedule it. brighter.vectorize(x, 16).parallel(y) # The following line is what we did in lesson 10. It compiles an # object file suitable for the system that you're running this # program on. For example, if you compile and run this file on # 64-bit linux on an x86 cpu with sse4.1, then the generated code # will be suitable for 64-bit linux on x86 with sse4.1. brighter.compile_to_file("lesson_11_host", args, "lesson_11_host") # We can also compile object files suitable for other cpus and # operating systems. You do this with an optional third argument # to compile_to_file which specifies the target to compile for. create_android = True create_windows = True create_ios = True if create_android: # Let's use this to compile a 32-bit arm android version of this code: target = hl.Target() target.os = hl.TargetOS.Android # The operating system target.arch = hl.TargetArch.ARM # The CPU architecture target.bits = 32 # The bit-width of the architecture arm_features = [] # A list of features to set target.set_features(arm_features) # Pass the target as the last argument. brighter.compile_to_file("lesson_11_arm_32_android", args, "lesson_11_arm_32_android", target) if create_windows: # And now a Windows object file for 64-bit x86 with AVX and SSE 4.1: target = hl.Target() target.os = hl.TargetOS.Windows target.arch = hl.TargetArch.X86 target.bits = 64 target.set_features([hl.TargetFeature.AVX, hl.TargetFeature.SSE41]) brighter.compile_to_file("lesson_11_x86_64_windows", args, "lesson_11_x86_64_windows", target) if create_ios: # And finally an iOS mach-o object file for one of Apple's 32-bit # ARM processors - the A6. It's used in the iPhone 5. The A6 uses # a slightly modified ARM architecture called ARMv7s. We specify # this using the target features field. Support for Apple's # 64-bit ARM processors is very new in llvm, and still somewhat # flaky. target = hl.Target() target.os = hl.TargetOS.IOS target.arch = hl.TargetArch.ARM target.bits = 32 target.set_features([hl.TargetFeature.ARMv7s]) brighter.compile_to_file("lesson_11_arm_32_ios", args, "lesson_11_arm_32_ios", target) # Now let's check these files are what they claim, by examining # their first few bytes. if create_android: # 32-arm android object files start with the magic bytes: # uint8_t [] arm_32_android_magic = [ 0x7f, ord('E'), ord('L'), ord('F'), # ELF format 1, # 32-bit 1, # 2's complement little-endian 1 ] # Current version of elf length = len(arm_32_android_magic) f = open("lesson_11_arm_32_android.o", "rb") try: header_bytes = f.read(length) except: print("Android object file not generated") return -1 f.close() header = list(unpack("B" * length, header_bytes)) if header != arm_32_android_magic: print([x == y for x, y in zip(header, arm_32_android_magic)]) raise Exception( "Unexpected header bytes in 32-bit arm object file.") return -1 if create_windows: # 64-bit windows object files start with the magic 16-bit value 0x8664 # (presumably referring to x86-64) # uint8_t [] win_64_magic = [0x64, 0x86] f = open("lesson_11_x86_64_windows.obj", "rb") try: header_bytes = f.read(2) except: print("Windows object file not generated") return -1 f.close() header = list(unpack("B" * 2, header_bytes)) if header != win_64_magic: raise Exception( "Unexpected header bytes in 64-bit windows object file.") return -1 if create_ios: # 32-bit arm iOS mach-o files start with the following magic bytes: # uint32_t [] arm_32_ios_magic = [ 0xfeedface, # Mach-o magic bytes #0xfe, 0xed, 0xfa, 0xce, # Mach-o magic bytes 12, # CPU type is ARM 11, # CPU subtype is ARMv7s 1 ] # It's a relocatable object file. f = open("lesson_11_arm_32_ios.o", "rb") try: header_bytes = f.read(4 * 4) except: print("ios object file not generated") return -1 f.close() header = list(unpack("I" * 4, header_bytes)) if header != arm_32_ios_magic: raise Exception( "Unexpected header bytes in 32-bit arm ios object file.") return -1 # It looks like the object files we produced are plausible for # those targets. We'll count that as a success for the purposes # of this tutorial. For a real application you'd then need to # figure out how to integrate Halide into your cross-compilation # toolchain. There are several small examples of this in the # Halide repository under the apps folder. See HelloAndroid and # HelloiOS here: # https:#github.com/halide/Halide/tree/master/apps/ print("Success!") return 0
def test_complexstub(): constant_image = _make_constant_image() input = hl.ImageParam(hl.UInt(8), 3, 'input') input.set(constant_image) x, y, c = hl.Var(), hl.Var(), hl.Var() target = hl.get_jit_target_from_environment() float_arg = 1.25 int_arg = 33 r = complexstub(target, typed_buffer_input=constant_image, untyped_buffer_input=constant_image, simple_input=input, array_input=[input, input], float_arg=float_arg, int_arg=[int_arg, int_arg], untyped_buffer_output_type="uint8", vectorize=True) # return value is a tuple; unpack separately to avoid # making the callsite above unreadable (simple_output, tuple_output, array_output, typed_buffer_output, untyped_buffer_output, static_compiled_buffer_output) = r b = simple_output.realize(32, 32, 3, target) assert b.type() == hl.Float(32) for x in range(32): for y in range(32): for c in range(3): expected = constant_image[x, y, c] actual = b[x, y, c] assert expected == actual, "Expected %s Actual %s" % (expected, actual) b = tuple_output.realize(32, 32, 3, target) assert b[0].type() == hl.Float(32) assert b[1].type() == hl.Float(32) assert len(b) == 2 for x in range(32): for y in range(32): for c in range(3): expected1 = constant_image[x, y, c] * float_arg expected2 = expected1 + int_arg actual1, actual2 = b[0][x, y, c], b[1][x, y, c] assert expected1 == actual1, "Expected1 %s Actual1 %s" % ( expected1, actual1) assert expected2 == actual2, "Expected2 %s Actual1 %s" % ( expected2, actual2) assert len(array_output) == 2 for a in array_output: b = a.realize(32, 32, target) assert b.type() == hl.Int(16) for x in range(32): for y in range(32): expected = constant_image[x, y, 0] + int_arg actual = b[x, y] assert expected == actual, "Expected %s Actual %s" % (expected, actual) # TODO: Output<Buffer<>> has additional behaviors useful when a Stub # is used within another Generator; this isn't yet implemented since there # isn't yet Python bindings for Generator authoring. This section # of the test may need revision at that point. b = typed_buffer_output.realize(32, 32, 3, target) assert b.type() == hl.Float(32) for x in range(32): for y in range(32): for c in range(3): expected = constant_image[x, y, c] actual = b[x, y, c] assert expected == actual, "Expected %s Actual %s" % (expected, actual) b = untyped_buffer_output.realize(32, 32, 3, target) assert b.type() == hl.UInt(8) for x in range(32): for y in range(32): for c in range(3): expected = constant_image[x, y, c] actual = b[x, y, c] assert expected == actual, "Expected %s Actual %s" % (expected, actual) b = static_compiled_buffer_output.realize(4, 4, 1, target) assert b.type() == hl.UInt(8) for x in range(4): for y in range(4): for c in range(1): expected = constant_image[x, y, c] + 42 actual = b[x, y, c] assert expected == actual, "Expected %s Actual %s" % (expected, actual)
def gen_g(self): ''' define g() function ''' # vars i, j, k, l = [self.vars[c] for c in "ijkl"] # clamped inputs x, y, z, expnt, fm, rnorm = [ self.clamps[c] for c in ["x", "y", "z", "expnt", "fm", "rnorm"] ] # unclamped input (for sizing) fm_in = self.inputs["fm_in"] # scalar inputs delo2, delta, rdelta = [ self.inputs[c] for c in ["delo2", "delta", "rdelta"] ] dx = hl.Func("dx") dy = hl.Func("dy") dz = hl.Func("dz") r2 = hl.Func("g_r2") expnt2 = hl.Func("expnt2") expnt_inv = hl.Func("expnt_inv") self.add_funcs_by_name([dx, dy, dz, r2, expnt2, expnt_inv]) dx[i, j] = x[i] - x[j] dy[i, j] = y[i] - y[j] dz[i, j] = z[i] - z[j] r2[i, j] = dx[i, j] * dx[i, j] + dy[i, j] * dy[i, j] + dz[i, j] * dz[i, j] expnt2[i, j] = expnt[i] + expnt[j] expnt_inv[i, j] = hl.f64(1.0) / expnt2[i, j] fac2 = hl.Func("fac2") ex_arg = hl.Func("ex_arg") ex = hl.Func("ex") denom = hl.Func("denom") fac4d = hl.Func("fac4d") self.add_funcs_by_name([fac2, ex_arg, ex, denom, fac4d]) fac2[i, j] = expnt[i] * expnt[j] * expnt_inv[i, j] ex_arg[i, j, k, l] = -fac2[i, j] * r2[i, j] - fac2[k, l] * r2[k, l] ex[i, j, k, l] = hl.select(ex_arg[i, j, k, l] < hl.f64(-37.0), hl.f64(0.0), hl.exp(ex_arg[i, j, k, l])) denom[i, j, k, l] = expnt2[i, j] * expnt2[k, l] * hl.sqrt(expnt2[i, j] + expnt2[k, l]) fac4d[i, j, k, l] = expnt2[i, j] * expnt2[k, l] / (expnt2[i, j] + expnt2[k, l]) x2 = hl.Func("g_x2") y2 = hl.Func("g_y2") z2 = hl.Func("g_z2") rpq2 = hl.Func("rpq2") self.add_funcs_by_name([x2, y2, z2, rpq2]) x2[i, j] = (x[i] * expnt[i] + x[j] * expnt[j]) * expnt_inv[i, j] y2[i, j] = (y[i] * expnt[i] + y[j] * expnt[j]) * expnt_inv[i, j] z2[i, j] = (z[i] * expnt[i] + z[j] * expnt[j]) * expnt_inv[i, j] rpq2[i, j, k, l] = ((x2[i, j] - x2[k, l]) * (x2[i, j] - x2[k, l]) + (y2[i, j] - y2[k, l]) * (y2[i, j] - y2[k, l]) + (z2[i, j] - z2[k, l]) * (z2[i, j] - z2[k, l])) f0t = hl.Func("f0t") f0n = hl.Func("f0n") f0x = hl.Func("f0x") f0val = hl.Func("f0val") self.add_funcs_by_name([f0t, f0n, f0x, f0val]) f0t[i, j, k, l] = fac4d[i, j, k, l] * rpq2[i, j, k, l] f0n[i, j, k, l] = hl.clamp(hl.i32((f0t[i, j, k, l] + delo2) * rdelta), fm_in.dim(0).min(), fm_in.dim(0).max()) f0x[i, j, k, l] = delta * f0n[i, j, k, l] - f0t[i, j, k, l] f0val[i, j, k, l] = hl.select( f0t[i, j, k, l] >= hl.f64(28.0), hl.f64(0.88622692545276) / hl.sqrt(f0t[i, j, k, l]), fm[f0n[i, j, k, l], 0] + f0x[i, j, k, l] * (fm[f0n[i, j, k, l], 1] + f0x[i, j, k, l] * hl.f64(0.5) * (fm[f0n[i, j, k, l], 2] + f0x[i, j, k, l] * hl.f64(1. / 3.) * (fm[f0n[i, j, k, l], 3] + f0x[i, j, k, l] * hl.f64(0.25) * fm[f0n[i, j, k, l], 4])))) g = hl.Func("g") self.add_funcs_by_name([g]) if self.tracing and self.tracing_g: g_trace_in = hl.ImageParam(hl.Float(64), 4, "g_trace_in") g_trace = hl.BoundaryConditions.constant_exterior(g_trace_in, 0) self.inputs["g_trace_in"] = g_trace_in self.clamps["g_trace"] = g_trace g_trace.compute_root() g[i, j, k, l] = (hl.f64(2.00) * hl.f64(pow(pi, 2.50)) / denom[i, j, k, l] ) * ex[i, j, k, l] * f0val[i, j, k, l] * rnorm[i] * rnorm[ j] * rnorm[k] * rnorm[l] + g_trace[i, j, k, l] else: g_trace = None g[i, j, k, l] = (hl.f64(2.00) * hl.f64(pow(pi, 2.50)) / denom[i, j, k, l]) * ex[i, j, k, l] * f0val[ i, j, k, l] * rnorm[i] * rnorm[j] * rnorm[k] * rnorm[l]