def main(): hl.load_plugin("autoschedule_li2018") x = hl.Var('x') f_in = hl.Func('in') f_in[x] = hl.f32(x) # Cast to float 32 f_0 = hl.Func('f_0') f_0[x] = 2 * f_in[x] f_1 = hl.Func('f_1') f_1[x] = hl.sin(f_0[x]) f_2 = hl.Func('f_2') f_2[x] = f_1[x] * f_1[x] # Setup f_2.set_estimate(x, 0, 1000) p = hl.Pipeline(f_2) target = hl.Target() # Only first parameter is used (number of cores on CPU) params = hl.MachineParams(32, 0, 0) result = p.auto_schedule('Li2018', target, params) print('Schedule:') print(result.schedule_source) p.compile_jit() # compile buf = p.realize(1000) # compute and get the buffer
def test_generate_halide(self): zone = self.define_original_twoel() decomposed = zone.split_recursive() self.vars = {k: hl.Var(k) for k in "ijkl"} i, j, k, l = [self.vars[k] for k in "ijkl"] g_dens = hl.Func("g_dens") g_dens[i,j] = i * j g = hl.Func("g") g[i,j,k,l] = hl.cos(i*j) * hl.sin(k*l) self.inputs = {"g": g, "g_dens": g_dens} self.clamps = {"g": g, "g_dens": g_dens} self.funcs = {"g": g, "g_dens": g_dens} self.loopnest_funcs = {} func = decomposed.generate_halide(self, [8, 8, 8, 8])
def contrast(input, strength, black_point): output = hl.Func("contrast_output") x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c") scale = strength inner_constant = math.pi / (2 * scale) sin_constant = hl.sin(inner_constant) slope = 65535 / (2 * sin_constant) constant = slope * sin_constant factor = math.pi / (scale * 65535) val = factor * hl.cast(hl.Float(32), input[x, y, c]) output[x, y, c] = hl.u16_sat(slope * hl.sin(val - inner_constant) + constant) white_scale = 65535 / (65535 - black_point) output[x, y, c] = hl.u16_sat((hl.cast(hl.Int(32), output[x, y, c]) - black_point) * white_scale) output.compute_root().parallel(y).vectorize(x, 16) return output
def main(): # So far Funcs (such as the one below) have evaluated to a single # scalar value for each point in their domain. single_valued = hl.Func() x, y = hl.Var("x"), hl.Var("y") single_valued[x, y] = x + y # One way to write a hl.Func that returns a collection of values is # to add an additional dimension which indexes that # collection. This is how we typically deal with color. For # example, the hl.Func below represents a collection of three values # for every x, y coordinate indexed by c. color_image = hl.Func() c = hl.Var("c") color_image[x, y, c] = hl.select( c == 0, 245, # Red value c == 1, 42, # Green value 132) # Blue value # Since this pattern appears quite often, Halide provides a # syntatic sugar to write the code above as the following, # using the "mux" function. # color_image[x, y, c] = hl.mux(c, [245, 42, 132]); # This method is often convenient because it makes it easy to # operate on this hl.Func in a way that treats each item in the # collection equally: brighter = hl.Func() brighter[x, y, c] = color_image[x, y, c] + 10 # However this method is also inconvenient for three reasons. # # 1) Funcs are defined over an infinite domain, so users of this # hl.Func can for example access color_image(x, y, -17), which is # not a meaningful value and is probably indicative of a bug. # # 2) It requires a hl.select, which can impact performance if not # bounded and unrolled: # brighter.bound(c, 0, 3).unroll(c) # # 3) With this method, all values in the collection must have the # same type. While the above two issues are merely inconvenient, # this one is a hard limitation that makes it impossible to # express certain things in this way. # It is also possible to represent a collection of values as a # collection of Funcs: func_array = [hl.Func() for i in range(3)] func_array[0][x, y] = x + y func_array[1][x, y] = hl.sin(x) func_array[2][x, y] = hl.cos(y) # This method avoids the three problems above, but introduces a # new annoyance. Because these are separate Funcs, it is # difficult to schedule them so that they are all computed # together inside a single loop over x, y. # A third alternative is to define a hl.Func as evaluating to a # Tuple instead of an hl.Expr. A Tuple is a fixed-size collection of # Exprs which may have different type. The following function # evaluates to an integer value (x+y), and a floating point value # (hl.sin(x*y)). multi_valued = hl.Func("multi_valued") multi_valued[x, y] = (x + y, hl.sin(x * y)) # Realizing a tuple-valued hl.Func returns a collection of # Buffers. We call this a Realization. It's equivalent to a # std::vector of hl.Buffer/Image objects: if True: im1, im2 = multi_valued.realize([80, 60]) assert im1.type() == hl.Int(32) assert im2.type() == hl.Float(32) assert im1[30, 40] == 30 + 40 assert np.isclose(im2[30, 40], math.sin(30 * 40)) # You can also pass a tuple of pre-allocated buffers to realize() # rather than having new ones created. (The Buffers must have the correct # types and have identical sizes.) if True: im1, im2 = hl.Buffer(hl.Int(32), [80, 60]), hl.Buffer(hl.Float(32), [80, 60]) multi_valued.realize((im1, im2)) assert im1[30, 40] == 30 + 40 assert np.isclose(im2[30, 40], math.sin(30 * 40)) # All Tuple elements are evaluated together over the same domain # in the same loop nest, but stored in distinct allocations. The # equivalent C++ code to the above is: if True: multi_valued_0 = np.empty((80 * 60), dtype=np.int32) multi_valued_1 = np.empty((80 * 60), dtype=np.int32) for yy in range(80): for xx in range(60): multi_valued_0[xx + 60 * yy] = xx + yy multi_valued_1[xx + 60 * yy] = math.sin(xx * yy) # When compiling ahead-of-time, a Tuple-valued hl.Func evaluates # into multiple distinct output halide_buffer_t structs. These appear in # order at the end of the function signature: # int multi_valued(...input buffers and params..., halide_buffer_t # *output_1, halide_buffer_t *output_2) # You can construct a Tuple by passing multiple Exprs to the # Tuple constructor as we did above. Perhaps more elegantly, you # can also take advantage of initializer lists and just # enclose your Exprs in braces: multi_valued_2 = hl.Func("multi_valued_2") multi_valued_2[x, y] = (x + y, hl.sin(x * y)) # Calls to a multi-valued hl.Func cannot be treated as Exprs. The # following is a syntax error: # hl.Func consumer # consumer[x, y] = multi_valued_2[x, y] + 10 # Instead you must index the returned object with square brackets # to retrieve the individual Exprs: integer_part = multi_valued_2[x, y][0] floating_part = multi_valued_2[x, y][1] assert type(integer_part) is hl.FuncTupleElementRef assert type(floating_part) is hl.FuncTupleElementRef consumer = hl.Func() consumer[x, y] = (integer_part + 10, floating_part + 10.0) # Tuple reductions. if True: # Tuples are particularly useful in reductions, as they allow # the reduction to maintain complex state as it walks along # its domain. The simplest example is an argmax. # First we create an Image to take the argmax over. input_func = hl.Func() input_func[x] = hl.sin(x) input = input_func.realize([100]) assert input.type() == hl.Float(32) # Then we defined a 2-valued Tuple which tracks the maximum value # its index. arg_max = hl.Func() # Pure definition. # (using [()] for zero-dimensional Funcs is a convention of this python interface) arg_max[()] = (0, input[0]) # Update definition. r = hl.RDom([(1, 99)]) old_index = arg_max[()][0] old_max = arg_max[()][1] new_index = hl.select(old_max > input[r], r, old_index) new_max = hl.max(input[r], old_max) arg_max[()] = (new_index, new_max) # The equivalent C++ is: arg_max_0 = 0 arg_max_1 = float(input[0]) for r in range(1, 100): old_index = arg_max_0 old_max = arg_max_1 new_index = r if (old_max > input[r]) else old_index new_max = max(input[r], old_max) # In a tuple update definition, all loads and computation # are done before any stores, so that all Tuple elements # are updated atomically with respect to recursive calls # to the same hl.Func. arg_max_0 = new_index arg_max_1 = new_max # Let's verify that the Halide and C++ found the same maximum # value and index. if True: r0, r1 = arg_max.realize() assert r0.type() == hl.Int(32) assert r1.type() == hl.Float(32) assert arg_max_0 == r0[()] assert np.isclose(arg_max_1, r1[()]) # Halide provides argmax and hl.argmin as built-in reductions # similar to sum, product, maximum, and minimum. They return # a Tuple consisting of the point in the reduction domain # corresponding to that value, and the value itself. In the # case of ties they return the first value found. We'll use # one of these in the following section. # Tuples for user-defined types. if True: # Tuples can also be a convenient way to represent compound # objects such as complex numbers. Defining an object that # can be converted to and from a Tuple is one way to extend # Halide's type system with user-defined types. class Complex: def __init__(self, r, i=None): if type(r) is float and type(i) is float: self.real = hl.Expr(r) self.imag = hl.Expr(i) elif i is not None: self.real = r self.imag = i else: self.real = r[0] self.imag = r[1] def as_tuple(self): "Convert to a Tuple" return (self.real, self.imag) def __add__(self, other): "Complex addition" return Complex(self.real + other.real, self.imag + other.imag) def __mul__(self, other): "Complex multiplication" return Complex(self.real * other.real - self.imag * other.imag, self.real * other.imag + self.imag * other.real) def __getitem__(self, idx): return (self.real, self.imag)[idx] def __len__(self): return 2 def magnitude(self): "Complex magnitude" return (self.real * self.real) + (self.imag * self.imag) # Other complex operators would go here. The above are # sufficient for this example. # Let's use the Complex struct to compute a Mandelbrot set. mandelbrot = hl.Func() # The initial complex value corresponding to an x, y coordinate # in our hl.Func. initial = Complex(x / 15.0 - 2.5, y / 6.0 - 2.0) # Pure definition. t = hl.Var("t") mandelbrot[x, y, t] = Complex(0.0, 0.0) # We'll use an update definition to take 12 steps. r = hl.RDom([(1, 12)]) current = Complex(mandelbrot[x, y, r - 1]) # The following line uses the complex multiplication and # addition we defined above. mandelbrot[x, y, r] = (Complex(current * current) + initial) # We'll use another tuple reduction to compute the iteration # number where the value first escapes a circle of radius 4. # This can be expressed as an hl.argmin of a boolean - we want # the index of the first time the given boolean expression is # false (we consider false to be less than true). The argmax # would return the index of the first time the expression is # true. escape_condition = Complex(mandelbrot[x, y, r]).magnitude() < 16.0 first_escape = hl.argmin(escape_condition) assert type(first_escape) is tuple # We only want the index, not the value, but hl.argmin returns # both, so we'll index the hl.argmin Tuple expression using # square brackets to get the hl.Expr representing the index. escape = hl.Func() escape[x, y] = first_escape[0] # Realize the pipeline and print the result as ascii art. result = escape.realize([61, 25]) assert result.type() == hl.Int(32) code = " .:-~*={&%#@" for yy in range(result.height()): for xx in range(result.width()): index = result[xx, yy] if index < len(code): print("%c" % code[index], end="") else: pass # is lesson 13 cpp version buggy ? print("") print("Success!") return 0
def main(): # So far Funcs (such as the one below) have evaluated to a single # scalar value for each point in their domain. single_valued = hl.Func() x, y = hl.Var("x"), hl.Var("y") single_valued[x, y] = x + y # One way to write a hl.Func that returns a collection of values is # to add an additional dimension which indexes that # collection. This is how we typically deal with color. For # example, the hl.Func below represents a collection of three values # for every x, y coordinate indexed by c. color_image = hl.Func() c = hl.Var("c") color_image[x, y, c] = hl.select(c == 0, 245, # Red value c == 1, 42, # Green value 132) # Blue value # This method is often convenient because it makes it easy to # operate on this hl.Func in a way that treats each item in the # collection equally: brighter = hl.Func() brighter[x, y, c] = color_image[x, y, c] + 10 # However this method is also inconvenient for three reasons. # # 1) Funcs are defined over an infinite domain, so users of this # hl.Func can for example access color_image(x, y, -17), which is # not a meaningful value and is probably indicative of a bug. # # 2) It requires a hl.select, which can impact performance if not # bounded and unrolled: # brighter.bound(c, 0, 3).unroll(c) # # 3) With this method, all values in the collection must have the # same type. While the above two issues are merely inconvenient, # this one is a hard limitation that makes it impossible to # express certain things in this way. # It is also possible to represent a collection of values as a # collection of Funcs: func_array = [hl.Func() for i in range(3)] func_array[0][x, y] = x + y func_array[1][x, y] = hl.sin(x) func_array[2][x, y] = hl.cos(y) # This method avoids the three problems above, but introduces a # new annoyance. Because these are separate Funcs, it is # difficult to schedule them so that they are all computed # together inside a single loop over x, y. # A third alternative is to define a hl.Func as evaluating to a # Tuple instead of an hl.Expr. A Tuple is a fixed-size collection of # Exprs which may have different type. The following function # evaluates to an integer value (x+y), and a floating point value # (hl.sin(x*y)). multi_valued = hl.Func("multi_valued") multi_valued[x, y] = (x + y, hl.sin(x * y)) # Realizing a tuple-valued hl.Func returns a collection of # Buffers. We call this a Realization. It's equivalent to a # std::vector of hl.Buffer/Image objects: if True: (im1, im2) = multi_valued.realize(80, 60) assert type(im1) is hl.Buffer_int32 assert type(im2) is hl.Buffer_float32 assert im1(30, 40) == 30 + 40 assert numpy.isclose(im2(30, 40), math.sin(30 * 40)) # All Tuple elements are evaluated together over the same domain # in the same loop nest, but stored in distinct allocations. The # equivalent C++ code to the above is: if True: multi_valued_0 = numpy.empty((80*60), dtype=numpy.int32) multi_valued_1 = numpy.empty((80*60), dtype=numpy.int32) for yy in range(80): for xx in range(60): multi_valued_0[xx + 60*yy] = xx + yy multi_valued_1[xx + 60*yy] = math.sin(xx*yy) # When compiling ahead-of-time, a Tuple-valued hl.Func evaluates # into multiple distinct output buffer_t structs. These appear in # order at the end of the function signature: # int multi_valued(...input buffers and params..., buffer_t *output_1, buffer_t *output_2) # You can construct a Tuple by passing multiple Exprs to the # Tuple constructor as we did above. Perhaps more elegantly, you # can also take advantage of C++11 initializer lists and just # enclose your Exprs in braces: multi_valued_2 = hl.Func("multi_valued_2") multi_valued_2[x, y] = (x + y, hl.sin(x * y)) # Calls to a multi-valued hl.Func cannot be treated as Exprs. The # following is a syntax error: # hl.Func consumer # consumer[x, y] = multi_valued_2[x, y] + 10 # Instead you must index the returned object with square brackets # to retrieve the individual Exprs: integer_part = multi_valued_2[x, y][0] floating_part = multi_valued_2[x, y][1] assert type(integer_part) is hl.FuncTupleElementRef assert type(floating_part) is hl.FuncTupleElementRef consumer = hl.Func() consumer[x, y] = (integer_part + 10, floating_part + 10.0) # Tuple reductions. if True: # Tuples are particularly useful in reductions, as they allow # the reduction to maintain complex state as it walks along # its domain. The simplest example is an argmax. # First we create an Image to take the argmax over. input_func = hl.Func() input_func[x] = hl.sin(x) input = input_func.realize(100) assert type(input) is hl.Buffer_float32 # Then we defined a 2-valued Tuple which tracks the maximum value # its index. arg_max = hl.Func() # Pure definition. # (using [()] for zero-dimensional Funcs is a convention of this python interface) arg_max[()] = (0, input(0)) # Update definition. r = hl.RDom(1, 99) old_index = arg_max[()][0] old_max = arg_max[()][1] new_index = hl.select(old_max > input[r], r, old_index) new_max = hl.max(input[r], old_max) arg_max[()] = (new_index, new_max) # The equivalent C++ is: arg_max_0 = 0 arg_max_1 = float(input(0)) for r in range(1, 100): old_index = arg_max_0 old_max = arg_max_1 new_index = r if (old_max > input(r)) else old_index new_max = max(input(r), old_max) # In a tuple update definition, all loads and computation # are done before any stores, so that all Tuple elements # are updated atomically with respect to recursive calls # to the same hl.Func. arg_max_0 = new_index arg_max_1 = new_max # Let's verify that the Halide and C++ found the same maximum # value and index. if True: (r0, r1) = arg_max.realize() assert type(r0) is hl.Buffer_int32 assert type(r1) is hl.Buffer_float32 assert arg_max_0 == r0(0) assert numpy.isclose(arg_max_1, r1(0)) # Halide provides argmax and hl.argmin as built-in reductions # similar to sum, product, maximum, and minimum. They return # a Tuple consisting of the point in the reduction domain # corresponding to that value, and the value itself. In the # case of ties they return the first value found. We'll use # one of these in the following section. # Tuples for user-defined types. if True: # Tuples can also be a convenient way to represent compound # objects such as complex numbers. Defining an object that # can be converted to and from a Tuple is one way to extend # Halide's type system with user-defined types. class Complex: def __init__(self, r, i=None): if type(r) is float and type(i) is float: self.real = hl.Expr(r) self.imag = hl.Expr(i) elif i is not None: self.real = r self.imag = i else: self.real = r[0] self.imag = r[1] def as_tuple(self): "Convert to a Tuple" return (self.real, self.imag) def __add__(self, other): "Complex addition" return Complex(self.real + other.real, self.imag + other.imag) def __mul__(self, other): "Complex multiplication" return Complex(self.real * other.real - self.imag * other.imag, self.real * other.imag + self.imag * other.real) def __getitem__(self, idx): return (self.real, self.imag)[idx] def __len__(self): return 2 def magnitude(self): "Complex magnitude" return (self.real * self.real) + (self.imag * self.imag) # Other complex operators would go here. The above are # sufficient for this example. # Let's use the Complex struct to compute a Mandelbrot set. mandelbrot = hl.Func() # The initial complex value corresponding to an x, y coordinate # in our hl.Func. initial = Complex(x/15.0 - 2.5, y/6.0 - 2.0) # Pure definition. t = hl.Var("t") mandelbrot[x, y, t] = Complex(0.0, 0.0) # We'll use an update definition to take 12 steps. r = hl.RDom(1, 12) current = Complex(mandelbrot[x, y, r-1]) # The following line uses the complex multiplication and # addition we defined above. mandelbrot[x, y, r] = (Complex(current*current) + initial) # We'll use another tuple reduction to compute the iteration # number where the value first escapes a circle of radius 4. # This can be expressed as an hl.argmin of a boolean - we want # the index of the first time the given boolean expression is # false (we consider false to be less than true). The argmax # would return the index of the first time the expression is # true. escape_condition = Complex(mandelbrot[x, y, r]).magnitude() < 16.0 first_escape = hl.argmin(escape_condition) assert type(first_escape) is tuple # We only want the index, not the value, but hl.argmin returns # both, so we'll index the hl.argmin Tuple expression using # square brackets to get the hl.Expr representing the index. escape = hl.Func() escape[x, y] = first_escape[0] # Realize the pipeline and print the result as ascii art. result = escape.realize(61, 25) assert type(result) is hl.Buffer_int32 code = " .:-~*={&%#@" for yy in range(result.height()): for xx in range(result.width()): index = result(xx, yy) if index < len(code): print("%c" % code[index], end="") else: pass # is lesson 13 cpp version buggy ? print("") print("Success!") return 0
def main(): # All Exprs have a scalar type, and all Funcs evaluate to one or # more scalar types. The scalar types in Halide are unsigned # integers of various bit widths, signed integers of the same set # of bit widths, floating point numbers in single and double # precision, and opaque handles (equivalent to void *). The # following array contains all the legal types. valid_halide_types = [ hl.UInt(8), hl.UInt(16), hl.UInt(32), hl.UInt(64), hl.Int(8), hl.Int(16), hl.Int(32), hl.Int(64), hl.Float(32), hl.Float(64), hl.Handle() ] # Constructing and inspecting types. if True: # You can programmatically examine the properties of a Halide # type. This is useful when you write a C++ function that has # hl.Expr arguments and you wish to check their types: assert hl.UInt(8).bits() == 8 assert hl.Int(8).is_int() # You can also programmatically construct Types as a function of other Types. t = hl.UInt(8) t = t.with_bits(t.bits() * 2) assert t == hl.UInt(16) # Or construct a Type from a C++ scalar type #assert type_of<float>() == hl.Float(32) # The Type struct is also capable of representing vector types, # but this is reserved for Halide's internal use. You should # vectorize code by using hl.Func::vectorize, not by attempting to # construct vector expressions directly. You may encounter vector # types if you programmatically manipulate lowered Halide code, # but this is an advanced topic (see hl.Func::add_custom_lowering_pass). # You can query any Halide hl.Expr for its type. An hl.Expr # representing a hl.Var has type hl.Int(32): x = hl.Var("x") assert hl.Expr(x).type() == hl.Int(32) # Most transcendental functions in Halide hl.cast their inputs to a # hl.Float(32) and return a hl.Float(32): assert hl.sin(x).type() == hl.Float(32) # You can hl.cast an hl.Expr from one Type to another using the hl.cast operator: assert hl.cast(hl.UInt(8), x).type() == hl.UInt(8) # This also comes in a template form that takes a C++ type. #assert hl.cast<uint8_t>(x).type() == hl.UInt(8) # You can also query any defined hl.Func for the types it produces. f1 = hl.Func("f1") f1[x] = hl.cast(hl.UInt(8), x) assert f1.output_types()[0] == hl.UInt(8) f2 = hl.Func("f2") f2[x] = (x, hl.sin(x)) assert f2.output_types()[0] == hl.Int(32) and \ f2.output_types()[1] == hl.Float(32) # Type promotion rules. if True: # When you combine Exprs of different types (e.g. using '+', # '*', etc), Halide uses a system of type promotion # rules. These differ to C's rules. To demonstrate these # we'll make some Exprs of each type. x = hl.Var("x") u8 = hl.cast(hl.UInt(8), x) u16 = hl.cast(hl.UInt(16), x) u32 = hl.cast(hl.UInt(32), x) u64 = hl.cast(hl.UInt(64), x) s8 = hl.cast(hl.Int(8), x) s16 = hl.cast(hl.Int(16), x) s32 = hl.cast(hl.Int(32), x) s64 = hl.cast(hl.Int(64), x) f32 = hl.cast(hl.Float(32), x) f64 = hl.cast(hl.Float(64), x) # The rules are as follows, and are applied in the order they are # written below. # 1) It is an error to hl.cast or use arithmetic operators on Exprs of type hl.Handle(). # 2) If the types are the same, then no type conversions occur. for t in valid_halide_types: # Skip the handle type. if t.is_handle(): continue e = hl.cast(t, x) assert (e + e).type() == e.type() # 3) If one type is a float but the other is not, then the # non-float argument is promoted to a float (possibly causing a # loss of precision for large integers). assert (u8 + f32).type() == hl.Float(32) assert (f32 + s64).type() == hl.Float(32) assert (u16 + f64).type() == hl.Float(64) assert (f64 + s32).type() == hl.Float(64) # 4) If both types are float, then the narrower argument is # promoted to the wider bit-width. assert (f64 + f32).type() == hl.Float(64) # The rules above handle all the floating-point cases. The # following three rules handle the integer cases. # 5) If one of the expressions is an integer constant, then it is # coerced to the type of the other expression. assert (u32 + 3).type() == hl.UInt(32) assert (3 + s16).type() == hl.Int(16) # If this rule would cause the integer to overflow, then Halide # will trigger an error, e.g. uncommenting the following line # will cause this program to terminate with an error. # hl.Expr bad = u8 + 257 # 6) If both types are unsigned integers, or both types are # signed integers, then the narrower argument is promoted to # wider type. assert (u32 + u8).type() == hl.UInt(32) assert (s16 + s64).type() == hl.Int(64) # 7) If one type is signed and the other is unsigned, both # arguments are promoted to a signed integer with the greater of # the two bit widths. assert (u8 + s32).type() == hl.Int(32) assert (u32 + s8).type() == hl.Int(32) # Note that this may silently overflow the unsigned type in the # case where the bit widths are the same. assert (u32 + s32).type() == hl.Int(32) if False: # evaluate<X> not yet exposed to python # When an unsigned hl.Expr is converted to a wider signed type in # this way, it is first widened to a wider unsigned type # (zero-extended), and then reinterpreted as a signed # integer. I.e. casting the hl.UInt(8) value 255 to an hl.Int(32) # produces 255, not -1. #int32_t result32 = evaluate<int>(hl.cast<int32_t>(hl.cast<uint8_t>(255))) assert result32 == 255 # When a signed type is explicitly converted to a wider unsigned # type with the hl.cast operator (the type promotion rules will # never do this automatically), it is first converted to the # wider signed type (sign-extended), and then reinterpreted as # an unsigned integer. I.e. casting the hl.Int(8) value -1 to a # hl.UInt(16) produces 65535, not 255. #uint16_t result16 = evaluate<uint16_t>(hl.cast<uint16_t>(hl.cast<int8_t>(-1))) assert result16 == 65535 # The type hl.Handle(). if True: # hl.Handle is used to represent opaque pointers. Applying # type_of to any pointer type will return hl.Handle() #assert type_of<void *>() == hl.Handle() #assert type_of<const char * const **>() == hl.Handle() # (not clear what the proper python version would be) # Handles are always stored as 64-bit, regardless of the compilation # target. assert hl.Handle().bits() == 64 # The main use of an hl.Expr of type hl.Handle is to pass # it through Halide to other external code. # Generic code. if True: # The main explicit use of Type in Halide is to write Halide # code parameterized by a Type. In C++ you'd do this with # templates. In Halide there's no need - you can inspect and # modify the types dynamically at C++ runtime instead. The # function defined below averages two expressions of any # equal numeric type. x = hl.Var("x") assert average(hl.cast(hl.Float(32), x), 3.0).type() == hl.Float(32) assert average(x, 3).type() == hl.Int(32) assert average(hl.cast(hl.UInt(8), x), hl.cast(hl.UInt(8), 3)).type() == hl.UInt(8) print("Success!") return 0