예제 #1
0
def test_param_bug():
    "see https://github.com/rodrigob/Halide/issues/1"

    p1 = hl.Param(hl.UInt(8), "p1", 0)
    p2 = hl.Param(hl.UInt(8), "p2")
    p3 = hl.Param(hl.UInt(8), 42)

    return
예제 #2
0
def main():
    input_img = hl.ImageParam(hl.UInt(16), 3, 'input')

    # number of intensity levels
    levels = hl.Param(int_t, 'levels', 8)

    # Parameters controlling the filter
    alpha = hl.Param(float_t, 'alpha', 1.0 / 7.0)
    beta = hl.Param(float_t, 'beta', 1.0)

    local_laplacian = get_local_laplacian(input_img, levels, alpha, beta)

    filter_test_image(local_laplacian, input_img)
예제 #3
0
    def setup_inputs(self):
        # input scalars
        delo2 = hl.Param(hl.Float(64), "delo2")
        delta = hl.Param(hl.Float(64), "delta")
        rdelta = hl.Param(hl.Float(64), "rdelta")

        # input vectors
        expnt_in = hl.ImageParam(hl.Float(64), 1, "expnt_in")
        rnorm_in = hl.ImageParam(hl.Float(64), 1, "rnorm_in")
        x_in = hl.ImageParam(hl.Float(64), 1, "x_in")
        y_in = hl.ImageParam(hl.Float(64), 1, "y_in")
        z_in = hl.ImageParam(hl.Float(64), 1, "z_in")

        # input matrices
        fm_in = hl.ImageParam(hl.Float(64), 2, "fm_in")
        g_fock_in_in = hl.ImageParam(hl.Float(64), 2, "g_fock_in")
        g_dens_in = hl.ImageParam(hl.Float(64), 2, "g_dens_in")

        self.inputs.update({
            x.name(): x
            for x in [
                delo2, delta, rdelta, expnt_in, rnorm_in, x_in, y_in, z_in,
                fm_in, g_fock_in_in, g_dens_in
            ]
        })

        # clamp all inputs, to prevent out-of-bounds errors from odd tile sizes and such
        expnt = hl.BoundaryConditions.constant_exterior(expnt_in, 0)
        rnorm = hl.BoundaryConditions.constant_exterior(rnorm_in, 0)
        x = hl.BoundaryConditions.constant_exterior(x_in, 0)
        y = hl.BoundaryConditions.constant_exterior(y_in, 0)
        z = hl.BoundaryConditions.constant_exterior(z_in, 0)
        fm = hl.BoundaryConditions.constant_exterior(fm_in, 0)
        g_fock_in = hl.BoundaryConditions.constant_exterior(g_fock_in_in, 0)
        g_dens = hl.BoundaryConditions.constant_exterior(g_dens_in, 0)

        self.clamps.update({
            "expnt": expnt,
            "rnorm": rnorm,
            "x": x,
            "y": y,
            "z": z,
            "fm": fm,
            "g_fock_in_clamped": g_fock_in,
            "g_dens": g_dens
        })

        # nbfn=number of basis functions.  This is our problem size
        self.nbfn = g_fock_in_in.height()
예제 #4
0
def test_basics2():
    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1)
    s_sigma = 8

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0,
                                   input.width() - 1),
                          hl.clamp(y, 0,
                                   input.height() - 1), 0]

    # Construct the bilateral grid
    r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r')
    val0 = clamped[x * s_sigma, y * s_sigma]
    val00 = clamped[x * s_sigma * hl.i32(1), y * s_sigma * hl.i32(1)]
    val22 = clamped[x * s_sigma - hl.i32(s_sigma // 2),
                    y * s_sigma - hl.i32(s_sigma // 2)]
    val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2]
    val3 = clamped[x * s_sigma + r.x - s_sigma // 2,
                   y * s_sigma + r.y - s_sigma // 2]

    try:
        val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2]
    except RuntimeError as e:
        assert 'Implicit cast from float32 to int' in str(e)
    else:
        assert False, 'Did not see expected exception!'
예제 #5
0
def test_basics3():
    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma',
                       0.1)  # Value needed if not generating an executable
    s_sigma = 8  # This is passed during code generation in the C++ version

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0,
                                   input.width() - 1),
                          hl.clamp(y, 0,
                                   input.height() - 1), 0]

    # Construct the bilateral grid
    r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r')
    val = clamped[x * s_sigma + r.x - s_sigma // 2,
                  y * s_sigma + r.y - s_sigma // 2]
    val = hl.clamp(val, 0.0, 1.0)
    zi = hl.i32((val / r_sigma) + 0.5)
    histogram = hl.Func('histogram')
    histogram[x, y, z, c] = 0.0

    ss = hl.select(c == 0, val, 1.0)
    left = histogram[x, y, zi, c]
    left += 5
    left += ss
예제 #6
0
def test_basics2():

    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable
    s_sigma = 8 # This is passed during code generation in the C++ version

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0, input.width()-1),
                          hl.clamp(y, 0, input.height()-1),0]

    # Construct the bilateral grid
    r = hl.RDom(0, s_sigma, 0, s_sigma, 'r')
    val0 = clamped[x * s_sigma, y * s_sigma]
    val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)]
    #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail
    val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma//2),
                    y * s_sigma - hl.cast(hl.Int(32), s_sigma//2)]
    val2 = clamped[x * s_sigma - s_sigma//2, y * s_sigma - s_sigma//2]
    val3 = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2]

    return
예제 #7
0
def test_basics2():

    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma',
                       0.1)  # Value needed if not generating an executable
    s_sigma = 8  # This is passed during code generation in the C++ version

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0,
                                   input.width() - 1),
                          hl.clamp(y, 0,
                                   input.height() - 1), 0]

    if True:
        print("s_sigma", s_sigma)
        print("s_sigma/2", s_sigma / 2)
        print("s_sigma//2", s_sigma // 2)
        print()
        print("x * s_sigma", x * s_sigma)
        print("x * 8", x * 8)
        print("x * 8 + 4", x * 8 + 4)
        print("x * 8 * 4", x * 8 * 4)
        print()
        print("x", x)
        print("(x * s_sigma).type()", )
        print("(x * 8).type()", (x * 8).type())
        print("(x * 8 + 4).type()", (x * 8 + 4).type())
        print("(x * 8 * 4).type()", (x * 8 * 4).type())
        print("(x * 8 / 4).type()", (x * 8 / 4).type())
        print("((x * 8) * 4).type()", ((x * 8) * 4).type())
        print("(x * (8 * 4)).type()", (x * (8 * 4)).type())

    assert (x * 8).type() == hl.Int(32)
    assert (x * 8 * 4).type() == hl.Int(32)  # yes this did fail at some point
    assert ((x * 8) / 4).type() == hl.Int(32)
    assert (x * (8 / 4)).type() == hl.Float(32)  # under python3 division rules
    assert (x * (8 // 4)).type() == hl.Int(32)
    #assert (x * 8 // 4).type() == hl.Int(32) # not yet implemented

    # Construct the bilateral grid
    r = hl.RDom(0, s_sigma, 0, s_sigma, 'r')
    val0 = clamped[x * s_sigma, y * s_sigma]
    val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1),
                    y * s_sigma * hl.cast(hl.Int(32), 1)]
    #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail
    val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2),
                    y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)]
    val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2]
    val3 = clamped[x * s_sigma + r.x - s_sigma // 2,
                   y * s_sigma + r.y - s_sigma // 2]

    return
예제 #8
0
def main():

    input = hl.ImageParam(hl.UInt(16), 3, 'input')

    # number of intensity levels
    levels = hl.Param(int_t, 'levels', 8)

    #Parameters controlling the filter
    alpha = hl.Param(float_t, 'alpha', 1.0 / 7.0)
    beta = hl.Param(float_t, 'beta', 1.0)

    local_laplacian = get_local_laplacian(input, levels, alpha, beta)

    generate = False  # Set to False to run the jit immediately and get  instant gratification.
    if generate:
        generate_compiled_file(local_laplacian)
    else:
        filter_test_image(local_laplacian, input)

    return
def main():

    # We'll define a simple one-stage pipeline:
    brighter = hl.Func("brighter")
    x, y = hl.Var("x"), hl.Var("y")

    # The pipeline will depend on one scalar parameter.
    offset = hl.Param(hl.UInt(8), name="offset")

    # And take one grayscale 8-bit input buffer. The first
    # constructor argument gives the type of a pixel, and the second
    # specifies the number of dimensions (not the number of
    # channels!). For a grayscale image this is two for a color
    # image it's three. Currently, four dimensions is the maximum for
    # inputs and outputs.
    input = hl.ImageParam(hl.UInt(8), 2)

    # If we were jit-compiling, these would just be an int and a
    # hl.Buffer, but because we want to compile the pipeline once and
    # have it work for any value of the parameter, we need to make a
    # hl.Param object, which can be used like an hl.Expr, and an hl.ImageParam
    # object, which can be used like a hl.Buffer.

    # Define the hl.Func.
    brighter[x, y] = input[x, y] + offset

    # Schedule it.
    brighter.vectorize(x, 16).parallel(y)

    # This time, instead of calling brighter.realize(...), which
    # would compile and run the pipeline immediately, we'll call a
    # method that compiles the pipeline to an object file and header.
    #
    # For AOT-compiled code, we need to explicitly declare the
    # arguments to the routine. This routine takes two. Arguments are
    # usually Params or ImageParams.
    fname = "lesson_10_halide"
    brighter.compile_to(
        {
            hl.Output.object: "lesson_10_halide.o",
            hl.Output.c_header: "lesson_10_halide.h",
            hl.Output.python_extension: "lesson_10_halide.py.cpp"
        }, [input, offset], "lesson_10_halide")

    print("Halide pipeline compiled, but not yet run.")

    # To continue this lesson, look in the file lesson_10_aot_compilation_run.cpp

    return 0
예제 #10
0
def main():
    input = hl.ImageParam(hl.Float(32), 2, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable
    s_sigma = 8 # This is passed during code generation in the C++ version

    bilateral_grid = get_bilateral_grid(input, r_sigma, s_sigma)

    # Set `generate` to False to run the jit immediately and get  instant gratification.
    #generate = True
    generate = False
    if generate:
        generate_compiled_file(bilateral_grid)
    else:
        filter_test_image(bilateral_grid, input)

    print("\nEnd of game. Have a nice day!")
    return
예제 #11
0
def test_division():
    f32 = hl.Param(hl.Float(32), 'f32', -32.0)
    f64 = hl.Param(hl.Float(64), 'f64', 64.0)
    i16 = hl.Param(hl.Int(16), 'i16', -16)
    i32 = hl.Param(hl.Int(32), 'i32', 32)
    u16 = hl.Param(hl.UInt(16), 'u16', 16)
    u32 = hl.Param(hl.UInt(32), 'u32', 32)

    # Verify that the types match the rules in match_types()
    assert (f32 / f64).type() == hl.Float(64)
    assert (f32 // f64).type() == hl.Float(64)

    assert (i16 / i32).type() == hl.Int(32)
    assert (i16 // i32).type() == hl.Int(32)

    assert (u16 / u32).type() == hl.UInt(32)
    assert (u16 // u32).type() == hl.UInt(32)

    # int / uint -> int
    assert (u16 / i32).type() == hl.Int(32)
    assert (i32 // u16).type() == hl.Int(32)

    # any / float -> float
    # float / any -> float
    assert (u16 / f32).type() == hl.Float(32)
    assert (u16 // f32).type() == hl.Float(32)

    assert (i16 / f64).type() == hl.Float(64)
    assert (i16 // f64).type() == hl.Float(64)

    # Verify that division semantics match those for Halide
    # (rather than python); this differs for int/int which
    # defaults to float (rather than floordiv) in Python3.
    # Also test that // always floors the result, even for float.
    assert _evaluate(f32 / f64) == -0.5
    assert _evaluate(f32 // f64) == -1.0

    assert _evaluate(i16 / i32) == -1
    assert _evaluate(i16 // i32) == -1
    assert _evaluate(i32 / i16) == -2

    assert _evaluate(u16 / u32) == 0
    assert _evaluate(u16 // u32) == 0

    assert _evaluate(u16 / i32) == 0
    assert _evaluate(i32 // u16) == 2

    assert _evaluate(u16 / f32) == -0.5
    assert _evaluate(u16 // f32) == -1.0

    assert _evaluate(i16 / f64) == -0.25
    assert _evaluate(i16 // f64) == -1.0
예제 #12
0
def test_int_promotion():
    # Verify that (Exprlike op literal) correctly matches the type
    # of the literal to the Exprlike (rather than promoting the result to int32).
    # All types that use add_binary_operators() should be tested here.

    x = hl.Var('x')
    # All the binary ops are handled the same, so + is good enough

    # Exprlike = FuncRef
    f = hl.Func('f')
    f[x] = hl.u16(x)
    _check_is_u16(f[x] + 2)
    _check_is_u16(2 + f[x])

    # Exprlike = Expr
    e = hl.Expr(f[x])
    _check_is_u16(e + 2)
    _check_is_u16(2 + e)

    # Exprlike = Param
    p = hl.Param(hl.UInt(16))
    _check_is_u16(p + 2)
    _check_is_u16(2 + p)
예제 #13
0
def main():

    # We'll define the simple one-stage pipeline that we used in lesson 10.
    brighter = hl.Func("brighter")
    x, y = hl.Var("x"), hl.Var("y")

    # Declare the arguments.
    offset = hl.Param(hl.UInt(8))
    input = hl.ImageParam(hl.UInt(8), 2)
    args = [input, offset]

    # Define the hl.Func.
    brighter[x, y] = input[x, y] + offset

    # Schedule it.
    brighter.vectorize(x, 16).parallel(y)

    # The following line is what we did in lesson 10. It compiles an
    # object file suitable for the system that you're running this
    # program on.  For example, if you compile and run this file on
    # 64-bit linux on an x86 cpu with sse4.1, then the generated code
    # will be suitable for 64-bit linux on x86 with sse4.1.
    brighter.compile_to_file("lesson_11_host", args, "lesson_11_host")

    # We can also compile object files suitable for other cpus and
    # operating systems. You do this with an optional third argument
    # to compile_to_file which specifies the target to compile for.

    create_android = True
    create_windows = True
    create_ios = True

    if create_android:
        # Let's use this to compile a 32-bit arm android version of this code:
        target = hl.Target()
        target.os = hl.TargetOS.Android  # The operating system
        target.arch = hl.TargetArch.ARM  # The CPU architecture
        target.bits = 32  # The bit-width of the architecture
        arm_features = []  # A list of features to set
        target.set_features(arm_features)
        # Pass the target as the last argument.
        brighter.compile_to_file("lesson_11_arm_32_android", args,
                                 "lesson_11_arm_32_android", target)

    if create_windows:
        # And now a Windows object file for 64-bit x86 with AVX and SSE 4.1:
        target = hl.Target()
        target.os = hl.TargetOS.Windows
        target.arch = hl.TargetArch.X86
        target.bits = 64
        target.set_features([hl.TargetFeature.AVX, hl.TargetFeature.SSE41])
        brighter.compile_to_file("lesson_11_x86_64_windows", args,
                                 "lesson_11_x86_64_windows", target)

    if create_ios:
        # And finally an iOS mach-o object file for one of Apple's 32-bit
        # ARM processors - the A6. It's used in the iPhone 5. The A6 uses
        # a slightly modified ARM architecture called ARMv7s. We specify
        # this using the target features field.  Support for Apple's
        # 64-bit ARM processors is very new in llvm, and still somewhat
        # flaky.
        target = hl.Target()
        target.os = hl.TargetOS.IOS
        target.arch = hl.TargetArch.ARM
        target.bits = 32
        target.set_features([hl.TargetFeature.ARMv7s])
        brighter.compile_to_file("lesson_11_arm_32_ios", args,
                                 "lesson_11_arm_32_ios", target)

    # Now let's check these files are what they claim, by examining
    # their first few bytes.

    if create_android:
        # 32-arm android object files start with the magic bytes:
        # uint8_t []
        arm_32_android_magic = [
            0x7f,
            ord('E'),
            ord('L'),
            ord('F'),  # ELF format
            1,  # 32-bit
            1,  # 2's complement little-endian
            1
        ]  # Current version of elf

        length = len(arm_32_android_magic)
        f = open("lesson_11_arm_32_android.o", "rb")
        try:
            header_bytes = f.read(length)
        except:
            print("Android object file not generated")
            return -1
        f.close()

        header = list(unpack("B" * length, header_bytes))
        if header != arm_32_android_magic:
            print([x == y for x, y in zip(header, arm_32_android_magic)])
            raise Exception(
                "Unexpected header bytes in 32-bit arm object file.")
            return -1

    if create_windows:
        # 64-bit windows object files start with the magic 16-bit value 0x8664
        # (presumably referring to x86-64)
        # uint8_t  []
        win_64_magic = [0x64, 0x86]

        f = open("lesson_11_x86_64_windows.obj", "rb")
        try:
            header_bytes = f.read(2)
        except:
            print("Windows object file not generated")
            return -1
        f.close()

        header = list(unpack("B" * 2, header_bytes))
        if header != win_64_magic:
            raise Exception(
                "Unexpected header bytes in 64-bit windows object file.")
            return -1

    if create_ios:
        # 32-bit arm iOS mach-o files start with the following magic bytes:
        #  uint32_t []
        arm_32_ios_magic = [
            0xfeedface,  # Mach-o magic bytes
            #0xfe, 0xed, 0xfa, 0xce, # Mach-o magic bytes
            12,  # CPU type is ARM
            11,  # CPU subtype is ARMv7s
            1
        ]  # It's a relocatable object file.
        f = open("lesson_11_arm_32_ios.o", "rb")
        try:
            header_bytes = f.read(4 * 4)
        except:
            print("ios object file not generated")
            return -1
        f.close()

        header = list(unpack("I" * 4, header_bytes))
        if header != arm_32_ios_magic:
            raise Exception(
                "Unexpected header bytes in 32-bit arm ios object file.")
            return -1

    # It looks like the object files we produced are plausible for
    # those targets. We'll count that as a success for the purposes
    # of this tutorial. For a real application you'd then need to
    # figure out how to integrate Halide into your cross-compilation
    # toolchain. There are several small examples of this in the
    # Halide repository under the apps folder. See HelloAndroid and
    # HelloiOS here:
    # https:#github.com/halide/Halide/tree/master/apps/
    print("Success!")
    return 0
예제 #14
0
def test_autodiff():
    x = hl.Var('x')
    b = hl.Buffer(hl.Float(32), [3])
    p = hl.Param(hl.Float(32), 'p', 1)
    b[0] = 1.0
    b[1] = 2.0
    b[2] = 3.0
    f, g, h = hl.Func('f'), hl.Func('g'), hl.Func('h')
    f[x] = b[x]
    f[0] = 4.0
    g[x] = f[x] * 5.0 * p
    r = hl.RDom([(0, 3)])
    h[()] = 0.0
    h[()] += g[r.x]

    d = hl.propagate_adjoints(h)

    # gradient w.r.t. the initialization of f
    d_f_init = d[f]
    d_f_init_buf = d_f_init.realize([3])
    assert(d_f_init_buf[0] == 0.0)
    assert(d_f_init_buf[1] == 5.0)
    assert(d_f_init_buf[2] == 5.0)
    d_f_init = d[f]# test different interface
    d_f_init_buf = d_f_init.realize([3])
    assert(d_f_init_buf[0] == 0.0)
    assert(d_f_init_buf[1] == 5.0)
    assert(d_f_init_buf[2] == 5.0)

    # gradient w.r.t. the updated f
    d_f_update_0 = d[f, 0]
    d_f_update_0_buf = d_f_update_0.realize([3])
    assert(d_f_update_0_buf[0] == 5.0)
    assert(d_f_update_0_buf[1] == 5.0)
    assert(d_f_update_0_buf[2] == 5.0)
    d_f_update_0 = d[f, 0]
    d_f_update_0_buf = d_f_update_0.realize([3])
    assert(d_f_update_0_buf[0] == 5.0)
    assert(d_f_update_0_buf[1] == 5.0)
    assert(d_f_update_0_buf[2] == 5.0)

    # gradient w.r.t. the buffer
    d_b = d[b]
    d_b_buf = d_b.realize([3])
    assert(d_b_buf[0] == 0.0)
    assert(d_b_buf[1] == 5.0)
    assert(d_b_buf[2] == 5.0)
    d_b = d[b]
    d_b_buf = d_b.realize([3])
    assert(d_b_buf[0] == 0.0)
    assert(d_b_buf[1] == 5.0)
    assert(d_b_buf[2] == 5.0)

    # gradient w.r.t. the param
    d_p = d[p]
    d_p_buf = d_p.realize()
    # 5 * (4 + 2 + 3)
    assert(abs(d_p_buf[()] - 45.0) < 1e-6)
    d_p = d[p]
    d_p_buf = d_p.realize()
    assert(abs(d_p_buf[()] - 45.0) < 1e-6)