예제 #1
0
def test_basics3():
    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma',
                       0.1)  # Value needed if not generating an executable
    s_sigma = 8  # This is passed during code generation in the C++ version

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0,
                                   input.width() - 1),
                          hl.clamp(y, 0,
                                   input.height() - 1), 0]

    # Construct the bilateral grid
    r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r')
    val = clamped[x * s_sigma + r.x - s_sigma // 2,
                  y * s_sigma + r.y - s_sigma // 2]
    val = hl.clamp(val, 0.0, 1.0)
    zi = hl.i32((val / r_sigma) + 0.5)
    histogram = hl.Func('histogram')
    histogram[x, y, z, c] = 0.0

    ss = hl.select(c == 0, val, 1.0)
    left = histogram[x, y, zi, c]
    left += 5
    left += ss
예제 #2
0
def test_basics2():

    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable
    s_sigma = 8 # This is passed during code generation in the C++ version

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0, input.width()-1),
                          hl.clamp(y, 0, input.height()-1),0]

    # Construct the bilateral grid
    r = hl.RDom(0, s_sigma, 0, s_sigma, 'r')
    val0 = clamped[x * s_sigma, y * s_sigma]
    val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)]
    #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail
    val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma//2),
                    y * s_sigma - hl.cast(hl.Int(32), s_sigma//2)]
    val2 = clamped[x * s_sigma - s_sigma//2, y * s_sigma - s_sigma//2]
    val3 = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2]

    return
예제 #3
0
def test_basics2():
    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1)
    s_sigma = 8

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0,
                                   input.width() - 1),
                          hl.clamp(y, 0,
                                   input.height() - 1), 0]

    # Construct the bilateral grid
    r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r')
    val0 = clamped[x * s_sigma, y * s_sigma]
    val00 = clamped[x * s_sigma * hl.i32(1), y * s_sigma * hl.i32(1)]
    val22 = clamped[x * s_sigma - hl.i32(s_sigma // 2),
                    y * s_sigma - hl.i32(s_sigma // 2)]
    val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2]
    val3 = clamped[x * s_sigma + r.x - s_sigma // 2,
                   y * s_sigma + r.y - s_sigma // 2]

    try:
        val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2]
    except RuntimeError as e:
        assert 'Implicit cast from float32 to int' in str(e)
    else:
        assert False, 'Did not see expected exception!'
예제 #4
0
def test_basics():
    input = hl.ImageParam(hl.UInt(16), 2, 'input')
    x, y = hl.Var('x'), hl.Var('y')

    blur_x = hl.Func('blur_x')
    blur_xx = hl.Func('blur_xx')
    blur_y = hl.Func('blur_y')

    yy = hl.i32(1)
    assert yy.type() == hl.Int(32)

    z = x + 1
    input[x, y]
    input[0, 0]
    input[z, y]
    input[x + 1, y]
    input[x, y] + input[x + 1, y]

    if False:
        aa = blur_x[x, y]
        bb = blur_x[x, y + 1]
        aa + bb
        blur_x[x, y] + blur_x[x, y + 1]

    (input[x, y] + input[x + 1, y]) / 2
    blur_x[x, y]
    blur_xx[x, y] = input[x, y]

    blur_x[x, y] = (input[x, y] + input[x + 1, y] + input[x + 2, y]) / 3
    blur_y[x, y] = (blur_x[x, y] + blur_x[x, y + 1] + blur_x[x, y + 2]) / 3

    xi, yi = hl.Var('xi'), hl.Var('yi')
    blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8)
    blur_x.compute_at(blur_y, x).vectorize(x, 8)
    blur_y.compile_jit()
예제 #5
0
def main():

    # define and compile the function
    input = hl.ImageParam(hl.UInt(8), 3, "input")
    erode = get_erode(input)
    erode.compile_jit()

    # preparing input and output memory buffers (numpy ndarrays)
    input_data = get_input_data()
    input_image = hl.Buffer(input_data)
    input.set(input_image)

    output_data = np.empty(input_data.shape, dtype=input_data.dtype, order="F")
    output_image = hl.Buffer(output_data)

    print("input_image", input_image)
    print("output_image", output_image)

    # do the actual computation
    erode.realize(output_image)

    # save results
    input_path = "erode_input.png"
    output_path = "erode_result.png"
    imageio.imsave(input_path, input_data)
    imageio.imsave(output_path, output_data)
    print("\nerode realized on output image.", "Result saved at", output_path,
          "( input data copy at", input_path, ")")

    print("\nEnd of game. Have a nice day!")
    return
예제 #6
0
def main():

    input = hl.ImageParam(float_t, 3, "input")
    levels = 10

    interpolate = get_interpolate(input, levels)

    # preparing input and output memory buffers (numpy ndarrays)
    input_data = get_input_data()
    assert input_data.shape[2] == 4
    input_image = hl.Buffer(input_data)
    input.set(input_image)

    input_width, input_height = input_data.shape[:2]

    t0 = datetime.now()
    output_image = interpolate.realize(input_width, input_height, 3)
    t1 = datetime.now()
    print('Interpolated in %.5f secs' % (t1 - t0).total_seconds())

    output_data = hl.buffer_to_ndarray(output_image)

    # save results
    input_path = "interpolate_input.png"
    output_path = "interpolate_result.png"
    imsave(input_path, input_data)
    imsave(output_path, output_data)
    print("\nblur realized on output image.", "Result saved at", output_path,
          "( input data copy at", input_path, ")")

    print("\nEnd of game. Have a nice day!")
예제 #7
0
def test_basics2():

    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma',
                       0.1)  # Value needed if not generating an executable
    s_sigma = 8  # This is passed during code generation in the C++ version

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0,
                                   input.width() - 1),
                          hl.clamp(y, 0,
                                   input.height() - 1), 0]

    if True:
        print("s_sigma", s_sigma)
        print("s_sigma/2", s_sigma / 2)
        print("s_sigma//2", s_sigma // 2)
        print()
        print("x * s_sigma", x * s_sigma)
        print("x * 8", x * 8)
        print("x * 8 + 4", x * 8 + 4)
        print("x * 8 * 4", x * 8 * 4)
        print()
        print("x", x)
        print("(x * s_sigma).type()", )
        print("(x * 8).type()", (x * 8).type())
        print("(x * 8 + 4).type()", (x * 8 + 4).type())
        print("(x * 8 * 4).type()", (x * 8 * 4).type())
        print("(x * 8 / 4).type()", (x * 8 / 4).type())
        print("((x * 8) * 4).type()", ((x * 8) * 4).type())
        print("(x * (8 * 4)).type()", (x * (8 * 4)).type())

    assert (x * 8).type() == hl.Int(32)
    assert (x * 8 * 4).type() == hl.Int(32)  # yes this did fail at some point
    assert ((x * 8) / 4).type() == hl.Int(32)
    assert (x * (8 / 4)).type() == hl.Float(32)  # under python3 division rules
    assert (x * (8 // 4)).type() == hl.Int(32)
    #assert (x * 8 // 4).type() == hl.Int(32) # not yet implemented

    # Construct the bilateral grid
    r = hl.RDom(0, s_sigma, 0, s_sigma, 'r')
    val0 = clamped[x * s_sigma, y * s_sigma]
    val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1),
                    y * s_sigma * hl.cast(hl.Int(32), 1)]
    #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail
    val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2),
                    y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)]
    val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2]
    val3 = clamped[x * s_sigma + r.x - s_sigma // 2,
                   y * s_sigma + r.y - s_sigma // 2]

    return
예제 #8
0
def test_imageparam_bug():
    "see https://github.com/rodrigob/Halide/issues/2"

    x = hl.Var("x")
    y = hl.Var("y")
    fx = hl.Func("fx")
    input = hl.ImageParam(hl.UInt(8), 1, "input")
    fx[x, y] = input[y]

    return
예제 #9
0
def main():
    input_img = hl.ImageParam(hl.UInt(16), 3, 'input')

    # number of intensity levels
    levels = hl.Param(int_t, 'levels', 8)

    # Parameters controlling the filter
    alpha = hl.Param(float_t, 'alpha', 1.0 / 7.0)
    beta = hl.Param(float_t, 'beta', 1.0)

    local_laplacian = get_local_laplacian(input_img, levels, alpha, beta)

    filter_test_image(local_laplacian, input_img)
예제 #10
0
    def setup_inputs(self):
        # input scalars
        delo2 = hl.Param(hl.Float(64), "delo2")
        delta = hl.Param(hl.Float(64), "delta")
        rdelta = hl.Param(hl.Float(64), "rdelta")

        # input vectors
        expnt_in = hl.ImageParam(hl.Float(64), 1, "expnt_in")
        rnorm_in = hl.ImageParam(hl.Float(64), 1, "rnorm_in")
        x_in = hl.ImageParam(hl.Float(64), 1, "x_in")
        y_in = hl.ImageParam(hl.Float(64), 1, "y_in")
        z_in = hl.ImageParam(hl.Float(64), 1, "z_in")

        # input matrices
        fm_in = hl.ImageParam(hl.Float(64), 2, "fm_in")
        g_fock_in_in = hl.ImageParam(hl.Float(64), 2, "g_fock_in")
        g_dens_in = hl.ImageParam(hl.Float(64), 2, "g_dens_in")

        self.inputs.update({
            x.name(): x
            for x in [
                delo2, delta, rdelta, expnt_in, rnorm_in, x_in, y_in, z_in,
                fm_in, g_fock_in_in, g_dens_in
            ]
        })

        # clamp all inputs, to prevent out-of-bounds errors from odd tile sizes and such
        expnt = hl.BoundaryConditions.constant_exterior(expnt_in, 0)
        rnorm = hl.BoundaryConditions.constant_exterior(rnorm_in, 0)
        x = hl.BoundaryConditions.constant_exterior(x_in, 0)
        y = hl.BoundaryConditions.constant_exterior(y_in, 0)
        z = hl.BoundaryConditions.constant_exterior(z_in, 0)
        fm = hl.BoundaryConditions.constant_exterior(fm_in, 0)
        g_fock_in = hl.BoundaryConditions.constant_exterior(g_fock_in_in, 0)
        g_dens = hl.BoundaryConditions.constant_exterior(g_dens_in, 0)

        self.clamps.update({
            "expnt": expnt,
            "rnorm": rnorm,
            "x": x,
            "y": y,
            "z": z,
            "fm": fm,
            "g_fock_in_clamped": g_fock_in,
            "g_dens": g_dens
        })

        # nbfn=number of basis functions.  This is our problem size
        self.nbfn = g_fock_in_in.height()
예제 #11
0
def test_basics():

    input = hl.ImageParam(hl.UInt(16), 2, 'input')
    x, y = hl.Var('x'), hl.Var('y')

    blur_x = hl.Func('blur_x')
    blur_xx = hl.Func('blur_xx')
    blur_y = hl.Func('blur_y')

    yy = hl.cast(hl.Int(32), 1)
    assert yy.type() == hl.Int(32)
    print("yy type:", yy.type())

    z = x + 1
    input[x,y]
    input[0,0]
    input[z,y]
    input[x+1,y]
    print("ping 0.2")
    input[x,y]+input[x+1,y]

    if False:
        aa = blur_x[x,y]
        bb = blur_x[x,y+1]
        aa + bb
        blur_x[x,y]+blur_x[x,y+1]

    print("ping 0.3")
    (input[x,y]+input[x+1,y]) / 2
    print("ping 0.4")
    blur_x[x,y]
    print("ping 0.4.1")
    blur_xx[x,y] = input[x,y]



    print("ping 0.5")
    blur_x[x,y] = (input[x,y]+input[x+1,y]+input[x+2,y])/3
    print("ping 1")
    blur_y[x,y] = (blur_x[x,y]+blur_x[x,y+1]+blur_x[x,y+2])/3

    xi, yi = hl.Var('xi'), hl.Var('yi')
    print("ping 2")
    blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8)
    blur_x.compute_at(blur_y, x).vectorize(x, 8)


    blur_y.compile_jit()
    print("Compiled to jit")

    return
def main():

    # We'll define a simple one-stage pipeline:
    brighter = hl.Func("brighter")
    x, y = hl.Var("x"), hl.Var("y")

    # The pipeline will depend on one scalar parameter.
    offset = hl.Param(hl.UInt(8), name="offset")

    # And take one grayscale 8-bit input buffer. The first
    # constructor argument gives the type of a pixel, and the second
    # specifies the number of dimensions (not the number of
    # channels!). For a grayscale image this is two for a color
    # image it's three. Currently, four dimensions is the maximum for
    # inputs and outputs.
    input = hl.ImageParam(hl.UInt(8), 2)

    # If we were jit-compiling, these would just be an int and a
    # hl.Buffer, but because we want to compile the pipeline once and
    # have it work for any value of the parameter, we need to make a
    # hl.Param object, which can be used like an hl.Expr, and an hl.ImageParam
    # object, which can be used like a hl.Buffer.

    # Define the hl.Func.
    brighter[x, y] = input[x, y] + offset

    # Schedule it.
    brighter.vectorize(x, 16).parallel(y)

    # This time, instead of calling brighter.realize(...), which
    # would compile and run the pipeline immediately, we'll call a
    # method that compiles the pipeline to an object file and header.
    #
    # For AOT-compiled code, we need to explicitly declare the
    # arguments to the routine. This routine takes two. Arguments are
    # usually Params or ImageParams.
    fname = "lesson_10_halide"
    brighter.compile_to(
        {
            hl.Output.object: "lesson_10_halide.o",
            hl.Output.c_header: "lesson_10_halide.h",
            hl.Output.python_extension: "lesson_10_halide.py.cpp"
        }, [input, offset], "lesson_10_halide")

    print("Halide pipeline compiled, but not yet run.")

    # To continue this lesson, look in the file lesson_10_aot_compilation_run.cpp

    return 0
예제 #13
0
def test_multipass_constraints():
    input = hl.ImageParam(hl.Float(32), 2, "input")

    f = hl.Func("f")
    x = hl.Var("x")
    y = hl.Var("y")

    f[x, y] = input[x + 1, y + 1] + input[x - 1, y - 1]
    f[x, y] += 3.0
    f.update().vectorize(x, 4)

    o = f.output_buffer()

    # Now make some hard-to-resolve constraints
    input.dim(0).set_bounds(min=input.dim(1).min() - 5,
                            extent=input.dim(1).extent() + o.dim(0).extent())

    o.dim(0).set_bounds(min=0,
                        extent=hl.select(
                            o.dim(0).extent() < 22,
                            o.dim(0).extent() + 1,
                            o.dim(0).extent()))

    # Make a bounds query buffer
    query_buf = hl.Buffer.make_bounds_query(type=hl.Float(32), sizes=[7, 8])
    query_buf.set_min([2, 2])

    f.infer_input_bounds(query_buf)

    if  input.get().dim(0).min() != -4 or \
        input.get().dim(0).extent() != 34 or \
        input.get().dim(1).min() != 1 or \
        input.get().dim(1).extent() != 10 or \
        query_buf.dim(0).min() != 0 or \
        query_buf.dim(0).extent() != 24 or \
        query_buf.dim(1).min() != 2 or \
        query_buf.dim(1).extent() != 8:

        print("Constraints not correctly satisfied:\n", "in:",
              input.get().dim(0).min(),
              input.get().dim(0).extent(),
              input.get().dim(1).min(),
              input.get().dim(1).extent(), "out:",
              query_buf.dim(0).min(),
              query_buf.dim(0).extent(),
              query_buf.dim(1).min(),
              query_buf.dim(1).extent())
        assert False
예제 #14
0
def test_scalar_funcs():
    input = hl.ImageParam(hl.UInt(16), 0, 'input')

    f = hl.Func('f')
    g = hl.Func('g')

    input[()]

    (input[()] + input[()]) / 2
    f[()]
    g[()]

    f[()] = (input[()] + input[()] + input[()]) / 3
    g[()] = (f[()] + f[()] + f[()]) / 3

    g.compile_jit()
예제 #15
0
def main():
    input = hl.ImageParam(hl.Float(32), 2, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable
    s_sigma = 8 # This is passed during code generation in the C++ version

    bilateral_grid = get_bilateral_grid(input, r_sigma, s_sigma)

    # Set `generate` to False to run the jit immediately and get  instant gratification.
    #generate = True
    generate = False
    if generate:
        generate_compiled_file(bilateral_grid)
    else:
        filter_test_image(bilateral_grid, input)

    print("\nEnd of game. Have a nice day!")
    return
예제 #16
0
def main():

    input = hl.ImageParam(hl.UInt(16), 3, 'input')

    # number of intensity levels
    levels = hl.Param(int_t, 'levels', 8)

    #Parameters controlling the filter
    alpha = hl.Param(float_t, 'alpha', 1.0 / 7.0)
    beta = hl.Param(float_t, 'beta', 1.0)

    local_laplacian = get_local_laplacian(input, levels, alpha, beta)

    generate = False  # Set to False to run the jit immediately and get  instant gratification.
    if generate:
        generate_compiled_file(local_laplacian)
    else:
        filter_test_image(local_laplacian, input)

    return
예제 #17
0
def main():
    input = hl.ImageParam(float_t, 3, "input")
    levels = 10

    interpolate = get_interpolate(input, levels)

    # preparing input and output memory buffers (numpy ndarrays)
    input_data = get_input_data()
    assert input_data.shape[2] == 4
    input_image = hl.Buffer(input_data)
    input.set(input_image)

    input_width, input_height = input_data.shape[:2]

    t0 = datetime.now()
    output_image = interpolate.realize(input_width, input_height, 3)
    t1 = datetime.now()

    elapsed = (t1 - t0).total_seconds()
    print('Interpolated in {:.5f} secs'.format(elapsed))

    output_data = np.asanyarray(output_image)

    # convert output
    input_data = (input_data * 255).astype(np.uint8)
    output_data = (output_data * 255).astype(np.uint8)

    # save results
    input_path = "interpolate_input.png"
    output_path = "interpolate_result.png"
    imageio.imsave(input_path, input_data)
    imageio.imsave(output_path, output_data)

    print()
    print(
        'blur realized on output image. Result saved at {} (input data copy at {})'
        .format(output_path, input_path))
    print()
    print("End of game. Have a nice day!")
예제 #18
0
import halide as hl
import imageio
import numpy as np

# Constructing Halide functions statically.
input = hl.ImageParam(hl.Float(32), 3)
f = hl.Func('f')
x, y, c = hl.Var('x'), hl.Var('y'), hl.Var('c')
# Double the values and clamp them by 1.
f[x, y, c] = hl.min(2 * input[x, y, c], 1.0)

# Actually compiling/executing the Halide functions.
#
# Setup the input by loading an image (Halide assumes Fortran ordering).
input_buffer = hl.Buffer(
    np.asfortranarray(
        imageio.imread('images/rgb.png').astype(np.float32) / 255.0))
input.set(input_buffer)
# Process the input by calling f.realize
output = f.realize(input_buffer.width(), input_buffer.height(),
                   input_buffer.channels())
# Save the image to a file by converting to a numpy array.
output = np.array(output)
imageio.imsave('output.png', (output * 255.0).astype(np.uint8))
def focus_stack_pipeline():
    outputs = []
    start_w, start_h = 3000, 2000
    number_of_layers = 5
    layer_sizes = [[start_w, start_h]]    

    for i in range(0, number_of_layers):
            # Grab from prev layer
            w,h = layer_sizes[-1]
            layer_sizes.append([int(math.ceil(w/2.0)),int(math.ceil(h/2.0))])

    # Add last size in once more to get the 2nd top lap layer (gaussian) for
    # the energy/deviation split.
    layer_sizes.append(layer_sizes[-1])

    input = hl.ImageParam(hl.UInt(8), 3)
    input.dim(0).set_estimate(0, start_w)
    input.dim(1).set_estimate(0, start_h)
    input.dim(2).set_estimate(0, 3)

    lap_inputs = []
    max_energy_inputs = []

    for i in range(0,number_of_layers+1):
        lap_layer = hl.ImageParam(hl.Float(32), 3, "lap{}".format(i))
        lap_inputs.append(lap_layer)
        w,h = layer_sizes[i]
        lap_layer.dim(0).set_estimate(0, w)
        lap_layer.dim(1).set_estimate(0, h)
        lap_layer.dim(2).set_estimate(0, 3)

        if i == number_of_layers:
            # last (top - small) layer
            # Add the last laplacian (really direct from gaussian) layer
            # in twice. We output one maxed on entropies and one maxed on
            # deviations.
            lap_layer = hl.ImageParam(hl.Float(32), 3, "lap{}".format(i+1))
            lap_inputs.append(lap_layer)
            lap_layer.dim(0).set_estimate(0, w)
            lap_layer.dim(1).set_estimate(0, h)
            lap_layer.dim(2).set_estimate(0, 3)


            entropy_layer = hl.ImageParam(hl.Float(32), 2, "entroy{}".format(i))
            max_energy_inputs.append(entropy_layer)
            entropy_layer.dim(0).set_estimate(0, w)
            entropy_layer.dim(1).set_estimate(0, h)

            deviation_layer = hl.ImageParam(hl.Float(32), 2, "deviation{}".format(i))
            max_energy_inputs.append(deviation_layer)
            deviation_layer.dim(0).set_estimate(0, w)
            deviation_layer.dim(1).set_estimate(0, h)
        else:
            max_energy_layer = hl.ImageParam(hl.Float(32), 2, "max_energy{}".format(i))
            max_energy_inputs.append(max_energy_layer)
            max_energy_layer.dim(0).set_estimate(0, w)
            max_energy_layer.dim(1).set_estimate(0, h)

    x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c")
    hist_index = hl.Var('hist_index')
    clamped = f32(x, y, c, mirror(input, 3000, 2000))

    f = hl.Func("input32")
    f[x, y, c] = clamped[x, y, c]

    energy_outputs = []
    gaussian_layers = [f]

    laplacian_layers = []
    merged_laps = []

    for layer_num in range(0, number_of_layers):
        # Add the layer size in also
        w,h = layer_sizes[layer_num]

        start_layer = gaussian_layers[-1]

        # Blur the image
        gaussian_layer = gaussian(x, y, c, start_layer)

        # Grab next layer size
        # w,h = layer_sizes[layer_num+1]

        # Reduce the layer size and add it into the list
        next_layer = reduce_layer(x, y, c, gaussian_layer)
        gaussian_layers.append(next_layer)

        # Expand back up
        expanded = expand_layer(x, y, c, next_layer)

        # Generate the laplacian from the
        # original - blurred/reduced/expanded version
        laplacian_layer = laplacian(x, y, c, start_layer, expanded)
        laplacian_layers.append(laplacian_layer)

        # Calculate energies for the gaussian layer
        prev_energies = mirror(max_energy_inputs[layer_num], w, h)
        next_energies = region_energy(x, y, c, laplacian_layer)

        prev_laplacian = mirror(lap_inputs[layer_num], w, h)
        merged_energies = energy_maxes(x, y, c, prev_energies, next_energies)

        merged_lap = merge_laplacian(x, y, c, merged_energies, next_energies, prev_laplacian, laplacian_layer)

        energy_outputs.append([[w,h,True],merged_energies])
        merged_laps.append(merged_lap)

        # Add estimates
        next_layer.set_estimate(x, 0, w)
        next_layer.set_estimate(y, 0, h)
        next_layer.set_estimate(c, 0, 3)

    # Handle last layer differently
    w,h = layer_sizes[-1]

    # The next_lap is really just the last gaussian layer
    next_lap = gaussian_layers[-1]

    prev_entropy_laplacian = mirror(lap_inputs[-2], w, h)
    prev_entropy = mirror(max_energy_inputs[-2], w, h)
    next_entropy = entropy(x, y, c, next_lap, w, h, hist_index)
    merged_entropy = energy_maxes(x, y, c, prev_entropy, next_entropy)
    merged_lap_on_entropy = merge_laplacian(x, y, c, merged_entropy, next_entropy, prev_entropy_laplacian, next_lap)
    merged_laps.append(merged_lap_on_entropy)

    prev_deviation_laplacian = mirror(lap_inputs[-1], w, h)
    prev_deviation = mirror(max_energy_inputs[-1], w, h)
    next_deviation = deviation(x, y, c, next_lap)
    merged_deviation = energy_maxes(x, y, c, prev_deviation, next_deviation)
    merged_lap_on_deviation = merge_laplacian(x, y, c, merged_deviation, next_deviation, prev_deviation_laplacian, next_lap)
    merged_laps.append(merged_lap_on_deviation)

    energy_outputs.append([[w,h,True],merged_entropy])
    energy_outputs.append([[w,h,True],merged_deviation])


    print("NUM LAYERS: ", len(gaussian_layers), len(laplacian_layers), layer_sizes)
    
    # Add all of the laplacian layers to the output first
    i = 0
    for merged_lap in merged_laps:
        w,h = layer_sizes[i]
        mid = (i < (len(merged_laps) - 2))
        outputs.append([[w,h,False,mid], merged_lap])
        i += 1

    # Then energies
    for energy_output in energy_outputs:
        outputs.append(energy_output)

    new_outputs = []
    for size, output in outputs:
        w = size[0]
        h = size[1]
        gray = len(size) > 2 and size[2]
        mid = len(size) > 3 and size[3]

        if mid:
            uint8_output = output
        else:
            uint8_output = output

        uint8_output.set_estimate(x, 0, w)
        uint8_output.set_estimate(y, 0, h)
        if not gray:
            uint8_output.set_estimate(c, 0, 3)

        new_outputs.append([size, uint8_output])

    outputs = new_outputs

    print("OUTPUT LAYERS: ")
    pprint(outputs)

    output_funcs = [output for _, output in outputs]
    
    pipeline = hl.Pipeline(output_funcs)

    return {
        'pipeline': pipeline,
        'inputs': [input] + lap_inputs + max_energy_inputs
    }
예제 #20
0
def test_extern():
    """
    Shows an example of Halide calling a C library loaded
    in the Python process via ctypes
    """

    # Requires Makefile support to build the external function in linkable form
    print("TODO: test_extern not yet implemented in Python; skipping...")
    return 0

    x = hl.Var("x")

    data = np.random.random(10).astype(np.float64)
    expected_result = np.sort(data)
    output_data = np.empty(10, dtype=np.float64)

    sort_func = hl.Func("extern_sort_func")
    # gsl_sort,
    # see http://www.gnu.org/software/gsl/manual/html_node/Sorting-vectors.html#Sorting-vectors

    input = hl.ImageParam(hl.Float(64), 1, "input_data")

    extern_name = "the_sort_func"
    params = [hl.ExternFuncArgument(input)]
    output_types = [hl.Int(32)]
    dimensionality = 1
    sort_func.define_extern(extern_name, params, output_types, dimensionality)

    try:
        sort_func.compile_jit()
    except RuntimeError:
        pass
    else:
        raise Exception(
            "compile_jit should have raised a 'Symbol not found' RuntimeError")

    import ctypes
    sort_lib = ctypes.CDLL("the_sort_function.so")
    print(sort_lib.the_sort_func)

    try:
        sort_func.compile_jit()
    except RuntimeError:
        print("ctypes CDLL did not work out")
    else:
        print("ctypes CDLL worked !")

    lib_path = "the_sort_function.so"
    #lib_path = "/home/rodrigob/code/references/" \
    #           "Halide_master/python_bindings/tests/the_sort_function.nohere.so"
    load_error = load_library_into_llvm(lib_path)
    assert load_error == False

    sort_func.compile_jit()

    # now that things are loaded, we try to call them
    input.set(data)
    sort_func.realize(output_data)

    assert np.isclose(expected_result, output_data)

    return
예제 #21
0
def main():

    # We'll define the simple one-stage pipeline that we used in lesson 10.
    brighter = hl.Func("brighter")
    x, y = hl.Var("x"), hl.Var("y")

    # Declare the arguments.
    offset = hl.Param(hl.UInt(8))
    input = hl.ImageParam(hl.UInt(8), 2)
    args = [input, offset]

    # Define the hl.Func.
    brighter[x, y] = input[x, y] + offset

    # Schedule it.
    brighter.vectorize(x, 16).parallel(y)

    # The following line is what we did in lesson 10. It compiles an
    # object file suitable for the system that you're running this
    # program on.  For example, if you compile and run this file on
    # 64-bit linux on an x86 cpu with sse4.1, then the generated code
    # will be suitable for 64-bit linux on x86 with sse4.1.
    brighter.compile_to_file("lesson_11_host", args, "lesson_11_host")

    # We can also compile object files suitable for other cpus and
    # operating systems. You do this with an optional third argument
    # to compile_to_file which specifies the target to compile for.

    create_android = True
    create_windows = True
    create_ios = True

    if create_android:
        # Let's use this to compile a 32-bit arm android version of this code:
        target = hl.Target()
        target.os = hl.TargetOS.Android  # The operating system
        target.arch = hl.TargetArch.ARM  # The CPU architecture
        target.bits = 32  # The bit-width of the architecture
        arm_features = []  # A list of features to set
        target.set_features(arm_features)
        # Pass the target as the last argument.
        brighter.compile_to_file("lesson_11_arm_32_android", args,
                                 "lesson_11_arm_32_android", target)

    if create_windows:
        # And now a Windows object file for 64-bit x86 with AVX and SSE 4.1:
        target = hl.Target()
        target.os = hl.TargetOS.Windows
        target.arch = hl.TargetArch.X86
        target.bits = 64
        target.set_features([hl.TargetFeature.AVX, hl.TargetFeature.SSE41])
        brighter.compile_to_file("lesson_11_x86_64_windows", args,
                                 "lesson_11_x86_64_windows", target)

    if create_ios:
        # And finally an iOS mach-o object file for one of Apple's 32-bit
        # ARM processors - the A6. It's used in the iPhone 5. The A6 uses
        # a slightly modified ARM architecture called ARMv7s. We specify
        # this using the target features field.  Support for Apple's
        # 64-bit ARM processors is very new in llvm, and still somewhat
        # flaky.
        target = hl.Target()
        target.os = hl.TargetOS.IOS
        target.arch = hl.TargetArch.ARM
        target.bits = 32
        target.set_features([hl.TargetFeature.ARMv7s])
        brighter.compile_to_file("lesson_11_arm_32_ios", args,
                                 "lesson_11_arm_32_ios", target)

    # Now let's check these files are what they claim, by examining
    # their first few bytes.

    if create_android:
        # 32-arm android object files start with the magic bytes:
        # uint8_t []
        arm_32_android_magic = [
            0x7f,
            ord('E'),
            ord('L'),
            ord('F'),  # ELF format
            1,  # 32-bit
            1,  # 2's complement little-endian
            1
        ]  # Current version of elf

        length = len(arm_32_android_magic)
        f = open("lesson_11_arm_32_android.o", "rb")
        try:
            header_bytes = f.read(length)
        except:
            print("Android object file not generated")
            return -1
        f.close()

        header = list(unpack("B" * length, header_bytes))
        if header != arm_32_android_magic:
            print([x == y for x, y in zip(header, arm_32_android_magic)])
            raise Exception(
                "Unexpected header bytes in 32-bit arm object file.")
            return -1

    if create_windows:
        # 64-bit windows object files start with the magic 16-bit value 0x8664
        # (presumably referring to x86-64)
        # uint8_t  []
        win_64_magic = [0x64, 0x86]

        f = open("lesson_11_x86_64_windows.obj", "rb")
        try:
            header_bytes = f.read(2)
        except:
            print("Windows object file not generated")
            return -1
        f.close()

        header = list(unpack("B" * 2, header_bytes))
        if header != win_64_magic:
            raise Exception(
                "Unexpected header bytes in 64-bit windows object file.")
            return -1

    if create_ios:
        # 32-bit arm iOS mach-o files start with the following magic bytes:
        #  uint32_t []
        arm_32_ios_magic = [
            0xfeedface,  # Mach-o magic bytes
            #0xfe, 0xed, 0xfa, 0xce, # Mach-o magic bytes
            12,  # CPU type is ARM
            11,  # CPU subtype is ARMv7s
            1
        ]  # It's a relocatable object file.
        f = open("lesson_11_arm_32_ios.o", "rb")
        try:
            header_bytes = f.read(4 * 4)
        except:
            print("ios object file not generated")
            return -1
        f.close()

        header = list(unpack("I" * 4, header_bytes))
        if header != arm_32_ios_magic:
            raise Exception(
                "Unexpected header bytes in 32-bit arm ios object file.")
            return -1

    # It looks like the object files we produced are plausible for
    # those targets. We'll count that as a success for the purposes
    # of this tutorial. For a real application you'd then need to
    # figure out how to integrate Halide into your cross-compilation
    # toolchain. There are several small examples of this in the
    # Halide repository under the apps folder. See HelloAndroid and
    # HelloiOS here:
    # https:#github.com/halide/Halide/tree/master/apps/
    print("Success!")
    return 0
예제 #22
0
def test_complexstub():
    constant_image = _make_constant_image()
    input = hl.ImageParam(hl.UInt(8), 3, 'input')
    input.set(constant_image)

    x, y, c = hl.Var(), hl.Var(), hl.Var()
    target = hl.get_jit_target_from_environment()

    float_arg = 1.25
    int_arg = 33

    r = complexstub(target,
                    typed_buffer_input=constant_image,
                    untyped_buffer_input=constant_image,
                    simple_input=input,
                    array_input=[input, input],
                    float_arg=float_arg,
                    int_arg=[int_arg, int_arg],
                    untyped_buffer_output_type="uint8",
                    vectorize=True)

    # return value is a tuple; unpack separately to avoid
    # making the callsite above unreadable
    (simple_output, tuple_output, array_output, typed_buffer_output,
     untyped_buffer_output, static_compiled_buffer_output) = r

    b = simple_output.realize(32, 32, 3, target)
    assert b.type() == hl.Float(32)
    for x in range(32):
        for y in range(32):
            for c in range(3):
                expected = constant_image[x, y, c]
                actual = b[x, y, c]
                assert expected == actual, "Expected %s Actual %s" % (expected,
                                                                      actual)

    b = tuple_output.realize(32, 32, 3, target)
    assert b[0].type() == hl.Float(32)
    assert b[1].type() == hl.Float(32)
    assert len(b) == 2
    for x in range(32):
        for y in range(32):
            for c in range(3):
                expected1 = constant_image[x, y, c] * float_arg
                expected2 = expected1 + int_arg
                actual1, actual2 = b[0][x, y, c], b[1][x, y, c]
                assert expected1 == actual1, "Expected1 %s Actual1 %s" % (
                    expected1, actual1)
                assert expected2 == actual2, "Expected2 %s Actual1 %s" % (
                    expected2, actual2)

    assert len(array_output) == 2
    for a in array_output:
        b = a.realize(32, 32, target)
        assert b.type() == hl.Int(16)
        for x in range(32):
            for y in range(32):
                expected = constant_image[x, y, 0] + int_arg
                actual = b[x, y]
                assert expected == actual, "Expected %s Actual %s" % (expected,
                                                                      actual)

    # TODO: Output<Buffer<>> has additional behaviors useful when a Stub
    # is used within another Generator; this isn't yet implemented since there
    # isn't yet Python bindings for Generator authoring. This section
    # of the test may need revision at that point.
    b = typed_buffer_output.realize(32, 32, 3, target)
    assert b.type() == hl.Float(32)
    for x in range(32):
        for y in range(32):
            for c in range(3):
                expected = constant_image[x, y, c]
                actual = b[x, y, c]
                assert expected == actual, "Expected %s Actual %s" % (expected,
                                                                      actual)

    b = untyped_buffer_output.realize(32, 32, 3, target)
    assert b.type() == hl.UInt(8)
    for x in range(32):
        for y in range(32):
            for c in range(3):
                expected = constant_image[x, y, c]
                actual = b[x, y, c]
                assert expected == actual, "Expected %s Actual %s" % (expected,
                                                                      actual)

    b = static_compiled_buffer_output.realize(4, 4, 1, target)
    assert b.type() == hl.UInt(8)
    for x in range(4):
        for y in range(4):
            for c in range(1):
                expected = constant_image[x, y, c] + 42
                actual = b[x, y, c]
                assert expected == actual, "Expected %s Actual %s" % (expected,
                                                                      actual)
예제 #23
0
    def gen_g(self):
        ''' define g() function '''
        # vars
        i, j, k, l = [self.vars[c] for c in "ijkl"]
        # clamped inputs
        x, y, z, expnt, fm, rnorm = [
            self.clamps[c] for c in ["x", "y", "z", "expnt", "fm", "rnorm"]
        ]
        # unclamped input (for sizing)
        fm_in = self.inputs["fm_in"]
        # scalar inputs
        delo2, delta, rdelta = [
            self.inputs[c] for c in ["delo2", "delta", "rdelta"]
        ]

        dx = hl.Func("dx")
        dy = hl.Func("dy")
        dz = hl.Func("dz")
        r2 = hl.Func("g_r2")
        expnt2 = hl.Func("expnt2")
        expnt_inv = hl.Func("expnt_inv")
        self.add_funcs_by_name([dx, dy, dz, r2, expnt2, expnt_inv])

        dx[i, j] = x[i] - x[j]
        dy[i, j] = y[i] - y[j]
        dz[i, j] = z[i] - z[j]

        r2[i,
           j] = dx[i, j] * dx[i, j] + dy[i, j] * dy[i, j] + dz[i, j] * dz[i, j]

        expnt2[i, j] = expnt[i] + expnt[j]
        expnt_inv[i, j] = hl.f64(1.0) / expnt2[i, j]

        fac2 = hl.Func("fac2")
        ex_arg = hl.Func("ex_arg")
        ex = hl.Func("ex")
        denom = hl.Func("denom")
        fac4d = hl.Func("fac4d")
        self.add_funcs_by_name([fac2, ex_arg, ex, denom, fac4d])
        fac2[i, j] = expnt[i] * expnt[j] * expnt_inv[i, j]
        ex_arg[i, j, k, l] = -fac2[i, j] * r2[i, j] - fac2[k, l] * r2[k, l]
        ex[i, j, k, l] = hl.select(ex_arg[i, j, k, l] < hl.f64(-37.0),
                                   hl.f64(0.0), hl.exp(ex_arg[i, j, k, l]))
        denom[i, j, k,
              l] = expnt2[i, j] * expnt2[k, l] * hl.sqrt(expnt2[i, j] +
                                                         expnt2[k, l])
        fac4d[i, j, k,
              l] = expnt2[i, j] * expnt2[k, l] / (expnt2[i, j] + expnt2[k, l])

        x2 = hl.Func("g_x2")
        y2 = hl.Func("g_y2")
        z2 = hl.Func("g_z2")
        rpq2 = hl.Func("rpq2")
        self.add_funcs_by_name([x2, y2, z2, rpq2])
        x2[i, j] = (x[i] * expnt[i] + x[j] * expnt[j]) * expnt_inv[i, j]
        y2[i, j] = (y[i] * expnt[i] + y[j] * expnt[j]) * expnt_inv[i, j]
        z2[i, j] = (z[i] * expnt[i] + z[j] * expnt[j]) * expnt_inv[i, j]
        rpq2[i, j, k, l] = ((x2[i, j] - x2[k, l]) * (x2[i, j] - x2[k, l]) +
                            (y2[i, j] - y2[k, l]) * (y2[i, j] - y2[k, l]) +
                            (z2[i, j] - z2[k, l]) * (z2[i, j] - z2[k, l]))

        f0t = hl.Func("f0t")
        f0n = hl.Func("f0n")
        f0x = hl.Func("f0x")
        f0val = hl.Func("f0val")
        self.add_funcs_by_name([f0t, f0n, f0x, f0val])
        f0t[i, j, k, l] = fac4d[i, j, k, l] * rpq2[i, j, k, l]
        f0n[i, j, k, l] = hl.clamp(hl.i32((f0t[i, j, k, l] + delo2) * rdelta),
                                   fm_in.dim(0).min(),
                                   fm_in.dim(0).max())
        f0x[i, j, k, l] = delta * f0n[i, j, k, l] - f0t[i, j, k, l]
        f0val[i, j, k, l] = hl.select(
            f0t[i, j, k, l] >= hl.f64(28.0),
            hl.f64(0.88622692545276) / hl.sqrt(f0t[i, j, k, l]),
            fm[f0n[i, j, k, l], 0] + f0x[i, j, k, l] *
            (fm[f0n[i, j, k, l], 1] + f0x[i, j, k, l] * hl.f64(0.5) *
             (fm[f0n[i, j, k, l], 2] + f0x[i, j, k, l] * hl.f64(1. / 3.) *
              (fm[f0n[i, j, k, l], 3] +
               f0x[i, j, k, l] * hl.f64(0.25) * fm[f0n[i, j, k, l], 4]))))

        g = hl.Func("g")
        self.add_funcs_by_name([g])

        if self.tracing and self.tracing_g:
            g_trace_in = hl.ImageParam(hl.Float(64), 4, "g_trace_in")
            g_trace = hl.BoundaryConditions.constant_exterior(g_trace_in, 0)
            self.inputs["g_trace_in"] = g_trace_in
            self.clamps["g_trace"] = g_trace
            g_trace.compute_root()
            g[i, j, k,
              l] = (hl.f64(2.00) * hl.f64(pow(pi, 2.50)) / denom[i, j, k, l]
                    ) * ex[i, j, k, l] * f0val[i, j, k, l] * rnorm[i] * rnorm[
                        j] * rnorm[k] * rnorm[l] + g_trace[i, j, k, l]
        else:
            g_trace = None
            g[i, j, k,
              l] = (hl.f64(2.00) * hl.f64(pow(pi, 2.50)) /
                    denom[i, j, k, l]) * ex[i, j, k, l] * f0val[
                        i, j, k, l] * rnorm[i] * rnorm[j] * rnorm[k] * rnorm[l]