Ejemplo n.º 1
0
def test_basics2():

    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable
    s_sigma = 8 # This is passed during code generation in the C++ version

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0, input.width()-1),
                          hl.clamp(y, 0, input.height()-1),0]

    # Construct the bilateral grid
    r = hl.RDom(0, s_sigma, 0, s_sigma, 'r')
    val0 = clamped[x * s_sigma, y * s_sigma]
    val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1), y * s_sigma * hl.cast(hl.Int(32), 1)]
    #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail
    val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma//2),
                    y * s_sigma - hl.cast(hl.Int(32), s_sigma//2)]
    val2 = clamped[x * s_sigma - s_sigma//2, y * s_sigma - s_sigma//2]
    val3 = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2]

    return
Ejemplo n.º 2
0
def test_basics2():
    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1)
    s_sigma = 8

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0,
                                   input.width() - 1),
                          hl.clamp(y, 0,
                                   input.height() - 1), 0]

    # Construct the bilateral grid
    r = hl.RDom([(0, s_sigma), (0, s_sigma)], 'r')
    val0 = clamped[x * s_sigma, y * s_sigma]
    val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1),
                    y * s_sigma * hl.cast(hl.Int(32), 1)]
    val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2),
                    y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)]
    val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2]
    val3 = clamped[x * s_sigma + r.x - s_sigma // 2,
                   y * s_sigma + r.y - s_sigma // 2]

    try:
        val1 = clamped[x * s_sigma - s_sigma / 2, y * s_sigma - s_sigma / 2]
    except RuntimeError as e:
        assert 'Implicit cast from float32 to int' in str(e)
    else:
        assert False, 'Did not see expected exception!'
Ejemplo n.º 3
0
def test_basics():
    input = hl.ImageParam(hl.UInt(16), 2, 'input')
    x, y = hl.Var('x'), hl.Var('y')

    blur_x = hl.Func('blur_x')
    blur_xx = hl.Func('blur_xx')
    blur_y = hl.Func('blur_y')

    yy = hl.cast(hl.Int(32), 1)
    assert yy.type() == hl.Int(32)

    z = x + 1
    input[x, y]
    input[0, 0]
    input[z, y]
    input[x + 1, y]
    input[x, y] + input[x + 1, y]

    if False:
        aa = blur_x[x, y]
        bb = blur_x[x, y + 1]
        aa + bb
        blur_x[x, y] + blur_x[x, y + 1]

    (input[x, y] + input[x + 1, y]) / 2
    blur_x[x, y]
    blur_xx[x, y] = input[x, y]

    blur_x[x, y] = (input[x, y] + input[x + 1, y] + input[x + 2, y]) / 3
    blur_y[x, y] = (blur_x[x, y] + blur_x[x, y + 1] + blur_x[x, y + 2]) / 3

    xi, yi = hl.Var('xi'), hl.Var('yi')
    blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8)
    blur_x.compute_at(blur_y, x).vectorize(x, 8)
    blur_y.compile_jit()
Ejemplo n.º 4
0
def test_image_to_ndarray():

    if "image_to_ndarray" not in globals():
        print("Skipping test_image_to_ndarray")
        return

    import numpy

    i0 = Image(hl.Float(32), 50, 50)
    assert i0.type() == hl.Float(32)

    a0 = image_to_ndarray(i0)
    print("a0.shape", a0.shape)
    print("a0.dtype", a0.dtype)
    assert a0.dtype == numpy.float32

    i1 = Image(hl.Int(16), 50, 50)
    assert i1.type() == hl.Int(16)
    i1[24, 24] = 42
    assert i1(24, 24) == 42

    a1 = image_to_ndarray(i1)
    print("a1.shape", a1.shape)
    print("a1.dtype", a1.dtype)
    assert a1.dtype == numpy.int16
    assert a1[24, 24] == 42

    return
Ejemplo n.º 5
0
def merge_temporal(images, alignment):
    weight = hl.Func("merge_temporal_weights")
    total_weight = hl.Func("merge_temporal_total_weights")
    output = hl.Func("merge_temporal_output")

    ix, iy, tx, ty, n = hl.Var('ix'), hl.Var('iy'), hl.Var('tx'), hl.Var('ty'), hl.Var('n')
    rdom0 = hl.RDom([(0, 16), (0, 16)])

    rdom1 = hl.RDom([(1, images.dim(2).extent() - 1)])

    imgs_mirror = hl.BoundaryConditions.mirror_interior(images, [(0, images.width()), (0, images.height())])

    layer = box_down2(imgs_mirror, "merge_layer")

    offset = Point(alignment[tx, ty, n]).clamp(Point(MINIMUM_OFFSET, MINIMUM_OFFSET),
                                               Point(MAXIMUM_OFFSET, MAXIMUM_OFFSET))

    al_x = idx_layer(tx, rdom0.x) + offset.x / 2
    al_y = idx_layer(ty, rdom0.y) + offset.y / 2

    ref_val = layer[idx_layer(tx, rdom0.x), idx_layer(ty, rdom0.y), 0]
    alt_val = layer[al_x, al_y, n]

    factor = 8.0
    min_distance = 10
    max_distance = 300 # max L1 distance, otherwise the value is not used

    distance = hl.sum(hl.abs(hl.cast(hl.Int(32), ref_val) - hl.cast(hl.Int(32), alt_val))) / 256

    normal_distance = hl.max(1, hl.cast(hl.Int(32), distance) / factor - min_distance / factor)

    # Weight for the alternate frame
    weight[tx, ty, n] = hl.select(normal_distance > (max_distance - min_distance), 0.0,
                                  1.0 / normal_distance)

    total_weight[tx, ty] = hl.sum(weight[tx, ty, rdom1]) + 1

    offset = Point(alignment[tx, ty, rdom1])

    al_x = idx_im(tx, ix) + offset.x
    al_y = idx_im(ty, iy) + offset.y

    ref_val = imgs_mirror[idx_im(tx, ix), idx_im(ty, iy), 0]
    alt_val = imgs_mirror[al_x, al_y, rdom1]

    # Sum all values according to their weight, and divide by total weight to obtain average
    output[ix, iy, tx, ty] = hl.sum(weight[tx, ty, rdom1] * alt_val / total_weight[tx, ty]) + ref_val / total_weight[
        tx, ty]

    weight.compute_root().parallel(ty).vectorize(tx, 16)

    total_weight.compute_root().parallel(ty).vectorize(tx, 16)

    output.compute_root().parallel(ty).vectorize(ix, 32)

    return output
Ejemplo n.º 6
0
def test_types():

    t0 = hl.Int(32)
    t1 = hl.Int(16)

    assert t0 != t1
    assert t0.is_float() == False
    assert t1.is_float() == False

    print("hl.Int(32) type:", hl.Int(32))
    print("hl.Int(16) type:", hl.Int(16))

    return
Ejemplo n.º 7
0
def test_basics():

    input = hl.ImageParam(hl.UInt(16), 2, 'input')
    x, y = hl.Var('x'), hl.Var('y')

    blur_x = hl.Func('blur_x')
    blur_xx = hl.Func('blur_xx')
    blur_y = hl.Func('blur_y')

    yy = hl.cast(hl.Int(32), 1)
    assert yy.type() == hl.Int(32)
    print("yy type:", yy.type())

    z = x + 1
    input[x,y]
    input[0,0]
    input[z,y]
    input[x+1,y]
    print("ping 0.2")
    input[x,y]+input[x+1,y]

    if False:
        aa = blur_x[x,y]
        bb = blur_x[x,y+1]
        aa + bb
        blur_x[x,y]+blur_x[x,y+1]

    print("ping 0.3")
    (input[x,y]+input[x+1,y]) / 2
    print("ping 0.4")
    blur_x[x,y]
    print("ping 0.4.1")
    blur_xx[x,y] = input[x,y]



    print("ping 0.5")
    blur_x[x,y] = (input[x,y]+input[x+1,y]+input[x+2,y])/3
    print("ping 1")
    blur_y[x,y] = (blur_x[x,y]+blur_x[x,y+1]+blur_x[x,y+2])/3

    xi, yi = hl.Var('xi'), hl.Var('yi')
    print("ping 2")
    blur_y.tile(x, y, xi, yi, 8, 4).parallel(y).vectorize(xi, 8)
    blur_x.compute_at(blur_y, x).vectorize(x, 8)


    blur_y.compile_jit()
    print("Compiled to jit")

    return
Ejemplo n.º 8
0
def test_buffer_to_ndarray():
    buf = hl.Buffer(hl.Int(16), [4, 4])
    assert buf.type() == hl.Int(16)
    buf.fill(0)
    buf[1, 2] = 42
    assert buf[1, 2] == 42

    # Should share storage with buf
    array_shared = np.array(buf, copy=False)
    assert array_shared.shape == (4, 4)
    assert array_shared.dtype == np.int16
    assert array_shared[1, 2] == 42

    # Should *not* share storage with buf
    array_copied = np.array(buf, copy=True)
    assert array_copied.shape == (4, 4)
    assert array_copied.dtype == np.int16
    assert array_copied[1, 2] == 42

    buf[1, 2] = 3
    assert array_shared[1, 2] == 3
    assert array_copied[1, 2] == 42

    # Ensure that Buffers that have nonzero mins get converted correctly,
    # since the Python Buffer Protocol doesn't have the 'min' concept
    cropped = buf.copy()
    cropped.crop(dimension=0, min=1, extent=2)

    # Should share storage with cropped (and buf)
    cropped_array_shared = np.array(cropped, copy=False)
    assert cropped_array_shared.shape == (2, 4)
    assert cropped_array_shared.dtype == np.int16
    assert cropped_array_shared[0, 2] == 3

    # Should *not* share storage with anything
    cropped_array_copied = np.array(cropped, copy=True)
    assert cropped_array_copied.shape == (2, 4)
    assert cropped_array_copied.dtype == np.int16
    assert cropped_array_copied[0, 2] == 3

    cropped[1, 2] = 5

    assert buf[1, 2] == 3
    assert array_shared[1, 2] == 3
    assert array_copied[1, 2] == 42

    assert cropped[1, 2] == 5
    assert cropped_array_shared[0, 2] == 5
    assert cropped_array_copied[0, 2] == 3
Ejemplo n.º 9
0
def resize_scale(input, fx, fy):
    shr = hl.Func('resize')
    x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c")

    index_x = hl.Func("index_x")
    index_y = hl.Func("index_y")
    index_x.trace_stores()
    index_y.trace_stores()

    index_x[x] = hl.cast(hl.Int(32), x / fx)
    index_y[y] = hl.cast(hl.Int(32), y / fy)

    final = hl.Func("final")
    final[x, y, c] = input[index_x[x], index_y[y], c]
    return final
Ejemplo n.º 10
0
def test_basics2():

    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma',
                       0.1)  # Value needed if not generating an executable
    s_sigma = 8  # This is passed during code generation in the C++ version

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0,
                                   input.width() - 1),
                          hl.clamp(y, 0,
                                   input.height() - 1), 0]

    if True:
        print("s_sigma", s_sigma)
        print("s_sigma/2", s_sigma / 2)
        print("s_sigma//2", s_sigma // 2)
        print()
        print("x * s_sigma", x * s_sigma)
        print("x * 8", x * 8)
        print("x * 8 + 4", x * 8 + 4)
        print("x * 8 * 4", x * 8 * 4)
        print()
        print("x", x)
        print("(x * s_sigma).type()", )
        print("(x * 8).type()", (x * 8).type())
        print("(x * 8 + 4).type()", (x * 8 + 4).type())
        print("(x * 8 * 4).type()", (x * 8 * 4).type())
        print("(x * 8 / 4).type()", (x * 8 / 4).type())
        print("((x * 8) * 4).type()", ((x * 8) * 4).type())
        print("(x * (8 * 4)).type()", (x * (8 * 4)).type())

    assert (x * 8).type() == hl.Int(32)
    assert (x * 8 * 4).type() == hl.Int(32)  # yes this did fail at some point
    assert ((x * 8) / 4).type() == hl.Int(32)
    assert (x * (8 / 4)).type() == hl.Float(32)  # under python3 division rules
    assert (x * (8 // 4)).type() == hl.Int(32)
    #assert (x * 8 // 4).type() == hl.Int(32) # not yet implemented

    # Construct the bilateral grid
    r = hl.RDom(0, s_sigma, 0, s_sigma, 'r')
    val0 = clamped[x * s_sigma, y * s_sigma]
    val00 = clamped[x * s_sigma * hl.cast(hl.Int(32), 1),
                    y * s_sigma * hl.cast(hl.Int(32), 1)]
    #val1 = clamped[x * s_sigma - s_sigma/2, y * s_sigma - s_sigma/2] # should fail
    val22 = clamped[x * s_sigma - hl.cast(hl.Int(32), s_sigma // 2),
                    y * s_sigma - hl.cast(hl.Int(32), s_sigma // 2)]
    val2 = clamped[x * s_sigma - s_sigma // 2, y * s_sigma - s_sigma // 2]
    val3 = clamped[x * s_sigma + r.x - s_sigma // 2,
                   y * s_sigma + r.y - s_sigma // 2]

    return
Ejemplo n.º 11
0
def test_basics3():

    input = hl.ImageParam(hl.Float(32), 3, 'input')
    r_sigma = hl.Param(hl.Float(32), 'r_sigma', 0.1) # Value needed if not generating an executable
    s_sigma = 8 # This is passed during code generation in the C++ version

    x = hl.Var('x')
    y = hl.Var('y')
    z = hl.Var('z')
    c = hl.Var('c')

    # Add a boundary condition
    clamped = hl.Func('clamped')
    clamped[x, y] = input[hl.clamp(x, 0, input.width()-1),
                          hl.clamp(y, 0, input.height()-1),0]

    # Construct the bilateral grid
    r = hl.RDom(0, s_sigma, 0, s_sigma, 'r')
    val = clamped[x * s_sigma + r.x - s_sigma//2, y * s_sigma + r.y - s_sigma//2]
    val = hl.clamp(val, 0.0, 1.0)
    #zi = hl.cast(hl.Int(32), val * (1.0/r_sigma) + 0.5)
    zi = hl.cast(hl.Int(32), (val / r_sigma) + 0.5)
    histogram = hl.Func('histogram')
    histogram[x, y, z, c] = 0.0

    ss = hl.select(c == 0, val, 1.0)
    print("hl.select(c == 0, val, 1.0)", ss)
    left = histogram[x, y, zi, c]
    print("histogram[x, y, zi, c]", histogram[x, y, zi, c])
    print("histogram[x, y, zi, c]", left)
    left += 5
    print("histogram[x, y, zi, c] after += 5", left)
    left += ss

    return
Ejemplo n.º 12
0
def test_ndarray_to_buffer():
    a0 = np.ones((200, 300), dtype=np.int32)

    # Buffer always shares data (when possible) by default,
    # and maintains the shape of the data source. (note that
    # the ndarray is col-major by default!)
    b0 = hl.Buffer(a0, "float32_test_buffer")
    assert b0.type() == hl.Int(32)
    assert b0.name() == "float32_test_buffer"
    assert b0.all_equal(1)

    assert b0.dim(0).min() == 0
    assert b0.dim(0).max() == 199
    assert b0.dim(0).extent() == 200
    assert b0.dim(0).stride() == 300

    assert b0.dim(1).min() == 0
    assert b0.dim(1).max() == 299
    assert b0.dim(1).extent() == 300
    assert b0.dim(1).stride() == 1

    a0[12, 34] = 56
    assert b0[12, 34] == 56

    b0[56, 34] = 12
    assert a0[56, 34] == 12
Ejemplo n.º 13
0
def test_nobuildmethod():
    x, y, c = hl.Var(), hl.Var(), hl.Var()
    target = hl.get_jit_target_from_environment()

    b_in = hl.Buffer(hl.Float(32), [2, 2])
    b_in.fill(123)

    b_out = hl.Buffer(hl.Int(32), [2, 2])

    f = nobuildmethod.generate(target, b_in, 1.0)
    f.realize(b_out)

    assert b_out.all_equal(123)
Ejemplo n.º 14
0
 def __init__(self, x=None, y=None):
     if x is None and y is None:
         self.x = hl.cast(hl.Int(16), 0)
         self.y = hl.cast(hl.Int(16), 0)
     elif x is not None and y is None:
         if type(x) is hl.FuncRef:
             hl.Tuple(x)
             self.x = hl.cast(hl.Int(16), x[0])
             self.y = hl.cast(hl.Int(16), x[1])
         elif type(x) is tuple:
             self.x = hl.cast(hl.Int(16), x[0])
             self.y = hl.cast(hl.Int(16), x[1])
     else:
         self.x = hl.cast(hl.Int(16), x)
         self.y = hl.cast(hl.Int(16), y)
Ejemplo n.º 15
0
def get_erode(input):
    """
    Erode on 5x5 stencil, first erode x then erode y.
    """

    x = hl.Var("x")
    y = hl.Var("y")
    c = hl.Var("c")
    input_clamped = hl.Func("input_clamped")
    erode_x = hl.Func("erode_x")
    erode_y = hl.Func("erode_y")

    input_clamped[x, y, c] = input[
        hl.clamp(x, hl.cast(hl.Int(32), 0
                            ), hl.cast(hl.Int(32),
                                       input.width() - 1)),
        hl.clamp(y, hl.cast(hl.Int(32), 0
                            ), hl.cast(hl.Int(32),
                                       input.height() - 1)), c]
    erode_x[x, y, c] = hl.min(
        hl.min(
            hl.min(
                hl.min(input_clamped[x - 2, y, c], input_clamped[x - 1, y, c]),
                input_clamped[x, y, c]), input_clamped[x + 1, y, c]),
        input_clamped[x + 2, y, c])
    erode_y[x, y, c] = hl.min(
        hl.min(
            hl.min(hl.min(erode_x[x, y - 2, c], erode_x[x, y - 1, c]),
                   erode_x[x, y, c]), erode_x[x, y + 1, c]), erode_x[x, y + 2,
                                                                     c])

    yi = hl.Var("yi")

    # CPU Schedule
    erode_x.compute_root().split(y, y, yi, 8).parallel(y)
    erode_y.compute_root().split(y, y, yi, 8).parallel(y)

    return erode_y
Ejemplo n.º 16
0
def test_partialbuildmethod():
    x, y, c = hl.Var(), hl.Var(), hl.Var()
    target = hl.get_jit_target_from_environment()

    b_in = hl.Buffer(hl.Float(32), [2, 2])
    b_in.fill(123)

    b_out = hl.Buffer(hl.Int(32), [2, 2])

    try:
        f = partialbuildmethod.generate(target, b_in, 1)
    except RuntimeError as e:
        assert "Generators that use build() (instead of generate()+Output<>) are not supported in the Python bindings." in str(e)
    else:
        assert False, 'Did not see expected exception!'
Ejemplo n.º 17
0
def align_layer(layer, prev_alignment, prev_min, prev_max):
    scores = hl.Func(layer.name() + "_scores")
    alignment = hl.Func(layer.name() + "_alignment")
    xi, yi, tx, ty, n = hl.Var("xi"), hl.Var("yi"), hl.Var('tx'), hl.Var(
        'ty'), hl.Var('n')
    rdom0 = hl.RDom([(0, 16), (0, 16)])
    rdom1 = hl.RDom([(-4, 8), (-4, 8)])

    # Alignment of the previous (more coarse) layer scaled to this (finer) layer
    prev_offset = DOWNSAMPLE_RATE * Point(
        prev_alignment[prev_tile(tx), prev_tile(ty), n]).clamp(
            prev_min, prev_max)

    x0 = idx_layer(tx, rdom0.x)
    y0 = idx_layer(ty, rdom0.y)
    # (x,y) coordinates in the search region relative to the offset obtained from the alignment of the previous layer
    x = x0 + prev_offset.x + xi
    y = y0 + prev_offset.y + yi

    ref_val = layer[x0, y0, 0]  # Value of reference frame (the first frame)
    alt_val = layer[x, y, n]  # alternate frame value

    # L1 distance between reference frame and alternate frame
    d = hl.abs(hl.cast(hl.Int(32), ref_val) - hl.cast(hl.Int(32), alt_val))

    scores[xi, yi, tx, ty, n] = hl.sum(d)

    # Alignment for each tile, where L1 distances are minimum
    alignment[tx, ty, n] = Point(hl.argmin(scores[rdom1.x, rdom1.y, tx, ty,
                                                  n])) + prev_offset

    scores.compute_at(alignment, tx).vectorize(xi, 8)

    alignment.compute_root().parallel(ty).vectorize(tx, 16)

    return alignment
Ejemplo n.º 18
0
def test_division():
    f32 = hl.Param(hl.Float(32), 'f32', -32.0)
    f64 = hl.Param(hl.Float(64), 'f64', 64.0)
    i16 = hl.Param(hl.Int(16), 'i16', -16)
    i32 = hl.Param(hl.Int(32), 'i32', 32)
    u16 = hl.Param(hl.UInt(16), 'u16', 16)
    u32 = hl.Param(hl.UInt(32), 'u32', 32)

    # Verify that the types match the rules in match_types()
    assert (f32 / f64).type() == hl.Float(64)
    assert (f32 // f64).type() == hl.Float(64)

    assert (i16 / i32).type() == hl.Int(32)
    assert (i16 // i32).type() == hl.Int(32)

    assert (u16 / u32).type() == hl.UInt(32)
    assert (u16 // u32).type() == hl.UInt(32)

    # int / uint -> int
    assert (u16 / i32).type() == hl.Int(32)
    assert (i32 // u16).type() == hl.Int(32)

    # any / float -> float
    # float / any -> float
    assert (u16 / f32).type() == hl.Float(32)
    assert (u16 // f32).type() == hl.Float(32)

    assert (i16 / f64).type() == hl.Float(64)
    assert (i16 // f64).type() == hl.Float(64)

    # Verify that division semantics match those for Halide
    # (rather than python); this differs for int/int which
    # defaults to float (rather than floordiv) in Python3.
    # Also test that // always floors the result, even for float.
    assert _evaluate(f32 / f64) == -0.5
    assert _evaluate(f32 // f64) == -1.0

    assert _evaluate(i16 / i32) == -1
    assert _evaluate(i16 // i32) == -1
    assert _evaluate(i32 / i16) == -2

    assert _evaluate(u16 / u32) == 0
    assert _evaluate(u16 // u32) == 0

    assert _evaluate(u16 / i32) == 0
    assert _evaluate(i32 // u16) == 2

    assert _evaluate(u16 / f32) == -0.5
    assert _evaluate(u16 // f32) == -1.0

    assert _evaluate(i16 / f64) == -0.25
    assert _evaluate(i16 // f64) == -1.0
Ejemplo n.º 19
0
    def buffer_t_to_buffer_struct(buffer):
        assert buffer.type() == hl.Int(32)
        b = buffer.raw_buffer()
        bb = BufferStruct()

        uint8_p_t = ctypes.POINTER(ctypes.c_ubyte)
        # host_p0 is the complicated way...
        #host_p0 = hl.buffer_to_ndarray(hl.Buffer(hl.UInt(8), b)).ctypes.data
        # host_ptr_as_int is the easy way
        host_p = buffer.host_ptr_as_int()
        bb.host = ctypes.hl.cast(host_p, uint8_p_t)
        #print("host_p", host_p0, host_p, bb.host)
        bb.dev = b.dev
        bb.elem_size = b.elem_size
        bb.host_dirty = b.host_dirty
        bb.dev_dirty = b.dev_dirty
        for i in range(4):
            bb.extent[i] = b.extent[i]
            bb.stride[i] = b.stride[i]
            bb.hl.min[i] = b.hl.min[i]
        return bb
Ejemplo n.º 20
0
def test_float_or_int():

    x = hl.Var('x')
    i, f =  hl.Int(32), hl.Float(32)

    assert ((x//2) - 1 + 2*(x%2)).type() == i
    assert ((x/2) - 1 + 2*(x%2)).type() == i
    assert ((x/2)).type() == i
    assert ((x/2.0)).type() == f
    assert ((x//2)).type() == i
    assert ((x//2) - 1).type() == i
    assert ((x%2)).type() == i
    assert (2*(x%2)).type() == i
    assert ((x//2) - 1 + 2*(x%2)).type() == i

    assert type(x) == hl.Var
    assert (x.as_expr()).type() == i
    assert (hl.Expr(2.0)).type() == f
    assert (hl.Expr(2)).type() == i
    assert (x + 2).type() == i
    assert (2 + x).type() == i
    assert (hl.Expr(2) + hl.Expr(3)).type() == i
    assert (hl.Expr(2.0) + hl.Expr(3)).type() == f
    assert (hl.Expr(2) + 3.0).type() == f
    assert (hl.Expr(2) + 3).type() == i
    assert (x.as_expr() + 2).type() == i # yes this failed at some point
    assert (2 + x.as_expr()).type() == i
    assert (2 * (x + 2)).type() == i # yes this failed at some point
    assert (x + 0).type() == i
    assert (x % 2).type() == i
    assert (2 * x).type() == i
    assert (x * 2).type() == i
    assert (x * 2).type() == i
    assert ((x % 2)).type() == i
    assert ((x % 2) * 2).type() == i
    #assert (2 * (x % 2)).type() == i # yes this failed at some point
    assert ((x + 2) * 2).type() == i

    return
Ejemplo n.º 21
0
def test_float_or_int():
    x = hl.Var('x')
    i32, f32 = hl.Int(32), hl.Float(32)

    assert hl.Expr(x).type() == i32
    assert (x * 2).type() == i32
    assert (x / 2).type() == i32
    assert ((x // 2) - 1 + 2 * (x % 2)).type() == i32
    assert ((x / 2) - 1 + 2 * (x % 2)).type() == i32
    assert ((x / 2)).type() == i32
    assert ((x / 2.0)).type() == f32
    assert ((x // 2)).type() == i32
    assert ((x // 2) - 1).type() == i32
    assert ((x % 2)).type() == i32
    assert (2 * (x % 2)).type() == i32
    assert ((x // 2) - 1 + 2 * (x % 2)).type() == i32

    assert type(x) == hl.Var
    assert (hl.Expr(x)).type() == i32
    assert (hl.Expr(2.0)).type() == f32
    assert (hl.Expr(2)).type() == i32
    assert (x + 2).type() == i32
    assert (2 + x).type() == i32
    assert (hl.Expr(2) + hl.Expr(3)).type() == i32
    assert (hl.Expr(2.0) + hl.Expr(3)).type() == f32
    assert (hl.Expr(2) + 3.0).type() == f32
    assert (hl.Expr(2) + 3).type() == i32
    assert (hl.Expr(x) + 2).type() == i32
    assert (2 + hl.Expr(x)).type() == i32
    assert (2 * (x + 2)).type() == i32
    assert (x + 0).type() == i32
    assert (x % 2).type() == i32
    assert (2 * x).type() == i32
    assert (x * 2).type() == i32
    assert (x * 2).type() == i32
    assert ((x % 2)).type() == i32
    assert ((x % 2) * 2).type() == i32
    assert (2 * (x % 2)).type() == i32
    assert ((x + 2) * 2).type() == i32
Ejemplo n.º 22
0
def contrast(input, strength, black_point):
    output = hl.Func("contrast_output")

    x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c")

    scale = strength

    inner_constant = math.pi / (2 * scale)
    sin_constant = hl.sin(inner_constant)
    slope = 65535 / (2 * sin_constant)
    constant = slope * sin_constant
    factor = math.pi / (scale * 65535)

    val = factor * hl.cast(hl.Float(32), input[x, y, c])

    output[x, y, c] = hl.u16_sat(slope * hl.sin(val - inner_constant) + constant)

    white_scale = 65535 / (65535 - black_point)

    output[x, y, c] = hl.u16_sat((hl.cast(hl.Int(32), output[x, y, c]) - black_point) * white_scale)

    output.compute_root().parallel(y).vectorize(x, 16)

    return output
Ejemplo n.º 23
0
# TODO: This allows you to use "true" div (vs floordiv) in Python2 for the / operator;
# unfortunately it appears to also replace the overloads we've carefully added for Halide.
# Figure out if it's possible to allow this to leave our Halide stuff unaffected.
#
# from __future__ import division

import time, sys
import halide as hl

from datetime import datetime
from scipy.misc import imread, imsave
import numpy as np
import os.path

int_t = hl.Int(32)
float_t = hl.Float(32)


def get_interpolate(input, levels):
    """
    Build function, schedules it, and invokes jit compiler
    :return: halide.hl.Func
    """

    # THE ALGORITHM

    downsampled = [hl.Func('downsampled%d' % i) for i in range(levels)]
    downx = [hl.Func('downx%d' % l) for l in range(levels)]
    interpolated = [hl.Func('interpolated%d' % i) for i in range(levels)]
    #     level_widths = [hl.Param(int_t,'level_widths%d'%i) for i in range(levels)]
Ejemplo n.º 24
0
def demosaic(input, width, height):
    print(f'width: {width}, height: {height}')

    f0 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f0")
    f1 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f1")
    f2 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f2")
    f3 = hl.Buffer(hl.Int(32), [5, 5], "demosaic_f3")

    f0.translate([-2, -2])
    f1.translate([-2, -2])
    f2.translate([-2, -2])
    f3.translate([-2, -2])

    d0 = hl.Func("demosaic_0")
    d1 = hl.Func("demosaic_1")
    d2 = hl.Func("demosaic_2")
    d3 = hl.Func("demosaic_3")

    output = hl.Func("demosaic_output")

    x, y, c = hl.Var("x"), hl.Var("y"), hl.Var("c")
    rdom0 = hl.RDom([(-2, 5), (-2, 5)])
    # rdom1 = hl.RDom([(0, width / 2), (0, height / 2)])

    input_mirror = hl.BoundaryConditions.mirror_interior(input, [(0, width), (0, height)])

    f0.fill(0)
    f1.fill(0)
    f2.fill(0)
    f3.fill(0)

    f0_sum = 8
    f1_sum = 16
    f2_sum = 16
    f3_sum = 16

    f0[0, -2] = -1
    f0[0, -1] = 2
    f0[-2, 0] = -1
    f0[-1, 0] = 2
    f0[0, 0] = 4
    f0[1, 0] = 2
    f0[2, 0] = -1
    f0[0, 1] = 2
    f0[0, 2] = -1

    f1[0, -2] = 1
    f1[-1, -1] = -2
    f1[1, -1] = -2
    f1[-2, 0] = -2
    f1[-1, 0] = 8
    f1[0, 0] = 10
    f1[1, 0] = 8
    f1[2, 0] = -2
    f1[-1, 1] = -2
    f1[1, 1] = -2
    f1[0, 2] = 1

    f2[0, -2] = -2
    f2[-1, -1] = -2
    f2[0, -1] = 8
    f2[1, -1] = -2
    f2[-2, 0] = 1
    f2[0, 0] = 10
    f2[2, 0] = 1
    f2[-1, 1] = -2
    f2[0, 1] = 8
    f2[1, 1] = -2
    f2[0, 2] = -2

    f3[0, -2] = -3
    f3[-1, -1] = 4
    f3[1, -1] = 4
    f3[-2, 0] = -3
    f3[0, 0] = 12
    f3[2, 0] = -3
    f3[-1, 1] = 4
    f3[1, 1] = 4
    f3[0, 2] = -3

    d0[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f0[rdom0.x, rdom0.y]) / f0_sum)
    d1[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f1[rdom0.x, rdom0.y]) / f1_sum)
    d2[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f2[rdom0.x, rdom0.y]) / f2_sum)
    d3[x, y] = hl.u16_sat(hl.sum(hl.i32(input_mirror[x + rdom0.x, y + rdom0.y]) * f3[rdom0.x, rdom0.y]) / f3_sum)

    R_row = y % 2 == 0
    B_row = y % 2 != 0
    R_col = x % 2 == 0
    B_col = x % 2 != 0
    at_R = c == 0
    at_G = c == 1
    at_B = c == 2

    output[x, y, c] = hl.select(at_R & R_row & B_col, d1[x, y],
                                at_R & B_row & R_col, d2[x, y],
                                at_R & B_row & B_col, d3[x, y],
                                at_G & R_row & R_col, d0[x, y],
                                at_G & B_row & B_col, d0[x, y],
                                at_B & B_row & R_col, d1[x, y],
                                at_B & R_row & B_col, d2[x, y],
                                at_B & R_row & R_col, d3[x, y],
                                input[x, y])

    d0.compute_root().parallel(y).vectorize(x, 16)
    d1.compute_root().parallel(y).vectorize(x, 16)
    d2.compute_root().parallel(y).vectorize(x, 16)
    d3.compute_root().parallel(y).vectorize(x, 16)

    output.compute_root().parallel(y).align_bounds(x, 2).unroll(x, 2).align_bounds(y, 2).unroll(y, 2).vectorize(x, 16)

    return output
Ejemplo n.º 25
0
def test_fill_all_equal():
    buf = hl.Buffer(hl.Int(32), [3, 4])
    buf.fill(3)
    assert buf.all_equal(3)
    buf[1, 2] = 4
    assert not buf.all_equal(3)
Ejemplo n.º 26
0
def test_schedules(verbose=False, test_random=False):
    #random_module.seed(int(sys.argv[1]) if len(sys.argv)>1 else 0)
    halide.exit_on_signal()
    f = halide.Func('f')
    x = halide.Var('x')
    y = halide.Var('y')
    c = halide.Var('c')
    g = halide.Func('g')
    v = halide.Var('v')
    input = halide.UniformImage(halide.UInt(16), 3)
    int_t = halide.Int(32)
    f[x, y, c] = input[
        halide.clamp(x, halide.cast(int_t, 0
                                    ), halide.cast(int_t,
                                                   input.width() - 1)),
        halide.clamp(y, halide.cast(int_t, 0
                                    ), halide.cast(int_t,
                                                   input.height() - 1)),
        halide.clamp(c, halide.cast(int_t, 0), halide.cast(int_t, 2))]
    #g[v] = f[v,v]
    g[x, y, c] = f[x, y, c] + 1
    assert sorted(halide.all_vars(g).keys()) == sorted(['x', 'y',
                                                        'c'])  #, 'v'])

    if verbose:
        print halide.func_varlist(f)
        print 'caller_vars(f) =', caller_vars(g, f)
        print 'caller_vars(g) =', caller_vars(g, g)


#    validL = list(valid_schedules(g, f, 4))
#    validL = [repr(_x) for _x in validL]
#
#    for L in sorted(validL):
#        print repr(L)
    T0 = time.time()
    if not test_random:
        random = True  #False
        nvalid_determ = 0
        for L in schedules_func(g, f, 0, 3):
            nvalid_determ += 1
            if verbose:
                print L
        nvalid_random = 0
        for i in range(100):
            for L in schedules_func(
                    g, f, 0, DEFAULT_MAX_DEPTH, random=True
            ):  #sorted([repr(_x) for _x in valid_schedules(g, f, 3)]):
                if verbose and 0:
                    print L  #repr(L)
                nvalid_random += 1
    s = []
    for i in range(400):
        d = random_schedule(g, 0, DEFAULT_MAX_DEPTH)
        si = str(d)
        s.append(si)
        if verbose:
            print 'Schedule:', si

        d.apply()
        evaluate = d.test((36, 36, 3), input)
        print 'evaluate'
        evaluate()
        if test_random:
            print 'Success'
            sys.exit()
    T1 = time.time()

    s = '\n'.join(s)
    assert 'f.chunk(_c0)' in s
    assert 'f.root().vectorize' in s
    assert 'f.root().unroll' in s
    assert 'f.root().split' in s
    assert 'f.root().tile' in s
    assert 'f.root().parallel' in s
    assert 'f.root().transpose' in s

    assert nvalid_random == 100
    if verbose:
        print 'generated in %.3f secs' % (T1 - T0)
    print 'random_schedule: OK'
def main():

    # The last lesson was quite involved, and scheduling complex
    # multi-stage pipelines is ahead of us. As an interlude, let's
    # consider something easy: evaluating funcs over rectangular
    # domains that do not start at the origin.

    # We define our familiar gradient function.
    gradient = hl.Func("gradient")
    x, y = hl.Var("x"), hl.Var("y")
    gradient[x, y] = x + y

    # And turn on tracing so we can see how it is being evaluated.
    gradient.trace_stores()

    # Previously we've realized gradient like so:
    #
    # gradient.realize(8, 8)
    #
    # This does three things internally:
    # 1) Generates code than can evaluate gradient over an arbitrary
    # rectangle.
    # 2) Allocates a new 8 x 8 image.
    # 3) Runs the generated code to evaluate gradient for all x, y
    # from (0, 0) to (7, 7) and puts the result into the image.
    # 4) Returns the new image as the result of the realize call.

    # What if we're managing memory carefully and don't want Halide
    # to allocate a new image for us? We can call realize another
    # way. We can pass it an image we would like it to fill in. The
    # following evaluates our hl.Func into an existing image:
    print("Evaluating gradient from (0, 0) to (7, 7)")
    result = hl.Buffer(hl.Int(32), [8, 8])
    gradient.realize(result)

    # Let's check it did what we expect:
    for yy in range(8):
        for xx in range(8):
            assert result[xx, yy] == xx + yy, "Something went wrong!"

    # Now let's evaluate gradient over a 5 x 7 rectangle that starts
    # somewhere else -- at position (100, 50). So x and y will run
    # from (100, 50) to (104, 56) inclusive.

    # We start by creating an image that represents that rectangle:
    # In the constructor we tell it the size.
    shifted = hl.Buffer(hl.Int(32), [5, 7])
    shifted.set_min([100, 50])  # Then we tell it the top-left corner.

    print("Evaluating gradient from (100, 50) to (104, 56)")

    # Note that this won't need to compile any new code, because when
    # we realized it the first time, we generated code capable of
    # evaluating gradient over an arbitrary rectangle.
    gradient.realize(shifted)

    # From C++, we also access the image object using coordinates
    # that start at (100, 50).
    for yy in range(50, 57):
        for xx in range(100, 105):
            assert shifted[xx, yy] == xx + yy, "Something went wrong!"

    # The image 'shifted' stores the value of our hl.Func over a domain
    # that starts at (100, 50), so asking for shifted(0, 0) would in
    # fact read out-of-bounds and probably crash.

    # What if we want to evaluate our hl.Func over some region that
    # isn't rectangular? Too bad. Halide only does rectangles :)

    print("Success!")
    return 0
Ejemplo n.º 28
0
def main():

    # So far Funcs (such as the one below) have evaluated to a single
    # scalar value for each point in their domain.
    single_valued = hl.Func()
    x, y = hl.Var("x"), hl.Var("y")
    single_valued[x, y] = x + y

    # One way to write a hl.Func that returns a collection of values is
    # to add an additional dimension which indexes that
    # collection. This is how we typically deal with color. For
    # example, the hl.Func below represents a collection of three values
    # for every x, y coordinate indexed by c.
    color_image = hl.Func()
    c = hl.Var("c")
    color_image[x, y, c] = hl.select(
        c == 0,
        245,  # Red value
        c == 1,
        42,  # Green value
        132)  # Blue value

    # Since this pattern appears quite often, Halide provides a
    # syntatic sugar to write the code above as the following,
    # using the "mux" function.
    # color_image[x, y, c] = hl.mux(c, [245, 42, 132]);

    # This method is often convenient because it makes it easy to
    # operate on this hl.Func in a way that treats each item in the
    # collection equally:
    brighter = hl.Func()
    brighter[x, y, c] = color_image[x, y, c] + 10

    # However this method is also inconvenient for three reasons.
    #
    # 1) Funcs are defined over an infinite domain, so users of this
    # hl.Func can for example access color_image(x, y, -17), which is
    # not a meaningful value and is probably indicative of a bug.
    #
    # 2) It requires a hl.select, which can impact performance if not
    # bounded and unrolled:
    # brighter.bound(c, 0, 3).unroll(c)
    #
    # 3) With this method, all values in the collection must have the
    # same type. While the above two issues are merely inconvenient,
    # this one is a hard limitation that makes it impossible to
    # express certain things in this way.

    # It is also possible to represent a collection of values as a
    # collection of Funcs:
    func_array = [hl.Func() for i in range(3)]
    func_array[0][x, y] = x + y
    func_array[1][x, y] = hl.sin(x)
    func_array[2][x, y] = hl.cos(y)

    # This method avoids the three problems above, but introduces a
    # new annoyance. Because these are separate Funcs, it is
    # difficult to schedule them so that they are all computed
    # together inside a single loop over x, y.

    # A third alternative is to define a hl.Func as evaluating to a
    # Tuple instead of an hl.Expr. A Tuple is a fixed-size collection of
    # Exprs which may have different type. The following function
    # evaluates to an integer value (x+y), and a floating point value
    # (hl.sin(x*y)).
    multi_valued = hl.Func("multi_valued")
    multi_valued[x, y] = (x + y, hl.sin(x * y))

    # Realizing a tuple-valued hl.Func returns a collection of
    # Buffers. We call this a Realization. It's equivalent to a
    # std::vector of hl.Buffer/Image objects:
    if True:
        im1, im2 = multi_valued.realize([80, 60])
        assert im1.type() == hl.Int(32)
        assert im2.type() == hl.Float(32)
        assert im1[30, 40] == 30 + 40
        assert np.isclose(im2[30, 40], math.sin(30 * 40))

    # You can also pass a tuple of pre-allocated buffers to realize()
    # rather than having new ones created. (The Buffers must have the correct
    # types and have identical sizes.)
    if True:
        im1, im2 = hl.Buffer(hl.Int(32),
                             [80, 60]), hl.Buffer(hl.Float(32), [80, 60])
        multi_valued.realize((im1, im2))
        assert im1[30, 40] == 30 + 40
        assert np.isclose(im2[30, 40], math.sin(30 * 40))

    # All Tuple elements are evaluated together over the same domain
    # in the same loop nest, but stored in distinct allocations. The
    # equivalent C++ code to the above is:
    if True:
        multi_valued_0 = np.empty((80 * 60), dtype=np.int32)
        multi_valued_1 = np.empty((80 * 60), dtype=np.int32)

        for yy in range(80):
            for xx in range(60):
                multi_valued_0[xx + 60 * yy] = xx + yy
                multi_valued_1[xx + 60 * yy] = math.sin(xx * yy)

    # When compiling ahead-of-time, a Tuple-valued hl.Func evaluates
    # into multiple distinct output halide_buffer_t structs. These appear in
    # order at the end of the function signature:
    # int multi_valued(...input buffers and params..., halide_buffer_t
    # *output_1, halide_buffer_t *output_2)

    # You can construct a Tuple by passing multiple Exprs to the
    # Tuple constructor as we did above. Perhaps more elegantly, you
    # can also take advantage of initializer lists and just
    # enclose your Exprs in braces:
    multi_valued_2 = hl.Func("multi_valued_2")
    multi_valued_2[x, y] = (x + y, hl.sin(x * y))

    # Calls to a multi-valued hl.Func cannot be treated as Exprs. The
    # following is a syntax error:
    # hl.Func consumer
    # consumer[x, y] = multi_valued_2[x, y] + 10

    # Instead you must index the returned object with square brackets
    # to retrieve the individual Exprs:
    integer_part = multi_valued_2[x, y][0]
    floating_part = multi_valued_2[x, y][1]
    assert type(integer_part) is hl.FuncTupleElementRef
    assert type(floating_part) is hl.FuncTupleElementRef

    consumer = hl.Func()
    consumer[x, y] = (integer_part + 10, floating_part + 10.0)

    # Tuple reductions.
    if True:
        # Tuples are particularly useful in reductions, as they allow
        # the reduction to maintain complex state as it walks along
        # its domain. The simplest example is an argmax.

        # First we create an Image to take the argmax over.
        input_func = hl.Func()
        input_func[x] = hl.sin(x)
        input = input_func.realize([100])
        assert input.type() == hl.Float(32)

        # Then we defined a 2-valued Tuple which tracks the maximum value
        # its index.
        arg_max = hl.Func()

        # Pure definition.
        # (using [()] for zero-dimensional Funcs is a convention of this python interface)
        arg_max[()] = (0, input[0])

        # Update definition.
        r = hl.RDom([(1, 99)])
        old_index = arg_max[()][0]
        old_max = arg_max[()][1]
        new_index = hl.select(old_max > input[r], r, old_index)
        new_max = hl.max(input[r], old_max)
        arg_max[()] = (new_index, new_max)

        # The equivalent C++ is:
        arg_max_0 = 0
        arg_max_1 = float(input[0])
        for r in range(1, 100):
            old_index = arg_max_0
            old_max = arg_max_1
            new_index = r if (old_max > input[r]) else old_index
            new_max = max(input[r], old_max)
            # In a tuple update definition, all loads and computation
            # are done before any stores, so that all Tuple elements
            # are updated atomically with respect to recursive calls
            # to the same hl.Func.
            arg_max_0 = new_index
            arg_max_1 = new_max

        # Let's verify that the Halide and C++ found the same maximum
        # value and index.
        if True:
            r0, r1 = arg_max.realize()

            assert r0.type() == hl.Int(32)
            assert r1.type() == hl.Float(32)
            assert arg_max_0 == r0[()]
            assert np.isclose(arg_max_1, r1[()])

        # Halide provides argmax and hl.argmin as built-in reductions
        # similar to sum, product, maximum, and minimum. They return
        # a Tuple consisting of the point in the reduction domain
        # corresponding to that value, and the value itself. In the
        # case of ties they return the first value found. We'll use
        # one of these in the following section.

    # Tuples for user-defined types.
    if True:
        # Tuples can also be a convenient way to represent compound
        # objects such as complex numbers. Defining an object that
        # can be converted to and from a Tuple is one way to extend
        # Halide's type system with user-defined types.
        class Complex:
            def __init__(self, r, i=None):
                if type(r) is float and type(i) is float:
                    self.real = hl.Expr(r)
                    self.imag = hl.Expr(i)
                elif i is not None:
                    self.real = r
                    self.imag = i
                else:
                    self.real = r[0]
                    self.imag = r[1]

            def as_tuple(self):
                "Convert to a Tuple"
                return (self.real, self.imag)

            def __add__(self, other):
                "Complex addition"
                return Complex(self.real + other.real, self.imag + other.imag)

            def __mul__(self, other):
                "Complex multiplication"
                return Complex(self.real * other.real - self.imag * other.imag,
                               self.real * other.imag + self.imag * other.real)

            def __getitem__(self, idx):
                return (self.real, self.imag)[idx]

            def __len__(self):
                return 2

            def magnitude(self):
                "Complex magnitude"
                return (self.real * self.real) + (self.imag * self.imag)

            # Other complex operators would go here. The above are
            # sufficient for this example.

        # Let's use the Complex struct to compute a Mandelbrot set.
        mandelbrot = hl.Func()

        # The initial complex value corresponding to an x, y coordinate
        # in our hl.Func.
        initial = Complex(x / 15.0 - 2.5, y / 6.0 - 2.0)

        # Pure definition.
        t = hl.Var("t")
        mandelbrot[x, y, t] = Complex(0.0, 0.0)

        # We'll use an update definition to take 12 steps.
        r = hl.RDom([(1, 12)])
        current = Complex(mandelbrot[x, y, r - 1])

        # The following line uses the complex multiplication and
        # addition we defined above.
        mandelbrot[x, y, r] = (Complex(current * current) + initial)

        # We'll use another tuple reduction to compute the iteration
        # number where the value first escapes a circle of radius 4.
        # This can be expressed as an hl.argmin of a boolean - we want
        # the index of the first time the given boolean expression is
        # false (we consider false to be less than true).  The argmax
        # would return the index of the first time the expression is
        # true.

        escape_condition = Complex(mandelbrot[x, y, r]).magnitude() < 16.0
        first_escape = hl.argmin(escape_condition)
        assert type(first_escape) is tuple
        # We only want the index, not the value, but hl.argmin returns
        # both, so we'll index the hl.argmin Tuple expression using
        # square brackets to get the hl.Expr representing the index.
        escape = hl.Func()
        escape[x, y] = first_escape[0]

        # Realize the pipeline and print the result as ascii art.
        result = escape.realize([61, 25])
        assert result.type() == hl.Int(32)
        code = " .:-~*={&%#@"
        for yy in range(result.height()):
            for xx in range(result.width()):
                index = result[xx, yy]
                if index < len(code):
                    print("%c" % code[index], end="")
                else:
                    pass  # is lesson 13 cpp version buggy ?
            print("")

    print("Success!")

    return 0
Ejemplo n.º 29
0
def test_target():
    # Target("") should be exactly like get_host_target().
    t1 = hl.get_host_target()
    t2 = hl.Target("")
    assert t1 == t2, "Default ctor failure"
    assert t1.supported()

    # to_string roundtripping
    t1 = hl.Target()
    ts = t1.to_string()
    assert ts == "arch_unknown-0-os_unknown"

    # Note, this should *not* validate, since validate_target_string
    # now returns false if any of arch-bits-os are undefined
    assert not hl.Target.validate_target_string(ts)

    # Don't attempt to roundtrip this: trying to create
    # a Target with unknown portions will now assert-fail.
    #
    # t2 = hl.Target(ts)
    # assert t2 == t1

    # repr() and str()
    assert str(t1) == "arch_unknown-0-os_unknown"
    assert repr(t1) == "<halide.Target arch_unknown-0-os_unknown>"

    assert t1.os == hl.TargetOS.OSUnknown
    assert t1.arch == hl.TargetArch.ArchUnknown
    assert t1.bits == 0

    # Full specification round-trip:
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32,
                   [hl.TargetFeature.SSE41])
    ts = t1.to_string()
    assert ts == "x86-32-linux-sse41"
    assert hl.Target.validate_target_string(ts)

    # Full specification (without features) round-trip:
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32)
    ts = t1.to_string()
    assert ts == "x86-32-linux"
    assert hl.Target.validate_target_string(ts)

    # Full specification round-trip, crazy features
    t1 = hl.Target(hl.TargetOS.Android, hl.TargetArch.ARM, 32, [
        hl.TargetFeature.JIT, hl.TargetFeature.SSE41, hl.TargetFeature.AVX,
        hl.TargetFeature.AVX2, hl.TargetFeature.CUDA, hl.TargetFeature.OpenCL,
        hl.TargetFeature.OpenGL, hl.TargetFeature.OpenGLCompute,
        hl.TargetFeature.Debug
    ])
    ts = t1.to_string()
    assert ts == "arm-32-android-avx-avx2-cuda-debug-jit-opencl-opengl-openglcompute-sse41"
    assert hl.Target.validate_target_string(ts)

    # Expected failures:
    ts = "host-unknowntoken"
    assert not hl.Target.validate_target_string(ts)

    ts = "x86-23"
    assert not hl.Target.validate_target_string(ts)

    # bits == 0 is allowed only if arch_unknown and os_unknown are specified,
    # and no features are set
    ts = "x86-0"
    assert not hl.Target.validate_target_string(ts)

    ts = "0-arch_unknown-os_unknown-sse41"
    assert not hl.Target.validate_target_string(ts)

    # "host" is only supported as the first token
    ts = "opencl-host"
    assert not hl.Target.validate_target_string(ts)

    # set_feature
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32,
                   [hl.TargetFeature.SSE41])
    assert t1.has_feature(hl.TargetFeature.SSE41)
    assert not t1.has_feature(hl.TargetFeature.AVX)
    t1.set_feature(hl.TargetFeature.AVX)
    t1.set_feature(hl.TargetFeature.SSE41, False)
    assert t1.has_feature(hl.TargetFeature.AVX)
    assert not t1.has_feature(hl.TargetFeature.SSE41)

    # set_features
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32,
                   [hl.TargetFeature.SSE41])
    assert t1.has_feature(hl.TargetFeature.SSE41)
    assert not t1.has_feature(hl.TargetFeature.AVX)
    t1.set_features([hl.TargetFeature.SSE41], False)
    t1.set_features([hl.TargetFeature.AVX, hl.TargetFeature.AVX2], True)
    assert t1.has_feature(hl.TargetFeature.AVX)
    assert t1.has_feature(hl.TargetFeature.AVX2)
    assert not t1.has_feature(hl.TargetFeature.SSE41)

    # with_feature
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32,
                   [hl.TargetFeature.SSE41])
    t2 = t1.with_feature(hl.TargetFeature.NoAsserts).with_feature(
        hl.TargetFeature.NoBoundsQuery)
    ts = t2.to_string()
    assert ts == "x86-32-linux-no_asserts-no_bounds_query-sse41"

    # without_feature
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32,
                   [hl.TargetFeature.SSE41, hl.TargetFeature.NoAsserts])
    # Note that NoBoundsQuery wasn't set here, so 'without' is a no-op
    t2 = t1.without_feature(hl.TargetFeature.NoAsserts).without_feature(
        hl.TargetFeature.NoBoundsQuery)
    ts = t2.to_string()
    assert ts == "x86-32-linux-sse41"

    # natural_vector_size
    # SSE4.1 is 16 bytes wide
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32,
                   [hl.TargetFeature.SSE41])
    assert t1.natural_vector_size(hl.UInt(8)) == 16
    assert t1.natural_vector_size(hl.Int(16)) == 8
    assert t1.natural_vector_size(hl.UInt(32)) == 4
    assert t1.natural_vector_size(hl.Float(32)) == 4

    # has_gpu_feature
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32,
                   [hl.TargetFeature.OpenCL])
    t2 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32, [])
    assert t1.has_gpu_feature()
    assert not t2.has_gpu_feature()

    # has_large_buffers & maximum_buffer_size
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64,
                   [hl.TargetFeature.LargeBuffers])
    t2 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64, [])
    assert t1.has_large_buffers()
    assert t1.maximum_buffer_size() == 9223372036854775807
    assert not t2.has_large_buffers()
    assert t2.maximum_buffer_size() == 2147483647

    # supports_device_api
    t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64,
                   [hl.TargetFeature.CUDA])
    t2 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 64)
    assert t1.supports_device_api(hl.DeviceAPI.CUDA)
    assert not t2.supports_device_api(hl.DeviceAPI.CUDA)

    # supports_type (deprecated version)
    t1 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64,
                   [hl.TargetFeature.Metal])
    t2 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64)
    assert not t1.supports_type(hl.Float(64))
    assert t2.supports_type(hl.Float(64))

    # supports_type (preferred version)
    t1 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64,
                   [hl.TargetFeature.Metal])
    t2 = hl.Target(hl.TargetOS.OSX, hl.TargetArch.X86, 64)
    assert not t1.supports_type(hl.Float(64), hl.DeviceAPI.Metal)
    assert not t2.supports_type(hl.Float(64), hl.DeviceAPI.Metal)

    # target_feature_for_device_api
    assert hl.target_feature_for_device_api(
        hl.DeviceAPI.OpenCL) == hl.TargetFeature.OpenCL

    # with_feature with non-convertible lists
    try:
        t1 = hl.Target(hl.TargetOS.Linux, hl.TargetArch.X86, 32,
                       ["this is a string"])
    except TypeError as e:
        assert "incompatible constructor arguments" in str(e)
    else:
        assert False, 'Did not see expected exception!'
Ejemplo n.º 30
0
def test_buffer_to_str():
    b = hl.Buffer()
    assert str(b) == '<undefined halide.Buffer>'
    b = hl.Buffer(hl.Int(32), [128, 256])
    assert str(
        b) == '<halide.Buffer of type int32 shape:[[0,128,1],[0,256,128]]>'