Example #1
0
def get_iny_inx(y, x, image_height, image_width, target_height, target_width,
                coordinate_transformation_mode):
    """ Infer input x,y from output x,y with various coordinate transformation methods """
    scale_y = te.div(image_height.astype("float"),
                     target_height.astype("float"))
    scale_x = te.div(image_width.astype("float"), target_width.astype("float"))
    if coordinate_transformation_mode == "half_pixel":
        in_y = (y + 0.5) * scale_y - 0.5
        in_x = (x + 0.5) * scale_x - 0.5
    elif coordinate_transformation_mode == "align_corners":
        in_y = (image_height - 1).astype("float") / (target_height - 1) * y
        in_x = (image_width - 1).astype("float") / (target_width - 1) * x
    elif coordinate_transformation_mode == "asymmetric":
        in_y = scale_y * y
        in_x = scale_x * x
    elif coordinate_transformation_mode == "pytorch_half_pixel":
        in_y = te.if_then_else(target_height > 1, (y + 0.5) * scale_y - 0.5,
                               0.0)
        in_x = te.if_then_else(target_width > 1, (x + 0.5) * scale_x - 0.5,
                               0.0)
    elif coordinate_transformation_mode == "tf_half_pixel_for_nn":
        in_y = (y + 0.5) * scale_y
        in_x = (x + 0.5) * scale_x
    else:
        raise ValueError(
            "Unsupported coordinate_transformation_mode: {}".format(
                coordinate_transformation_mode))
    return in_y, in_x
Example #2
0
def test_average_pool():
    for i in range(5):
        N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)]
        (input_dtype, acc_dtype) = random_dtypes()
        D = te.placeholder((N, CI, H, W), dtype=input_dtype)

        KH = min(H, KH)
        KW = min(W, KW)

        kh = te.reduce_axis((0, KH))
        kw = te.reduce_axis((0, KW))

        OH = (H - KH) + 1
        OW = (W - KW) + 1

        C = te.compute(
            (N, CO, OH, OW),
            lambda n, co, h, w: te.sum(
                te.div(D[n][co][h + kh][w + kw].astype(acc_dtype), (KW * KH)), axis=[kh, kw]
            ),
        )

        s = te.create_schedule([C.op])

        assert compute_flop(s) == 2 * N * CO * OH * OW * KH * KW
Example #3
0
def get_inx(x,
            image_width,
            target_width,
            coordinate_transformation_mode,
            start_x=0,
            end_x=-1):
    """Infer input x from output x with various coordinate transformation methods"""
    scale_x = te.div(image_width.astype("float"), target_width.astype("float"))
    if coordinate_transformation_mode == "half_pixel":
        in_x = (x + 0.5) * scale_x - 0.5
    elif coordinate_transformation_mode == "align_corners":
        in_x = (image_width - 1).astype("float") / (target_width - 1) * x
    elif coordinate_transformation_mode == "asymmetric":
        in_x = scale_x * x
    elif coordinate_transformation_mode == "pytorch_half_pixel":
        in_x = te.if_then_else(target_width > 1, (x + 0.5) * scale_x - 0.5,
                               0.0)
    elif coordinate_transformation_mode == "tf_half_pixel_for_nn":
        in_x = (x + 0.5) * scale_x
    elif coordinate_transformation_mode == "tf_crop_and_resize":
        in_x = te.if_then_else(
            target_width > 1,
            start_x * (image_width - 1) + x * (end_x - start_x) *
            (image_width - 1).astype("float") / (target_width - 1),
            0.5 * (start_x + end_x) * (image_width - 1),
        )
    else:
        raise ValueError(
            "Unsupported coordinate_transformation_mode: {}".format(
                coordinate_transformation_mode))
    return in_x
Example #4
0
def test_reduce_simplify():
    ck = CanonicalChecker()
    k = te.reduce_axis((0, 10), name="k")
    j = te.reduce_axis((-5, 3), name="j")
    A = te.placeholder((10,), name="A")
    ck.verify(te.sum(tvm.tir.Select(k + j < 12, k + j, 0), [k, j]), te.sum(k + j, [k, j]))
    ck.verify(te.sum(A[3], []), A[3])
    ck.verify(te.sum(A[3], [], where=k > 12, init=1.0), tvm.tir.const(1.0, dtype="float32"))
    # The rule below is not typical, removed for now
    ck.verify(te.sum(te.div(k, 10), k), te.sum(tvm.tir.const(0, "int32"), k))
Example #5
0
    def check_llvm_reciprocal(n):
        A = te.placeholder((n, ), name="A")
        B = te.compute((n, ), lambda i: te.div(1.0, (1e37 * A[i])), name="B")

        s = te.create_schedule(B.op)
        f = tvm.build(s, [A, B], "llvm")

        a = tvm.nd.array(np.full((n, ), 100, "float32"))
        b = tvm.nd.empty((n, ), "float32")
        f(a, b)
        tvm.testing.assert_allclose(b.numpy(), np.zeros((n, ), "float32"))
Example #6
0
def matmul():
    # Algorithm
    k = te.reduce_axis((0, K), 'k')
    A = te.placeholder((M, K), name='A')
    B = te.placeholder((K, N), name='B')

    ##### define space begin #####
    cfg = autotvm.get_config()
    cfg.define_split("tile_x", M, num_outputs=3)
    cfg.define_split("tile_y", N, num_outputs=3)
    cfg.define_split("tile_k", K, num_outputs=2)
    ##### define space end #####

    # We have to re-write the algorithm slightly.
    bn = cfg["tile_y"].size[-1]
    packedB = te.compute((N / bn, K, bn),
                         lambda x, y, z: B[y, x * bn + z],
                         name='packedB')
    C = te.compute(
        (M, N),
        lambda x, y: te.sum(A[x, k] * packedB[te.div(y, bn), k, y % bn],
                            axis=k),
        name='C')
    s = te.create_schedule(C.op)
    x, y = s[C].op.axis
    k, = s[C].op.reduce_axis

    # schedule according to config
    # Allocate write cache
    CC = s.cache_write(C, 'global')
    xt, xo, xi = cfg["tile_x"].apply(s, C, x)
    yt, yo, yi = cfg["tile_y"].apply(s, C, y)
    s[C].reorder(xt, yt, xo, yo, xi, yi)
    xyt = s[C].fuse(xt, yt)
    # parallel
    s[C].parallel(xyt)
    xyo = s[C].fuse(xo, yo)
    s[C].unroll(xi)
    s[C].vectorize(yi)

    # Write cache is computed at xyo
    s[CC].compute_at(s[C], xyo)

    # New inner axes
    xc, yc = s[CC].op.axis

    k, = s[CC].op.reduce_axis
    ko, ki = cfg["tile_k"].apply(s, CC, k)
    s[CC].reorder(ko, xc, ki, yc)
    s[CC].unroll(xc)
    s[CC].unroll(ki)
    s[CC].vectorize(yc)
    # cfg.define_reorder("reorder", [xc, ki, yc], "all")
    # cfg["reorder"].apply(s, CC, [xc, ki, yc])
    # cfg.define_annotate('ann', [xc, ki, yc], policy='try_unroll_vec')
    # cfg['ann'].apply(s, CC, [xc, ki, yc])

    x, y, z = s[packedB].op.axis
    s[packedB].vectorize(z)
    s[packedB].parallel(x)

    return s, [A, B, C]