Ejemplo n.º 1
0
    def forward_(x_op, name):
        # this is 2d only specialized implementation of topi.nn.softmax
        if x_op.ndim == 1:
            x = topi.reshape(x_op.tvm_tensor, (1, x_op.size))
            m = 1
            n = x_op.shape[0]
        elif x_op.ndim == 2:
            x = x_op.tvm_tensor
            m, n = x_op.shape
        else:
            raise ValueError(f'Given ndim {x_op.ndim} is not supported')

        k = tvm.reduce_axis((0, n), name='k')
        max_elem = tvm.compute((m, ),
                               lambda i: tvm.max(x[i, k], axis=k),
                               name=f'{name}:max_elem')

        k = tvm.reduce_axis((0, n), name='k')
        expsum = tvm.compute(
            (m, ),
            lambda i: tvm.sum(tvm.exp(x[i, k] - max_elem[i]), axis=k),
            name=f'{name}:expsum')

        softmax = tvm.compute(
            x.shape,
            lambda i, j: tvm.exp(x[i, j] - max_elem[i]) / expsum[i],
            name=f'{name}:softmax')

        if x_op.ndim == 1:
            softmax = topi.reshape(softmax, x_op.shape)

        return softmax, max_elem, expsum
Ejemplo n.º 2
0
def softmax(x, axis):
    
    assert len(x.shape) == 2
    m, n = x.shape
    ok = tvm.reduce_axis((0, n), name='ok')
    maxelem = tvm.compute((m, ), lambda on: tvm.max(x[on, ok], axis=ok))
    ok = tvm.reduce_axis((0, n), name='ok')
    expsum = tvm.compute(
        (m, ), lambda on: tvm.sum(tvm.exp(x[on, ok] - maxelem[on]), axis=ok)) 
    divelem = tvm.compute(
        x.shape, lambda on, os: (tvm.exp(x[on,os] - maxelem[on]) / expsum[on]))
    return divelem
Ejemplo n.º 3
0
def make_matrix_softmax_cross_entropy(shape, tgt, tgt_host, func_name,
                                      dtype="float32"):
    """TODO: Your code here"""
    """Hint: output shape should be (1,)"""
    assert len(shape) == 2
    x = tvm.placeholder(shape, dtype=dtype, name='x')
    y = tvm.placeholder(shape, dtype=dtype, name='y')

    n, cls_num = shape
    max_iter = tvm.reduce_axis((0, cls_num), name='max_iter')

    x_max = tvm.compute((n,), lambda i : tvm.max(x[i, max_iter], axis=max_iter))
    e_x = tvm.compute(shape, lambda i,j : tvm.exp(x[i,j] - x_max[i]))

    sum_ex_iter = tvm.reduce_axis((0, cls_num), name='sum_ex_iter')
    e_x_sum = tvm.compute((n,), lambda i : tvm.sum(e_x[i, sum_ex_iter], axis=sum_ex_iter))
    log_softmax = tvm.compute(shape, lambda i,j : tvm.log(e_x[i,j] /e_x_sum[i]))
    y_mul_log_softmax = tvm.compute(shape, lambda i,j: log_softmax[i,j] * y[i,j])

    sum_entropy_iter = tvm.reduce_axis((0, cls_num), name='sum_entropy_iter')
    mean_iter = tvm.reduce_axis((0, n), name='mean_iter')
    sum_entropy = tvm.compute((1,), lambda i:tvm.sum(y_mul_log_softmax[mean_iter, sum_entropy_iter], axis=[mean_iter, sum_entropy_iter]))

    scale = tvm.const(-n, dtype)
    entropy = tvm.compute((1,), lambda i: sum_entropy[i]/scale)


    s = tvm.create_schedule(entropy.op)
    f = tvm.build(s, [x, y, entropy], tgt, target_host=tgt_host, name=func_name)
    return f
Ejemplo n.º 4
0
def test_exp():
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n,), name='A')
    B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B')
    s = tvm.create_schedule(B.op)
    # create iter var and assign them tags.
    num_thread = 8
    bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
    s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
    s[B].bind(tx, tvm.thread_axis("threadIdx.x"))

    # one line to build the function.
    def check_device(device, host="stackvm"):
        if not tvm.module.enabled(host):
            return
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            return
        fexp = tvm.build(s, [A, B],
                         device, host,
                         name="myexp")
        ctx = tvm.context(device, 0)
        # launch the kernel.
        n = 1024
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
        fexp(a, b)
        np.testing.assert_allclose(
            b.asnumpy(), np.exp(a.asnumpy()), rtol=1e-5)

    check_device("cuda", "llvm")
    check_device("vulkan")
    check_device("opencl")
Ejemplo n.º 5
0
def test_exp():
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n,), name='A')
    B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B')
    s = tvm.create_schedule(B.op)
    # create iter var and assign them tags.
    px, x = s[B].split(B.op.axis[0], nparts=1)
    s[B].bind(px, tvm.thread_axis("pipeline"))

    # one line to build the function.
    def check_device(device, host="llvm"):
        if not tvm.module.enabled(host):
            return
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            return
        fexp = tvm.build(s, [A, B],
                         device, host,
                         name="myexp")
        ctx = tvm.context(device, 0)
        # launch the kernel.
        n = 1024
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
        fexp(a, b)
        tvm.testing.assert_allclose(
            b.asnumpy(), np.exp(a.asnumpy()), rtol=1e-5)

    check_device("sdaccel")
    if "AWS_PLATFORM" in os.environ:
        check_device("sdaccel -device=" + os.environ.get("AWS_PLATFORM"))

    check_device("aocl_sw_emu")
Ejemplo n.º 6
0
def make_matrix_softmax_cross_entropy(shape,
                                      tgt,
                                      tgt_host,
                                      func_name,
                                      dtype="float32"):
    """Hint: output shape should be (1,)"""
    h, w = shape
    x = tvm.placeholder(shape, dtype, name="x")
    k1 = tvm.reduce_axis((0, w), name="k1")
    x2 = tvm.compute((h, ), lambda i: tvm.max(x[i, k1], axis=k1))
    x3 = tvm.compute(shape, lambda i, j: x[i, j] - x2[i])
    x4 = tvm.compute(shape, lambda i, j: tvm.exp(x3[i, j]))
    k2 = tvm.reduce_axis((0, w), name="k2")
    x5 = tvm.compute((h, ), lambda i: tvm.sum(x4[i, k2], axis=k2))
    log_p = tvm.compute(shape, lambda i, j: tvm.log(x4[i, j] / x5[i]))
    q = tvm.placeholder(shape, dtype, name="q")
    k3 = tvm.reduce_axis((0, w), name="k3")
    y1 = tvm.compute((h, ),
                     lambda i: tvm.sum(q[i, k3] * log_p[i, k3], axis=k3))
    k4 = tvm.reduce_axis((0, h), name="k4")
    y2 = tvm.compute((1, ), lambda i: tvm.sum(-y1[k4], axis=k4))
    y3 = tvm.compute((1, ), lambda i: y2[i] / h)

    s = tvm.create_schedule(y3.op)
    f = tvm.build(s, [x, q, y3], tgt, target_host=tgt_host, name=func_name)
    return f
Ejemplo n.º 7
0
def make_matrix_softmax(shape, tgt, tgt_host, func_name, dtype="float32"):
    """TODO: Your code here"""
    """Hint: use tvm.reduce_axis, tvm.sum, tvm.max, tvm.exp"""
    """Hint: do not reuse the same reduction axis j."""
    """Hint: implement the following version for better stability
        e_x = np.exp(x - np.max(x))
        softmax(x)= e_x / e_x.sum()
    """

    A = tvm.placeholder(shape, dtype=dtype, name="A")
    k = tvm.reduce_axis((0, shape[1]), name="k")
    max_A = tvm.compute((shape[0], ),
                        lambda i: tvm.max(A[i, k], axis=k),
                        name="max_A")
    exp = tvm.compute(shape,
                      lambda i, j: tvm.exp(A[i, j] - max_A[i]),
                      name="exp")
    k1 = tvm.reduce_axis((0, shape[1]), name="k1")
    sum_exp = tvm.compute((shape[0], ),
                          lambda i: tvm.sum(exp[i, k1], axis=k1),
                          name="sum_exp")
    B = tvm.compute(shape, lambda i, j: exp[i, j] / sum_exp[i], name="B")

    s = tvm.create_schedule(B.op)
    f = tvm.build(s, [A, B], tgt, target_host=tgt_host, name=func_name)
    return f
Ejemplo n.º 8
0
def test_exp():
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n, ), name='A')
    B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B')
    s = tvm.create_schedule(B.op)
    # create iter var and assign them tags.
    num_thread = 8
    bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
    s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
    s[B].bind(tx, tvm.thread_axis("threadIdx.x"))

    # one line to build the function.
    def check_device(device, host="stackvm"):
        if not tvm.module.enabled(host):
            return
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            return
        fexp = tvm.build(s, [A, B], device, host, name="myexp")
        ctx = tvm.context(device, 0)
        # launch the kernel.
        n = 1024
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
        fexp(a, b)
        np.testing.assert_allclose(b.asnumpy(), np.exp(a.asnumpy()), rtol=1e-5)

    check_device("cuda", "llvm")
    check_device("vulkan")
    check_device("opencl")
def test_exp():
    # graph
    n = tvm.convert(1024)
    A = tvm.placeholder((n, ), name='A')
    B = tvm.compute(A.shape, lambda *i: tvm.exp(A(*i)), name='B')
    s = tvm.create_schedule(B.op)
    # create iter var and assign them tags.
    px, x = s[B].split(B.op.axis[0], nparts=1)
    s[B].bind(px, tvm.thread_axis("pipeline"))

    # one line to build the function.
    def check_device(device, host="llvm"):
        if not tvm.module.enabled(host):
            return
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            return
        fexp = tvm.build(s, [A, B], device, host, name="myexp")
        ctx = tvm.context(device, 0)
        # launch the kernel.
        n = 1024
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
        fexp(a, b)
        tvm.testing.assert_allclose(b.asnumpy(),
                                    np.exp(a.asnumpy()),
                                    rtol=1e-5)

    check_device("sdaccel")
    if "AWS_PLATFORM" in os.environ:
        check_device("sdaccel -device=" + os.environ.get("AWS_PLATFORM"))

    check_device("aocl_sw_emu")
Ejemplo n.º 10
0
def reg_bbox(x1, y1, x2, y2, dx, dy, dw, dh):
    """Bounding box regression function"""
    bbox_w = x2 - x1 + 1.0
    bbox_h = y2 - y1 + 1.0
    ctr_x = x1 + 0.5 * (bbox_w - 1.0)
    ctr_y = y1 + 0.5 * (bbox_h - 1.0)

    pred_ctr_x = dx * bbox_w + ctr_x
    pred_ctr_y = dy * bbox_h + ctr_y
    pred_w = tvm.exp(dw) * bbox_w
    pred_h = tvm.exp(dh) * bbox_h

    pred_x1 = pred_ctr_x - 0.5 * (pred_w - 1.0)
    pred_y1 = pred_ctr_y - 0.5 * (pred_h - 1.0)
    pred_x2 = pred_ctr_x + 0.5 * (pred_w - 1.0)
    pred_y2 = pred_ctr_y + 0.5 * (pred_h - 1.0)
    return pred_x1, pred_y1, pred_x2, pred_y2
Ejemplo n.º 11
0
def reg_bbox(x1, y1, x2, y2, dx, dy, dw, dh):
    """Bounding box regression function"""
    bbox_w = x2 - x1 + 1.0
    bbox_h = y2 - y1 + 1.0
    ctr_x = x1 + 0.5 * (bbox_w - 1.0)
    ctr_y = y1 + 0.5 * (bbox_h - 1.0)

    pred_ctr_x = dx * bbox_w + ctr_x
    pred_ctr_y = dy * bbox_h + ctr_y
    pred_w = tvm.exp(dw) * bbox_w
    pred_h = tvm.exp(dh) * bbox_h

    pred_x1 = pred_ctr_x - 0.5 * (pred_w - 1.0)
    pred_y1 = pred_ctr_y - 0.5 * (pred_h - 1.0)
    pred_x2 = pred_ctr_x + 0.5 * (pred_w - 1.0)
    pred_y2 = pred_ctr_y + 0.5 * (pred_h - 1.0)
    return pred_x1, pred_y1, pred_x2, pred_y2
Ejemplo n.º 12
0
def _declaration_softmaxDiv(cfg, sum_data, max_data, in_data, axis):

    assert len(in_data.shape) == 2
    m, n = in_data.shape
    
    divelem = tvm.compute(
        in_data.shape, lambda on, ok: ((tvm.exp(in_data[on,ok] - max_data[on,0])) / sum_data[on,0]), tag="softmaxDiv")

    return divelem
Ejemplo n.º 13
0
 def _gaussian_map_sum(i, j):
     # i is row, j is col
     x, y = data[ni, 0], data[ni, 1]
     sigma = data[ni, 2]
     sigma2 = sigma * sigma
     v = tvm.if_then_else(
         tvm.all(x >= 0, x < cols, y >= 0, y < rows),
         tvm.exp(-(topi.power((x - j), 2) + topi.power(
             (y - i), 2)) / (2 * sigma2)) / (2 * pi * sigma2), 0)
     return tvm.sum(v, axis=ni)
Ejemplo n.º 14
0
def _declaration_softmaxSum(cfg, max_data, in_data, axis):

    assert len(in_data.shape) == 2
    m, n = in_data.shape
    
    ok = tvm.reduce_axis((0, n), name='ok')
    expsum = tvm.compute(
        (m, ), lambda on: tvm.sum(tvm.exp(in_data[on,ok] - max_data[on,0]), axis=ok), tag="softmaxSum")

    return expsum
Ejemplo n.º 15
0
def test_inline2():
    m = tvm.size_var('m')
    A = tvm.placeholder((m,), name='A')
    T = tvm.compute((m,), lambda i,: A[i] + 10, name='T')
    stmt = tvm.make.Evaluate(tvm.exp(T[10]) + 11 * T[100])
    stmt = tvm.ir_pass.Inline(
        stmt, T.op, [x.var for x in T.op.axis], T.op.body[0])
    def check(op):
        if isinstance(op, tvm.expr.Call):
            assert op.func != T.op
    tvm.ir_pass.PostOrderVisit(stmt, check)
Ejemplo n.º 16
0
def test_inline2():
    m = tvm.var('m')
    A = tvm.placeholder((m,), name='A')
    T = tvm.compute((m,), lambda i,: A[i] + 10, name='T')
    stmt = tvm.make.Evaluate(tvm.exp(T[10]) + 11 * T[100])
    stmt = tvm.ir_pass.Inline(
        stmt, T.op, [x.var for x in T.op.axis], T.op.body[0])
    def check(op):
        if isinstance(op, tvm.expr.Call):
            assert op.func != T.op
    tvm.ir_pass.PostOrderVisit(stmt, check)
Ejemplo n.º 17
0
def softmax_computation(X, shape, dtype="float32"): 
    km = tvm.reduce_axis((0, shape[1]), name="km")
    MAX = tvm.compute((shape[0],), lambda i: tvm.max(X(i, km), axis=km), name="MAX")
    
    E_X = tvm.compute(shape, lambda i, j: tvm.exp(X(i, j) - MAX(i)), name="E_X")
    
    ks = tvm.reduce_axis((0, shape[1]), name="ks")
    SUM = tvm.compute((shape[0],), lambda i: tvm.sum(E_X(i, ks), axis=ks), name="SUM")
    
    SOFTMAX = tvm.compute(shape, lambda i, j: E_X(i, j) / SUM(i), name="SOFTMAX")

    return MAX, E_X, SUM, SOFTMAX
Ejemplo n.º 18
0
def softmax(x):
    """Perform softmax activation on the data

    Parameters
    ----------
    data : tvm.Tensor
        2-D input data

    Returns
    -------
    output : tvm.Tensor
        2-D output with same shape
    """
    assert len(x.shape) == 2, "only support 2-dim softmax"
    m, n = x.shape
    k = tvm.reduce_axis((0, n), name='k')
    max_elem = tvm.compute((m, ), lambda i: tvm.max(x[i, k], axis=k))
    expsum = tvm.compute(
        (m, ), lambda i: tvm.sum(tvm.exp(x[i, k] - max_elem[i]), axis=k))
    return tvm.compute(x.shape,
                       lambda i, j: tvm.exp(x[i, j] - max_elem[i]) / expsum[i])
Ejemplo n.º 19
0
def exp(x):
    """Take exponential of input x.

    Parameters
    ----------
    x : tvm.Tensor
        Input argument.

    Returns
    -------
    y : tvm.Tensor
        The result.
    """
    return tvm.compute(x.shape, lambda *i: tvm.exp(x(*i)))
Ejemplo n.º 20
0
 def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, vh):
     """Transform prior anchor box to output box through location predictions.
     """
     al = anchor[anchor_base_idx]
     at = anchor[anchor_base_idx + 1]
     ar = anchor[anchor_base_idx + 2]
     ab = anchor[anchor_base_idx + 3]
     aw = ar - al
     ah = ab - at
     ax = (al + ar) / 2.0
     ay = (at + ab) / 2.0
     px = loc[loc_base_idx]
     py = loc[loc_base_idx + 1]
     pw = loc[loc_base_idx + 2]
     ph = loc[loc_base_idx + 3]
     ox = px * vx * aw + ax
     oy = py * vy * ah + ay
     ow = tvm.exp(pw * vw) * aw / 2.0
     oh = tvm.exp(ph * vh) * ah / 2.0
     return tvm.select(clip, tvm.make.Max(0, tvm.make.Min(1, ox - ow)), ox - ow), \
         tvm.select(clip, tvm.make.Max(0, tvm.make.Min(1, oy - oh)), oy - oh), \
         tvm.select(clip, tvm.make.Max(0, tvm.make.Min(1, ox + ow)), ox + ow), \
         tvm.select(clip, tvm.make.Max(0, tvm.make.Min(1, oy + oh)), oy + oh)
Ejemplo n.º 21
0
 def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, vh):
     """Transform prior anchor box to output box through location predictions.
     """
     al = anchor[anchor_base_idx]
     at = anchor[anchor_base_idx + 1]
     ar = anchor[anchor_base_idx + 2]
     ab = anchor[anchor_base_idx + 3]
     aw = ar - al
     ah = ab - at
     ax = (al + ar) / 2.0
     ay = (at + ab) / 2.0
     px = loc[loc_base_idx]
     py = loc[loc_base_idx + 1]
     pw = loc[loc_base_idx + 2]
     ph = loc[loc_base_idx + 3]
     ox = px * vx * aw + ax
     oy = py * vy * ah + ay
     ow = tvm.exp(pw * vw) * aw / 2.0
     oh = tvm.exp(ph * vh) * ah / 2.0
     return tvm.if_then_else(clip, tvm.max(0, tvm.min(1, ox - ow)), ox - ow), \
            tvm.if_then_else(clip, tvm.max(0, tvm.min(1, oy - oh)), oy - oh), \
            tvm.if_then_else(clip, tvm.max(0, tvm.min(1, ox + ow)), ox + ow), \
            tvm.if_then_else(clip, tvm.max(0, tvm.min(1, oy + oh)), oy + oh)
Ejemplo n.º 22
0
Archivo: math.py Proyecto: gwli/tvm
def exp(x):
    """Take exponential of input x.

    Parameters
    ----------
    x : tvm.Tensor
        Input argument.

    Returns
    -------
    y : tvm.Tensor
        The result.
    """
    return tvm.compute(x.shape, lambda *i: tvm.exp(x(*i)))
Ejemplo n.º 23
0
def make_matrix_softmax(shape, tgt, tgt_host, func_name, dtype="float32"):
    assert len(shape) == 2, "only a 2d tensor is accepted for batched softmax"
    num_batch, num_class = shape
    logits = tvm.placeholder(shape, dtype, "logits")
    k = tvm.reduce_axis((0, num_class), name="k")
    max_logits = tvm.compute((num_batch,), lambda i: tvm.max(logits[i, k], axis=k))
    logits_shifted = tvm.compute(shape, lambda i, j: logits[i, j] - max_logits[i])
    exps = tvm.compute(shape, lambda *i: tvm.exp(logits_shifted(*i)))
    k = tvm.reduce_axis((0, num_class), name="k")
    exps_sum = tvm.compute((num_batch,), lambda i: tvm.sum(exps[i, k], axis=k))
    softmaxes = tvm.compute(shape, lambda i, j: exps[i, j] / exps_sum[i])

    s = tvm.create_schedule(softmaxes.op)
    f = tvm.build(s, [logits, softmaxes], tgt, tgt_host, name=func_name)
    return f
Ejemplo n.º 24
0
def make_matrix_softmax_cross_entropy(shape,
                                      tgt,
                                      tgt_host,
                                      func_name,
                                      dtype="float32"):
    X = tvm.placeholder(shape, dtype=dtype, name="X")
    Y_orig = tvm.placeholder(shape, dtype=dtype, name="Y_orig")

    j1 = tvm.reduce_axis((0, shape[1]), "j1")
    j2 = tvm.reduce_axis((0, shape[1]), "j2")

    maxX = tvm.compute((shape[0], ),
                       lambda i: tvm.max(X[i, j1], axis=j1),
                       name="maxX")

    numerator = tvm.compute(shape,
                            lambda i, j: tvm.exp(X[i, j] - maxX[i]),
                            name="numerator")

    denominator = tvm.compute((shape[0], ),
                              lambda i: tvm.sum(numerator[i, j2], axis=j2),
                              name="denominator")

    m1 = tvm.reduce_axis((0, shape[0]), "m1")
    m2 = tvm.reduce_axis((0, shape[1]), "m2")

    cross_entropy_sum = tvm.compute((1, ),
                                    lambda i: tvm.sum(Y_orig[m1, m2] * tvm.log(
                                        numerator[m1, m2] / denominator[m1]),
                                                      axis=[m1, m2]),
                                    name="cross_entropy_sum")

    negated = tvm.compute((1, ),
                          lambda i: -cross_entropy_sum[i] / shape[0],
                          name="negated")

    s = tvm.create_schedule(negated.op)

    # print(tvm.lower(s, [X, Y_orig, negated], simple_mode=True))

    return tvm.build(s, [X, Y_orig, negated],
                     tgt,
                     target_host=tgt_host,
                     name=func_name)
Ejemplo n.º 25
0
def make_matrix_softmax(shape, tgt, tgt_host, func_name, dtype="float32"):
    """TODO: Your code here"""
    """Hint: use tvm.reduce_axis, tvm.sum, tvm.max, tvm.exp"""
    """Hint: do not reuse the same reduction axis j."""
    """Hint: implement the following version for better stability
        e_x = np.exp(x - np.max(x))
        softmax(x)= e_x / e_x.sum()
    """
    X = tvm.placeholder(shape, dtype=dtype, name='X')
    ky = tvm.reduce_axis((0, shape[1]), name='ky')
    MAX_X = tvm.compute((shape[0], ), lambda i: tvm.max(X[i, ky], axis=[ky]))

    E_X = tvm.compute(shape, lambda i, j: tvm.exp(X[i, j] - MAX_X(i)))

    ky_n = tvm.reduce_axis((0, shape[1]), name='ky_n')
    E_X_SUM = tvm.compute((shape[0], ),
                          lambda i: tvm.sum(E_X[i, ky_n], axis=[ky_n]))

    Y = tvm.compute(shape, lambda i, j: E_X[i, j] / E_X_SUM(i))
    s = tvm.create_schedule(Y.op)

    block_x = tvm.thread_axis("blockIdx.x")
    thread_x = tvm.thread_axis("threadIdx.x")

    # MAX_X
    s[MAX_X].bind(MAX_X.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[MAX_X].bind(ky, tvm.thread_axis("threadIdx.x"))

    # E_X
    s[E_X].bind(E_X.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[E_X].bind(E_X.op.axis[1], tvm.thread_axis("threadIdx.x"))

    # E_X_SUM
    s[E_X_SUM].bind(E_X_SUM.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[E_X_SUM].bind(ky_n, tvm.thread_axis("threadIdx.x"))

    # SOFTMAX_X
    s[Y].bind(Y.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[Y].bind(Y.op.axis[1], tvm.thread_axis("threadIdx.x"))

    # print(tvm.lower(s, [X, Y], simple_mode=True))

    f = tvm.build(s, [X, Y], tgt, target_host=tgt_host, name=func_name)
    return _export_module(f, func_name, remote)
Ejemplo n.º 26
0
def make_matrix_softmax(shape, tgt, tgt_host, func_name, dtype="float32"):
    """Hint: use tvm.reduce_axis, tvm.sum, tvm.max, tvm.exp"""
    """Hint: do not reuse the same reduction axis j."""
    """Hint: implement the following version for better stability
        e_x = np.exp(x - np.max(x))
        softmax(x)= e_x / e_x.sum()
    """
    h, w = shape
    x1 = tvm.placeholder(shape, dtype, name="x1")
    k1 = tvm.reduce_axis((0, w), name="k1")
    x2 = tvm.compute((h, ), lambda i: tvm.max(x1[i, k1], axis=k1))
    x3 = tvm.compute(shape, lambda i, j: x1[i, j] - x2[i])
    x4 = tvm.compute(shape, lambda i, j: tvm.exp(x3[i, j]))
    k2 = tvm.reduce_axis((0, w), name="k2")
    x5 = tvm.compute((h, ), lambda i: tvm.sum(x4[i, k2], axis=k2))
    y = tvm.compute(shape, lambda i, j: x4[i, j] / x5[i])

    s = tvm.create_schedule(y.op)
    f = tvm.build(s, [x1, y], tgt, target_host=tgt_host, name=func_name)
    return f
Ejemplo n.º 27
0
def make_matrix_softmax_cross_entropy(shape,
                                      tgt,
                                      tgt_host,
                                      func_name,
                                      dtype="float32"):
    """TODO: Your code here"""
    """Hint: output shape should be (1,)"""

    A = tvm.placeholder(shape, dtype=dtype, name="A")
    B = tvm.placeholder(shape, dtype=dtype, name="B")
    k = tvm.reduce_axis((0, shape[1]), name="k")
    max_A = tvm.compute((shape[0], ),
                        lambda i: tvm.max(A[i, k], axis=k),
                        name="max_A")
    exp = tvm.compute(shape,
                      lambda i, j: tvm.exp(A[i, j] - max_A[i]),
                      name="exp")
    k1 = tvm.reduce_axis((0, shape[1]), name="k1")
    sum_exp = tvm.compute((shape[0], ),
                          lambda i: tvm.sum(exp[i, k1], axis=k1),
                          name="sum_exp")
    softmax = tvm.compute(shape,
                          lambda i, j: exp[i, j] / sum_exp[i],
                          name="softmax")

    log = tvm.compute(shape, lambda i, j: tvm.log(softmax[i, j]), name="log")
    k2 = tvm.reduce_axis((0, shape[1]), name="k2")
    sum_softmax = tvm.compute(
        (shape[0], ),
        lambda i: tvm.sum(B[i, k2] * log[i, k2], axis=k2),
        name="sum_softmax")
    k3 = tvm.reduce_axis((0, shape[0]), name="k3")
    softmax_cross_entropy = tvm.compute(
        (1, ), lambda i: tvm.sum(-1 * sum_softmax[k3] / shape[0], axis=k3))

    s = tvm.create_schedule(softmax_cross_entropy.op)
    f = tvm.build(s, [A, B, softmax_cross_entropy],
                  tgt,
                  target_host=tgt_host,
                  name=func_name)
    return f
Ejemplo n.º 28
0
def test_inline_multi_reduce():
    def argmax_comp(x, y):
        idx = tvm.select((x[1] >= y[1]), x[0], y[0])
        val = tvm.select((x[1] >= y[1]), x[1], y[1])
        return idx, val
    def argmax_init(idx_typ, val_typ):
        return tvm.const(-1, idx_typ), tvm.min_value(val_typ)

    argmax = tvm.comm_reducer(argmax_comp, argmax_init, name='argmax')
    m = tvm.var('m')
    n = tvm.var('n')
    val = tvm.placeholder((m, n), name='val', dtype='float32')
    val1 = tvm.compute((m, n), lambda i, j: val[i, j]+1, name='val1')
    val2 = tvm.compute((m, n), lambda i, j: tvm.exp(val1[i, j]), name='val2')
    k = tvm.reduce_axis((0, n), 'k')
    T_idx, T_val = tvm.compute((m, ), lambda i: argmax((k.var, val2[i, k]), axis=k), name='T')
    s = tvm.create_schedule(T_idx.op)
    s[val1].compute_inline()
    s = s.normalize()
    bounds = tvm.schedule.InferBound(s)
    stmt = tvm.schedule.ScheduleOps(s, bounds)
def test_inline_multi_reduce():
    def argmax_comp(x, y):
        idx = tvm.expr.Select((x[1] >= y[1]), x[0], y[0])
        val = tvm.expr.Select((x[1] >= y[1]), x[1], y[1])
        return idx, val
    def argmax_init(idx_typ, val_typ):
        return tvm.const(-1, idx_typ), tvm.min_value(val_typ)

    argmax = tvm.comm_reducer(argmax_comp, argmax_init, name='argmax')
    m = tvm.var('m')
    n = tvm.var('n')
    val = tvm.placeholder((m, n), name='val', dtype='float32')
    val1 = tvm.compute((m, n), lambda i, j: val[i, j]+1, name='val1')
    val2 = tvm.compute((m, n), lambda i, j: tvm.exp(val1[i, j]), name='val2')
    k = tvm.reduce_axis((0, n), 'k')
    T_idx, T_val = tvm.compute((m, ), lambda i: argmax((k.var, val2[i, k]), axis=k), name='T')
    s = tvm.create_schedule(T_idx.op)
    s[val1].compute_inline()
    s = s.normalize()
    bounds = tvm.schedule.InferBound(s)
    stmt = tvm.schedule.ScheduleOps(s, bounds)
Ejemplo n.º 30
0
def make_matrix_softmax(shape, tgt, tgt_host, func_name, dtype="float32"):
    X = tvm.placeholder(shape, dtype=dtype, name="X")
    j1 = tvm.reduce_axis((0, shape[1]), "j1")

    maxX = tvm.compute((shape[0], ),
                       lambda i: tvm.max(X[i, j1], axis=j1),
                       name="maxX")

    numerator = tvm.compute(shape,
                            lambda i, j: tvm.exp(X[i, j] - maxX[i]),
                            name="numerator")

    j2 = tvm.reduce_axis((0, shape[1]), "j2")

    denominator = tvm.compute((shape[0], ),
                              lambda i: tvm.sum(numerator[i, j2], axis=j2),
                              name="denominator")

    Y = tvm.compute(shape,
                    lambda i, j: numerator[i, j] / denominator[i],
                    name="Y")
    s = tvm.create_schedule(Y.op)
    return tvm.build(s, [X, Y], tgt, target_host=tgt_host, name=func_name)
Ejemplo n.º 31
0
def make_matrix_softmax_cross_entropy(shape, tgt, tgt_host, func_name,
                                      dtype="float32"):
    assert len(shape) == 2, "only 2d tensor is accepted for batched softmax xent loss"
    num_batch, num_class = shape
    logits = tvm.placeholder(shape, dtype, "logits")
    truth = tvm.placeholder(shape, dtype, "truth")
    k = tvm.reduce_axis((0, num_class), name="k")
    max_logits = tvm.compute((num_batch,), lambda i: tvm.max(logits[i, k], axis=k))
    logits_shifted = tvm.compute(shape, lambda i, j: logits[i, j] - max_logits[i])
    exps = tvm.compute(shape, lambda *i: tvm.exp(logits_shifted(*i)))
    k = tvm.reduce_axis((0, num_class), name="k")
    exps_sum = tvm.compute((num_batch,), lambda i: tvm.sum(exps[i, k], axis=k))
    neg_pred_log = tvm.compute(shape, lambda i,j: tvm.log(exps_sum[i]) - logits_shifted[i, j])
    ewise_prod = tvm.compute(shape, lambda *i: truth(*i) * neg_pred_log(*i))

    i = tvm.reduce_axis((0, num_batch), name="i")
    j = tvm.reduce_axis((0, num_class), name="j")
    ce_sum = tvm.compute((1,), lambda _: tvm.sum(ewise_prod[i, j], axis=[i, j]))
    ce_mean = tvm.compute((1,), lambda _: ce_sum[0] / tvm.const(num_batch, dtype))

    s = tvm.create_schedule(ce_mean.op)
    f = tvm.build(s, [logits, truth, ce_mean], tgt, tgt_host, func_name)
    return f
Ejemplo n.º 32
0
def log_softmax(x):
    """Perform log softmax activation on the data

    Parameters
    ----------
    data : tvm.Tensor
        2-D input data

    Returns
    -------
    output : tvm.Tensor
        2-D output with same shape
    """

    assert len(x.shape) == 2, "only support 2-dim log softmax"
    m, n = x.shape
    k = tvm.reduce_axis((0, n), name='k')
    max_elem = tvm.compute((m, ), lambda i: tvm.max(x[i, k], axis=k))
    k = tvm.reduce_axis((0, n), name='k')
    expsum = tvm.compute(
        (m, ), lambda i: tvm.sum(tvm.exp(x[i, k] - max_elem[i]), axis=k))
    return tvm.compute(
        x.shape, lambda i, j: x[i, j] - max_elem[i] - tvm.log(expsum[i]))
Ejemplo n.º 33
0
def make_matrix_softmax(shape, tgt, tgt_host, func_name, dtype="float32"):

    """TODO: Your code here"""
    """Hint: use tvm.reduce_axis, tvm.sum, tvm.max, tvm.exp"""
    """Hint: do not reuse the same reduction axis j."""
    """Hint: implement the following version for better stability
        e_x = np.exp(x - np.max(x))
        softmax(x)= e_x / e_x.sum()
    """
    assert len(shape) == 2
    n, cls_dim = shape
    x = tvm.placeholder(shape, dtype=dtype, name='x')
    max_iter = tvm.reduce_axis((0, cls_dim), name='max_iter')
    t_max = tvm.compute((n, ), lambda i: tvm.max(x[i, max_iter], axis=max_iter), name='t_max')
    e_x = tvm.compute(shape, lambda i,j: tvm.exp(x[i,j]-t_max[i]), name='e_x')

    sum_iter = tvm.reduce_axis((0, cls_dim), name='sum_iter')
    e_x_sum = tvm.compute((n,) , lambda i:tvm.sum(e_x[i, sum_iter], axis=sum_iter), name='e_x_sum')
    softmax = tvm.compute(shape, lambda i, j: e_x[i, j] / e_x_sum[i], name='softmax')

    s = tvm.create_schedule(softmax.op)
    f = tvm.build(s, [x, softmax], tgt, target_host=tgt_host, name=func_name)
    return f
Ejemplo n.º 34
0
parser.add_argument('--sim', type=str, help='the simulator to use', 
                        default='S0', choices=['S0', 'S1', 'SC'])
args = parser.parse_args()

with ScheduleProcHelper(), nnpu.Environment('./nnpu_config_fp32.yaml'):
    env = nnpu.get_env()
    nnpu.set_device(env, type=args.sim)
    dtype_n, dtype_w = env.cfg['dtype_n'], env.cfg['dtype_w']

    assert dtype_w in ['float32', 'float16'], 'when testing activation function, float dtype is needed'

    shape = (64, )
    a = tvm.placeholder(shape, dtype_w, 'a')
    a_buf = tvm.compute(shape, lambda *i: a(*i), 'a_buf')

    exp = tvm.compute(shape, lambda i: tvm.exp(a_buf[i]), 'exp')
    log = tvm.compute(shape, lambda i: tvm.log(a_buf[i]), 'exp')
    tanh = tvm.compute(shape, lambda i: tvm.tanh(a_buf[i]), 'exp')
    sigmoid = tvm.compute(shape, lambda i: tvm.sigmoid(a_buf[i]), 'exp')

    # k = tvm.reduce_axis((0, 16), 'k0')
    # sum = tvm.compute((1, ), lambda i: tvm.sum(sigmoid[k], axis=k), 'sum')
    # nnpu.utils.MarkScope(sum)

    # softmax = tvm.compute(shape, lambda i: sigmoid[i] / sum[0], 'softmax')
    # nnpu.utils.MarkScope(softmax)
    # softmax_host, _ = nnpu.utils.CopyBufToH(softmax, 'softmax')

    s = nnpu.create_schedule([exp.op, log.op, tanh.op, sigmoid.op])
    # cache write
    exp_buf = s.cache_write(exp, env.get_scope('buffer0'))
Ejemplo n.º 35
0
def compute_exp(a):
    return tvm.compute(a.shape, lambda *i: tvm.exp(a(*i)))
Ejemplo n.º 36
0
def make_matrix_softmax_cross_entropy(shape,
                                      tgt,
                                      tgt_host,
                                      func_name,
                                      dtype="float32"):
    """TODO: Your code here"""
    """Hint: output shape should be (1,)"""
    X = tvm.placeholder(shape, dtype=dtype, name='X')
    a1 = tvm.reduce_axis((0, shape[1]), name='a1')
    MAX_X = tvm.compute((shape[0], ), lambda i: tvm.max(X[i, a1], axis=[a1]))

    E_X = tvm.compute(shape, lambda i, j: tvm.exp(X[i, j] - MAX_X(i)))

    a2 = tvm.reduce_axis((0, shape[1]), name='a2')
    E_X_SUM = tvm.compute((shape[0], ),
                          lambda i: tvm.sum(E_X[i, a2], axis=[a2]))

    SOFTMAX_X = tvm.compute(shape, lambda i, j: E_X[i, j] / E_X_SUM(i))

    LOG_SOFTMAX_X = tvm.compute(shape, lambda i, j: tvm.log(SOFTMAX_X[i, j]))

    X_P = tvm.placeholder(shape, dtype=dtype, name='X_P')

    MUL = tvm.compute(shape, lambda i, j: X_P[i, j] * LOG_SOFTMAX_X[i, j])

    a3 = tvm.reduce_axis((0, shape[1]), name='a3')
    SUM = tvm.compute((shape[0], ), lambda i: tvm.sum(-MUL[i, a3], axis=[a3]))

    a4 = tvm.reduce_axis((0, shape[0]), name='a4')
    MEAN = tvm.compute((1, ), lambda i: tvm.sum(SUM[a4] / shape[0], axis=[a4]))

    # s = tvm.create_schedule([MAX_X.op, E_X.op, E_X_SUM.op, SOFTMAX_X.op, LOG_SOFTMAX_X.op, MUL.op, SUM.op, MEAN.op])
    s = tvm.create_schedule(MEAN.op)

    # print(tvm.lower(s, [X, X_P, MEAN], simple_mode=True))

    # MAX_X
    s[MAX_X].bind(MAX_X.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[MAX_X].bind(a1, tvm.thread_axis("threadIdx.x"))

    # E_X
    s[E_X].bind(E_X.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[E_X].bind(E_X.op.axis[1], tvm.thread_axis("threadIdx.x"))

    # E_X_SUM
    s[E_X_SUM].bind(E_X_SUM.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[E_X_SUM].bind(a2, tvm.thread_axis("threadIdx.x"))

    # SOFTMAX_X
    s[SOFTMAX_X].bind(SOFTMAX_X.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[SOFTMAX_X].bind(SOFTMAX_X.op.axis[1], tvm.thread_axis("threadIdx.x"))

    # LOG_SOFT_MAX
    s[LOG_SOFTMAX_X].bind(LOG_SOFTMAX_X.op.axis[0],
                          tvm.thread_axis("blockIdx.x"))
    s[LOG_SOFTMAX_X].bind(LOG_SOFTMAX_X.op.axis[1],
                          tvm.thread_axis("threadIdx.x"))

    # MUL
    s[MUL].bind(MUL.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[MUL].bind(MUL.op.axis[1], tvm.thread_axis("threadIdx.x"))

    # SUM
    s[SUM].bind(SUM.op.axis[0], tvm.thread_axis("blockIdx.x"))
    s[SUM].bind(a3, tvm.thread_axis("threadIdx.x"))

    # MEAN
    # s[MEAN].bind(a4, tvm.thread_axis("blockIdx.x"))
    s[MEAN].bind(a4, tvm.thread_axis("threadIdx.x"))

    # print(tvm.lower(s, [X, X_P, MEAN], simple_mode=True))

    # block_x = tvm.thread_axis("blockIdx.x")
    # thread_x = tvm.thread_axis("threadIdx.x")

    # zo, zi = s[SUM].split(SUM.op.axis[0], 3)
    # print(tvm.lower(s, [X, X_P, MEAN], simple_mode=True))
    # s[SUM].bind(zo, block_x)
    # s[SUM].bind(zi, thread_x)

    f = tvm.build(s, [X, X_P, MEAN], tgt, target_host=tgt_host, name=func_name)
    return _export_module(f, func_name, remote)
Ejemplo n.º 37
0
######################################################################
# Unified Intrinsic Call
# ----------------------
# The above code verifies that direct external call can be used to
# call into device specific functions.
# However, the above way only works for CUDA target with float type.
# Ideally, we want to write same code for any device and any data type.
#
# TVM intrinsic provides the user a mechanism to achieve this, and this
# is the recommended way to solve the problem.
# The following code use tvm.exp instead, which create an intrinsic call
# :any:`tvm.exp` to do the exponential.
#
n = tvm.var("n")
A = tvm.placeholder((n, ), name='A')
B = tvm.compute(A.shape, lambda i: tvm.exp(A[i]), name="B")
s = tvm.create_schedule(B.op)
num_thread = 64
bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
fcuda = tvm.build(s, [A, B], "cuda", name="myexp")
print(fcuda.imported_modules[0].get_source())
######################################################################
# We can find that the code works for both CUDA and opencl.
# The same tvm.exp can also be used for float64 data types.
#
fopencl = tvm.build(s, [A, B], "opencl", name="myexp")
print(fopencl.imported_modules[0].get_source())

######################################################################
Ejemplo n.º 38
0
 def _compute_expsum(max_elem, *indices):
     eval_range = insert_reduce_index(indices, k2)
     return tvm.sum(tvm.exp(x[eval_range] - max_elem[indices]), axis=k2)
Ejemplo n.º 39
0
label = tvm.placeholder((N, L), name='label')
data = tvm.placeholder((N, D), name='data')
weight = tvm.placeholder((L, D + 1), name='weight')

data_expand = tvm.compute((N, D + 1), lambda n, d:
        tvm.select((d < D), data[n, d], tvm.const(1, dtype=data.dtype)),
        name='data_expand')

rd = tvm.reduce_axis((0, D + 1), name='rd')
dot = tvm.compute((N, L), lambda n, l:
        tvm.sum(weight[l, rd] * data_expand[n, rd], axis=rd),
        name='dot')

scale = tvm.compute((N, L), lambda n, l:
        (1 / (1 + tvm.exp(-label[n, l] * dot[n, l])) - 1) * label[n, l],
        name='scale')

rn = tvm.reduce_axis((0, N), name='rn')
gradient = tvm.compute((L, D + 1), lambda l, d:
        tvm.sum(scale[rn, l] * data_expand[rn, d], axis=rn),
        name='gradient')

new_weight = tvm.compute((L, D + 1), lambda l, d:
        weight[l, d] - learning_rate * gradient[l, d],
        name='new_weight')

# === End computation

# Scheduling
s = tvm.create_schedule(new_weight.op)
Ejemplo n.º 40
0
######################################################################
# Unified Intrinsic Call
# ----------------------
# The above code verifies that direct external call can be used to
# call into device specific functions.
# However, the above way only works for CUDA target with float type.
# Ideally, we want to write same code for any device and any data type.
#
# TVM intrinsic provides the user a mechanism to achieve this, and this
# is the recommended way to solve the problem.
# The following code use tvm.exp instead, which create an intrinsic call
# :any:`tvm.exp` to do the exponential.
#
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda i: tvm.exp(A[i]), name="B")
s = tvm.create_schedule(B.op)
num_thread = 64
bx, tx = s[B].split(B.op.axis[0], factor=num_thread)
s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
fcuda = tvm.build(s, [A, B], "cuda", name="myexp")
print(fcuda.imported_modules[0].get_source())
######################################################################
# We can find that the code works for both CUDA and opencl.
# The same tvm.exp can also be used for float64 data types.
#
fopencl = tvm.build(s, [A, B], "opencl", name="myexp")
print(fopencl.imported_modules[0].get_source())

######################################################################
Ejemplo n.º 41
0
 def func2():
     return tvm.exp((x + y + 1) * y / 4)
Ejemplo n.º 42
0
 def _normalize(max_elem, expsum, *indices):
     non_reduce_indices = tuple([var for (i, var) in enumerate(indices) if i != axis])
     return tvm.exp(x[indices] - max_elem[non_reduce_indices]) / expsum[non_reduce_indices]