Beispiel #1
0
def batch_matmul_2D(data1, data2, bias=None, out_dtype="float32", layout1="NHDT", layout2="NHDT", layout_out="NHDT"):
    layout1_dict = {}
    layout2_dict = {}
    layout1 = layout1[2:]
    layout2 = layout2[2:]
    layout1_str = layout1.replace('D', 'm').replace('T', 'k')  
    layout2_str = layout2.replace('D', 'n').replace('T', 'k')   
    layout1_list = list(layout1_str)
    layout2_list = list(layout2_str)

    for i in range(len(layout1)):
        layout1_dict[layout1_list[i]] = data1.shape[i]
        layout2_dict[layout2_list[i]] = data2.shape[i]

    reduce_axis = tvm.reduce_axis((0, layout1_dict['k']), name='reduce_axis')

    if out_dtype=="float32":
        res = tvm.compute((layout1_dict['m'], layout2_dict['n']), lambda i, j: tvm.sum(
                data1[i if layout1_list[0] == 'm' else reduce_axis, reduce_axis if layout1_list[1] == 'k' else i].astype("float") *
                data2[j if layout2_list[0] == 'n' else reduce_axis, reduce_axis if layout2_list[1] == 'k' else j].astype("float"), axis=reduce_axis))
    else:
        res = tvm.compute((layout1_dict['m'], layout2_dict['n']), lambda i, j: tvm.sum(
                data1[i if layout1_list[0] == 'm' else reduce_axis, reduce_axis if layout1_list[1] == 'k' else i] *
                data2[j if layout2_list[0] == 'n' else reduce_axis, reduce_axis if layout2_list[1] == 'k' else j], axis=reduce_axis))
    
    if bias is not None:
        res = topi.add(res, bias)

    if layout_out != "NHDT":
        res = auto_out_transpose(res, layout_out)
    return res
Beispiel #2
0
def TensorcoreConv(data,
                   weight,
                   stride=[1, 1],
                   pad=[0, 0, 0, 0],
                   dilation=[1, 1],
                   out_dtype="float32",
                   name="out",
                   target=utils.CUDA):
    batch, in_h, in_w, in_c = data.shape
    out_c, k_h, k_w, _ = weight.shape
    pad_top, pad_bottom, pad_left, pad_right = pad
    s_h, s_w = stride
    d_h, d_w = dilation
    k_h_d = (k_h - 1) * d_h + 1
    k_w_d = (k_w - 1) * d_w + 1
    o_h = (in_h + pad_top + pad_bottom - k_h_d) // s_h + 1
    o_w = (in_w + pad_left + pad_right - k_w_d) // s_w + 1

    has_pad = not (pad_left == 0 and pad_right == 0 and pad_top == 0
                   and pad_bottom == 0)

    if has_pad:
        data_pad = tvm.compute(
            (batch, in_h + pad_top + pad_bottom, in_w + pad_left + pad_right,
             in_c),
            lambda n, h, w, i: tvm.if_then_else(
                tvm.all(h >= pad_top, h - pad_bottom < in_h, w >= pad_left, w -
                        pad_right < in_w),
                data[n, h - pad_top, w - pad_left, i],
                tvm.const(0.0, "float16"),
            ),
            name="Pad",
        )
    else:
        data_pad = data

    rc = tvm.reduce_axis((0, in_c), name="rc")
    rh = tvm.reduce_axis((0, k_h), name="rh")
    rw = tvm.reduce_axis((0, k_w), name="rw")

    if out_dtype == "float32":
        out = tvm.compute(
            (batch, o_h, o_w, out_c),
            lambda n, h, w, o: tvm.sum(data_pad[n, (h * s_h + rh * d_h), (
                w * s_w + rw * d_w), rc].astype("float32") * weight[
                    o, rh, rw, rc].astype("float32"),
                                       axis=[rc, rh, rw]),
            name=name)
    else:
        out = tvm.compute(
            (batch, o_h, o_w, out_c),
            lambda n, h, w, o: tvm.sum(data_pad[n, (h * s_h + rh * d_h), (
                w * s_w + rw * d_w), rc] * weight[o, rh, rw, rc],
                                       axis=[rc, rh, rw]),
            name=name)

    return out
Beispiel #3
0
def Conv(data, weight, stride=[1, 1], pad=[0, 0, 0, 0], dilation=[1, 1], name="out", target=utils.CUDA):
    """
    Supported Platforms:
        'GPU'
    """
    if target != utils.CUDA:
        raise RuntimeError("the target %s is not supported!" % target)
    batch, in_c, in_h, in_w = data.shape
    out_c, in_c, k_h, k_w = weight.shape
    pad_top, pad_bottom, pad_left, pad_right = pad
    s_h, s_w = stride
    d_h, d_w = dilation
    k_h_d = (k_h - 1) * d_h + 1
    k_w_d = (k_w - 1) * d_w + 1
    o_h = (in_h + pad_top + pad_bottom - k_h_d) // s_h + 1
    o_w = (in_w + pad_left + pad_right - k_w_d) // s_w + 1
    out_shape = (batch, out_c, o_h, o_w)

    data_pad = topi.nn.pad(data, [0, 0, pad_top, pad_left], [0, 0, pad_bottom, pad_right], 0.0)

    rc = tvm.reduce_axis((0, in_c), name="rc")
    rh = tvm.reduce_axis((0, k_h), name="rh")
    rw = tvm.reduce_axis((0, k_w), name="rw")

    out = tvm.compute(out_shape,
                    lambda n, c, h, w: tvm.sum(
                        data_pad[n, rc, h * s_h + rh * d_h, w * s_w + rw * d_w] * weight[c, rc, rh, rw],
                        axis=[rc, rh, rw]),
                    name=name)
    # use for relu condition
    # out = tvm.compute(out.shape, lambda *i: tvm.max(out(*i), tvm.const(0, out.dtype)), name="relu")
    return out
def batch_matmul_4d(data1, data2, attrs):
    """batch matmul for 4-D data"""
    bias, out_dtype, layout1, layout2, layout_out = attrs
    layout1_dict = {}
    layout2_dict = {}
    layout1_str = layout1.replace('N', 'B').replace(
        'H', 'b').replace('D', 'm').replace('T', 'k')
    layout2_str = layout2.replace('N', 'B').replace(
        'H', 'b').replace('D', 'n').replace('T', 'k')
    layout1_list = list(layout1_str)
    layout2_list = list(layout2_str)

    for i in range(len(layout1)):
        layout1_dict[layout1_list[i]] = data1.shape[i]
        layout2_dict[layout2_list[i]] = data2.shape[i]

    reduce_axis = tvm.reduce_axis(
        (0, layout1_dict.get('k')), name='reduce_axis')

    if out_dtype == "float32":
        res = tvm.compute(
            (layout1_dict.get('B'), layout1_dict.get('b'),
             layout1_dict.get('m'), layout2_dict.get('n')),
            lambda B, b, i, j: tvm.sum(
                data1[B, b, i if layout1_list[2] == 'm' else reduce_axis,
                      reduce_axis if layout1_list[3] == 'k' else i].astype("float") *
                data2[B, b, j if layout2_list[2] == 'n' else reduce_axis,
                      reduce_axis if layout2_list[3] == 'k' else j].astype("float"), axis=reduce_axis))
    else:
        res = tvm.compute(
            (layout1_dict.get('B'), layout1_dict.get('b'),
             layout1_dict.get('m'), layout2_dict.get('n')),
            lambda B, b, i, j: tvm.sum(
                data1[B, b, i if layout1_list[2] == 'm' else reduce_axis,
                      reduce_axis if layout1_list[3] == 'k' else i] *
                data2[B, b, j if layout2_list[2] == 'n' else reduce_axis,
                      reduce_axis if layout2_list[3] == 'k' else j], axis=reduce_axis))

    if bias is not None:
        res = topi.add(res, bias)

    if layout_out != "NHDT":
        res = auto_out_transpose(res, layout_out)
    return res
Beispiel #5
0
def batch_matmul_4D(data1, data2, bias, layout1="NHDT", layout2="NHDT", layout_out="NHDT"):
    if layout1 != "NHDT":
        data1 = auto_in_transpose(data1, layout1)
    if layout2 != "NHDT":
        data2 = auto_in_transpose(data2, layout2)

    b1, b2, m, k = data1.shape
    b1, b2, n, k = data2.shape
    reduce_axis = tvm.reduce_axis((0, k), name='reduce_axis')
    res = tvm.compute((b1, b2, m, n), lambda i_b1, i_b2, i_m, i_n: tvm.sum(data1[i_b1, i_b2, i_m, reduce_axis] * 
                                                                                                                                                        data2[i_b1, i_b2, i_n, reduce_axis],
                                                                                                                                                        axis=reduce_axis), name='matmul_compute')
    if bias is not None:
        res = topi.add(res, bias)
    if layout_out != "NHDT":
        res = auto_out_transpose(res, layout_out)
    return res
Beispiel #6
0
def conv2d_nhwc(inputs, attrs):
    attrs = {k: v for k, v in attrs.items()}
    # Check inputs and attrs
    if len(inputs) != 2:
        raise ValueError("length of inputs shoule be 2, but got %d." %
                         len(inputs))
    if "stride" not in attrs:
        raise ValueError("stride not be found in the attrs")
    data = inputs[0]
    weight = inputs[1]
    output_name = "T_conv2d_nhwc_" + data.op.name + "_" + weight.op.name
    stride = attrs["stride"]
    data_dtype = data.dtype
    weight_dtype = weight.dtype
    # Check data type
    vc_util.ops_dtype_check(data_dtype, vc_util.DtypeForDavinci.FLOAT16)
    vc_util.ops_dtype_check(weight_dtype, vc_util.DtypeForDavinci.FLOAT16)
    # Check shape
    if len(data.shape) != 4 or len(weight.shape) != 4:
        raise ValueError(
            "shape of data and weight should be 4-dim, but got %d and %d." %
            (len(data.shape), len(weight.shape)))
    # Compute output
    n, in_h, in_w, in_c = data.shape
    out_c, k_h, k_w, in_c = weight.shape
    _, _, s_h, s_w = stride
    o_h = (in_h - k_h) // s_h + 1
    o_w = (in_w - k_w) // s_w + 1
    rc = tvm.reduce_axis((0, in_c), name="rc")
    rh = tvm.reduce_axis((0, k_h), name="rh")
    rw = tvm.reduce_axis((0, k_w), name="rw")
    output = tvm.compute((n, o_h, o_w, out_c),
                         lambda n, h, w, o: tvm.sum(data[n, (h * s_h + rh), (
                             w * s_w + rw), rc] * weight[o, rh, rw, rc],
                                                    axis=[rc, rh, rw]),
                         name=output_name)
    return output