Example #1
0
def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    # Remove attached compilation target because conv2d_NCHWc needs to create
    # a conv2d_nchwc op and target is not one of conv2d's parameters.
    if "target" in new_attrs:
        del new_attrs["target"]

    assert attrs.get_int_tuple("dilation") == (1, 1), "Does not support dilation " \
                                                      "when alter_op_layout is enabled"
    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    if groups == 1:
        # query config of this workload
        workload = _conv_arg_to_workload(tinfos[0], tinfos[1], strides,
                                         padding, layout, out_dtype)
        cfg = autotvm.DispatchContext.current.query(
            tvm.target.current_target(), workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            autotvm.task.clear_fallback_cache(tvm.target.current_target(),
                                              workload)
            return None

        if cfg.template_key == 'direct':  # packing weight tensor
            new_attrs['kernel_layout'] = 'OIHW%do' % (cfg['tile_co'].size[-1])
            return sym.conv2d(*copy_inputs, **new_attrs)
        else:  # pre-compute weight transformation in winograd
            if "-device=arm_cpu" in tvm.target.current_target().options:
                tile_size = 4
                VC = cfg['tile_k'].size[-1]
            else:
                from ..mali.conv2d import _pick_tile_size
                tile_size = _pick_tile_size(tinfos[0], tinfos[1])
                VC = cfg['tile_bna'].val

            weight = sym.contrib.conv2d_winograd_weight_transform(
                copy_inputs[1], tile_size=tile_size)
            CO, CI, KH, KW = get_const_tuple(tinfos[1].shape)
            weight = sym.reshape(weight,
                                 shape=(KH + tile_size - 1, KW + tile_size - 1,
                                        CO // VC, VC, CI))
            weight = sym.transpose(weight, axes=[0, 1, 2, 4, 3])

            copy_inputs[1] = weight
            new_attrs['tile_size'] = tile_size
            return sym.contrib.conv2d_winograd_without_weight_transform(
                *copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None
Example #2
0
def nnvm_dot(c, a, b):
    """Implementation of dot."""
    na = c.ref(a)
    nb = c.ref(b)
    return sym.dense(na,
                     sym.transpose(nb, axes=(1, 0)),
                     units=b.shape[1],
                     use_bias=False)
Example #3
0
def verify_transpose(dshape, axes):
    x = sym.Variable("x")
    if axes:
        y = sym.transpose(x, axes=axes)
    else:
        y = sym.transpose(x)
    y = y + 1
    dtype = "float32"
    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
        m = graph_runtime.create(graph, lib, ctx)
        # set input
        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        m.run(x=data)
        out_np = np.transpose(data.asnumpy(), axes=axes) + 1
        out = m.get_output(0, tvm.nd.empty(out_np.shape))
        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
Example #4
0
def verify_transpose(dshape, axes):
    x = sym.Variable("x")
    if axes:
        y = sym.transpose(x, axes=axes)
    else:
        y = sym.transpose(x)
    y = y + 1
    dtype = "float32"
    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
        m = graph_runtime.create(graph, lib, ctx)
        # set input
        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        m.run(x=data)
        out_np = np.transpose(data.asnumpy(), axes=axes) + 1
        out = m.get_output(0, tvm.nd.empty(out_np.shape))
        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
Example #5
0
def _alter_conv2d_layout(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    if 'cudnn' in tvm.target.current_target(
    ).libs or 'miopen' in tvm.target.current_target().libs:
        return None

    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    assert attrs.get_int_tuple("dilation") == (1, 1), "Does not support dilation " \
                                                      "when alter_op_layout is enabled"
    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    if groups == 1:
        # query config of this workload
        workload = ('conv2d', ) + autotvm.task.args_to_workload(
            [tinfos[0], tinfos[1], strides, padding, layout, out_dtype])

        cfg = autotvm.DispatchContext.current.query(
            tvm.target.current_target(), workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            autotvm.task.clear_fallback_cache(tvm.target.current_target(),
                                              workload)
            return None

        if cfg.template_key == 'direct':
            return None

        if cfg.template_key == 'int8':
            assert 'cuda' in tvm.target.current_target().keys
            new_attrs['layout'] = 'NCHW4c'
            new_attrs['out_layout'] = 'NCHW4c'
            new_attrs['kernel_layout'] = 'OIHW4o4i'
            return sym.conv2d(*copy_inputs, **new_attrs)

        # pre-compute weight transformation in winograd
        tile_size = _infer_tile_size(tinfos[0], tinfos[1])

        weight = sym.contrib.conv2d_winograd_weight_transform(
            copy_inputs[1], tile_size=tile_size)
        weight = sym.transpose(weight, axes=[0, 1, 3, 2])
        copy_inputs[1] = weight
        new_attrs['tile_size'] = tile_size
        return sym.contrib.conv2d_winograd_without_weight_transform(
            *copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None
Example #6
0
def test_transpose():
    x = sym.Variable("x", shape=(1, 32, 512, 512))
    y = sym.transpose(x, name="y", axes=(0, 2, 3, 1))
    g, ldict = correct_layout(y, "NCHW")
    assert (ldict["x"][0] == "NCHW")
    assert (ldict["y"][0] == "NHWC")
    # second pass will insert layout transform
    g, ldict = correct_layout(g, "NCHW16c")
    assert (ldict["x"][0] == "NCHW16c")
    assert (ldict["x_NCHW"][0] == "NCHW")
    assert (ldict["y"][0] == "NHWC")
Example #7
0
def test_transpose():
    x = sym.Variable("x", shape=(1, 32, 512, 512))
    y = sym.transpose(x, name="y", axes=(0, 2, 3, 1))
    g, ldict = correct_layout(y, "NCHW")
    assert(ldict["x"][0] == "NCHW")
    assert(ldict["y"][0] == "NHWC")
    # second pass will insert layout transform
    g, ldict = correct_layout(g, "NCHW16c")
    assert(ldict["x"][0] == "NCHW16c")
    assert(ldict["x_NCHW"][0] == "NCHW")
    assert(ldict["y"][0] == "NHWC")
Example #8
0
def weight_prepack_conv2d(attrs, inputs, tinfos):
    import ast
    data = tinfos[0]
    kernel = tinfos[1]
    padding = ast.literal_eval(attrs['padding'])
    stride = ast.literal_eval(attrs['strides'])
    wkl = _get_workload(data, kernel, stride, padding, 'float32')
    sch = _get_schedule_conv(wkl)
    is_kernel_1x1 = isinstance(sch, AVX512Conv1x1Fwd)

    ic_bn, oc_bn = sch.ic_bn, sch.oc_bn

    new_attrs = {k: attrs[k] for k in attrs.keys()}
    new_attrs.pop('layout', None)

    kernel_sym = inputs[1]
    oc, ic, h, w = get_const_tuple(tinfos[1].shape)
    OC = oc // oc_bn
    IC = ic // ic_bn
    trans_kernel = sym.transpose(kernel_sym, axes=(1, 2, 3, 0))
    trans_kernel = sym.reshape(trans_kernel, shape=(ic, h, w, OC, oc_bn))
    trans_kernel = sym.transpose(trans_kernel, axes=(1, 2, 3, 4, 0))
    trans_kernel = sym.reshape(trans_kernel,
                               shape=(h, w, OC, oc_bn, IC, ic_bn))
    if is_kernel_1x1:
        # (oc, ic, h, w) -> (OC, IC, ic, oc, h, w)
        trans_kernel = sym.transpose(trans_kernel, axes=(2, 4, 5, 3, 0, 1))
    else:
        # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
        trans_kernel = sym.transpose(trans_kernel, axes=(2, 4, 0, 1, 5, 3))

    if attrs.get_bool('use_bias'):
        bias = inputs[2]
        bias = sym.reshape(bias, shape=(OC, oc_bn))
        return sym.contrib.conv2d_nchw_kernel_packed(inputs[0], trans_kernel,
                                                     bias, **new_attrs)
    else:
        return sym.contrib.conv2d_nchw_kernel_packed(inputs[0], trans_kernel,
                                                     **new_attrs)
Example #9
0
File: conv2d.py Project: souptc/tvm
def _alter_conv2d_layout(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    assert attrs.get_int_tuple("dilation") == (1, 1), "Does not support dilation " \
                                                      "when alter_op_layout is enabled"
    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    if groups == 1:
        # query config of this workload
        workload = _conv_arg_to_workload(tinfos[0], tinfos[1], strides,
                                         padding, layout, out_dtype)
        cfg = autotvm.task.DispatchContext.current.query(
            tvm.target.current_target(), workload)

        if cfg.template_key == 'direct':  # packing weight tensor
            new_attrs['kernel_layout'] = 'OIHW%do' % (cfg['tile_co'].size[-1])
            return sym.conv2d(*copy_inputs, **new_attrs)
        else:  # pre-compute weight transformation in winograd
            tile_size = 4

            weight = sym.contrib.conv2d_winograd_weight_transform(
                copy_inputs[1], tile_size=tile_size)
            CO, CI, KH, KW = get_const_tuple(tinfos[1].shape)
            VC = cfg['tile_k'].size[-1]
            weight = sym.reshape(weight,
                                 shape=(KH + tile_size - 1, KW + tile_size - 1,
                                        CO // VC, VC, CI))
            weight = sym.transpose(weight, axes=[0, 1, 2, 4, 3])

            copy_inputs[1] = weight
            new_attrs['tile_size'] = tile_size
            return sym.contrib.conv2d_winograd_without_weight_transform(
                *copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None
Example #10
0
def test_argmax():
    dshape = (204800, 2)
    oshape = (1, 320, 640)

    dtype = "float32"
    x = sym.Variable("x", shape=dshape, dtype=dtype)
    x = sym.reshape(x, shape=(1, 320, 640, 2))
    x = sym.transpose(x, axes=(0, 3, 1, 2))
    y = sym.argmax(x, axis=1)
    target_str = "llvm"
    target = tvm.target.create(target_str)
    ctx = tvm.context(target_str, 0)
    with nnvm.compiler.build_config(opt_level=2):
        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
    m = graph_runtime.create(graph, lib, ctx)
    data = np.random.uniform(size=dshape).astype(dtype)
    m.run(x=data)
    np_reshape = np.reshape(data, (1, 320, 640, 2))
    np_transpose = np.transpose(np_reshape, axes=(0, 3, 1, 2))
    np_argmax = np.argmax(np_transpose, axis=1)
    out = m.get_output(0)
    np.testing.assert_allclose(out.asnumpy(), np_argmax, atol=1e-5, rtol=1e-5)
Example #11
0
def test_argmax():
    dshape = (204800, 2)
    oshape = (1, 320, 640)

    dtype = "float32"
    x = sym.Variable("x", shape=dshape, dtype=dtype)
    x = sym.reshape(x, shape=(1, 320, 640, 2))
    x = sym.transpose(x, axes=(0, 3, 1, 2))
    y = sym.argmax(x, axis=1)
    target_str = "llvm"
    target = tvm.target.create(target_str)
    ctx = tvm.context(target_str, 0)
    with nnvm.compiler.build_config(opt_level=2):
        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
    m = graph_runtime.create(graph, lib, ctx)
    data = np.random.uniform(size=dshape).astype(dtype)
    m.run(x=data)
    np_reshape = np.reshape(data, (1, 320, 640, 2))
    np_transpose = np.transpose(np_reshape, axes=(0, 3, 1, 2))
    np_argmax = np.argmax(np_transpose, axis=1)
    out = m.get_output(0)
    np.testing.assert_allclose(out.asnumpy(), np_argmax, atol=1e-5, rtol=1e-5)
Example #12
0
def _alter_conv2d_layout(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    if 'cudnn' in tvm.target.current_target().libs or 'miopen' in tvm.target.current_target().libs:
        return None

    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    data, kernel = tinfos[0:2]
    N, CI, H, W = get_const_tuple(data.shape)
    CO, _, KH, KW = get_const_tuple(kernel.shape)

    dispatch_ctx = autotvm.DispatchContext.current
    target = tvm.target.current_target()

    if groups == 1:
        # query config of this workload
        workload = autotvm.task.args_to_workload(
            [tinfos[0], tinfos[1], strides, padding, dilation, layout, out_dtype], conv2d)
        cfg = autotvm.DispatchContext.current.query(target, workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            autotvm.task.clear_fallback_cache(target, workload)
            return None

        if cfg.template_key == 'direct':
            return None

        if cfg.template_key == 'int8':
            assert 'cuda' in target.keys
            new_layout = 'NCHW4c'
            new_attrs['layout'] = new_layout
            new_attrs['out_layout'] = new_layout
            new_attrs['kernel_layout'] = 'OIHW4o4i'
            ic_block_factor = oc_block_factor = 4

            # Store the same config for the altered operator (workload)
            new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor),
                                       dtype=data.dtype)
            new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW,\
                                         oc_block_factor, ic_block_factor), dtype=kernel.dtype)
            new_workload = autotvm.task.args_to_workload(
                [new_data, new_kernel, strides, padding, dilation, new_layout, out_dtype],
                conv2d
            )
            dispatch_ctx.update(target, new_workload, cfg)
            return sym.conv2d(*copy_inputs, **new_attrs)

        if attrs.get_int_tuple("dilation") != (1, 1):
            warnings.warn("Does not support weight pre-transform for dilated convolution.")
            return None

        # pre-compute weight transformation in winograd
        tile_size = _infer_tile_size(tinfos[0], tinfos[1])

        weight = sym.contrib.conv2d_winograd_weight_transform(copy_inputs[1],
                                                              tile_size=tile_size)
        weight = sym.transpose(weight, axes=[0, 1, 3, 2])
        copy_inputs[1] = weight
        new_attrs['tile_size'] = tile_size

        # Store the same config for the altered operator (workload)
        new_data = data
        new_weight = tvm.placeholder((KH + tile_size - 1, KW + tile_size - 1, CI, CO),
                                     dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_weight, strides, padding, dilation, layout, out_dtype, tile_size],
            conv2d_winograd_without_weight_transform
        )
        dispatch_ctx.update(target, new_workload, cfg)
        return sym.contrib.conv2d_winograd_without_weight_transform(*copy_inputs, **new_attrs)
    elif groups != CI:
        workload = autotvm.task.args_to_workload(
            [tinfos[0], tinfos[1], strides, padding, dilation, groups, out_dtype],
            group_conv2d_nchw)
        cfg = autotvm.DispatchContext.current.query(target, workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            autotvm.task.clear_fallback_cache(target, workload)
            return None

        if cfg.template_key == 'int8':
            assert 'cuda' in target.keys
            new_layout = 'NCHW4c'
            new_attrs['layout'] = new_layout
            new_attrs['out_layout'] = new_layout
            new_attrs['kernel_layout'] = 'OIHW4o4i'
            ic_block_factor = oc_block_factor = 4

            # Store the same config for the altered operator (workload)
            new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor),
                                       dtype=data.dtype)
            new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor // groups,\
                                         KH, KW, oc_block_factor, ic_block_factor),
                                         dtype=kernel.dtype)
            new_workload = autotvm.task.args_to_workload(
                [new_data, new_kernel, strides, padding, dilation, groups, out_dtype],
                group_conv2d_nchw
            )
            dispatch_ctx.update(target, new_workload, cfg)
            return sym.conv2d(*copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None
Example #13
0
def nnvm_transpose(c, a, ax):
    """Implementation of transpose."""
    na = c.ref(a)
    assert ax.is_constant(tuple)
    return sym.transpose(na, axes=ax.value)
Example #14
0
 def check(in_shape, out_shape, **kwargs):
     x = sym.Variable("x", shape=in_shape)
     y = sym.transpose(x, name="y", **kwargs)
     sdict = infer_shape(y)
     assert(tuple(sdict["y"][0]) == tuple(out_shape))
Example #15
0
def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    dilation = attrs.get_int_tuple("dilation")
    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    if layout != 'NCHW' or groups != 1:
        return None
    if dilation != (1, 1):
        warnings.warn("Does not support weight pre-transform for dilated convolution.")
        return None

    data, kernel = tinfos[0:2]
    N, CI, H, W = get_const_tuple(data.shape)
    CO, _, KH, KW = get_const_tuple(kernel.shape)

    # query config of this workload
    workload = autotvm.task.args_to_workload(
        [data, kernel, strides, padding, dilation, layout, out_dtype], conv2d)
    target = tvm.target.current_target()
    dispatch_ctx = autotvm.DispatchContext.current
    cfg = dispatch_ctx.query(target, workload)

    if cfg.is_fallback:  # if is fallback, clear query cache and return None
        autotvm.task.clear_fallback_cache(target, workload)
        return None

    if cfg.template_key == 'direct':  # pack weight tensor
        VC = cfg['tile_co'].size[-1]
        new_attrs['kernel_layout'] = 'OIHW%do' % VC

        # Store the same config for the altered operator (workload)
        new_data = data
        new_kernel = tvm.placeholder((CO // VC, CI, KH, KW, VC), dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_kernel, strides, padding, dilation, 'NCHW', out_dtype], conv2d)
        dispatch_ctx.update(target, new_workload, cfg)

        return sym.conv2d(*copy_inputs, **new_attrs)
    else:  # pre-compute weight transformation in winograd
        if "-device=arm_cpu" in target.options:
            tile_size = 4
            VC = cfg['tile_k'].size[-1]
        else:
            from ..mali.conv2d import _pick_tile_size
            tile_size = _pick_tile_size(tinfos[0], tinfos[1])
            VC = cfg['tile_bna'].val

        weight = sym.contrib.conv2d_winograd_weight_transform(copy_inputs[1], tile_size=tile_size)
        weight = sym.reshape(weight,
                             shape=(KH + tile_size - 1, KW + tile_size - 1, CO // VC, VC, CI))
        weight = sym.transpose(weight, axes=[0, 1, 2, 4, 3])

        copy_inputs[1] = weight
        new_attrs['tile_size'] = tile_size

        # Store the same config for the altered operator (workload)
        new_data = data
        new_weight = tvm.placeholder((KH + tile_size - 1, KH + tile_size -1, CO // VC, CI, VC),
                                     kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_weight, strides, padding, dilation,
             new_attrs['layout'], out_dtype, tile_size],
            conv2d_winograd_without_weight_transform)
        dispatch_ctx.update(target, new_workload, cfg)

        return sym.contrib.conv2d_winograd_without_weight_transform(*copy_inputs, **new_attrs)
Example #16
0
def _alter_conv2d_layout(attrs, inputs, tinfo):
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]
    new_attrs = {k: attrs[k] for k in attrs.keys()}
    data, kernel = tinfo[0], tinfo[1]
    batch_size, in_channel, height, width = get_const_tuple(data.shape)

    groups = attrs.get_int("groups")
    out_channel = attrs.get_int("channels")
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    layout = attrs['layout']
    kh, kw = attrs.get_int_tuple("kernel_size")

    dtype = data.dtype
    out_dtype = dtype if attrs["out_dtype"] == "same" else attrs["out_dtype"]
    is_depthwise = groups == in_channel and groups == out_channel

    # only optimize for NCHW
    if layout != 'NCHW':
        return None
    if groups != 1 and not is_depthwise:
        return None

    dispatch_ctx = autotvm.task.DispatchContext.current
    target = tvm.target.current_target()
    # query schedule and fallback if necessary
    workload = autotvm.task.args_to_workload(
        [data, kernel, strides, padding, dilation, out_dtype], depthwise_conv2d_nchw) \
        if is_depthwise else \
        autotvm.task.args_to_workload(
            [data, kernel, strides, padding, dilation, layout, out_dtype], conv2d)
    cfg = dispatch_ctx.query(target, workload)
    if cfg.is_fallback:
        _get_default_config(cfg, data, kernel, strides, padding, out_dtype,
                            is_depthwise)

    ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
    new_attrs['layout'] = 'NCHW%dc' % ic_bn
    new_attrs['out_layout'] = 'NCHW%dc' % oc_bn

    new_data = tvm.placeholder(
        (batch_size, in_channel // ic_bn, height, width, ic_bn),
        dtype=data.dtype)
    if is_depthwise:
        # channel, channel_multiplier, kh, kw -> out_channel_chunk, kh, kw, out_channel_block
        # in which out_channel = merge(channel, channel_multiplier)
        kernel_sym = copy_inputs[1]
        kernel_sym = sym.reshape(kernel_sym,
                                 shape=(out_channel // oc_bn, oc_bn, kh, kw))
        kernel_sym = sym.transpose(kernel_sym, axes=(0, 2, 3, 1))
        copy_inputs[1] = kernel_sym

        # Store altered operator's config
        new_kernel = tvm.placeholder((out_channel // oc_bn, kh, kw, oc_bn),
                                     dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload([
            new_data, new_kernel, strides, padding, dilation,
            new_attrs['layout'], new_attrs['out_layout'], out_dtype
        ], depthwise_conv2d_NCHWc)
    else:
        out_channel, _, kh, kw = get_const_tuple(kernel.shape)
        # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
        new_attrs['kernel_layout'] = 'OIHW%di%do' % (ic_bn, oc_bn)

        # Store altered operator's config
        new_kernel = tvm.placeholder(
            (out_channel // oc_bn, in_channel // ic_bn, kh, kw, ic_bn, oc_bn),
            dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload([
            new_data, new_kernel, strides, padding, dilation,
            new_attrs['layout'], new_attrs['out_layout'], out_dtype
        ], conv2d_NCHWc)

    dispatch_ctx.update(target, new_workload, cfg)
    return sym.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs)
Example #17
0
def _alter_conv2d_layout(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    if 'cudnn' in tvm.target.current_target(
    ).libs or 'miopen' in tvm.target.current_target().libs:
        return None

    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    data, kernel = tinfos[0:2]
    N, CI, H, W = get_const_tuple(data.shape)
    CO, _, KH, KW = get_const_tuple(kernel.shape)

    dispatch_ctx = autotvm.DispatchContext.current

    if groups == 1:
        # query config of this workload
        workload = ('conv2d', ) + autotvm.task.args_to_workload([
            tinfos[0], tinfos[1], strides, padding, dilation, layout, out_dtype
        ])
        target = tvm.target.current_target()
        cfg = autotvm.DispatchContext.current.query(target, workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            autotvm.task.clear_fallback_cache(target, workload)
            return None

        if cfg.template_key == 'direct':
            return None

        if cfg.template_key == 'int8':
            assert 'cuda' in target.keys
            new_layout = 'NCHW4c'
            new_attrs['layout'] = new_layout
            new_attrs['out_layout'] = new_layout
            new_attrs['kernel_layout'] = 'OIHW4o4i'
            ic_block_factor = oc_block_factor = 4

            # Store the same config for the altered operator (workload)
            new_data = tvm.placeholder(
                (N, CI // ic_block_factor, H, W, ic_block_factor),
                dtype=data.dtype)
            new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW,\
                                         oc_block_factor, ic_block_factor), dtype=kernel.dtype)
            new_workload = autotvm.task.args_to_workload([
                new_data, new_kernel, strides, padding, dilation, new_layout,
                out_dtype
            ], conv2d)
            dispatch_ctx.update(target, new_workload, cfg)
            return sym.conv2d(*copy_inputs, **new_attrs)

        if attrs.get_int_tuple("dilation") != (1, 1):
            warnings.warn(
                "Does not support weight pre-transform for dilated convolution."
            )
            return None

        # pre-compute weight transformation in winograd
        tile_size = _infer_tile_size(tinfos[0], tinfos[1])

        weight = sym.contrib.conv2d_winograd_weight_transform(
            copy_inputs[1], tile_size=tile_size)
        weight = sym.transpose(weight, axes=[0, 1, 3, 2])
        copy_inputs[1] = weight
        new_attrs['tile_size'] = tile_size

        # Store the same config for the altered operator (workload)
        new_data = data
        new_weight = tvm.placeholder(
            (KH + tile_size - 1, KW + tile_size - 1, CI, CO),
            dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload([
            new_data, new_weight, strides, padding, dilation, layout,
            out_dtype, tile_size
        ], conv2d_winograd_without_weight_transform)
        dispatch_ctx.update(target, new_workload, cfg)
        return sym.contrib.conv2d_winograd_without_weight_transform(
            *copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None
Example #18
0
 def check(in_shape, out_shape, **kwargs):
     x = sym.Variable("x", shape=in_shape)
     y = sym.transpose(x, name="y", **kwargs)
     sdict = infer_shape(y)
     assert(tuple(sdict["y"][0]) == tuple(out_shape))
Example #19
0
def _alter_conv2d_layout(attrs, inputs, tinfo):
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]
    new_attrs = {k : attrs[k] for k in attrs.keys()}
    data, kernel = tinfo[0], tinfo[1]
    batch_size, in_channel, height, width = get_const_tuple(data.shape)

    groups = attrs.get_int("groups")
    out_channel = attrs.get_int("channels")
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    layout = attrs['layout']
    kh, kw = attrs.get_int_tuple("kernel_size")

    dtype = data.dtype
    out_dtype = dtype if attrs["out_dtype"] == "same" else attrs["out_dtype"]
    is_depthwise = groups == in_channel and groups == out_channel

    # only optimize for NCHW
    if layout != 'NCHW':
        return None
    if groups != 1 and not is_depthwise:
        return None

    dispatch_ctx = autotvm.task.DispatchContext.current
    target = tvm.target.current_target()
    # query schedule and fallback if necessary
    workload = autotvm.task.args_to_workload(
        [data, kernel, strides, padding, dilation, out_dtype], depthwise_conv2d_nchw) \
        if is_depthwise else \
        autotvm.task.args_to_workload(
            [data, kernel, strides, padding, dilation, layout, out_dtype], conv2d)
    cfg = dispatch_ctx.query(target, workload)
    if cfg.is_fallback:
        _get_default_config(cfg, data, kernel, strides, padding, out_dtype, is_depthwise)

    ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
    new_attrs['layout'] = 'NCHW%dc' % ic_bn
    new_attrs['out_layout'] = 'NCHW%dc' % oc_bn

    new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
                               dtype=data.dtype)
    if is_depthwise:
        # channel, channel_multiplier, kh, kw -> out_channel_chunk, kh, kw, out_channel_block
        # in which out_channel = merge(channel, channel_multiplier)
        kernel_sym = copy_inputs[1]
        kernel_sym = sym.reshape(kernel_sym, shape=(out_channel//oc_bn, oc_bn, kh, kw))
        kernel_sym = sym.transpose(kernel_sym, axes=(0, 2, 3, 1))
        copy_inputs[1] = kernel_sym

        # Store altered operator's config
        new_kernel = tvm.placeholder((out_channel//oc_bn, kh, kw, oc_bn), dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_kernel, strides, padding, dilation, new_attrs['layout'],
             new_attrs['out_layout'], out_dtype], depthwise_conv2d_NCHWc)
    else:
        out_channel, _, kh, kw = get_const_tuple(kernel.shape)
        # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
        new_attrs['kernel_layout'] = 'OIHW%di%do' % (ic_bn, oc_bn)

        # Store altered operator's config
        new_kernel = tvm.placeholder((out_channel//oc_bn, in_channel//ic_bn, kh, kw, ic_bn, oc_bn),
                                     dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_kernel, strides, padding, dilation, new_attrs['layout'],
             new_attrs['out_layout'], out_dtype], conv2d_NCHWc)

    dispatch_ctx.update(target, new_workload, cfg)
    return sym.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs)