예제 #1
0
def test_conv2d():
    def run_test_conv2d(sym, dtype, dshape, kshape, oshape, shape_dict, padding):
        for target, ctx in ctx_list():
            graph, lib, _ = nnvm.compiler.build(sym, target, shape_dict)
            m = graph_runtime.create(graph, lib, ctx)
            data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
            kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
            bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
            m.run(x=data, y_weight=kernel, y_bias=bias)
            out = m.get_output(0, tvm.nd.empty(oshape, dtype))
            c_np = topi.testing.conv2d_nchw_python(
                data.asnumpy(), kernel.asnumpy(), 1, padding)
            c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
            tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)

    x = sym.Variable("x")
    y = sym.conv2d(x, channels=10, kernel_size=(3,3),
                   name="y", padding=(1,1))
    dtype = "float32"
    dshape = (1, 3, 18, 18)
    kshape = (10, 3, 3, 3)
    oshape = (1, 10, 18, 18)
    shape_dict = {"x": dshape}
    run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (1,1))

    x = sym.Variable("x")
    y = sym.conv2d(x, channels=10, kernel_size=(1,3),
                   name="y", padding=(0,1))
    dtype = "float32"
    dshape = (1, 3, 224, 224)
    kshape = (10, 3, 1, 3)
    oshape = (1, 10, 224, 224)
    shape_dict = {"x": dshape}
    run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (0,1))
예제 #2
0
def get_symbol(num_classes, version, **kwargs):
    """Get symbol of SqueezeNet

    Parameters
    ----------
    num_classes: int
        The number of classification results

    version : str, optional
        "1.0" or "1.1" of SqueezeNet
    """
    assert version == '1.1', ("Unsupported SqueezeNet version {version}:"
                              "1.1 expected".format(version=version))
    net = sym.Variable("data")

    net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2))
    net = sym.relu(net)
    net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
    net = _make_fire(net, 16, 64, 64)
    net = _make_fire(net, 16, 64, 64)
    net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
    net = _make_fire(net, 32, 128, 128)
    net = _make_fire(net, 32, 128, 128)
    net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
    net = _make_fire(net, 48, 192, 192)
    net = _make_fire(net, 48, 192, 192)
    net = _make_fire(net, 64, 256, 256)
    net = _make_fire(net, 64, 256, 256)

    net = sym.dropout(net, rate=0.5)
    net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1))
    net = sym.relu(net)
    net = sym.global_avg_pool2d(net)
    return sym.softmax(net, axis=1)
예제 #3
0
파일: tvm_gradient.py 프로젝트: shinh/test
def nnvm_conv():
    x = sym.Variable("x")
    y = sym.Variable("y")
    z = sym.conv2d(x, y, channels=3, kernel_size=3)
    grad = graph_util.gradients([z], [x, y])
    print(grad)
    print(grad[0].debug_str())
예제 #4
0
def test_conv_ewise_injective():
    x = sym.Variable("x")
    y = sym.conv2d(x, channels=32, kernel_size=(3, 3), groups=32,
                   name="y", padding=(1,1))
    y = sym.flatten(y + 1) + 1
    dtype = "float32"
    dshape = (1, 32, 18, 18)
    kshape = (32, 1, 3, 3)
    oshape = (1, 32* 18 * 18)
    shape_dict = {"x": dshape}

    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
        m = graph_runtime.create(graph, lib, ctx)
        # print(graph.ir(join_entry_attrs=["shape"]))
        assert graph.index.num_nodes == 5
        # set input
        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
        bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
        m.run(x=data, y_weight=kernel, y_bias=bias)
        # get output
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        c_np = topi.testing.depthwise_conv2d_python_nchw(
            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
        c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) + 1
        c_np = c_np.reshape(c_np.shape[0], np.prod(c_np.shape[1:])) + 1
        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
예제 #5
0
def test_mixed_precision():
    x = sym.Variable("x")
    dtype = "int8"
    out_dtype="int32"
    y = sym.conv2d(x,
                   channels=10,
                   kernel_size=(3,3),
                   name="y",
                   padding=(1,1),
                   use_bias=False,
                   out_dtype="int32")
    dshape = (1, 3, 18, 18)
    kshape = (10, 3, 3, 3)
    oshape = (1, 10, 18, 18)
    shape_dict = {"x": dshape}
    dtype_dict = {"x": dtype}
    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict)
        m = graph_runtime.create(graph, lib, ctx)
        data = tvm.nd.array(np.random.uniform(-127, 127, size=dshape).astype(dtype))
        kernel = tvm.nd.array(np.random.uniform(-127, 127, size=kshape).astype(dtype))
        m.run(x=data, y_weight=kernel)
        out = m.get_output(0, tvm.nd.empty(oshape, out_dtype))
        c_np = topi.testing.conv2d_nchw_python(
            data.asnumpy().astype(out_dtype),
            kernel.asnumpy().astype(out_dtype), 1, 1)
        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
예제 #6
0
def test_injective_conv2d():
    channels = 16
    data = sym.Variable(name="data")
    pool = sym.global_avg_pool2d(data=data)
    weight = sym.reshape(pool, shape=[1, channels, 1, 1])
    residual = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1),
                          layout="NCHW", kernel_layout="OIHW", use_bias=False, name="conv")
    net = weight * data + residual
    size = 56
    dtype="float32"
    dshape = (1, channels, size, size)
    kshape = (channels, channels, 3, 3)
    oshape = dshape
    shape_dict = {"data": dshape}

    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(net, target, shape_dict)
        # data, global_avg_pool, conv weight, conv op, fused elemwise add
        assert graph.index.num_nodes == 5

        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
        m = graph_runtime.create(graph, lib, ctx)
        m.run(data=data, conv_weight=kernel)
        # get output
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        residual = topi.testing.conv2d_nchw_python(
            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
        weight = np.mean(data.asnumpy(), axis=(2, 3))
        c_np = weight[:, :, np.newaxis, np.newaxis] * data.asnumpy() + residual
        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
예제 #7
0
 def get_sym(out_channel):
     data = sym.Variable(name="data")
     data = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channel, padding=(1, 1),
                       layout="NCHW", kernel_layout="OIHW", use_bias=True)
     data = sym.batch_norm(data)
     data = elu(data)
     return data
예제 #8
0
def test_concatenate_conv2d():
    ch = 3
    size = 8
    data = sym.Variable(name="data")
    concat = sym.concatenate(data, data, axis=1)
    conv = sym.conv2d(data=concat, kernel_size=(1,1), channels=ch*2, use_bias=False, name="conv")
    net = sym.elemwise_add(concat, conv)

    dtype="float32"
    dshape = (1, ch, size, size)
    kshape = (ch*2, ch*2, 1, 1)
    oshape = (1, ch*2, size, size)
    shape_dict = {"data": dshape}

    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(net, target, shape_dict)
        # data, conv weight, conv op, concat
        assert graph.index.num_nodes == 4

        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
        m = graph_runtime.create(graph, lib, ctx)
        m.run(data=data, conv_weight=kernel)
        # get output
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))

        concat = np.concatenate((data.asnumpy(), data.asnumpy()), axis=1)
        conv = topi.testing.conv2d_nchw_python(
            concat, kernel.asnumpy(), (1,1), 'SAME')
        ref = concat + conv
        tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
예제 #9
0
def test_json_pass():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv', stride=(2,2))
    g = graph.create(y)
    ret = g.apply('SaveJSON')
    ret._set_json_attr('json', ret.json_attr('json'))
    g2 = ret.apply('LoadJSON')
    assert g2.apply('SaveJSON').json_attr('json') == ret.json_attr('json')
예제 #10
0
 def before(x, scale, channels):
     y = sym.conv2d(x,
                    channels=channels,
                    kernel_size=(3, 3),
                    padding=(1, 1),
                    name="conv")
     y = y * sym.expand_dims(scale, axis=1, num_newaxis=1)
     return y
예제 #11
0
 def check(in_shape,
           out_shape,
           kernel_shape,
           **kwargs):
     x = sym.Variable("x", shape=in_shape)
     y = sym.conv2d(x, name="y", **kwargs)
     sdict = infer_shape(y)
     assert(tuple(sdict["y"][0]) == tuple(out_shape))
     assert(tuple(sdict["y_weight"][0]) == tuple(kernel_shape))
예제 #12
0
def test_print_graph_ir():
    x = sym.Variable("x", shape=(1, 1, 10, 20))
    y = sym.conv2d(x + 1, name="y", channels=10, kernel_size=(3,3))
    g = graph.create(y)
    g = g.apply("InferShape")
    ir1 = g.ir()
    ir2 = g.ir(join_entry_attrs=["shape"])
    assert("y_bias" in ir1)
    assert("shape=" in ir2)
예제 #13
0
def test_default_input():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv')
    assert y.list_inputs() == ['x', 'conv_weight']
    try:
        z = sym.add(x)
        assert False
    except NNVMError:
        pass
예제 #14
0
def test_list_args():
    x = sym.Variable('x')
    z = sym.Variable('z')
    y = sym.conv2d(data=x, name='conv', dev='gpu')
    y = sym.add(y, z, name='add1')
    # write after read
    z = sym.assign(x, y, name='assign')
    assert z.list_inputs('read_only') == ['conv_weight', 'z']
    assert z.list_inputs('aux_state') == ['x']
예제 #15
0
 def check(in_shape,
           out_shape,
           kernel_shape,
           **kwargs):
     x = sym.Variable("x", shape=in_shape)
     y = sym.conv2d(x, name="y", **kwargs)
     sdict = infer_shape(y)
     assert(tuple(sdict["y"][0]) == tuple(out_shape))
     assert(tuple(sdict["y_weight"][0]) == tuple(kernel_shape))
예제 #16
0
def test_print_graph_ir():
    x = sym.Variable("x", shape=(1, 1, 10, 20))
    y = sym.conv2d(x + 1, name="y", channels=10, kernel_size=(3, 3))
    g = graph.create(y)
    g = g.apply("InferShape")
    ir1 = g.ir()
    ir2 = g.ir(join_entry_attrs=["shape"])
    assert ("y_bias" in ir1)
    assert ("shape=" in ir2)
예제 #17
0
파일: conv2d.py 프로젝트: zhiics/tvm
def _alter_conv2d_layout(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    assert attrs.get_int_tuple("dilation") == (1, 1), "Does not support dilation " \
                                                      "when alter_op_layout is enabled"
    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    if groups == 1:
        # query config of this workload
        workload = _conv_arg_to_workload(tinfos[0], tinfos[1], strides,
                                         padding, layout, out_dtype)
        cfg = autotvm.DispatchContext.current.query(
            tvm.target.current_target(), workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            context = autotvm.DispatchContext.current
            while not isinstance(context, autotvm.FallbackContext):
                context = context._old_ctx
            context.clear_cache(tvm.target.current_target(), workload)
            return None

        if cfg.template_key == 'direct':  # packing weight tensor
            new_attrs['kernel_layout'] = 'OIHW%do' % (cfg['tile_co'].size[-1])
            return sym.conv2d(*copy_inputs, **new_attrs)
        else:  # pre-compute weight transformation in winograd
            if "-device=arm_cpu" in tvm.target.current_target().options:
                tile_size = 4
                VC = cfg['tile_k'].size[-1]
            else:
                from ..mali.conv2d import _pick_tile_size
                tile_size = _pick_tile_size(tinfos[0], tinfos[1])
                VC = cfg['tile_bna'].val

            weight = sym.contrib.conv2d_winograd_weight_transform(
                copy_inputs[1], tile_size=tile_size)
            CO, CI, KH, KW = get_const_tuple(tinfos[1].shape)
            weight = sym.reshape(weight,
                                 shape=(KH + tile_size - 1, KW + tile_size - 1,
                                        CO // VC, VC, CI))
            weight = sym.transpose(weight, axes=[0, 1, 2, 4, 3])

            copy_inputs[1] = weight
            new_attrs['tile_size'] = tile_size
            return sym.contrib.conv2d_winograd_without_weight_transform(
                *copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None
예제 #18
0
def test_list_args():
    x = sym.Variable('x')
    z = sym.Variable('z')
    y = sym.conv2d(data=x, name='conv', dev='gpu')
    y = sym.add(y, z, name='add1')
    # write after read
    z = sym.assign(x, y, name='assign')
    assert z.list_input_names('read_only') == ['conv_weight', 'z']
    assert z.list_input_names('aux_state') == ['x']
예제 #19
0
def _alter_conv2d_layout(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    if 'cudnn' in tvm.target.current_target(
    ).libs or 'miopen' in tvm.target.current_target().libs:
        return None

    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    assert attrs.get_int_tuple("dilation") == (1, 1), "Does not support dilation " \
                                                      "when alter_op_layout is enabled"
    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    if groups == 1:
        # query config of this workload
        workload = ('conv2d', ) + autotvm.task.args_to_workload(
            [tinfos[0], tinfos[1], strides, padding, layout, out_dtype])

        cfg = autotvm.DispatchContext.current.query(
            tvm.target.current_target(), workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            autotvm.task.clear_fallback_cache(tvm.target.current_target(),
                                              workload)
            return None

        if cfg.template_key == 'direct':
            return None

        if cfg.template_key == 'int8':
            assert 'cuda' in tvm.target.current_target().keys
            new_attrs['layout'] = 'NCHW4c'
            new_attrs['out_layout'] = 'NCHW4c'
            new_attrs['kernel_layout'] = 'OIHW4o4i'
            return sym.conv2d(*copy_inputs, **new_attrs)

        # pre-compute weight transformation in winograd
        tile_size = _infer_tile_size(tinfos[0], tinfos[1])

        weight = sym.contrib.conv2d_winograd_weight_transform(
            copy_inputs[1], tile_size=tile_size)
        weight = sym.transpose(weight, axes=[0, 1, 3, 2])
        copy_inputs[1] = weight
        new_attrs['tile_size'] = tile_size
        return sym.contrib.conv2d_winograd_without_weight_transform(
            *copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None
예제 #20
0
 def before(x, conv_weight, conv_bias, in_scale, out_scale, channels):
     x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2)
     y = sym.conv2d(x, conv_weight, conv_bias,
                    channels=channels,
                    kernel_size=(3, 3),
                    padding=(1, 1),
                    name="conv")
     y = sym.relu(y)
     y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2)
     return y
def test_conv2d():
    def run_test_conv2d(sym, dtype, dshape, kshape, oshape, shape_dict,
                        padding):
        for target, ctx in ctx_list():
            graph, lib, _ = nnvm.compiler.build(sym, target, shape_dict)
            m = graph_runtime.create(graph, lib, ctx)
            data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
            kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
            bias = tvm.nd.array(
                np.random.uniform(size=kshape[0]).astype(dtype))
            m.run(x=data, y_weight=kernel, y_bias=bias)
            out = m.get_output(0, tvm.nd.empty(oshape, dtype))
            c_np = topi.testing.conv2d_nchw_python(data.asnumpy(),
                                                   kernel.asnumpy(), 1,
                                                   padding)
            c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
            tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)

    x = sym.Variable("x")
    y = sym.conv2d(x,
                   channels=10,
                   kernel_size=(3, 3),
                   name="y",
                   padding=(1, 1))
    dtype = "float32"
    dshape = (1, 3, 18, 18)
    kshape = (10, 3, 3, 3)
    oshape = (1, 10, 18, 18)
    shape_dict = {"x": dshape}
    run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (1, 1))

    x = sym.Variable("x")
    y = sym.conv2d(x,
                   channels=10,
                   kernel_size=(1, 3),
                   name="y",
                   padding=(0, 1))
    dtype = "float32"
    dshape = (1, 3, 224, 224)
    kshape = (10, 3, 1, 3)
    oshape = (1, 10, 224, 224)
    shape_dict = {"x": dshape}
    run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (0, 1))
예제 #22
0
파일: nn.py 프로젝트: xdarkbluex/tinyflow
def conv2d(data, weight=None,
           strides=[1, 1, 1, 1],
           padding='VALID',
           data_format='NCHW',
           **kwargs):
    kwargs = kwargs.copy()
    kwargs['data'] = data
    if weight:
        kwargs['weight'] = weight
    return _sym.conv2d(strides=strides, padding=padding, data_format=data_format, **kwargs)
예제 #23
0
 def before(x, conv_weight, conv_bias, in_scale, out_scale, channels):
     x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2)
     y = sym.conv2d(x, conv_weight, conv_bias,
                    channels=channels,
                    kernel_size=(3, 3),
                    padding=(1, 1),
                    name="conv")
     y = sym.relu(y)
     y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2)
     return y
예제 #24
0
def test_default_input():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv')
    assert y.list_input_names() == ['x', 'conv_weight']
    tname = [z.list_output_names()[0] for z in y.list_input_variables()]
    assert tname == y.list_input_names()
    try:
        z = sym.add(x)
        assert False
    except NNVMError:
        pass
예제 #25
0
def test_mutate_input():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv')
    z = sym.assign(x, y)
    t = sym.add(z, x)

    try:
        z = sym.assign(z, z)
        assert False
    except NNVMError:
        pass
예제 #26
0
def test_json_pass_with_attr():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv', stride=(2,2))
    g = graph.create(y)
    g._set_json_attr('version', '0.1.0')
    ret = g.apply('SaveJSON')
    json_str = ret.json_attr('json')
    print(json_str)
    ret._set_json_attr('json', json_str)
    g2 = ret.apply('LoadJSON')
    assert g2.json_attr('version') == '0.1.0'
예제 #27
0
def test_mutate_input():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv')
    z = sym.assign(x, y)
    t = sym.add(z, x)

    try:
        z = sym.assign(z, z)
        assert False
    except NNVMError:
        pass
예제 #28
0
def get_sym(layout, kernel_layout, channels):
    data = sym.Variable(name="data")
    data = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1),
                      layout=layout, kernel_layout=kernel_layout, use_bias=True)
    data = sym.max_pool2d(data=data, pool_size=(2, 2), strides=(2, 2), layout=layout)
    data = sym.upsampling(data=data, scale=2, layout=layout)
    softmax_axis = 1
    if layout == "NHWC":
        softmax_axis = 3
    data = sym.softmax(data=data, axis=softmax_axis)
    return data
예제 #29
0
def get_sym(layout, kernel_layout, channels):
    data = sym.Variable(name="data")
    data = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1),
                      layout=layout, kernel_layout=kernel_layout, use_bias=True)
    data = sym.max_pool2d(data=data, pool_size=(2, 2), strides=(2, 2), layout=layout)
    data = sym.upsampling(data=data, scale=2, layout=layout)
    softmax_axis = 1
    if layout == "NHWC":
        softmax_axis = 3
    data = sym.softmax(data=data, axis=softmax_axis)
    return data
예제 #30
0
def test_default_input():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv')
    assert y.list_input_names() == ['x', 'conv_weight']
    tname = [z.list_output_names()[0] for z in y.list_input_variables()]
    assert tname == y.list_input_names()
    try:
        z = sym.add(x)
        assert False
    except NNVMError:
        pass
예제 #31
0
def test_json_pass_with_attr():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv', stride=(2, 2))
    g = graph.create(y)
    g._set_json_attr('version', '0.1.0')
    ret = g.apply('SaveJSON')
    json_str = ret.json_attr('json')
    print(json_str)
    ret._set_json_attr('json', json_str)
    g2 = ret.apply('LoadJSON')
    assert g2.json_attr('version') == '0.1.0'
예제 #32
0
 def get_sym(out_channel):
     data = sym.Variable(name="data")
     data = sym.conv2d(data=data,
                       kernel_size=(3, 3),
                       channels=out_channel,
                       padding=(1, 1),
                       layout="NCHW",
                       kernel_layout="OIHW",
                       use_bias=True)
     data = sym.batch_norm(data)
     data = elu(data)
     return data
예제 #33
0
 def expected(x, conv_weight, conv_bias, scale, channels):
     conv_weight = conv_weight * sym.expand_dims(scale, axis=1, num_newaxis=3)
     conv_bias = conv_bias * scale
     y = sym.conv2d(x,
                    conv_weight,
                    conv_bias,
                    channels=channels,
                    kernel_size=(3, 3),
                    padding=(1, 1),
                    name="conv")
     y = sym.relu(y)
     return y
예제 #34
0
    def compile_run_graph(device, target):
        if not tvm.runtime.enabled(device):
            print("Skip test because %s is not enabled." % device)
            return

        out_channels = 16
        data1 = symbol.Variable(name="data1")
        data2 = symbol.Variable(name="data2")
        simple_net1 = symbol.conv2d(data=data1,
                                    kernel_size=(3, 3),
                                    channels=out_channels,
                                    padding=(1, 1),
                                    use_bias=True)

        simple_net2 = symbol.conv2d(data=data2,
                                    kernel_size=(3, 3),
                                    channels=out_channels,
                                    padding=(1, 1),
                                    use_bias=True)
        ret = symbol.elemwise_add(simple_net1, simple_net2)
        ret = symbol.conv2d(ret,
                            kernel_size=(3, 3),
                            channels=out_channels,
                            padding=(1, 1),
                            use_bias=True)

        batch_size = 1
        data_shape = (batch_size, 3, 224, 224)
        shape_dict = {"data1": data_shape, "data2": data_shape}
        params = {}
        params["data1"] = np.random.uniform(-1, 1,
                                            size=data_shape).astype("float32")
        params["data2"] = np.random.uniform(-1, 1,
                                            size=data_shape).astype("float32")
        op_name_device = {"elemwise_add": "cpu", "conv2d": device}
        fallback_device = tvm.context("cpu")
        target = {"cpu": "llvm", device: target}
        # No op will be fused. 3 additional device copy nodes are required.
        check_annotated_graph(ret, target, op_name_device, 15, fallback_device,
                              shape_dict, params)
예제 #35
0
 def forward(self, inputs):
     if self._use_bias:
         return sym.conv2d(data=inputs,
                           weight=self.weight,
                           bias=self.bias,
                           channels=self._out_channel,
                           kernel_size=self._kernel_size,
                           padding=self._padding,
                           strides=self._strides,
                           dilation=self._dilation,
                           groups=self._group,
                           use_bias=self._use_bias)
     else:
         return sym.conv2d(data=inputs,
                           weight=self.weight,
                           channels=self._out_channel,
                           kernel_size=self._kernel_size,
                           padding=self._padding,
                           strides=self._strides,
                           dilation=self._dilation,
                           groups=self._group,
                           use_bias=self._use_bias)
예제 #36
0
def conv2d_block(data, name, channels, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)):
    conv2d = sym.conv2d(
        data=data, 
        channels=channels, 
        kernel_size=kernel_size, 
        strides=strides, 
        padding=padding, 
        use_bias=False, 
        layout="NCHW", 
        name=name + "_conv2d"
    )
    # act = sym.relu(data=conv2d, name=name + "_relu")
    return conv2d
예제 #37
0
 def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels):
     conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3)
     conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=2)
     conv_bias = conv_bias * out_scale
     y = sym.conv2d(x,
                    conv_weight,
                    conv_bias,
                    channels=channels,
                    kernel_size=(3, 3),
                    padding=(1, 1),
                    name="conv")
     y = sym.relu(y)
     return y
예제 #38
0
 def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels):
     conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3)
     conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=3)
     conv_bias = conv_bias * out_scale
     y = sym.conv2d(x,
                    conv_weight,
                    conv_bias,
                    channels=channels,
                    kernel_size=(3, 3),
                    padding=(1, 1),
                    groups=54,
                    name="depthiwise_conv")
     y = sym.relu(y)
     return y
예제 #39
0
파일: nn.py 프로젝트: suluner/dlsys
def conv2d(data,
           weight=None,
           strides=[1, 1, 1, 1],
           padding='VALID',
           data_format='NCHW',
           **kwargs):
    kwargs = kwargs.copy()
    kwargs['data'] = data
    if weight:
        kwargs['weight'] = weight
    return _sym.conv2d(strides=strides,
                       padding=padding,
                       data_format=data_format,
                       **kwargs)
예제 #40
0
def separable_conv_block(data,
                         name,
                         depthwise_channels,
                         pointwise_channels,
                         kernel_size=(3, 3),
                         downsample=False,
                         padding=(1, 1),
                         epsilon=1e-5):
    """Helper function to get a separable conv block"""
    if downsample:
        strides = (2, 2)
    else:
        strides = (1, 1)
    # depthwise convolution + bn + relu
    conv1 = sym.conv2d(data=data,
                       channels=depthwise_channels,
                       groups=depthwise_channels,
                       kernel_size=kernel_size,
                       strides=strides,
                       padding=padding,
                       use_bias=False,
                       layout="NCHW",
                       name=name + "_depthwise_conv1")
    bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1")
    act1 = sym.relu(data=bn1, name=name + "_relu1")
    # pointwise convolution + bn + relu
    conv2 = sym.conv2d(data=act1,
                       channels=pointwise_channels,
                       kernel_size=(1, 1),
                       strides=(1, 1),
                       padding=(0, 0),
                       use_bias=False,
                       layout="NCHW",
                       name=name + "_conv2")
    bn2 = sym.batch_norm(data=conv2, epsilon=epsilon, name=name + "_bn2")
    act2 = sym.relu(data=bn2, name=name + "_relu2")
    return act2
예제 #41
0
def test_conv2d():
    x = sym.Variable("data", shape=(1, 32, 512, 512))
    y = sym.conv2d(x,
                   name="conv",
                   channels=12,
                   kernel_size=(3, 3),
                   padding=(1, 1),
                   layout="NCHW")
    _, ldict = correct_layout(y)
    assert (ldict["data"][0] == "NCHW")
    assert (ldict["conv_weight"][0] == "OIHW")
    assert (ldict["conv_bias"][0] == "C")
    assert (ldict["conv"][0] == "NCHW")
    y = sym.conv2d(x,
                   name="conv",
                   channels=12,
                   kernel_size=(3, 3),
                   padding=(1, 1),
                   layout="NCHW16c",
                   kernel_layout="OIHW16i16o",
                   out_layout="NCHW8c")
    _, ldict = correct_layout(y)
    assert (ldict["data"][0] == "NCHW16c")
    assert (ldict["conv_weight"][0] == "OIHW16i16o")
    assert (ldict["conv_bias"][0] == "C8c")
    assert (ldict["conv"][0] == "NCHW8c")
    y = sym.conv2d(x,
                   name="conv",
                   channels=12,
                   kernel_size=(3, 3),
                   padding=(1, 1),
                   layout="N16cHWC")
    _, ldict = correct_layout(y)
    assert (ldict["data"][0] == "N16cHWC")
    assert (ldict["conv_weight"][0] == "OIHW")
    assert (ldict["conv_bias"][0] == "16cC")
    assert (ldict["conv"][0] == "N16cHWC")
예제 #42
0
def test_conv2d():
    x = sym.Variable("data", shape=(1, 32, 512, 512))
    y = sym.conv2d(x, name="conv", channels=12,
                   kernel_size=(3,3), padding=(1,1), layout="NCHW")
    _, ldict = correct_layout(y)
    assert(ldict["data"][0] == "NCHW")
    assert(ldict["conv_weight"][0] == "OIHW")
    assert(ldict["conv_bias"][0] == "C")
    assert(ldict["conv"][0] == "NCHW")
    y = sym.conv2d(x, name="conv", channels=12,
                   kernel_size=(3,3), padding=(1,1), layout="NCHW16c",
                   kernel_layout="OIHW16i16o", out_layout="NCHW8c")
    _, ldict = correct_layout(y)
    assert(ldict["data"][0] == "NCHW16c")
    assert(ldict["conv_weight"][0] == "OIHW16i16o")
    assert(ldict["conv_bias"][0] == "C8c")
    assert(ldict["conv"][0] == "NCHW8c")
    y = sym.conv2d(x, name="conv", channels=12,
                   kernel_size=(3,3), padding=(1,1), layout="N16cHWC")
    _, ldict = correct_layout(y)
    assert(ldict["data"][0] == "N16cHWC")
    assert(ldict["conv_weight"][0] == "OIHW")
    assert(ldict["conv_bias"][0] == "16cC")
    assert(ldict["conv"][0] == "N16cHWC")
예제 #43
0
def test_residual_block_layout_transform():
    ch = 16
    size = 32
    data = sym.Variable(name="data")
    conv1 = sym.conv2d(data=data, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv1")
    layout_transform1 = sym.__layout_transform__(data=conv1, src_layout="NCHW", dst_layout="NCHW8c")
    layout_transform2 = sym.__layout_transform__(data=layout_transform1, src_layout="NCHW8c", dst_layout="NCHW")
    conv2 = sym.conv2d(data=conv1, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv2")
    elemwise_sum = sym.elemwise_add(layout_transform2, conv2)
    out = sym.relu(elemwise_sum)

    dtype="float32"
    dshape = (1, ch, size, size)
    kshape = (ch, ch, 3, 3)
    oshape = (1, ch, size, size)
    shape_dict = {"data": dshape}

    target = "llvm" # only test on llvm since it involves NCHW8c layout
    ctx = tvm.context(target, 0)
    graph, lib, _ = nnvm.compiler.build(out, target, shape_dict)
    # data, conv1 weight, conv1, layout transform + elemwise add + relu, conv2 weight, conv2 op
    assert graph.index.num_nodes == 6

    data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
    kernel1 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
    kernel2 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
    m = graph_runtime.create(graph, lib, ctx)
    m.run(data=data, conv1_weight=kernel1, conv2_weight=kernel2)
    out = m.get_output(0, tvm.nd.empty(oshape, dtype))

    conv1 = topi.testing.conv2d_nchw_python(
        data.asnumpy(), kernel1.asnumpy(), (1,1), 'SAME')
    conv2 = topi.testing.conv2d_nchw_python(
        conv1, kernel2.asnumpy(), (1,1), 'SAME')
    ref = np.maximum(conv1 + conv2, 0)
    tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
예제 #44
0
def test_order_mutation_pass():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv', dev='gpu')
    y = sym.add(y, x, name='add1')
    # write after read
    z = sym.assign(x, y, name='assign')
    # read after write
    t = sym.add(y, x, name='add2')
    g = graph.create(sym.Group([t, z]))
    jgraph = json.loads(g.apply(['OrderMutation', 'SaveJSON']).json_attr('json'))
    jnodes = jgraph['nodes']
    nindex = {n['name']: i for i, n in enumerate(jnodes)}
    assert nindex['assign'] in jnodes[nindex['add2']]['control_deps']
    assert nindex['conv'] in jnodes[nindex['assign']]['control_deps']
    assert nindex['add1'] in jnodes[nindex['assign']]['control_deps']
    assert jnodes[nindex['assign']]['inputs'][0][2] == 1
예제 #45
0
def test_order_mutation_pass():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv', dev='gpu')
    y = sym.add(y, x, name='add1')
    # write after read
    z = sym.assign(x, y, name='assign')
    # read after write
    t = sym.add(y, x, name='add2')
    g = graph.create(sym.Group([t, z]))
    jgraph = json.loads(g.apply(['OrderMutation', 'SaveJSON']).json_attr('json'))
    jnodes = jgraph['nodes']
    nindex = {n['name']: i for i, n in enumerate(jnodes)}
    assert nindex['assign'] in jnodes[nindex['add2']]['control_deps']
    assert nindex['conv'] in jnodes[nindex['assign']]['control_deps']
    assert nindex['add1'] in jnodes[nindex['assign']]['control_deps']
    assert jnodes[nindex['assign']]['inputs'][0][2] == 1
예제 #46
0
def conv_block(data,
               name,
               channels,
               kernel_size=(3, 3),
               strides=(1, 1),
               padding=(1, 1),
               epsilon=1e-5):
    """Helper function to construct conv-bn-relu"""
    # convolution + bn + relu
    conv = sym.conv2d(data=data,
                      channels=channels,
                      kernel_size=kernel_size,
                      strides=strides,
                      padding=padding,
                      use_bias=False,
                      layout="NCHW",
                      name=name + "_conv")
    bn = sym.batch_norm(data=conv, epsilon=epsilon, name=name + "_bn")
    act = sym.relu(data=bn, name=name + "_relu")
    return act
예제 #47
0
def test_grouped_conv2d_nchw():
    x = sym.Variable("x")
    y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32,
                   name="y", padding=(1,1))
    dtype = "float32"
    dshape = (1, 32, 18, 18)
    kshape = (32, 1, 3, 3)
    oshape = (1, 32, 18, 18)
    shape_dict = {"x": dshape}
    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
        m = graph_runtime.create(graph, lib, ctx)
        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
        bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
        m.run(x=data, y_weight=kernel, y_bias=bias)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        c_np = topi.testing.depthwise_conv2d_python_nchw(
            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
        c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
예제 #48
0
def test_grouped_conv2d_nhwc():
    x = sym.Variable("x")
    y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32,
                   name="y", padding=(1,1), layout="NHWC", kernel_layout ='HWOI')
    dtype = "float32"
    dshape = (1, 18, 18, 32)
    kshape = (3, 3, 32, 1)
    oshape = (1, 18, 18, 32)
    shape_dict = {"x": dshape}
    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
        m = graph_runtime.create(graph, lib, ctx)
        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
        bias = tvm.nd.array(np.random.uniform(size=kshape[2]).astype(dtype))
        m.run(x=data, y_weight=kernel, y_bias=bias)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        c_np = topi.testing.depthwise_conv2d_python_nhwc(
            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
        c_np = c_np + bias.asnumpy().reshape(1, 1, kshape[2])
        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
예제 #49
0
    def get_feature(internel_layer, layers, filters, batch_norm=False):
        """
		Get VGG feature body as stacks of convoltions.
		layers  : [1, 1, 2, 2, 2]
		filters : [64, 128, 256, 512, 512]
		"""
        for i, num in enumerate(layers):
            """
			i = 0, num = 1
			i = 1, num = 1
			i = 2, num = 2
			i = 3, num = 2
			i = 4, num = 2
			"""
            for j in range(num):
                internel_layer = sym.pad(data=internel_layer,
                                         pad_width=((0, 0), (1, 1), (1, 1),
                                                    (0, 0)))
                internel_layer = sym.conv2d(data=internel_layer,
                                            kernel_size=(3, 3),
                                            channels=filters[i],
                                            layout='NHWC',
                                            kernel_layout='HWOI',
                                            name="conv%s_%s" % (i + 1, j + 1))
                if batch_norm:
                    internel_layer = sym.batch_norm(data=internel_layer,
                                                    axis=3,
                                                    name="bn%s_%s" %
                                                    (i + 1, j + 1))
                internel_layer = sym.relu(data=internel_layer,
                                          name="relu%s_%s" % (i + 1, j + 1))

            internel_layer = sym.max_pool2d(data=internel_layer,
                                            pool_size=(2, 2),
                                            strides=(2, 2),
                                            layout="NHWC",
                                            name="pool%s" % (i + 1))
            return internel_layer
예제 #50
0
def test_injective_conv2d():
    channels = 16
    data = sym.Variable(name="data")
    pool = sym.global_avg_pool2d(data=data)
    weight = sym.reshape(pool, shape=[1, channels, 1, 1])
    residual = sym.conv2d(data=data,
                          kernel_size=(3, 3),
                          channels=channels,
                          padding=(1, 1),
                          layout="NCHW",
                          kernel_layout="OIHW",
                          use_bias=False,
                          name="conv")
    net = weight * data + residual
    size = 56
    dtype = "float32"
    dshape = (1, channels, size, size)
    kshape = (channels, channels, 3, 3)
    oshape = dshape
    shape_dict = {"data": dshape}

    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(net, target, shape_dict)
        # data, global_avg_pool, conv weight, conv op, fused elemwise add
        assert graph.index.num_nodes == 5

        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
        m = graph_runtime.create(graph, lib, ctx)
        m.run(data=data, conv_weight=kernel)
        # get output
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        residual = topi.testing.conv2d_nchw_python(data.asnumpy(),
                                                   kernel.asnumpy(), (1, 1),
                                                   'SAME')
        weight = np.mean(data.asnumpy(), axis=(2, 3))
        c_np = weight[:, :, np.newaxis, np.newaxis] * data.asnumpy() + residual
        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
예제 #51
0
def test_alter_conv2d_layout():
    data = sym.Variable("data", shape=(1, 32, 512, 512))
    conv = sym.conv2d(data,
                      name="conv",
                      channels=16,
                      kernel_size=(3, 3),
                      padding=(1, 1),
                      use_bias=False,
                      layout="NCHW")
    # split here
    convs = sym.split(conv, indices_or_sections=2)
    relus = [sym.relu(x, name="relu") for x in convs]
    relu = sym.concatenate(*relus)
    flatten = sym.flatten(relu, name="flatten")
    softmax = sym.softmax(flatten, name="softmax")
    g = graph.create(softmax)

    g = g.apply("CorrectLayout")
    g = graph_attr.set_dtype_inputs(g, "float32")
    g = g.apply(["InferShape", "InferType"])
    layouts_origin = get_layouts(g)

    @reg.register_alter_op_layout("conv2d", level=100)
    def alter_conv2d_layout(attrs, inputs, tinfos):
        new_attrs = {k: attrs[k] for k in attrs.keys()}
        new_attrs["layout"] = "NCHW16c"
        new_attrs["kernel_layout"] = "NCHW16c"
        new_attrs["name"] = "conv_alter"
        return sym.conv2d(inputs[0], inputs[1], **new_attrs)

    g = g.apply("AlterOpLayout")
    layouts = get_layouts(g)

    # check copy layouts
    for node in ["data", "relu", "flatten", "softmax", "conv_weight"]:
        assert layouts[node] == layouts_origin[node]
    assert layouts["conv_alter"] == layouts_origin["conv"]
예제 #52
0
def main(conv_config):
    # Define conv2d network.
    N, H, W, CO, CI, KH, KW, strides, padding = conv_configs[conv_config]
    batch_size = N
    data_shape = (N, CI, H, W)
    data = sym.Variable(name="data")
    simple_net = sym.conv2d(data=data,
                            kernel_size=(KH, KW),
                            channels=CO,
                            padding=padding)

    # Use cuDNN as conv2d backend.
    net, params = utils.create_workload(simple_net, batch_size, data_shape[1:])
    target = "cuda -libs=cudnn"
    graph, lib, params = nnvm.compiler.build(net,
                                             target,
                                             shape={"data": data_shape},
                                             params=params)

    ctx = tvm.context(target, 0)
    data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
    module = runtime.create(graph, lib, ctx)
    module.set_input(**params)
    module.set_input("data", data)
    module.run()
    out_shape = (batch_size, CO, W, H)
    out = module.get_output(0, tvm.nd.empty(out_shape))
    out_cudnn = out.asnumpy()

    print('Time cost of cuDNN conv2d operator ({}):'.format(conv_config))
    costs = []
    for _ in range(10):
        evaluator = module.module.time_evaluator("run", ctx, number=1000)
        cost = evaluator().mean
        costs.append(cost)
        print('%.8f' % cost)
    print('Mean:', '%.8f' % np.mean(costs))
예제 #53
0
def test_conv2d():
    x = sym.Variable('x')
    y = sym.conv2d(x, channels=3, kernel_size=(3, 3),
                   name="y", use_bias=False)
    assert y.list_input_names() == ["x", "y_weight"]
예제 #54
0
import tvm
import numpy as np
from tvm.contrib import graph_runtime as runtime
import nnvm.symbol as sym
import nnvm.compiler
from nnvm.testing import utils

######################################################################
# Create a simple network
# -----------------------
# Let's create a very simple network for demonstration.
# It consists of convolution, batch normalization, and ReLU activation.

out_channels = 16
data = sym.Variable(name="data")
simple_net = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channels, padding = (1, 1), use_bias=True)
simple_net = sym.batch_norm(data=simple_net)
simple_net = sym.relu(data=simple_net)

batch_size = 1
data_shape = (batch_size, 3, 224, 224)
net, params = utils.create_workload(simple_net, batch_size, data_shape[1:])

######################################################################
# Build and run with cuda backend
# -------------------------------
# We build and run this network with cuda backend, as usual.
# By setting the logging level to DEBUG, the result of NNVM graph compilation will be dumped as pseudo code.
import logging
logging.basicConfig(level=logging.DEBUG) # to dump TVM IR after fusion
예제 #55
0
def test_graph_json_attr():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv', stride=(2,2))
    g = graph.create(y)
    g._set_json_attr('ilist', [1,2,3], 'list_int')
    assert g.json_attr('ilist') == [1,2,3]
예제 #56
0
def test_graph_json_attr():
    x = sym.Variable('x')
    y = sym.conv2d(data=x, name='conv', stride=(2, 2))
    g = graph.create(y)
    g._set_json_attr('ilist', [1, 2, 3], 'list_int')
    assert g.json_attr('ilist') == [1, 2, 3]
예제 #57
0
def _make_fire_conv(net, channels, kernel_size, padding=0):
    net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size),
                     padding=(padding, padding))
    net = sym.relu(net)
    return net
예제 #58
0
def _alter_conv2d_layout(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    if 'cudnn' in tvm.target.current_target().libs or 'miopen' in tvm.target.current_target().libs:
        return None

    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    dilation = attrs.get_int_tuple("dilation")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    data, kernel = tinfos[0:2]
    N, CI, H, W = get_const_tuple(data.shape)
    CO, _, KH, KW = get_const_tuple(kernel.shape)

    dispatch_ctx = autotvm.DispatchContext.current
    target = tvm.target.current_target()

    if groups == 1:
        # query config of this workload
        workload = autotvm.task.args_to_workload(
            [tinfos[0], tinfos[1], strides, padding, dilation, layout, out_dtype], conv2d)
        cfg = autotvm.DispatchContext.current.query(target, workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            autotvm.task.clear_fallback_cache(target, workload)
            return None

        if cfg.template_key == 'direct':
            return None

        if cfg.template_key == 'int8':
            assert 'cuda' in target.keys
            new_layout = 'NCHW4c'
            new_attrs['layout'] = new_layout
            new_attrs['out_layout'] = new_layout
            new_attrs['kernel_layout'] = 'OIHW4o4i'
            ic_block_factor = oc_block_factor = 4

            # Store the same config for the altered operator (workload)
            new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor),
                                       dtype=data.dtype)
            new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW,\
                                         oc_block_factor, ic_block_factor), dtype=kernel.dtype)
            new_workload = autotvm.task.args_to_workload(
                [new_data, new_kernel, strides, padding, dilation, new_layout, out_dtype],
                conv2d
            )
            dispatch_ctx.update(target, new_workload, cfg)
            return sym.conv2d(*copy_inputs, **new_attrs)

        if attrs.get_int_tuple("dilation") != (1, 1):
            warnings.warn("Does not support weight pre-transform for dilated convolution.")
            return None

        # pre-compute weight transformation in winograd
        tile_size = _infer_tile_size(tinfos[0], tinfos[1])

        weight = sym.contrib.conv2d_winograd_weight_transform(copy_inputs[1],
                                                              tile_size=tile_size)
        weight = sym.transpose(weight, axes=[0, 1, 3, 2])
        copy_inputs[1] = weight
        new_attrs['tile_size'] = tile_size

        # Store the same config for the altered operator (workload)
        new_data = data
        new_weight = tvm.placeholder((KH + tile_size - 1, KW + tile_size - 1, CI, CO),
                                     dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_weight, strides, padding, dilation, layout, out_dtype, tile_size],
            conv2d_winograd_without_weight_transform
        )
        dispatch_ctx.update(target, new_workload, cfg)
        return sym.contrib.conv2d_winograd_without_weight_transform(*copy_inputs, **new_attrs)
    elif groups != CI:
        workload = autotvm.task.args_to_workload(
            [tinfos[0], tinfos[1], strides, padding, dilation, groups, out_dtype],
            group_conv2d_nchw)
        cfg = autotvm.DispatchContext.current.query(target, workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            autotvm.task.clear_fallback_cache(target, workload)
            return None

        if cfg.template_key == 'int8':
            assert 'cuda' in target.keys
            new_layout = 'NCHW4c'
            new_attrs['layout'] = new_layout
            new_attrs['out_layout'] = new_layout
            new_attrs['kernel_layout'] = 'OIHW4o4i'
            ic_block_factor = oc_block_factor = 4

            # Store the same config for the altered operator (workload)
            new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor),
                                       dtype=data.dtype)
            new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor // groups,\
                                         KH, KW, oc_block_factor, ic_block_factor),
                                         dtype=kernel.dtype)
            new_workload = autotvm.task.args_to_workload(
                [new_data, new_kernel, strides, padding, dilation, groups, out_dtype],
                group_conv2d_nchw
            )
            dispatch_ctx.update(target, new_workload, cfg)
            return sym.conv2d(*copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None