Beispiel #1
0
def test_injective_conv2d():
    channels = 16
    data = sym.Variable(name="data")
    pool = sym.global_avg_pool2d(data=data)
    weight = sym.reshape(pool, shape=[1, channels, 1, 1])
    residual = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1),
                          layout="NCHW", kernel_layout="OIHW", use_bias=False, name="conv")
    net = weight * data + residual
    size = 56
    dtype="float32"
    dshape = (1, channels, size, size)
    kshape = (channels, channels, 3, 3)
    oshape = dshape
    shape_dict = {"data": dshape}

    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(net, target, shape_dict)
        # data, global_avg_pool, conv weight, conv op, fused elemwise add
        assert graph.index.num_nodes == 5

        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
        m = graph_runtime.create(graph, lib, ctx)
        m.run(data=data, conv_weight=kernel)
        # get output
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        residual = topi.testing.conv2d_nchw_python(
            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
        weight = np.mean(data.asnumpy(), axis=(2, 3))
        c_np = weight[:, :, np.newaxis, np.newaxis] * data.asnumpy() + residual
        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
Beispiel #2
0
def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    # Remove attached compilation target because conv2d_NCHWc needs to create
    # a conv2d_nchwc op and target is not one of conv2d's parameters.
    if "target" in new_attrs:
        del new_attrs["target"]

    assert attrs.get_int_tuple("dilation") == (1, 1), "Does not support dilation " \
                                                      "when alter_op_layout is enabled"
    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    if groups == 1:
        # query config of this workload
        workload = _conv_arg_to_workload(tinfos[0], tinfos[1], strides,
                                         padding, layout, out_dtype)
        cfg = autotvm.DispatchContext.current.query(
            tvm.target.current_target(), workload)

        if cfg.is_fallback:  # if is fallback, clear query cache and return None
            autotvm.task.clear_fallback_cache(tvm.target.current_target(),
                                              workload)
            return None

        if cfg.template_key == 'direct':  # packing weight tensor
            new_attrs['kernel_layout'] = 'OIHW%do' % (cfg['tile_co'].size[-1])
            return sym.conv2d(*copy_inputs, **new_attrs)
        else:  # pre-compute weight transformation in winograd
            if "-device=arm_cpu" in tvm.target.current_target().options:
                tile_size = 4
                VC = cfg['tile_k'].size[-1]
            else:
                from ..mali.conv2d import _pick_tile_size
                tile_size = _pick_tile_size(tinfos[0], tinfos[1])
                VC = cfg['tile_bna'].val

            weight = sym.contrib.conv2d_winograd_weight_transform(
                copy_inputs[1], tile_size=tile_size)
            CO, CI, KH, KW = get_const_tuple(tinfos[1].shape)
            weight = sym.reshape(weight,
                                 shape=(KH + tile_size - 1, KW + tile_size - 1,
                                        CO // VC, VC, CI))
            weight = sym.transpose(weight, axes=[0, 1, 2, 4, 3])

            copy_inputs[1] = weight
            new_attrs['tile_size'] = tile_size
            return sym.contrib.conv2d_winograd_without_weight_transform(
                *copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None
Beispiel #3
0
def nnvm_reshape(c, v, shp):
    """Implementation of reshape."""
    nv = c.ref(v)
    assert shp.is_constant(tuple)
    if shp.value == ():
        shp = (1, )
    else:
        shp = shp.value
    return sym.reshape(nv, shape=shp)
Beispiel #4
0
def test_reshape():
    x = sym.Variable("x", shape=(4, ))
    y = sym.reshape(x, shape=(2, 2), name="y")
    g, ldict = correct_layout(y, "C")
    assert (ldict["x"][0] == "C")
    assert (ldict["y"][0] == "__undef__")
    # second pass will insert layout transform
    g, ldict = correct_layout(g, "C16c")
    assert (ldict["x"][0] == "C16c")
    assert (ldict["x_C"][0] == "C")
    assert (ldict["y"][0] == "__undef__")
Beispiel #5
0
def test_reshape():
    x = sym.Variable("x", shape=(4,))
    y = sym.reshape(x, shape=(2,2), name="y")
    g, ldict = correct_layout(y, "C")
    assert(ldict["x"][0] == "C")
    assert(ldict["y"][0] == "__undef__")
    # second pass will insert layout transform
    g, ldict = correct_layout(g, "C16c")
    assert(ldict["x"][0] == "C16c")
    assert(ldict["x_C"][0] == "C")
    assert(ldict["y"][0] == "__undef__")
Beispiel #6
0
def weight_prepack_conv2d(attrs, inputs, tinfos):
    import ast
    data = tinfos[0]
    kernel = tinfos[1]
    padding = ast.literal_eval(attrs['padding'])
    stride = ast.literal_eval(attrs['strides'])
    wkl = _get_workload(data, kernel, stride, padding, 'float32')
    sch = _get_schedule_conv(wkl)
    is_kernel_1x1 = isinstance(sch, AVX512Conv1x1Fwd)

    ic_bn, oc_bn = sch.ic_bn, sch.oc_bn

    new_attrs = {k: attrs[k] for k in attrs.keys()}
    new_attrs.pop('layout', None)

    kernel_sym = inputs[1]
    oc, ic, h, w = get_const_tuple(tinfos[1].shape)
    OC = oc // oc_bn
    IC = ic // ic_bn
    trans_kernel = sym.transpose(kernel_sym, axes=(1, 2, 3, 0))
    trans_kernel = sym.reshape(trans_kernel, shape=(ic, h, w, OC, oc_bn))
    trans_kernel = sym.transpose(trans_kernel, axes=(1, 2, 3, 4, 0))
    trans_kernel = sym.reshape(trans_kernel,
                               shape=(h, w, OC, oc_bn, IC, ic_bn))
    if is_kernel_1x1:
        # (oc, ic, h, w) -> (OC, IC, ic, oc, h, w)
        trans_kernel = sym.transpose(trans_kernel, axes=(2, 4, 5, 3, 0, 1))
    else:
        # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
        trans_kernel = sym.transpose(trans_kernel, axes=(2, 4, 0, 1, 5, 3))

    if attrs.get_bool('use_bias'):
        bias = inputs[2]
        bias = sym.reshape(bias, shape=(OC, oc_bn))
        return sym.contrib.conv2d_nchw_kernel_packed(inputs[0], trans_kernel,
                                                     bias, **new_attrs)
    else:
        return sym.contrib.conv2d_nchw_kernel_packed(inputs[0], trans_kernel,
                                                     **new_attrs)
Beispiel #7
0
def test_infer_shape():
    x = sym.Variable('x', shape=(4, 2))
    y = sym.add(x, x, name='add1')
    y = sym.reshape(y, target=(2, 4), name="reshape1")
    g = graph.create(y)
    g._set_json_attr("shape_attr_key", "shape")
    g = g.apply('InferShape')
    jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
    jnodes = jgraph['nodes']
    jnode_row_ptr = jgraph['node_row_ptr']
    nindex = {n['name']: i for i, n in enumerate(jnodes)}
    assert g.json_attr('shape')[jnode_row_ptr[nindex["reshape1"]]] == [2, 4]
    assert g.json_attr('shape')[jnode_row_ptr[nindex["add1"]]] == [4, 2]
Beispiel #8
0
def test_infer_shape():
    x = sym.Variable('x', shape=(4, 2))
    y = sym.add(x, x, name='add1')
    y = sym.reshape(y, target=(2, 4), name="reshape1")
    g = graph.create(y)
    g._set_json_attr("shape_attr_key", "shape")
    g = g.apply('InferShape')
    jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
    jnodes = jgraph['nodes']
    jnode_row_ptr = jgraph['node_row_ptr']
    nindex = {n['name']: i for i, n in enumerate(jnodes)}
    assert g.json_attr('shape')[jnode_row_ptr[nindex["reshape1"]]] == [2, 4]
    assert g.json_attr('shape')[jnode_row_ptr[nindex["add1"]]] == [4, 2]
Beispiel #9
0
def verify_reshape(dshape, oshape):
    x = sym.Variable("x")
    y = sym.reshape(x, shape=oshape)
    y = y + 1
    dtype = "float32"
    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
        m = graph_runtime.create(graph, lib, ctx)
        # set input
        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        m.run(x=data)
        out_np = data.asnumpy().reshape(oshape) + 1
        out = m.get_output(0, tvm.nd.empty(out_np.shape))
        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
Beispiel #10
0
def verify_reshape(dshape, oshape):
    x = sym.Variable("x")
    y = sym.reshape(x, shape=oshape)
    y = y + 1
    dtype = "float32"
    for target, ctx in ctx_list():
        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
        m = graph_runtime.create(graph, lib, ctx)
        # set input
        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
        m.run(x=data)
        out_np = data.asnumpy().reshape(oshape) + 1
        out = m.get_output(0, tvm.nd.empty(out_np.shape))
        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
Beispiel #11
0
def _alter_conv2d_layout(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    assert attrs.get_int_tuple("dilation") == (1, 1), "Does not support dilation " \
                                                      "when alter_op_layout is enabled"
    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    if groups == 1:
        # query config of this workload
        workload = _conv_arg_to_workload(tinfos[0], tinfos[1], strides,
                                         padding, layout, out_dtype)
        cfg = autotvm.task.DispatchContext.current.query(
            tvm.target.current_target(), workload)

        if cfg.template_key == 'direct':  # packing weight tensor
            new_attrs['kernel_layout'] = 'OIHW%do' % (cfg['tile_co'].size[-1])
            return sym.conv2d(*copy_inputs, **new_attrs)
        else:  # pre-compute weight transformation in winograd
            tile_size = 4

            weight = sym.contrib.conv2d_winograd_weight_transform(
                copy_inputs[1], tile_size=tile_size)
            CO, CI, KH, KW = get_const_tuple(tinfos[1].shape)
            VC = cfg['tile_k'].size[-1]
            weight = sym.reshape(weight,
                                 shape=(KH + tile_size - 1, KW + tile_size - 1,
                                        CO // VC, VC, CI))
            weight = sym.transpose(weight, axes=[0, 1, 2, 4, 3])

            copy_inputs[1] = weight
            new_attrs['tile_size'] = tile_size
            return sym.contrib.conv2d_winograd_without_weight_transform(
                *copy_inputs, **new_attrs)

    # do nothing for depthwise convolution
    return None
Beispiel #12
0
def test_plan_memory():
    x = sym.Variable('x', shape=(4, 2))
    x2 = sym.add(x, x, name='addk')
    y = sym.reshape(x2, target=(2, 4), name="reshapek")
    y = sym.add(y, x2, name="add2")
    y = sym.add(y, y)
    g = graph.create(y)
    g._set_json_attr("shape_attr_key", "shape")
    g = g.apply(["InferShape", "InferType", "PlanMemory"])
    jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
    jnodes = jgraph['nodes']
    jnode_row_ptr = jgraph['node_row_ptr']
    storage_id = g.json_attr('storage_id')
    nindex = {n['name']: i for i, n in enumerate(jnodes)}
    assert (storage_id[jnode_row_ptr[nindex["addk"]]] !=
            storage_id[jnode_row_ptr[nindex["reshapek"]]])
    assert (storage_id[jnode_row_ptr[nindex["add2"]]] == storage_id[
        jnode_row_ptr[nindex["reshapek"]]])
Beispiel #13
0
def test_plan_memory():
    x = sym.Variable('x', shape=(4, 2))
    x2 = sym.add(x, x, name='addk')
    y = sym.reshape(x2, target=(2, 4), name="reshapek")
    y = sym.add(y, x2, name="add2")
    y = sym.add(y, y)
    g = graph.create(y)
    g._set_json_attr("shape_attr_key", "shape")
    g = g.apply(["InferShape", "InferType", "PlanMemory"])
    jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
    jnodes = jgraph['nodes']
    jnode_row_ptr = jgraph['node_row_ptr']
    storage_id = g.json_attr('storage_id')
    nindex = {n['name']: i for i, n in enumerate(jnodes)}
    assert (storage_id[jnode_row_ptr[nindex["addk"]]] !=
            storage_id[jnode_row_ptr[nindex["reshapek"]]])
    assert (storage_id[jnode_row_ptr[nindex["add2"]]] ==
            storage_id[jnode_row_ptr[nindex["reshapek"]]])
Beispiel #14
0
def test_argmax():
    dshape = (204800, 2)
    oshape = (1, 320, 640)

    dtype = "float32"
    x = sym.Variable("x", shape=dshape, dtype=dtype)
    x = sym.reshape(x, shape=(1, 320, 640, 2))
    x = sym.transpose(x, axes=(0, 3, 1, 2))
    y = sym.argmax(x, axis=1)
    target_str = "llvm"
    target = tvm.target.create(target_str)
    ctx = tvm.context(target_str, 0)
    with nnvm.compiler.build_config(opt_level=2):
        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
    m = graph_runtime.create(graph, lib, ctx)
    data = np.random.uniform(size=dshape).astype(dtype)
    m.run(x=data)
    np_reshape = np.reshape(data, (1, 320, 640, 2))
    np_transpose = np.transpose(np_reshape, axes=(0, 3, 1, 2))
    np_argmax = np.argmax(np_transpose, axis=1)
    out = m.get_output(0)
    np.testing.assert_allclose(out.asnumpy(), np_argmax, atol=1e-5, rtol=1e-5)
Beispiel #15
0
def nnvm_array_reduce(c, fn, array, shape):
    """Implementation of array_reduce."""
    assert fn.is_constant(Primitive)
    assert shape.is_constant(tuple)
    fn = fn.value
    tshp = shape.value
    ary = c.ref(array)
    if fn == P.scalar_add:
        ashp = ashape(array)
        if len(tshp) < len(ashp):
            ts = (1, ) * (len(ashp) - len(tshp)) + tshp
        else:
            ts = tshp
        axis = list(i for i, t in enumerate(ts) if t == 1)
        if len(axis) == 1:
            axis = axis[0]
        res = sym.sum(ary, axis=axis, keepdims=1)
        if len(tshp) < len(ashp):
            res = sym.reshape(res, shape=tshp)
        return res
    else:
        raise NotImplementedError(f"reduce with {fn}")
Beispiel #16
0
def test_argmax():
    dshape = (204800, 2)
    oshape = (1, 320, 640)

    dtype = "float32"
    x = sym.Variable("x", shape=dshape, dtype=dtype)
    x = sym.reshape(x, shape=(1, 320, 640, 2))
    x = sym.transpose(x, axes=(0, 3, 1, 2))
    y = sym.argmax(x, axis=1)
    target_str = "llvm"
    target = tvm.target.create(target_str)
    ctx = tvm.context(target_str, 0)
    with nnvm.compiler.build_config(opt_level=2):
        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
    m = graph_runtime.create(graph, lib, ctx)
    data = np.random.uniform(size=dshape).astype(dtype)
    m.run(x=data)
    np_reshape = np.reshape(data, (1, 320, 640, 2))
    np_transpose = np.transpose(np_reshape, axes=(0, 3, 1, 2))
    np_argmax = np.argmax(np_transpose, axis=1)
    out = m.get_output(0)
    np.testing.assert_allclose(out.asnumpy(), np_argmax, atol=1e-5, rtol=1e-5)
Beispiel #17
0
def nn(m: Model):
    v_images = sym.Variable("images", shape=(BATCH_SIZE, 1, 28, 28), dtype=0)
    v_true_labels = sym.Variable("true_labels",
                                 shape=(BATCH_SIZE, 10),
                                 dtype=0)

    x = v_images
    x = sym.reshape(data=x, shape=(BATCH_SIZE, 28 * 28))
    x = sym.dense(data=x, units=10)
    logits = x

    x = -sym.elemwise_mul(v_true_labels, sym.log_softmax(x))
    loss = sym.sum(x) / BATCH_SIZE

    # This is not really accuracy, because we use softmax instead of hardmax
    accuracy = sym.sum(v_true_labels * sym.softmax(logits)) / BATCH_SIZE

    # We have to somehow list all weights (the corresponding variables are generated automatically)
    weight_vars = [
        v for v in loss.list_input_variables()
        if v.attr('name') not in ['images', 'true_labels']
    ]

    optimizer = SGD(learning_rate=1e-4)
    update_step = optimizer.minimize(loss, var=weight_vars)

    tgraph = nnvm.graph.create(sym.Group(
        [loss, update_step])).apply("InferShape").apply("InferType")
    fgraph = nnvm.graph.create(sym.Group(
        [loss, accuracy])).apply("InferShape").apply("InferType")

    m.tgraph = tgraph
    m.fgraph = fgraph
    m.optimizer = optimizer
    m.loss = loss
    return m
Beispiel #18
0
def _alter_conv2d_layout(attrs, inputs, tinfo):
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]
    new_attrs = {k: attrs[k] for k in attrs.keys()}
    data, kernel = tinfo[0], tinfo[1]
    batch_size, in_channel, height, width = get_const_tuple(data.shape)

    groups = attrs.get_int("groups")
    out_channel = attrs.get_int("channels")
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    layout = attrs['layout']
    kh, kw = attrs.get_int_tuple("kernel_size")

    dtype = data.dtype
    out_dtype = dtype if attrs["out_dtype"] == "same" else attrs["out_dtype"]
    is_depthwise = groups == in_channel and groups == out_channel

    # only optimize for NCHW
    if layout != 'NCHW':
        return None
    if groups != 1 and not is_depthwise:
        return None

    dispatch_ctx = autotvm.task.DispatchContext.current
    target = tvm.target.current_target()
    # query schedule and fallback if necessary
    workload = autotvm.task.args_to_workload(
        [data, kernel, strides, padding, dilation, out_dtype], depthwise_conv2d_nchw) \
        if is_depthwise else \
        autotvm.task.args_to_workload(
            [data, kernel, strides, padding, dilation, layout, out_dtype], conv2d)
    cfg = dispatch_ctx.query(target, workload)
    if cfg.is_fallback:
        _get_default_config(cfg, data, kernel, strides, padding, out_dtype,
                            is_depthwise)

    ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
    new_attrs['layout'] = 'NCHW%dc' % ic_bn
    new_attrs['out_layout'] = 'NCHW%dc' % oc_bn

    new_data = tvm.placeholder(
        (batch_size, in_channel // ic_bn, height, width, ic_bn),
        dtype=data.dtype)
    if is_depthwise:
        # channel, channel_multiplier, kh, kw -> out_channel_chunk, kh, kw, out_channel_block
        # in which out_channel = merge(channel, channel_multiplier)
        kernel_sym = copy_inputs[1]
        kernel_sym = sym.reshape(kernel_sym,
                                 shape=(out_channel // oc_bn, oc_bn, kh, kw))
        kernel_sym = sym.transpose(kernel_sym, axes=(0, 2, 3, 1))
        copy_inputs[1] = kernel_sym

        # Store altered operator's config
        new_kernel = tvm.placeholder((out_channel // oc_bn, kh, kw, oc_bn),
                                     dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload([
            new_data, new_kernel, strides, padding, dilation,
            new_attrs['layout'], new_attrs['out_layout'], out_dtype
        ], depthwise_conv2d_NCHWc)
    else:
        out_channel, _, kh, kw = get_const_tuple(kernel.shape)
        # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
        new_attrs['kernel_layout'] = 'OIHW%di%do' % (ic_bn, oc_bn)

        # Store altered operator's config
        new_kernel = tvm.placeholder(
            (out_channel // oc_bn, in_channel // ic_bn, kh, kw, ic_bn, oc_bn),
            dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload([
            new_data, new_kernel, strides, padding, dilation,
            new_attrs['layout'], new_attrs['out_layout'], out_dtype
        ], conv2d_NCHWc)

    dispatch_ctx.update(target, new_workload, cfg)
    return sym.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs)
Beispiel #19
0
 def check(in_shape, tshape, out_shape):
     x = sym.Variable("x", shape=in_shape)
     y = sym.reshape(x, shape=tshape, name="y")
     sdict = infer_shape(y)
     assert(tuple(sdict["y"][0]) == tuple(out_shape))
Beispiel #20
0
def test_reshape():
    x = sym.Variable("x")
    y = sym.reshape(x, shape=(10, 20), name="y")
    assert(y.list_input_names() == ["x"])
Beispiel #21
0
def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
    """Alter op layout for pre-computing kernel transformation"""
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]

    new_attrs = {k: attrs[k] for k in attrs.keys()}

    dilation = attrs.get_int_tuple("dilation")
    strides = attrs.get_int_tuple("strides")
    padding = attrs.get_int_tuple("padding")
    groups = attrs.get_int('groups')
    layout = attrs["layout"]
    out_dtype = attrs["out_dtype"]
    out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype

    if layout != 'NCHW' or groups != 1:
        return None
    if dilation != (1, 1):
        warnings.warn("Does not support weight pre-transform for dilated convolution.")
        return None

    data, kernel = tinfos[0:2]
    N, CI, H, W = get_const_tuple(data.shape)
    CO, _, KH, KW = get_const_tuple(kernel.shape)

    # query config of this workload
    workload = autotvm.task.args_to_workload(
        [data, kernel, strides, padding, dilation, layout, out_dtype], conv2d)
    target = tvm.target.current_target()
    dispatch_ctx = autotvm.DispatchContext.current
    cfg = dispatch_ctx.query(target, workload)

    if cfg.is_fallback:  # if is fallback, clear query cache and return None
        autotvm.task.clear_fallback_cache(target, workload)
        return None

    if cfg.template_key == 'direct':  # pack weight tensor
        VC = cfg['tile_co'].size[-1]
        new_attrs['kernel_layout'] = 'OIHW%do' % VC

        # Store the same config for the altered operator (workload)
        new_data = data
        new_kernel = tvm.placeholder((CO // VC, CI, KH, KW, VC), dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_kernel, strides, padding, dilation, 'NCHW', out_dtype], conv2d)
        dispatch_ctx.update(target, new_workload, cfg)

        return sym.conv2d(*copy_inputs, **new_attrs)
    else:  # pre-compute weight transformation in winograd
        if "-device=arm_cpu" in target.options:
            tile_size = 4
            VC = cfg['tile_k'].size[-1]
        else:
            from ..mali.conv2d import _pick_tile_size
            tile_size = _pick_tile_size(tinfos[0], tinfos[1])
            VC = cfg['tile_bna'].val

        weight = sym.contrib.conv2d_winograd_weight_transform(copy_inputs[1], tile_size=tile_size)
        weight = sym.reshape(weight,
                             shape=(KH + tile_size - 1, KW + tile_size - 1, CO // VC, VC, CI))
        weight = sym.transpose(weight, axes=[0, 1, 2, 4, 3])

        copy_inputs[1] = weight
        new_attrs['tile_size'] = tile_size

        # Store the same config for the altered operator (workload)
        new_data = data
        new_weight = tvm.placeholder((KH + tile_size - 1, KH + tile_size -1, CO // VC, CI, VC),
                                     kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_weight, strides, padding, dilation,
             new_attrs['layout'], out_dtype, tile_size],
            conv2d_winograd_without_weight_transform)
        dispatch_ctx.update(target, new_workload, cfg)

        return sym.contrib.conv2d_winograd_without_weight_transform(*copy_inputs, **new_attrs)
Beispiel #22
0
 def check(in_shape, tshape, out_shape):
     x = sym.Variable("x", shape=in_shape)
     y = sym.reshape(x, shape=tshape, name="y")
     sdict = infer_shape(y)
     assert(tuple(sdict["y"][0]) == tuple(out_shape))
Beispiel #23
0
def _alter_conv2d_layout(attrs, inputs, tinfo):
    import nnvm.symbol as sym
    copy_inputs = [s for s in inputs]
    new_attrs = {k : attrs[k] for k in attrs.keys()}
    data, kernel = tinfo[0], tinfo[1]
    batch_size, in_channel, height, width = get_const_tuple(data.shape)

    groups = attrs.get_int("groups")
    out_channel = attrs.get_int("channels")
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    layout = attrs['layout']
    kh, kw = attrs.get_int_tuple("kernel_size")

    dtype = data.dtype
    out_dtype = dtype if attrs["out_dtype"] == "same" else attrs["out_dtype"]
    is_depthwise = groups == in_channel and groups == out_channel

    # only optimize for NCHW
    if layout != 'NCHW':
        return None
    if groups != 1 and not is_depthwise:
        return None

    dispatch_ctx = autotvm.task.DispatchContext.current
    target = tvm.target.current_target()
    # query schedule and fallback if necessary
    workload = autotvm.task.args_to_workload(
        [data, kernel, strides, padding, dilation, out_dtype], depthwise_conv2d_nchw) \
        if is_depthwise else \
        autotvm.task.args_to_workload(
            [data, kernel, strides, padding, dilation, layout, out_dtype], conv2d)
    cfg = dispatch_ctx.query(target, workload)
    if cfg.is_fallback:
        _get_default_config(cfg, data, kernel, strides, padding, out_dtype, is_depthwise)

    ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
    new_attrs['layout'] = 'NCHW%dc' % ic_bn
    new_attrs['out_layout'] = 'NCHW%dc' % oc_bn

    new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn),
                               dtype=data.dtype)
    if is_depthwise:
        # channel, channel_multiplier, kh, kw -> out_channel_chunk, kh, kw, out_channel_block
        # in which out_channel = merge(channel, channel_multiplier)
        kernel_sym = copy_inputs[1]
        kernel_sym = sym.reshape(kernel_sym, shape=(out_channel//oc_bn, oc_bn, kh, kw))
        kernel_sym = sym.transpose(kernel_sym, axes=(0, 2, 3, 1))
        copy_inputs[1] = kernel_sym

        # Store altered operator's config
        new_kernel = tvm.placeholder((out_channel//oc_bn, kh, kw, oc_bn), dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_kernel, strides, padding, dilation, new_attrs['layout'],
             new_attrs['out_layout'], out_dtype], depthwise_conv2d_NCHWc)
    else:
        out_channel, _, kh, kw = get_const_tuple(kernel.shape)
        # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
        new_attrs['kernel_layout'] = 'OIHW%di%do' % (ic_bn, oc_bn)

        # Store altered operator's config
        new_kernel = tvm.placeholder((out_channel//oc_bn, in_channel//ic_bn, kh, kw, ic_bn, oc_bn),
                                     dtype=kernel.dtype)
        new_workload = autotvm.task.args_to_workload(
            [new_data, new_kernel, strides, padding, dilation, new_attrs['layout'],
             new_attrs['out_layout'], out_dtype], conv2d_NCHWc)

    dispatch_ctx.update(target, new_workload, cfg)
    return sym.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs)