def test_conv2d(): def run_test_conv2d(sym, dtype, dshape, kshape, oshape, shape_dict, padding): for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(sym, target, shape_dict) m = graph_runtime.create(graph, lib, ctx) data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype)) m.run(x=data, y_weight=kernel, y_bias=bias) out = m.get_output(0, tvm.nd.empty(oshape, dtype)) c_np = topi.testing.conv2d_nchw_python( data.asnumpy(), kernel.asnumpy(), 1, padding) c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) x = sym.Variable("x") y = sym.conv2d(x, channels=10, kernel_size=(3,3), name="y", padding=(1,1)) dtype = "float32" dshape = (1, 3, 18, 18) kshape = (10, 3, 3, 3) oshape = (1, 10, 18, 18) shape_dict = {"x": dshape} run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (1,1)) x = sym.Variable("x") y = sym.conv2d(x, channels=10, kernel_size=(1,3), name="y", padding=(0,1)) dtype = "float32" dshape = (1, 3, 224, 224) kshape = (10, 3, 1, 3) oshape = (1, 10, 224, 224) shape_dict = {"x": dshape} run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (0,1))
def get_symbol(num_classes, version, **kwargs): """Get symbol of SqueezeNet Parameters ---------- num_classes: int The number of classification results version : str, optional "1.0" or "1.1" of SqueezeNet """ assert version == '1.1', ("Unsupported SqueezeNet version {version}:" "1.1 expected".format(version=version)) net = sym.Variable("data") net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2)) net = sym.relu(net) net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) net = _make_fire(net, 16, 64, 64) net = _make_fire(net, 16, 64, 64) net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) net = _make_fire(net, 32, 128, 128) net = _make_fire(net, 32, 128, 128) net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2)) net = _make_fire(net, 48, 192, 192) net = _make_fire(net, 48, 192, 192) net = _make_fire(net, 64, 256, 256) net = _make_fire(net, 64, 256, 256) net = sym.dropout(net, rate=0.5) net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1)) net = sym.relu(net) net = sym.global_avg_pool2d(net) return sym.softmax(net, axis=1)
def nnvm_conv(): x = sym.Variable("x") y = sym.Variable("y") z = sym.conv2d(x, y, channels=3, kernel_size=3) grad = graph_util.gradients([z], [x, y]) print(grad) print(grad[0].debug_str())
def test_conv_ewise_injective(): x = sym.Variable("x") y = sym.conv2d(x, channels=32, kernel_size=(3, 3), groups=32, name="y", padding=(1,1)) y = sym.flatten(y + 1) + 1 dtype = "float32" dshape = (1, 32, 18, 18) kshape = (32, 1, 3, 3) oshape = (1, 32* 18 * 18) shape_dict = {"x": dshape} for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) m = graph_runtime.create(graph, lib, ctx) # print(graph.ir(join_entry_attrs=["shape"])) assert graph.index.num_nodes == 5 # set input data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype)) m.run(x=data, y_weight=kernel, y_bias=bias) # get output out = m.get_output(0, tvm.nd.empty(oshape, dtype)) c_np = topi.testing.depthwise_conv2d_python_nchw( data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME') c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) + 1 c_np = c_np.reshape(c_np.shape[0], np.prod(c_np.shape[1:])) + 1 np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
def test_mixed_precision(): x = sym.Variable("x") dtype = "int8" out_dtype="int32" y = sym.conv2d(x, channels=10, kernel_size=(3,3), name="y", padding=(1,1), use_bias=False, out_dtype="int32") dshape = (1, 3, 18, 18) kshape = (10, 3, 3, 3) oshape = (1, 10, 18, 18) shape_dict = {"x": dshape} dtype_dict = {"x": dtype} for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict) m = graph_runtime.create(graph, lib, ctx) data = tvm.nd.array(np.random.uniform(-127, 127, size=dshape).astype(dtype)) kernel = tvm.nd.array(np.random.uniform(-127, 127, size=kshape).astype(dtype)) m.run(x=data, y_weight=kernel) out = m.get_output(0, tvm.nd.empty(oshape, out_dtype)) c_np = topi.testing.conv2d_nchw_python( data.asnumpy().astype(out_dtype), kernel.asnumpy().astype(out_dtype), 1, 1) tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
def test_injective_conv2d(): channels = 16 data = sym.Variable(name="data") pool = sym.global_avg_pool2d(data=data) weight = sym.reshape(pool, shape=[1, channels, 1, 1]) residual = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1), layout="NCHW", kernel_layout="OIHW", use_bias=False, name="conv") net = weight * data + residual size = 56 dtype="float32" dshape = (1, channels, size, size) kshape = (channels, channels, 3, 3) oshape = dshape shape_dict = {"data": dshape} for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(net, target, shape_dict) # data, global_avg_pool, conv weight, conv op, fused elemwise add assert graph.index.num_nodes == 5 data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) m = graph_runtime.create(graph, lib, ctx) m.run(data=data, conv_weight=kernel) # get output out = m.get_output(0, tvm.nd.empty(oshape, dtype)) residual = topi.testing.conv2d_nchw_python( data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME') weight = np.mean(data.asnumpy(), axis=(2, 3)) c_np = weight[:, :, np.newaxis, np.newaxis] * data.asnumpy() + residual tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
def get_sym(out_channel): data = sym.Variable(name="data") data = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channel, padding=(1, 1), layout="NCHW", kernel_layout="OIHW", use_bias=True) data = sym.batch_norm(data) data = elu(data) return data
def test_concatenate_conv2d(): ch = 3 size = 8 data = sym.Variable(name="data") concat = sym.concatenate(data, data, axis=1) conv = sym.conv2d(data=concat, kernel_size=(1,1), channels=ch*2, use_bias=False, name="conv") net = sym.elemwise_add(concat, conv) dtype="float32" dshape = (1, ch, size, size) kshape = (ch*2, ch*2, 1, 1) oshape = (1, ch*2, size, size) shape_dict = {"data": dshape} for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(net, target, shape_dict) # data, conv weight, conv op, concat assert graph.index.num_nodes == 4 data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) m = graph_runtime.create(graph, lib, ctx) m.run(data=data, conv_weight=kernel) # get output out = m.get_output(0, tvm.nd.empty(oshape, dtype)) concat = np.concatenate((data.asnumpy(), data.asnumpy()), axis=1) conv = topi.testing.conv2d_nchw_python( concat, kernel.asnumpy(), (1,1), 'SAME') ref = concat + conv tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
def test_json_pass(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv', stride=(2,2)) g = graph.create(y) ret = g.apply('SaveJSON') ret._set_json_attr('json', ret.json_attr('json')) g2 = ret.apply('LoadJSON') assert g2.apply('SaveJSON').json_attr('json') == ret.json_attr('json')
def before(x, scale, channels): y = sym.conv2d(x, channels=channels, kernel_size=(3, 3), padding=(1, 1), name="conv") y = y * sym.expand_dims(scale, axis=1, num_newaxis=1) return y
def check(in_shape, out_shape, kernel_shape, **kwargs): x = sym.Variable("x", shape=in_shape) y = sym.conv2d(x, name="y", **kwargs) sdict = infer_shape(y) assert(tuple(sdict["y"][0]) == tuple(out_shape)) assert(tuple(sdict["y_weight"][0]) == tuple(kernel_shape))
def test_print_graph_ir(): x = sym.Variable("x", shape=(1, 1, 10, 20)) y = sym.conv2d(x + 1, name="y", channels=10, kernel_size=(3,3)) g = graph.create(y) g = g.apply("InferShape") ir1 = g.ir() ir2 = g.ir(join_entry_attrs=["shape"]) assert("y_bias" in ir1) assert("shape=" in ir2)
def test_default_input(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv') assert y.list_inputs() == ['x', 'conv_weight'] try: z = sym.add(x) assert False except NNVMError: pass
def test_list_args(): x = sym.Variable('x') z = sym.Variable('z') y = sym.conv2d(data=x, name='conv', dev='gpu') y = sym.add(y, z, name='add1') # write after read z = sym.assign(x, y, name='assign') assert z.list_inputs('read_only') == ['conv_weight', 'z'] assert z.list_inputs('aux_state') == ['x']
def check(in_shape, out_shape, kernel_shape, **kwargs): x = sym.Variable("x", shape=in_shape) y = sym.conv2d(x, name="y", **kwargs) sdict = infer_shape(y) assert(tuple(sdict["y"][0]) == tuple(out_shape)) assert(tuple(sdict["y_weight"][0]) == tuple(kernel_shape))
def test_print_graph_ir(): x = sym.Variable("x", shape=(1, 1, 10, 20)) y = sym.conv2d(x + 1, name="y", channels=10, kernel_size=(3, 3)) g = graph.create(y) g = g.apply("InferShape") ir1 = g.ir() ir2 = g.ir(join_entry_attrs=["shape"]) assert ("y_bias" in ir1) assert ("shape=" in ir2)
def _alter_conv2d_layout(attrs, inputs, tinfos): """Alter op layout for pre-computing kernel transformation""" import nnvm.symbol as sym copy_inputs = [s for s in inputs] new_attrs = {k: attrs[k] for k in attrs.keys()} assert attrs.get_int_tuple("dilation") == (1, 1), "Does not support dilation " \ "when alter_op_layout is enabled" strides = attrs.get_int_tuple("strides") padding = attrs.get_int_tuple("padding") groups = attrs.get_int('groups') layout = attrs["layout"] out_dtype = attrs["out_dtype"] out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype if groups == 1: # query config of this workload workload = _conv_arg_to_workload(tinfos[0], tinfos[1], strides, padding, layout, out_dtype) cfg = autotvm.DispatchContext.current.query( tvm.target.current_target(), workload) if cfg.is_fallback: # if is fallback, clear query cache and return None context = autotvm.DispatchContext.current while not isinstance(context, autotvm.FallbackContext): context = context._old_ctx context.clear_cache(tvm.target.current_target(), workload) return None if cfg.template_key == 'direct': # packing weight tensor new_attrs['kernel_layout'] = 'OIHW%do' % (cfg['tile_co'].size[-1]) return sym.conv2d(*copy_inputs, **new_attrs) else: # pre-compute weight transformation in winograd if "-device=arm_cpu" in tvm.target.current_target().options: tile_size = 4 VC = cfg['tile_k'].size[-1] else: from ..mali.conv2d import _pick_tile_size tile_size = _pick_tile_size(tinfos[0], tinfos[1]) VC = cfg['tile_bna'].val weight = sym.contrib.conv2d_winograd_weight_transform( copy_inputs[1], tile_size=tile_size) CO, CI, KH, KW = get_const_tuple(tinfos[1].shape) weight = sym.reshape(weight, shape=(KH + tile_size - 1, KW + tile_size - 1, CO // VC, VC, CI)) weight = sym.transpose(weight, axes=[0, 1, 2, 4, 3]) copy_inputs[1] = weight new_attrs['tile_size'] = tile_size return sym.contrib.conv2d_winograd_without_weight_transform( *copy_inputs, **new_attrs) # do nothing for depthwise convolution return None
def test_list_args(): x = sym.Variable('x') z = sym.Variable('z') y = sym.conv2d(data=x, name='conv', dev='gpu') y = sym.add(y, z, name='add1') # write after read z = sym.assign(x, y, name='assign') assert z.list_input_names('read_only') == ['conv_weight', 'z'] assert z.list_input_names('aux_state') == ['x']
def _alter_conv2d_layout(attrs, inputs, tinfos): """Alter op layout for pre-computing kernel transformation""" if 'cudnn' in tvm.target.current_target( ).libs or 'miopen' in tvm.target.current_target().libs: return None import nnvm.symbol as sym copy_inputs = [s for s in inputs] new_attrs = {k: attrs[k] for k in attrs.keys()} assert attrs.get_int_tuple("dilation") == (1, 1), "Does not support dilation " \ "when alter_op_layout is enabled" strides = attrs.get_int_tuple("strides") padding = attrs.get_int_tuple("padding") groups = attrs.get_int('groups') layout = attrs["layout"] out_dtype = attrs["out_dtype"] out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype if groups == 1: # query config of this workload workload = ('conv2d', ) + autotvm.task.args_to_workload( [tinfos[0], tinfos[1], strides, padding, layout, out_dtype]) cfg = autotvm.DispatchContext.current.query( tvm.target.current_target(), workload) if cfg.is_fallback: # if is fallback, clear query cache and return None autotvm.task.clear_fallback_cache(tvm.target.current_target(), workload) return None if cfg.template_key == 'direct': return None if cfg.template_key == 'int8': assert 'cuda' in tvm.target.current_target().keys new_attrs['layout'] = 'NCHW4c' new_attrs['out_layout'] = 'NCHW4c' new_attrs['kernel_layout'] = 'OIHW4o4i' return sym.conv2d(*copy_inputs, **new_attrs) # pre-compute weight transformation in winograd tile_size = _infer_tile_size(tinfos[0], tinfos[1]) weight = sym.contrib.conv2d_winograd_weight_transform( copy_inputs[1], tile_size=tile_size) weight = sym.transpose(weight, axes=[0, 1, 3, 2]) copy_inputs[1] = weight new_attrs['tile_size'] = tile_size return sym.contrib.conv2d_winograd_without_weight_transform( *copy_inputs, **new_attrs) # do nothing for depthwise convolution return None
def before(x, conv_weight, conv_bias, in_scale, out_scale, channels): x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2) y = sym.conv2d(x, conv_weight, conv_bias, channels=channels, kernel_size=(3, 3), padding=(1, 1), name="conv") y = sym.relu(y) y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2) return y
def test_conv2d(): def run_test_conv2d(sym, dtype, dshape, kshape, oshape, shape_dict, padding): for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(sym, target, shape_dict) m = graph_runtime.create(graph, lib, ctx) data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) bias = tvm.nd.array( np.random.uniform(size=kshape[0]).astype(dtype)) m.run(x=data, y_weight=kernel, y_bias=bias) out = m.get_output(0, tvm.nd.empty(oshape, dtype)) c_np = topi.testing.conv2d_nchw_python(data.asnumpy(), kernel.asnumpy(), 1, padding) c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5) x = sym.Variable("x") y = sym.conv2d(x, channels=10, kernel_size=(3, 3), name="y", padding=(1, 1)) dtype = "float32" dshape = (1, 3, 18, 18) kshape = (10, 3, 3, 3) oshape = (1, 10, 18, 18) shape_dict = {"x": dshape} run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (1, 1)) x = sym.Variable("x") y = sym.conv2d(x, channels=10, kernel_size=(1, 3), name="y", padding=(0, 1)) dtype = "float32" dshape = (1, 3, 224, 224) kshape = (10, 3, 1, 3) oshape = (1, 10, 224, 224) shape_dict = {"x": dshape} run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (0, 1))
def conv2d(data, weight=None, strides=[1, 1, 1, 1], padding='VALID', data_format='NCHW', **kwargs): kwargs = kwargs.copy() kwargs['data'] = data if weight: kwargs['weight'] = weight return _sym.conv2d(strides=strides, padding=padding, data_format=data_format, **kwargs)
def before(x, conv_weight, conv_bias, in_scale, out_scale, channels): x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2) y = sym.conv2d(x, conv_weight, conv_bias, channels=channels, kernel_size=(3, 3), padding=(1, 1), name="conv") y = sym.relu(y) y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2) return y
def test_default_input(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv') assert y.list_input_names() == ['x', 'conv_weight'] tname = [z.list_output_names()[0] for z in y.list_input_variables()] assert tname == y.list_input_names() try: z = sym.add(x) assert False except NNVMError: pass
def test_mutate_input(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv') z = sym.assign(x, y) t = sym.add(z, x) try: z = sym.assign(z, z) assert False except NNVMError: pass
def test_json_pass_with_attr(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv', stride=(2,2)) g = graph.create(y) g._set_json_attr('version', '0.1.0') ret = g.apply('SaveJSON') json_str = ret.json_attr('json') print(json_str) ret._set_json_attr('json', json_str) g2 = ret.apply('LoadJSON') assert g2.json_attr('version') == '0.1.0'
def test_mutate_input(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv') z = sym.assign(x, y) t = sym.add(z, x) try: z = sym.assign(z, z) assert False except NNVMError: pass
def get_sym(layout, kernel_layout, channels): data = sym.Variable(name="data") data = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1), layout=layout, kernel_layout=kernel_layout, use_bias=True) data = sym.max_pool2d(data=data, pool_size=(2, 2), strides=(2, 2), layout=layout) data = sym.upsampling(data=data, scale=2, layout=layout) softmax_axis = 1 if layout == "NHWC": softmax_axis = 3 data = sym.softmax(data=data, axis=softmax_axis) return data
def get_sym(layout, kernel_layout, channels): data = sym.Variable(name="data") data = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1), layout=layout, kernel_layout=kernel_layout, use_bias=True) data = sym.max_pool2d(data=data, pool_size=(2, 2), strides=(2, 2), layout=layout) data = sym.upsampling(data=data, scale=2, layout=layout) softmax_axis = 1 if layout == "NHWC": softmax_axis = 3 data = sym.softmax(data=data, axis=softmax_axis) return data
def test_default_input(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv') assert y.list_input_names() == ['x', 'conv_weight'] tname = [z.list_output_names()[0] for z in y.list_input_variables()] assert tname == y.list_input_names() try: z = sym.add(x) assert False except NNVMError: pass
def test_json_pass_with_attr(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv', stride=(2, 2)) g = graph.create(y) g._set_json_attr('version', '0.1.0') ret = g.apply('SaveJSON') json_str = ret.json_attr('json') print(json_str) ret._set_json_attr('json', json_str) g2 = ret.apply('LoadJSON') assert g2.json_attr('version') == '0.1.0'
def get_sym(out_channel): data = sym.Variable(name="data") data = sym.conv2d(data=data, kernel_size=(3, 3), channels=out_channel, padding=(1, 1), layout="NCHW", kernel_layout="OIHW", use_bias=True) data = sym.batch_norm(data) data = elu(data) return data
def expected(x, conv_weight, conv_bias, scale, channels): conv_weight = conv_weight * sym.expand_dims(scale, axis=1, num_newaxis=3) conv_bias = conv_bias * scale y = sym.conv2d(x, conv_weight, conv_bias, channels=channels, kernel_size=(3, 3), padding=(1, 1), name="conv") y = sym.relu(y) return y
def compile_run_graph(device, target): if not tvm.runtime.enabled(device): print("Skip test because %s is not enabled." % device) return out_channels = 16 data1 = symbol.Variable(name="data1") data2 = symbol.Variable(name="data2") simple_net1 = symbol.conv2d(data=data1, kernel_size=(3, 3), channels=out_channels, padding=(1, 1), use_bias=True) simple_net2 = symbol.conv2d(data=data2, kernel_size=(3, 3), channels=out_channels, padding=(1, 1), use_bias=True) ret = symbol.elemwise_add(simple_net1, simple_net2) ret = symbol.conv2d(ret, kernel_size=(3, 3), channels=out_channels, padding=(1, 1), use_bias=True) batch_size = 1 data_shape = (batch_size, 3, 224, 224) shape_dict = {"data1": data_shape, "data2": data_shape} params = {} params["data1"] = np.random.uniform(-1, 1, size=data_shape).astype("float32") params["data2"] = np.random.uniform(-1, 1, size=data_shape).astype("float32") op_name_device = {"elemwise_add": "cpu", "conv2d": device} fallback_device = tvm.context("cpu") target = {"cpu": "llvm", device: target} # No op will be fused. 3 additional device copy nodes are required. check_annotated_graph(ret, target, op_name_device, 15, fallback_device, shape_dict, params)
def forward(self, inputs): if self._use_bias: return sym.conv2d(data=inputs, weight=self.weight, bias=self.bias, channels=self._out_channel, kernel_size=self._kernel_size, padding=self._padding, strides=self._strides, dilation=self._dilation, groups=self._group, use_bias=self._use_bias) else: return sym.conv2d(data=inputs, weight=self.weight, channels=self._out_channel, kernel_size=self._kernel_size, padding=self._padding, strides=self._strides, dilation=self._dilation, groups=self._group, use_bias=self._use_bias)
def conv2d_block(data, name, channels, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)): conv2d = sym.conv2d( data=data, channels=channels, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=False, layout="NCHW", name=name + "_conv2d" ) # act = sym.relu(data=conv2d, name=name + "_relu") return conv2d
def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels): conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3) conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=2) conv_bias = conv_bias * out_scale y = sym.conv2d(x, conv_weight, conv_bias, channels=channels, kernel_size=(3, 3), padding=(1, 1), name="conv") y = sym.relu(y) return y
def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels): conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3) conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=3) conv_bias = conv_bias * out_scale y = sym.conv2d(x, conv_weight, conv_bias, channels=channels, kernel_size=(3, 3), padding=(1, 1), groups=54, name="depthiwise_conv") y = sym.relu(y) return y
def conv2d(data, weight=None, strides=[1, 1, 1, 1], padding='VALID', data_format='NCHW', **kwargs): kwargs = kwargs.copy() kwargs['data'] = data if weight: kwargs['weight'] = weight return _sym.conv2d(strides=strides, padding=padding, data_format=data_format, **kwargs)
def separable_conv_block(data, name, depthwise_channels, pointwise_channels, kernel_size=(3, 3), downsample=False, padding=(1, 1), epsilon=1e-5): """Helper function to get a separable conv block""" if downsample: strides = (2, 2) else: strides = (1, 1) # depthwise convolution + bn + relu conv1 = sym.conv2d(data=data, channels=depthwise_channels, groups=depthwise_channels, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=False, layout="NCHW", name=name + "_depthwise_conv1") bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1") act1 = sym.relu(data=bn1, name=name + "_relu1") # pointwise convolution + bn + relu conv2 = sym.conv2d(data=act1, channels=pointwise_channels, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), use_bias=False, layout="NCHW", name=name + "_conv2") bn2 = sym.batch_norm(data=conv2, epsilon=epsilon, name=name + "_bn2") act2 = sym.relu(data=bn2, name=name + "_relu2") return act2
def test_conv2d(): x = sym.Variable("data", shape=(1, 32, 512, 512)) y = sym.conv2d(x, name="conv", channels=12, kernel_size=(3, 3), padding=(1, 1), layout="NCHW") _, ldict = correct_layout(y) assert (ldict["data"][0] == "NCHW") assert (ldict["conv_weight"][0] == "OIHW") assert (ldict["conv_bias"][0] == "C") assert (ldict["conv"][0] == "NCHW") y = sym.conv2d(x, name="conv", channels=12, kernel_size=(3, 3), padding=(1, 1), layout="NCHW16c", kernel_layout="OIHW16i16o", out_layout="NCHW8c") _, ldict = correct_layout(y) assert (ldict["data"][0] == "NCHW16c") assert (ldict["conv_weight"][0] == "OIHW16i16o") assert (ldict["conv_bias"][0] == "C8c") assert (ldict["conv"][0] == "NCHW8c") y = sym.conv2d(x, name="conv", channels=12, kernel_size=(3, 3), padding=(1, 1), layout="N16cHWC") _, ldict = correct_layout(y) assert (ldict["data"][0] == "N16cHWC") assert (ldict["conv_weight"][0] == "OIHW") assert (ldict["conv_bias"][0] == "16cC") assert (ldict["conv"][0] == "N16cHWC")
def test_conv2d(): x = sym.Variable("data", shape=(1, 32, 512, 512)) y = sym.conv2d(x, name="conv", channels=12, kernel_size=(3,3), padding=(1,1), layout="NCHW") _, ldict = correct_layout(y) assert(ldict["data"][0] == "NCHW") assert(ldict["conv_weight"][0] == "OIHW") assert(ldict["conv_bias"][0] == "C") assert(ldict["conv"][0] == "NCHW") y = sym.conv2d(x, name="conv", channels=12, kernel_size=(3,3), padding=(1,1), layout="NCHW16c", kernel_layout="OIHW16i16o", out_layout="NCHW8c") _, ldict = correct_layout(y) assert(ldict["data"][0] == "NCHW16c") assert(ldict["conv_weight"][0] == "OIHW16i16o") assert(ldict["conv_bias"][0] == "C8c") assert(ldict["conv"][0] == "NCHW8c") y = sym.conv2d(x, name="conv", channels=12, kernel_size=(3,3), padding=(1,1), layout="N16cHWC") _, ldict = correct_layout(y) assert(ldict["data"][0] == "N16cHWC") assert(ldict["conv_weight"][0] == "OIHW") assert(ldict["conv_bias"][0] == "16cC") assert(ldict["conv"][0] == "N16cHWC")
def test_residual_block_layout_transform(): ch = 16 size = 32 data = sym.Variable(name="data") conv1 = sym.conv2d(data=data, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv1") layout_transform1 = sym.__layout_transform__(data=conv1, src_layout="NCHW", dst_layout="NCHW8c") layout_transform2 = sym.__layout_transform__(data=layout_transform1, src_layout="NCHW8c", dst_layout="NCHW") conv2 = sym.conv2d(data=conv1, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv2") elemwise_sum = sym.elemwise_add(layout_transform2, conv2) out = sym.relu(elemwise_sum) dtype="float32" dshape = (1, ch, size, size) kshape = (ch, ch, 3, 3) oshape = (1, ch, size, size) shape_dict = {"data": dshape} target = "llvm" # only test on llvm since it involves NCHW8c layout ctx = tvm.context(target, 0) graph, lib, _ = nnvm.compiler.build(out, target, shape_dict) # data, conv1 weight, conv1, layout transform + elemwise add + relu, conv2 weight, conv2 op assert graph.index.num_nodes == 6 data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) kernel1 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) kernel2 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) m = graph_runtime.create(graph, lib, ctx) m.run(data=data, conv1_weight=kernel1, conv2_weight=kernel2) out = m.get_output(0, tvm.nd.empty(oshape, dtype)) conv1 = topi.testing.conv2d_nchw_python( data.asnumpy(), kernel1.asnumpy(), (1,1), 'SAME') conv2 = topi.testing.conv2d_nchw_python( conv1, kernel2.asnumpy(), (1,1), 'SAME') ref = np.maximum(conv1 + conv2, 0) tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
def test_order_mutation_pass(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv', dev='gpu') y = sym.add(y, x, name='add1') # write after read z = sym.assign(x, y, name='assign') # read after write t = sym.add(y, x, name='add2') g = graph.create(sym.Group([t, z])) jgraph = json.loads(g.apply(['OrderMutation', 'SaveJSON']).json_attr('json')) jnodes = jgraph['nodes'] nindex = {n['name']: i for i, n in enumerate(jnodes)} assert nindex['assign'] in jnodes[nindex['add2']]['control_deps'] assert nindex['conv'] in jnodes[nindex['assign']]['control_deps'] assert nindex['add1'] in jnodes[nindex['assign']]['control_deps'] assert jnodes[nindex['assign']]['inputs'][0][2] == 1
def test_order_mutation_pass(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv', dev='gpu') y = sym.add(y, x, name='add1') # write after read z = sym.assign(x, y, name='assign') # read after write t = sym.add(y, x, name='add2') g = graph.create(sym.Group([t, z])) jgraph = json.loads(g.apply(['OrderMutation', 'SaveJSON']).json_attr('json')) jnodes = jgraph['nodes'] nindex = {n['name']: i for i, n in enumerate(jnodes)} assert nindex['assign'] in jnodes[nindex['add2']]['control_deps'] assert nindex['conv'] in jnodes[nindex['assign']]['control_deps'] assert nindex['add1'] in jnodes[nindex['assign']]['control_deps'] assert jnodes[nindex['assign']]['inputs'][0][2] == 1
def conv_block(data, name, channels, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), epsilon=1e-5): """Helper function to construct conv-bn-relu""" # convolution + bn + relu conv = sym.conv2d(data=data, channels=channels, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=False, layout="NCHW", name=name + "_conv") bn = sym.batch_norm(data=conv, epsilon=epsilon, name=name + "_bn") act = sym.relu(data=bn, name=name + "_relu") return act
def test_grouped_conv2d_nchw(): x = sym.Variable("x") y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32, name="y", padding=(1,1)) dtype = "float32" dshape = (1, 32, 18, 18) kshape = (32, 1, 3, 3) oshape = (1, 32, 18, 18) shape_dict = {"x": dshape} for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) m = graph_runtime.create(graph, lib, ctx) data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype)) m.run(x=data, y_weight=kernel, y_bias=bias) out = m.get_output(0, tvm.nd.empty(oshape, dtype)) c_np = topi.testing.depthwise_conv2d_python_nchw( data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME') c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
def test_grouped_conv2d_nhwc(): x = sym.Variable("x") y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32, name="y", padding=(1,1), layout="NHWC", kernel_layout ='HWOI') dtype = "float32" dshape = (1, 18, 18, 32) kshape = (3, 3, 32, 1) oshape = (1, 18, 18, 32) shape_dict = {"x": dshape} for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(y, target, shape_dict) m = graph_runtime.create(graph, lib, ctx) data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) bias = tvm.nd.array(np.random.uniform(size=kshape[2]).astype(dtype)) m.run(x=data, y_weight=kernel, y_bias=bias) out = m.get_output(0, tvm.nd.empty(oshape, dtype)) c_np = topi.testing.depthwise_conv2d_python_nhwc( data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME') c_np = c_np + bias.asnumpy().reshape(1, 1, kshape[2]) tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
def get_feature(internel_layer, layers, filters, batch_norm=False): """ Get VGG feature body as stacks of convoltions. layers : [1, 1, 2, 2, 2] filters : [64, 128, 256, 512, 512] """ for i, num in enumerate(layers): """ i = 0, num = 1 i = 1, num = 1 i = 2, num = 2 i = 3, num = 2 i = 4, num = 2 """ for j in range(num): internel_layer = sym.pad(data=internel_layer, pad_width=((0, 0), (1, 1), (1, 1), (0, 0))) internel_layer = sym.conv2d(data=internel_layer, kernel_size=(3, 3), channels=filters[i], layout='NHWC', kernel_layout='HWOI', name="conv%s_%s" % (i + 1, j + 1)) if batch_norm: internel_layer = sym.batch_norm(data=internel_layer, axis=3, name="bn%s_%s" % (i + 1, j + 1)) internel_layer = sym.relu(data=internel_layer, name="relu%s_%s" % (i + 1, j + 1)) internel_layer = sym.max_pool2d(data=internel_layer, pool_size=(2, 2), strides=(2, 2), layout="NHWC", name="pool%s" % (i + 1)) return internel_layer
def test_injective_conv2d(): channels = 16 data = sym.Variable(name="data") pool = sym.global_avg_pool2d(data=data) weight = sym.reshape(pool, shape=[1, channels, 1, 1]) residual = sym.conv2d(data=data, kernel_size=(3, 3), channels=channels, padding=(1, 1), layout="NCHW", kernel_layout="OIHW", use_bias=False, name="conv") net = weight * data + residual size = 56 dtype = "float32" dshape = (1, channels, size, size) kshape = (channels, channels, 3, 3) oshape = dshape shape_dict = {"data": dshape} for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(net, target, shape_dict) # data, global_avg_pool, conv weight, conv op, fused elemwise add assert graph.index.num_nodes == 5 data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype)) kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype)) m = graph_runtime.create(graph, lib, ctx) m.run(data=data, conv_weight=kernel) # get output out = m.get_output(0, tvm.nd.empty(oshape, dtype)) residual = topi.testing.conv2d_nchw_python(data.asnumpy(), kernel.asnumpy(), (1, 1), 'SAME') weight = np.mean(data.asnumpy(), axis=(2, 3)) c_np = weight[:, :, np.newaxis, np.newaxis] * data.asnumpy() + residual np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
def test_alter_conv2d_layout(): data = sym.Variable("data", shape=(1, 32, 512, 512)) conv = sym.conv2d(data, name="conv", channels=16, kernel_size=(3, 3), padding=(1, 1), use_bias=False, layout="NCHW") # split here convs = sym.split(conv, indices_or_sections=2) relus = [sym.relu(x, name="relu") for x in convs] relu = sym.concatenate(*relus) flatten = sym.flatten(relu, name="flatten") softmax = sym.softmax(flatten, name="softmax") g = graph.create(softmax) g = g.apply("CorrectLayout") g = graph_attr.set_dtype_inputs(g, "float32") g = g.apply(["InferShape", "InferType"]) layouts_origin = get_layouts(g) @reg.register_alter_op_layout("conv2d", level=100) def alter_conv2d_layout(attrs, inputs, tinfos): new_attrs = {k: attrs[k] for k in attrs.keys()} new_attrs["layout"] = "NCHW16c" new_attrs["kernel_layout"] = "NCHW16c" new_attrs["name"] = "conv_alter" return sym.conv2d(inputs[0], inputs[1], **new_attrs) g = g.apply("AlterOpLayout") layouts = get_layouts(g) # check copy layouts for node in ["data", "relu", "flatten", "softmax", "conv_weight"]: assert layouts[node] == layouts_origin[node] assert layouts["conv_alter"] == layouts_origin["conv"]
def main(conv_config): # Define conv2d network. N, H, W, CO, CI, KH, KW, strides, padding = conv_configs[conv_config] batch_size = N data_shape = (N, CI, H, W) data = sym.Variable(name="data") simple_net = sym.conv2d(data=data, kernel_size=(KH, KW), channels=CO, padding=padding) # Use cuDNN as conv2d backend. net, params = utils.create_workload(simple_net, batch_size, data_shape[1:]) target = "cuda -libs=cudnn" graph, lib, params = nnvm.compiler.build(net, target, shape={"data": data_shape}, params=params) ctx = tvm.context(target, 0) data = np.random.uniform(-1, 1, size=data_shape).astype("float32") module = runtime.create(graph, lib, ctx) module.set_input(**params) module.set_input("data", data) module.run() out_shape = (batch_size, CO, W, H) out = module.get_output(0, tvm.nd.empty(out_shape)) out_cudnn = out.asnumpy() print('Time cost of cuDNN conv2d operator ({}):'.format(conv_config)) costs = [] for _ in range(10): evaluator = module.module.time_evaluator("run", ctx, number=1000) cost = evaluator().mean costs.append(cost) print('%.8f' % cost) print('Mean:', '%.8f' % np.mean(costs))
def test_conv2d(): x = sym.Variable('x') y = sym.conv2d(x, channels=3, kernel_size=(3, 3), name="y", use_bias=False) assert y.list_input_names() == ["x", "y_weight"]
import tvm import numpy as np from tvm.contrib import graph_runtime as runtime import nnvm.symbol as sym import nnvm.compiler from nnvm.testing import utils ###################################################################### # Create a simple network # ----------------------- # Let's create a very simple network for demonstration. # It consists of convolution, batch normalization, and ReLU activation. out_channels = 16 data = sym.Variable(name="data") simple_net = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channels, padding = (1, 1), use_bias=True) simple_net = sym.batch_norm(data=simple_net) simple_net = sym.relu(data=simple_net) batch_size = 1 data_shape = (batch_size, 3, 224, 224) net, params = utils.create_workload(simple_net, batch_size, data_shape[1:]) ###################################################################### # Build and run with cuda backend # ------------------------------- # We build and run this network with cuda backend, as usual. # By setting the logging level to DEBUG, the result of NNVM graph compilation will be dumped as pseudo code. import logging logging.basicConfig(level=logging.DEBUG) # to dump TVM IR after fusion
def test_graph_json_attr(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv', stride=(2,2)) g = graph.create(y) g._set_json_attr('ilist', [1,2,3], 'list_int') assert g.json_attr('ilist') == [1,2,3]
def test_graph_json_attr(): x = sym.Variable('x') y = sym.conv2d(data=x, name='conv', stride=(2, 2)) g = graph.create(y) g._set_json_attr('ilist', [1, 2, 3], 'list_int') assert g.json_attr('ilist') == [1, 2, 3]
def _make_fire_conv(net, channels, kernel_size, padding=0): net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size), padding=(padding, padding)) net = sym.relu(net) return net
def _alter_conv2d_layout(attrs, inputs, tinfos): """Alter op layout for pre-computing kernel transformation""" if 'cudnn' in tvm.target.current_target().libs or 'miopen' in tvm.target.current_target().libs: return None import nnvm.symbol as sym copy_inputs = [s for s in inputs] new_attrs = {k: attrs[k] for k in attrs.keys()} strides = attrs.get_int_tuple("strides") padding = attrs.get_int_tuple("padding") dilation = attrs.get_int_tuple("dilation") groups = attrs.get_int('groups') layout = attrs["layout"] out_dtype = attrs["out_dtype"] out_dtype = tinfos[0].dtype if out_dtype == "same" else out_dtype data, kernel = tinfos[0:2] N, CI, H, W = get_const_tuple(data.shape) CO, _, KH, KW = get_const_tuple(kernel.shape) dispatch_ctx = autotvm.DispatchContext.current target = tvm.target.current_target() if groups == 1: # query config of this workload workload = autotvm.task.args_to_workload( [tinfos[0], tinfos[1], strides, padding, dilation, layout, out_dtype], conv2d) cfg = autotvm.DispatchContext.current.query(target, workload) if cfg.is_fallback: # if is fallback, clear query cache and return None autotvm.task.clear_fallback_cache(target, workload) return None if cfg.template_key == 'direct': return None if cfg.template_key == 'int8': assert 'cuda' in target.keys new_layout = 'NCHW4c' new_attrs['layout'] = new_layout new_attrs['out_layout'] = new_layout new_attrs['kernel_layout'] = 'OIHW4o4i' ic_block_factor = oc_block_factor = 4 # Store the same config for the altered operator (workload) new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), dtype=data.dtype) new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor, KH, KW,\ oc_block_factor, ic_block_factor), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, new_layout, out_dtype], conv2d ) dispatch_ctx.update(target, new_workload, cfg) return sym.conv2d(*copy_inputs, **new_attrs) if attrs.get_int_tuple("dilation") != (1, 1): warnings.warn("Does not support weight pre-transform for dilated convolution.") return None # pre-compute weight transformation in winograd tile_size = _infer_tile_size(tinfos[0], tinfos[1]) weight = sym.contrib.conv2d_winograd_weight_transform(copy_inputs[1], tile_size=tile_size) weight = sym.transpose(weight, axes=[0, 1, 3, 2]) copy_inputs[1] = weight new_attrs['tile_size'] = tile_size # Store the same config for the altered operator (workload) new_data = data new_weight = tvm.placeholder((KH + tile_size - 1, KW + tile_size - 1, CI, CO), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_weight, strides, padding, dilation, layout, out_dtype, tile_size], conv2d_winograd_without_weight_transform ) dispatch_ctx.update(target, new_workload, cfg) return sym.contrib.conv2d_winograd_without_weight_transform(*copy_inputs, **new_attrs) elif groups != CI: workload = autotvm.task.args_to_workload( [tinfos[0], tinfos[1], strides, padding, dilation, groups, out_dtype], group_conv2d_nchw) cfg = autotvm.DispatchContext.current.query(target, workload) if cfg.is_fallback: # if is fallback, clear query cache and return None autotvm.task.clear_fallback_cache(target, workload) return None if cfg.template_key == 'int8': assert 'cuda' in target.keys new_layout = 'NCHW4c' new_attrs['layout'] = new_layout new_attrs['out_layout'] = new_layout new_attrs['kernel_layout'] = 'OIHW4o4i' ic_block_factor = oc_block_factor = 4 # Store the same config for the altered operator (workload) new_data = tvm.placeholder((N, CI // ic_block_factor, H, W, ic_block_factor), dtype=data.dtype) new_kernel = tvm.placeholder((CO // oc_block_factor, CI // ic_block_factor // groups,\ KH, KW, oc_block_factor, ic_block_factor), dtype=kernel.dtype) new_workload = autotvm.task.args_to_workload( [new_data, new_kernel, strides, padding, dilation, groups, out_dtype], group_conv2d_nchw ) dispatch_ctx.update(target, new_workload, cfg) return sym.conv2d(*copy_inputs, **new_attrs) # do nothing for depthwise convolution return None