def test_conv_nhwc_convert_layout(): def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight = relay.var("weight", shape=(64, 64, 3, 3)) y = relay.nn.conv2d( x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OIHW", ) y = relay.nn.relu(y) y = relay.Function([x, weight], y) return y def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight = relay.var("weight", shape=(64, 64, 3, 3)) x = relay.layout_transform(x, "NCHW", "NHWC") weight = relay.layout_transform(weight, "OIHW", "HWIO") y = relay.nn.conv2d( x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.nn.relu(y) y = relay.layout_transform(y, "NHWC", "NCHW") y = relay.Function(relay.analysis.free_vars(y), y) return y a = before() a = run_opt_pass( a, transform.ConvertLayout({"nn.conv2d": ["NHWC", "default"]})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_no_convert_layout(): def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight = relay.var('weight', shape=(64, 64, 3, 3)) y = relay.nn.conv2d(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) y = relay.nn.relu(y) y = relay.Function([x, weight], y) return y def expected(): return before() a = before() a = run_opt_pass(a, transform.ConvertLayout('NCHW')) b = run_opt_pass(expected(), transform.InferType()) assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
def test_default_keyword(): """ Check that the default keyword selects correct TVM default layout. """ def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight = relay.var("weight", shape=(64, 3, 3, 64)) y = relay.nn.conv2d( x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OHWI", ) y = relay.Function(analysis.free_vars(y), y) return y def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) w = relay.var("weight", shape=(64, 3, 3, 64)) w = relay.layout_transform(w, "OHWI", "OIHW") y = relay.nn.conv2d( x, w, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OIHW", ) y = relay.Function(analysis.free_vars(y), y) return y a = before() a = run_opt_pass( a, transform.ConvertLayout({"nn.conv2d": ["NCHW", "default"]})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_conv_convert_kernel_layout(): """ Check that convolution kernel layout is correctly transformed. """ def before(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var("weight", shape=(3, 3, 64, 64)) y = relay.nn.conv2d( x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.Function(analysis.free_vars(y), y) return y def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) w = relay.var("weight", shape=(3, 3, 64, 64)) w = relay.layout_transform(w, "HWIO", "OHWI") y = relay.nn.conv2d( x, w, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="OHWI", ) y = relay.Function(analysis.free_vars(y), y) return y a = before() a = run_opt_pass(a, transform.ConvertLayout({"nn.conv2d": ["NHWC", "OHWI"]})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_no_convert_layout(): def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight = relay.var("weight", shape=(64, 64, 3, 3)) y = relay.nn.conv2d(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) y = relay.nn.relu(y) y = relay.Function([x, weight], y) return y def expected(): return before() a = before() a = run_opt_pass( a, transform.ConvertLayout({"nn.conv2d": ["NCHW", "default"]})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_scalar_convert_layout(): def before(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var("weight", shape=(3, 3, 64, 64)) y = relay.nn.conv2d(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') y = relay.add(y, relay.const(1, "float32")) y = relay.Function(analysis.free_vars(y), y) return y def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) w = relay.var("weight", shape=(3, 3, 64, 64)) x = relay.layout_transform(x, 'NHWC', 'NCHW') w = relay.layout_transform(w, 'HWIO', 'OIHW') y = relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1)) y = relay.add(y, relay.const(1.0, "float32")) y = relay.layout_transform(y, "NCHW", "NHWC") y = relay.Function(analysis.free_vars(y), y) return y a = before() a = run_opt_pass( a, transform.ConvertLayout({'nn.conv2d': ['NCHW', 'default']})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_conv_transpose_convert_layout(): def before(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var('weight', shape=(3, 3, 64, 64)) y = relay.nn.conv2d_transpose(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') y = relay.nn.relu(y) y = relay.Function([x, weight], y) return y def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var('weight', shape=(3, 3, 64, 64)) x = relay.layout_transform(x, 'NHWC', 'NCHW') weight = relay.layout_transform(weight, 'HWIO', 'OIHW') y = relay.nn.conv2d_transpose(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) y = relay.nn.relu(y) y = relay.layout_transform(y, 'NCHW', 'NHWC') y = relay.Function(relay.analysis.free_vars(y), y) return y a = before() a = run_opt_pass( a, transform.ConvertLayout({'nn.conv2d_transpose': ['NCHW', 'OIHW']})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_deformable_conv_bias_pool_convert_layout(): def before(N, CI, H, W, CO, KH, KW, layout): if layout == "NCHW": data_shape = (N, CI, H, W) weight_shape = (CO, CI, KH, KW) kernel_layout = "OIHW" else: data_shape = (N, H, W, CI) weight_shape = (KH, KW, CI, CO) kernel_layout = "HWIO" bias_shape = (CO,) data = relay.var("data", shape=data_shape, dtype="float32") offset = relay.var("offset") weight = relay.var("weight", shape=weight_shape, dtype="float32") bias = relay.var("bias", shape=bias_shape, dtype="float32") y = relay.nn.deformable_conv2d( data, offset, weight, kernel_size=(KH, KW), channels=CO, data_layout=layout, kernel_layout=kernel_layout, ) y = relay.nn.bias_add(y, bias, axis=-1 if layout == "NHWC" else 1) y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout=layout) y = relay.cast(y, "int32") y = relay.nn.batch_flatten(y) y = relay.Function(analysis.free_vars(y), y) return y def expected(N, CI, H, W, CO, KH, KW, OH, OW, src_layout, dst_layout): layout_map = {"src": {}, "dst": {}} if src_layout == "NCHW": nchw = layout_map["src"] nhwc = layout_map["dst"] else: nchw = layout_map["dst"] nhwc = layout_map["src"] nchw["data_layout"] = "NCHW" nchw["data_shape"] = (N, CI, H, W) nchw["offset_shape"] = (N, KH * KW * 2, OH, OW) nchw["weight_shape"] = (CO, CI, KH, KW) nchw["kernel_layout"] = "OIHW" nhwc["data_layout"] = "NHWC" nhwc["data_shape"] = (N, H, W, CI) nhwc["offset_shape"] = (N, OH, OW, KH * KW * 2) nhwc["weight_shape"] = (KH, KW, CI, CO) nhwc["kernel_layout"] = "HWIO" bias_shape = (CO,) data = relay.var("data", shape=layout_map["src"]["data_shape"], dtype="float32") offset = relay.var("offset", shape=layout_map["src"]["offset_shape"], dtype="float32") weight = relay.var("weight", shape=layout_map["src"]["weight_shape"], dtype="float32") bias = relay.var("bias", shape=bias_shape, dtype="float32") data = relay.layout_transform( data, layout_map["src"]["data_layout"], layout_map["dst"]["data_layout"] ) offset = relay.layout_transform( offset, layout_map["src"]["data_layout"], layout_map["dst"]["data_layout"] ) weight = relay.layout_transform( weight, layout_map["src"]["kernel_layout"], layout_map["dst"]["kernel_layout"] ) y = relay.nn.deformable_conv2d( data, offset, weight, kernel_size=(KH, KW), channels=CO, data_layout=layout_map["dst"]["data_layout"], kernel_layout=layout_map["dst"]["kernel_layout"], ) if layout_map["src"]["data_layout"] == "NHWC": bias = relay.expand_dims(bias, axis=0, num_newaxis=3) else: bias = relay.expand_dims(bias, axis=1, num_newaxis=2) bias = relay.expand_dims(bias, axis=0) bias = relay.layout_transform( bias, layout_map["src"]["data_layout"], layout_map["dst"]["data_layout"] ) y = relay.add(y, bias) y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout=layout_map["dst"]["data_layout"]) y = relay.cast(y, "int32") y = relay.layout_transform( y, layout_map["dst"]["data_layout"], layout_map["src"]["data_layout"] ) y = relay.nn.batch_flatten(y) y = relay.Function(analysis.free_vars(y), y) return y # NHWC -> NCHW a = before(1, 3, 224, 224, 32, 3, 3, "NHWC") a = run_opt_pass(a, transform.ConvertLayout({"nn.deformable_conv2d": ["NCHW", "default"]})) b = run_opt_pass( expected(1, 3, 224, 224, 32, 3, 3, 222, 222, "NHWC", "NCHW"), transform.InferType() ) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) # NCHW -> NHWC a = before(1, 3, 224, 224, 32, 3, 3, "NCHW") a = run_opt_pass(a, transform.ConvertLayout({"nn.deformable_conv2d": ["NHWC", "default"]})) b = run_opt_pass( expected(1, 3, 224, 224, 32, 3, 3, 222, 222, "NCHW", "NHWC"), transform.InferType() ) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_qnn_conv_add_convert_layout(): def before(): x = relay.var("x", shape=(1, 56, 56, 64), dtype="int8") weight1 = relay.var("weight1", shape=(3, 3, 64, 64), dtype="int8") weight2 = relay.var("weight2", shape=(3, 3, 64, 64), dtype="int8") y = relay.qnn.op.conv2d( x, weight1, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y1 = relay.qnn.op.conv2d( y, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.cast(y, "int8") y1 = relay.cast(y, "int8") ret = relay.qnn.op.add( y, y1, relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), ) y = relay.Function(analysis.free_vars(ret), ret) return y def expected(): x = relay.var("x", shape=(1, 56, 56, 64), dtype="int8") weight1 = relay.var("weight1", shape=(3, 3, 64, 64), dtype="int8") weight2 = relay.var("weight2", shape=(3, 3, 64, 64), dtype="int8") weight1 = relay.layout_transform(weight1, "HWIO", "OIHW") weight2 = relay.layout_transform(weight2, "HWIO", "OIHW") y = relay.layout_transform(x, "NHWC", "NCHW") y = relay.qnn.op.conv2d( y, weight1, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), ) y1 = relay.qnn.op.conv2d( y, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), ) y = relay.cast(y, "int8") y1 = relay.cast(y, "int8") ret = relay.qnn.op.add( y, y1, relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), ) ret = relay.layout_transform(ret, "NCHW", "NHWC") y = relay.Function(analysis.free_vars(ret), ret) return y a = before() a = run_opt_pass( a, transform.ConvertLayout({"qnn.conv2d": ["NCHW", "default"]})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_qnn_conv_requantize_convert_layout(): def before(): x = relay.var("x", shape=(1, 56, 56, 64), dtype="int8") weight = relay.var("weight", shape=(3, 3, 64, 64), dtype="int8") y = relay.qnn.op.conv2d( x, weight, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.qnn.op.requantize( y, relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), out_dtype="int32", ) y = relay.nn.relu(y) y = relay.Function([x, weight], y) return y def expected(): x = relay.var("x", shape=(1, 56, 56, 64), dtype="int8") weight = relay.var("weight", shape=(3, 3, 64, 64), dtype="int8") x = relay.layout_transform(x, "NHWC", "NCHW") weight = relay.layout_transform(weight, "HWIO", "OIHW") y = relay.qnn.op.conv2d( x, weight, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), ) y = relay.qnn.op.requantize( y, relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), axis=1, out_dtype="int32", ) y = relay.nn.relu(y) y = relay.layout_transform(y, "NCHW", "NHWC") y = relay.Function(relay.analysis.free_vars(y), y) return y a = before() a = run_opt_pass( a, transform.ConvertLayout({"qnn.conv2d": ["NCHW", "default"]})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_conv_bn_convert_layout(): """ Check that layout transforms are propagated through bn. """ def before(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var("weight", shape=(3, 3, 64, 64)) y = relay.nn.conv2d( x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) dtype = "float32" beta = relay.var("beta", relay.TensorType((64, ), dtype)) gamma = relay.var("gamma", relay.TensorType((64, ), dtype)) moving_mean = relay.var("moving_mean", relay.TensorType((64, ), dtype)) moving_var = relay.var("moving_var", relay.TensorType((64, ), dtype)) y = relay.nn.batch_norm(y, gamma, beta, moving_mean, moving_var, axis=3) y = relay.nn.relu(y[0]) y = relay.Function(analysis.free_vars(y), y) return y def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) w = relay.var("weight", shape=(3, 3, 64, 64)) x = relay.layout_transform(x, "NHWC", "NCHW") w = relay.layout_transform(w, "HWIO", "OIHW") y = relay.nn.conv2d(x, w, channels=64, kernel_size=(3, 3), padding=(1, 1)) dtype = "float32" beta = relay.var("beta", relay.TensorType((64, ), dtype)) gamma = relay.var("gamma", relay.TensorType((64, ), dtype)) moving_mean = relay.var("moving_mean", relay.TensorType((64, ), dtype)) moving_var = relay.var("moving_var", relay.TensorType((64, ), dtype)) y = relay.nn.batch_norm(y, gamma, beta, moving_mean, moving_var, axis=1) y = relay.nn.relu(y[0]) y = relay.layout_transform(y, "NCHW", "NHWC") y = relay.Function(analysis.free_vars(y), y) return y a = before() a = run_opt_pass( a, transform.ConvertLayout({"nn.conv2d": ["NCHW", "default"]})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_dual_path_convert_layout(): def before(): x = relay.var("x", shape=(1, 56, 56, 64)) weight1 = relay.var("weight1", shape=(3, 3, 64, 32)) weight2 = relay.var("weight2", shape=(3, 3, 32, 32)) y = relay.nn.conv2d( x, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.nn.relu(y) y1 = relay.nn.conv2d( y, weight2, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y1 = relay.nn.relu(y1) y2 = relay.nn.batch_flatten(y) ret = relay.Tuple([y1, y2]) y = relay.Function(analysis.free_vars(ret), ret) return y def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) weight1 = relay.var("weight1", shape=(3, 3, 64, 32)) weight2 = relay.var("weight2", shape=(3, 3, 32, 32)) weight1 = relay.layout_transform(weight1, "HWIO", "OIHW") weight2 = relay.layout_transform(weight2, "HWIO", "OIHW") y = relay.layout_transform(x, "NHWC", "NCHW") y = relay.nn.conv2d(y, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1)) y = relay.nn.relu(y) y1 = relay.nn.conv2d(y, weight2, channels=32, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.nn.relu(y1) y1 = relay.layout_transform(y1, "NCHW", "NHWC") y2 = relay.layout_transform(y, "NCHW", "NHWC") y2 = relay.nn.batch_flatten(y2) ret = relay.Tuple([y1, y2]) y = relay.Function(analysis.free_vars(ret), ret) return y a = before() a = run_opt_pass( a, transform.ConvertLayout({"nn.conv2d": ["NCHW", "default"]})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_convert_with_config(): def before(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var("weight", shape=(3, 3, 64, 64)) y = relay.nn.conv2d( x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.nn.relu(y) weight2 = relay.var("weight2", shape=(3, 3, 64, 64)) y2 = relay.nn.conv2d( y, weight2, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y2 = relay.nn.relu(y2) out = relay.Function([x, weight, weight2], y2) return out def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var("weight", shape=(3, 3, 64, 64)) weight2 = relay.var("weight2", shape=(3, 3, 64, 64)) weight2 = relay.layout_transform(weight2, "HWIO", "HWOI") y = relay.nn.conv2d( x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.nn.relu(y) y = relay.layout_transform(y, "NHWC", "HWNC") y2 = relay.nn.conv2d( y, weight2, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="HWNC", kernel_layout="HWOI", ) y2 = relay.nn.relu(y2) y2 = relay.layout_transform(y2, "HWNC", "NHWC") output = relay.Function(relay.analysis.free_vars(y2), y2) return output a = before() layout_config = relay.transform.LayoutConfig(skip_layers=[0]) with layout_config: a = run_opt_pass( a, transform.ConvertLayout({"nn.conv2d": ["HWNC", "default"]})) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_different_ops_convert_layout(): """Check convert layout correctly supports converting the layout of different ops in the same graph. """ def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 3, 3, 64)) weight2 = relay.var("weight2", shape=(64, 3, 3, 64), dtype="int8") weight3 = relay.var("weight3", shape=(64, 3, 3, 64)) out = relay.nn.conv2d( x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OHWI", ) out = relay.cast(out, "int8") out = relay.qnn.op.conv2d( out, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OHWI", ) out = relay.cast(out, "float32") out = relay.nn.conv2d_transpose( out, weight3, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OHWI", ) out = relay.Function(analysis.free_vars(out), out) return out def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 3, 3, 64)) weight2 = relay.var("weight2", shape=(64, 3, 3, 64), dtype="int8") weight3 = relay.var("weight3", shape=(64, 3, 3, 64)) x = relay.layout_transform(x, "NCHW", "NHWC") weight1 = relay.layout_transform(weight1, "OHWI", "HWIO") out = relay.nn.conv2d( x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) out = relay.cast(out, "int8") out = relay.layout_transform(out, "NHWC", "NCHW") weight2 = relay.layout_transform(weight2, "OHWI", "OIHW") out = relay.qnn.op.conv2d( out, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OIHW", ) out = relay.cast(out, "float32") out = relay.layout_transform(out, "NCHW", "NHWC") weight3 = relay.layout_transform(weight3, "OHWI", "HWIO") out = relay.nn.conv2d_transpose( out, weight3, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) out = relay.layout_transform(out, "NHWC", "NCHW") out = relay.Function(analysis.free_vars(out), out) return out a = before() desired_layouts = { "nn.conv2d": ["NHWC", "HWIO"], "qnn.conv2d": ["NCHW", "OIHW"], "nn.conv2d_transpose": ["NHWC", "HWIO"], } a = run_opt_pass(a, transform.ConvertLayout(desired_layouts)) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)
def test_qnn_conv_add_convert_layout(): def before(): x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8') weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8') weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8') y = relay.qnn.op.conv2d(x, weight1, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') y1 = relay.qnn.op.conv2d(y, weight2, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') y = relay.cast(y, 'int8') y1 = relay.cast(y, 'int8') ret = relay.qnn.op.add(y, y1, relay.const(1, 'float32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'int32')) y = relay.Function(analysis.free_vars(ret), ret) return y def expected(): x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8') weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8') weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8') weight1 = relay.layout_transform(weight1, 'HWIO', 'OIHW') weight2 = relay.layout_transform(weight2, 'HWIO', 'OIHW') y = relay.layout_transform(x, "NHWC", "NCHW") y = relay.qnn.op.conv2d(y, weight1, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.qnn.op.conv2d(y, weight2, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1)) y = relay.cast(y, 'int8') y1 = relay.cast(y, 'int8') ret = relay.qnn.op.add(y, y1, relay.const(1, 'float32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'int32')) ret = relay.layout_transform(ret, "NCHW", "NHWC") y = relay.Function(analysis.free_vars(ret), ret) return y a = before() a = run_opt_pass(a, transform.ConvertLayout('NCHW')) b = run_opt_pass(expected(), transform.InferType()) assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
def partition_for_tensorrt( mod, params=None, version=None, use_implicit_batch=True, remove_no_mac_subgraphs=False, max_workspace_size=1 << 30, ): """Partition the graph greedily offloading supported operators to TensorRT. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. version : Optional[Tuple[int, int, int]] TensorRT version to target as tuple of (major, minor, patch). If TVM is compiled with USE_TENSORRT_RUNTIME=ON, the linked TensorRT version will be used instead. use_implicit_batch : Optional[bool] Use TensorRT implicit batch mode (default true). Setting to false will enable explicit batch mode which will widen supported operators to include those which modify the batch dimension, but may reduce performance for some models. remove_no_mac_subgraphs : Optional[bool] Removes subgraphs which have been partitioned for TensorRT if they do not have any multiply-accumulate operations. The removed subgraphs will go through TVM's standard compilation instead. Can improve performance. max_workspace_size : Optional[int] How many bytes of workspace size to allow each subgraph to use for TensorRT engine creation. See TensorRT documentation for more info. Returns ------- mod_and_config : Tuple[Module, Dict[str, Any]] A tuple of 1) annotated and partitioned module and 2) "relay.ext.tensorrt.options" configuration which should be given to PassContext when building. """ config = { "use_implicit_batch": use_implicit_batch, "max_workspace_size": max_workspace_size, "remove_no_mac_subgraphs": remove_no_mac_subgraphs, } if version: assert isinstance(version, tuple) and len(version) == 3 config["tensorrt_version"] = version else: linked_version = tuple( tvm.get_global_func("relay.op.get_tensorrt_version")()) if not linked_version: logger.warning( "TVM was not built against TensorRT and no version was provided to " "partition_for_tensorrt. Defaulting to 6.0.1") linked_version = (6, 0, 1) config["tensorrt_version"] = linked_version if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential([ transform.InferType(), RemoveDropoutPass(), transform.RemoveUnusedFunctions(), transform.ConvertLayout({ "nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", "default"], "nn.conv2d_transpose": ["NCHW", "default"], }), transform.FoldConstant(), transform.AnnotateTarget("tensorrt"), transform.MergeCompilerRegions(), transform.PartitionGraph(), transform.InferType(), ]) with tvm.transform.PassContext( opt_level=3, config={"relay.ext.tensorrt.options": config}): mod = seq(mod) mod = prune_tensorrt_subgraphs(mod) return mod, config
def test_different_ops_convert_layout(): """ Check convert layout correctly supports converting the layout of different ops in the same graph. """ def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 3, 3, 64)) weight2 = relay.var("weight2", shape=(64, 3, 3, 64), dtype='int8') out = relay.nn.conv2d(x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NCHW', kernel_layout='OHWI') out = relay.cast(out, 'int8') out = relay.qnn.op.conv2d(out, weight2, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NCHW', kernel_layout='OHWI') out = relay.Function(analysis.free_vars(out), out) return out def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 3, 3, 64)) weight2 = relay.var("weight2", shape=(64, 3, 3, 64), dtype='int8') x = relay.layout_transform(x, 'NCHW', 'NHWC') weight1 = relay.layout_transform(weight1, 'OHWI', 'HWIO') out = relay.nn.conv2d(x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') out = relay.cast(out, 'int8') out = relay.layout_transform(out, 'NHWC', 'NCHW') weight2 = relay.layout_transform(weight2, 'OHWI', 'OIHW') out = relay.qnn.op.conv2d(out, weight2, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NCHW', kernel_layout='OIHW') out = relay.Function(analysis.free_vars(out), out) return out a = before() desired_layouts = { 'nn.conv2d': ['NHWC', 'HWIO'], 'qnn.conv2d': ['NCHW', 'OIHW'] } a = run_opt_pass(a, transform.ConvertLayout(desired_layouts)) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a)