def expected(): x = relay.var("x", shape=(1, 32, 56, 56)) w = relay.var("w", shape=(32, 1, 3, 3)) x = relay.layout_transform(x, "NCHW", "NCHW8c") w = relay.layout_transform(w, "OIHW", "OIHW1i8o") y = relay.nn.contrib_depthwise_conv2d_nchwc( x, w, padding=(1, 1, 1, 1), channels=32, kernel_size=(3, 3), groups=32, data_layout="NCHW8c", kernel_layout="OIHW1i8o", out_layout="NCHW8c", ) y = relay.layout_transform(y, "NCHW8c", "NCHW") y = relay.Function(analysis.free_vars(y), y) return y
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) bias = relay.var("bias", shape=(64,)) scale = relay.var("scale", shape=(64, 1, 1)) weight = relay.var("weight") x = relay.layout_transform(x, "NCHW", "NCHW16c") bias = relay.expand_dims(bias, 1, 2) bias = relay.expand_dims(bias, 0, 1) bias = relay.layout_transform(bias, "NCHW", "NCHW16c") scale = relay.expand_dims(scale, 0, 1) scale = relay.layout_transform(scale, "NCHW", "NCHW16c") y = relay.nn.conv2d( x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW16c" ) y = relay.add(y, bias) # test broadcasting to lhs y = relay.multiply(scale, y) # test broadcasting to rhs y = relay.layout_transform(y, "NCHW16c", "NCHW") y = relay.Function(analysis.free_vars(y), y) return y
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) bias = relay.var("bias", shape=(64, )) weight = relay.var("weight", shape=(64, 64, 3, 3)) y = relay.layout_transform(x, "NCHW", "NCHW16c") w = relay.layout_transform(weight, "OIHW", "OIHW16i") y = relay.nn.conv2d(y, w, channels=64, kernel_size=(3, 3), padding=(1, 1), kernel_layout="OIHW16i", data_layout="NCHW16c") y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout="NCHW16c") y = relay.layout_transform(y, "NCHW16c", "NCHW") y = relay.nn.lrn(y) y = relay.Function(analysis.free_vars(y), y) return y
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1") weight2 = relay.var("weight2") y = relay.layout_transform(x, "NCHW", "NCHW16c") y = relay.nn.conv2d( y, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW16c" ) y = relay.nn.relu(y) y1 = relay.nn.conv2d( y, weight2, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW16c" ) y1 = relay.nn.relu(y1) y1 = relay.layout_transform(y1, "NCHW16c", "NCHW") y2 = relay.layout_transform(y, "NCHW16c", "NCHW") y2 = relay.nn.batch_flatten(y2) ret = relay.Tuple([y1, y2]) y = relay.Function(analysis.free_vars(ret), ret) return y
def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) weight1 = relay.var("weight1", shape=(3, 3, 64, 32)) weight2 = relay.var("weight2", shape=(1, 1, 64, 32)) weight1 = relay.layout_transform(weight1, "HWIO", "OIHW") weight2 = relay.layout_transform(weight2, "HWIO", "OIHW") x = relay.layout_transform(x, "NHWC", "NCHW") y = relay.nn.conv2d(x, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1)) y = relay.nn.relu(y) y2 = relay.nn.conv2d(x, weight2, channels=32, kernel_size=(1, 1)) y2 = relay.nn.relu(y2) y = y + y2 y = relay.nn.global_max_pool2d(y) y = relay.layout_transform(y, "NCHW", "NHWC") return relay.Function(analysis.free_vars(y), y)
def build_tvm(self, net, torch_inputs): self.graph_pth = torch2trt.GraphModule(net, torch_inputs) with torch2trt.core.tvm_network(): trace, graph_pth = torch2trt.core.torch2tvm( net, torch_inputs, input_names=self.input_names, verbose=self.verbose) self.refit_weight_dict = graph_pth.refit_weight_dict input_names = get_torch_forward_name(net.forward) self.graph_pth = graph_pth outputs = graph_pth.get_resolved_outputs() tvm_weight_dict = graph_pth.context.tvm_weight_dict self.params = {k.name_hint: v for k, v in tvm_weight_dict.items()} print(len(self.params)) self.graph = expr.Function(analysis.free_vars(outputs), outputs) if self.graph_post_fn is not None: self.graph = self.graph_post_fn(self.graph) self.ctx = TVMInference(self.graph, self.params, input_names=input_names, ctx=tvm.gpu(0), cudnn=True)
def expected_nhwc_int8(): x = relay.var("x", shape=(1, 56, 56, 73), dtype="int8") weight = relay.var("weight1", shape=(3, 3, 73, 79), dtype="int8") tile_rows = 4 tile_cols = 16 weight_transformed = relay.nn.contrib_conv2d_gemm_weight_transform( weight, tile_rows, tile_cols ) y = relay.nn.contrib_conv2d_gemm_without_weight_transform( x, weight_transformed, channels=79, kernel_size=(3, 3), data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32", ) y = relay.Function(analysis.free_vars(y), y) return y
def before_nhwc(): x = relay.var("x", shape=(1, 56, 56, 64)) weight1 = relay.var('weight1') weight2 = relay.var('weight2') y = relay.nn.conv2d(x, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC') y1 = relay.nn.conv2d(y, weight2, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC') ret = relay.concatenate([y, y1], axis=3) y = relay.Function(analysis.free_vars(ret), ret) return y
def expected_nhwc(): x = relay.var("x", shape=(1, 56, 56, 64)) weight1 = relay.var('weight1') weight2 = relay.var('weight2') y = relay.layout_transform(x, "NHWC", "NCHW16c") y = relay.nn.conv2d(y, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW16c") y1 = relay.nn.conv2d(y, weight2, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout='NCHW16c') ret = relay.concatenate([y, y1], axis=1) ret = relay.layout_transform(ret, "NCHW16c", "NHWC") y = relay.Function(analysis.free_vars(ret), ret) return y
def before(): x = relay.var("x", shape=(1, 500, 500, 64)) kernel = relay.var('kernel', shape=(3, 3, 64, 64), dtype='float32') bias = relay.var("bias", shape=(64, )) multiplier1 = relay.var('multiplier1', shape=(1, ), dtype='float32') multiplier2 = relay.var('multiplier2', shape=(1, 1), dtype='float32') y = relay.nn.conv2d(x, kernel, data_layout='NHWC', kernel_layout="HWIO", kernel_size=(3, 3)) y = relay.add(bias, y) y = relay.nn.relu(y) y = relay.multiply(multiplier1, y) y = relay.multiply(y, multiplier2) y = relay.Function(analysis.free_vars(y), y) return y
def expected(): x = relay.var("x", shape=(1, 56, 56, 64), dtype="int8") weight1 = relay.var("weight1", shape=(3, 3, 64, 64), dtype="int8") weight2 = relay.var("weight2", shape=(3, 3, 64, 64), dtype="int8") weight1 = relay.layout_transform(weight1, "HWIO", "OIHW") weight2 = relay.layout_transform(weight2, "HWIO", "OIHW") y = relay.layout_transform(x, "NHWC", "NCHW") y = relay.qnn.op.conv2d( y, weight1, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), ) y1 = relay.qnn.op.conv2d( y, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), ) y = relay.cast(y, "int8") y1 = relay.cast(y, "int8") ret = relay.qnn.op.concatenate( [y, y1], [relay.const(1, "float32"), relay.const(1, "float32")], [relay.const(1, "int32"), relay.const(1, "int32")], relay.const(1, "float32"), relay.const(1, "int32"), axis=1, ) ret = relay.layout_transform(ret, "NCHW", "NHWC") y = relay.Function(analysis.free_vars(ret), ret) return y
def before(): x = relay.var("x", shape=(1, 56, 56, 64), dtype="int8") weight1 = relay.var("weight1", shape=(3, 3, 64, 64), dtype="int8") weight2 = relay.var("weight2", shape=(3, 3, 64, 64), dtype="int8") y = relay.qnn.op.conv2d( x, weight1, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y1 = relay.qnn.op.conv2d( y, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.cast(y, "int8") y1 = relay.cast(y, "int8") ret = relay.qnn.op.concatenate( [y, y1], [relay.const(1, "float32"), relay.const(1, "float32")], [relay.const(1, "int32"), relay.const(1, "int32")], relay.const(1, "float32"), relay.const(1, "int32"), axis=3, ) y = relay.Function(analysis.free_vars(ret), ret) return y
def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) bias = relay.var("bias", shape=(64,)) weight = relay.var("weight", shape=(3, 3, 64, 64)) x = relay.layout_transform(x, "NHWC", "NCHW") weight = relay.layout_transform(weight, "HWIO", "OIHW") y = relay.nn.conv2d(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) bias = relay.expand_dims(bias, axis=0, num_newaxis=3) bias = relay.layout_transform(bias, "NHWC", "NCHW") y = relay.add(y, bias) # a useless tuple, which will be eliminated y = relay.Tuple([y])[0] y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2)) y = relay.cast(y, "int32") y = relay.layout_transform(y, "NCHW", "NHWC") y = relay.nn.batch_flatten(y) y = relay.Function(analysis.free_vars(y), y) return y
def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 64, 3, 3)) y = relay.nn.conv2d( x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OIHW", ) rois = relay.var("rois", shape=(32, 5)) y = relay.vision.roi_pool(y, rois, pooled_size=(14, 14), spatial_scale=0.0625, layout="NCHW") y = relay.Function(analysis.free_vars(y), y) return y
def before(): x = relay.var("x", shape=(1, 56, 56, 64)) bias = relay.var("bias", shape=(64, )) weight = relay.var("weight", shape=(3, 3, 64, 64)) y = relay.nn.conv2d(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') y = relay.nn.bias_add(y, bias, axis=3) # a useless tuple, which will be eliminated y = relay.Tuple([y])[0] y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout='NHWC') y = relay.cast(y, 'int32') y = relay.nn.batch_flatten(y) y = relay.Function(analysis.free_vars(y), y) return y
def expected_nhwc(): x = relay.var("x", shape=(1, 56, 56, 64)) weight1 = relay.var('weight1', shape=(3, 3, 64, 64)) weight2 = relay.var('weight2', shape=(3, 3, 64, 64)) y = relay.layout_transform(x, "NHWC", "NCHW") weight1 = relay.layout_transform(weight1, "HWIO", "OIHW") weight2 = relay.layout_transform(weight2, "HWIO", "OIHW") y = relay.nn.conv2d(y, weight1, channels=64, kernel_size=(3, 3)) y = relay.nn.relu(y) y = relay.nn.avg_pool2d(y, pool_size=(1,1)) y = relay.nn.conv2d(y, weight2, channels=64, kernel_size=(3, 3)) y = relay.nn.relu(y) y = relay.layout_transform(y, "NCHW", "NHWC") y = relay.Function(analysis.free_vars(y), y) return y
def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var('weight1') weight2 = relay.var('weight2') y = relay.nn.conv2d(x, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1)) y = relay.nn.relu(y) y1 = relay.nn.conv2d(y, weight2, channels=32, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.nn.relu(y1) y2 = relay.nn.batch_flatten(y) ret = relay.Tuple([y1, y2]) y = relay.Function(analysis.free_vars(ret), ret) return y
def expected(): x = relay.var("x", shape=(1, 32, 28, 28)) weight = relay.var("weight", shape=(32, 32, 3, 3)) weight = relay.layout_transform(weight, "OIHW", "OIHW4i4o") x = relay.layout_transform(x, "NCHW", "NCHW4c") y = relay.op.nn.contrib_conv2d_nchwc(x, weight, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW4c") y = relay.strided_slice(y, begin=relay.const([0, 4], "int32"), end=relay.const([1, 21], "int32"), strides=relay.const([1, 1], "int32")) y = relay.layout_transform(y, "NCHW4c", "NCHW") y = relay.Function(analysis.free_vars(y), y) return y
def run_infer_type(expr, mod=None): if not mod: mod = tvm.IRModule.from_expr(expr) mod = transform.InferType()(mod) entry = mod["main"] return entry if isinstance(expr, relay.Function) else entry.body else: if isinstance(expr, relay.GlobalVar): gv = expr.name_hint else: func = expr if not isinstance(expr, relay.Function): func = relay.Function(analysis.free_vars(expr), expr) mod["main"] = func gv = "main" mod = transform.InferType()(mod) if isinstance(expr, (relay.GlobalVar, relay.Function)): return mod[gv] return mod[gv].body
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var('weight1') weight2 = relay.var('weight2') x = relay.layout_transform(x, "NCHW", "NCHW16c") y = relay.nn.conv2d(x, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW16c") y = relay.nn.relu(y) y2 = relay.nn.conv2d(x, weight2, channels=32, kernel_size=(1, 1), data_layout='NCHW16c') y2 = relay.nn.relu(y2) y = y + y2 y = relay.nn.global_max_pool2d(y, layout="NCHW16c") y = relay.layout_transform(y, "NCHW16c", "NCHW") return relay.Function(analysis.free_vars(y), y)
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 3, 3, 64)) weight2 = relay.var("weight2", shape=(64, 3, 3, 64), dtype='int8') weight3 = relay.var("weight3", shape=(64, 3, 3, 64)) x = relay.layout_transform(x, 'NCHW', 'NHWC') weight1 = relay.layout_transform(weight1, 'OHWI', 'HWIO') out = relay.nn.conv2d(x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') out = relay.cast(out, 'int8') out = relay.layout_transform(out, 'NHWC', 'NCHW') weight2 = relay.layout_transform(weight2, 'OHWI', 'OIHW') out = relay.qnn.op.conv2d(out, weight2, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NCHW', kernel_layout='OIHW') out = relay.cast(out, 'float32') out = relay.layout_transform(out, 'NCHW', 'NHWC') weight3 = relay.layout_transform(weight3, 'OHWI', 'HWIO') out = relay.nn.conv2d_transpose(out, weight3, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') out = relay.layout_transform(out, 'NHWC', 'NCHW') out = relay.Function(analysis.free_vars(out), out) return out
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 64, 3, 3)) x = relay.layout_transform(x, "NCHW", "NHWC") weight1 = relay.layout_transform(weight1, "OIHW", "HWIO") y = relay.nn.conv2d( x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.layout_transform(y, "NHWC", "NCHW") rois = relay.var("rois", shape=(32, 5)) y = relay.vision.roi_align( y, rois, pooled_size=(14, 14), spatial_scale=0.0625, sample_ratio=2, layout="NCHW" ) y = relay.Function(analysis.free_vars(y), y) return y
def before(): x = relay.var("x", shape=(1, 56, 56, 64)) weight1 = relay.var("weight1", shape=(3, 3, 64, 32)) weight2 = relay.var("weight2", shape=(1, 1, 64, 32)) y = relay.nn.conv2d( x, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.nn.relu(y) y2 = relay.nn.conv2d( x, weight2, channels=32, kernel_size=(1, 1), data_layout="NHWC", kernel_layout="HWIO" ) y2 = relay.nn.relu(y2) y = y + y2 y = relay.nn.global_max_pool2d(y, layout="NHWC") return relay.Function(analysis.free_vars(y), y)
def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) weight1 = relay.var("weight1", shape=(3, 3, 64, 64)) weight2 = relay.var("weight2", shape=(3, 3, 64, 64)) weight1 = relay.layout_transform(weight1, "HWIO", "OIHW") weight2 = relay.layout_transform(weight2, "HWIO", "OIHW") y = relay.layout_transform(x, "NHWC", "NCHW") y = relay.nn.conv2d(y, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.nn.conv2d(y, weight2, channels=64, kernel_size=(3, 3), padding=(1, 1)) ret = relay.concatenate([y, y1], axis=1) ret = relay.layout_transform(ret, "NCHW", "NHWC") y = relay.Function(analysis.free_vars(ret), ret) return y
def before_nhwc(): x = relay.var("x", shape=(1, 56, 56, 64)) weight1 = relay.var('weight1', shape=(3, 3, 64, 64)) weight2 = relay.var('weight2', shape=(3, 3, 64, 64)) y = relay.nn.conv2d(x, weight1, channels=64, kernel_size=(3, 3), data_layout='NHWC', kernel_layout='HWIO') y = relay.nn.relu(y) y = relay.nn.avg_pool2d(y, pool_size=(1, 1), layout='NHWC') y = relay.nn.conv2d(y, weight2, channels=64, kernel_size=(3, 3), data_layout='NHWC', kernel_layout='HWIO') y = relay.nn.relu(y) y = relay.Function(analysis.free_vars(y), y) return y
def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 3, 3, 64)) weight2 = relay.var("weight2", shape=(64, 3, 3, 64), dtype="int8") weight3 = relay.var("weight3", shape=(64, 3, 3, 64)) out = relay.nn.conv2d( x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OHWI", ) out = relay.cast(out, "int8") out = relay.qnn.op.conv2d( out, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OHWI", ) out = relay.cast(out, "float32") out = relay.nn.conv2d_transpose( out, weight3, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OHWI", ) out = relay.Function(analysis.free_vars(out), out) return out
def before(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var("weight", shape=(3, 3, 64, 16)) bias = relay.var("bias", shape=(1, 1, 1, 16)) y = relay.nn.conv2d( x, weight, channels=16, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.add(y, bias) mean = relay.mean(y, axis=3, exclude=True) var = relay.variance(y, axis=3, exclude=True) gamma = relay.var("gamma") beta = relay.var("beta") y = relay.nn.batch_norm(y, gamma, beta, mean, var, axis=3) y = y[0] y = relay.Function(analysis.free_vars(y), y) return y
def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var("weight", shape=(3, 3, 64, 16)) bias = relay.var("bias", shape=(1, 1, 1, 16)) x = relay.layout_transform(x, src_layout="NHWC", dst_layout="NCHW") x = relay.layout_transform(x, src_layout="NCHW", dst_layout="NCHW16c") weight = relay.layout_transform(weight, src_layout="HWIO", dst_layout="OIHW") y = relay.nn.conv2d( x, weight, channels=16, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW16c" ) bias = relay.layout_transform(bias, src_layout="NHWC", dst_layout="NCHW") bias = relay.layout_transform(bias, src_layout="NCHW", dst_layout="NCHW16c") add = relay.add(y, bias) y = relay.layout_transform(add, src_layout="NCHW16c", dst_layout="NCHW") y = relay.layout_transform(y, src_layout="NCHW", dst_layout="NHWC") mean = relay.mean(y, axis=3, exclude=True) var = relay.variance(y, axis=3, exclude=True) denom = relay.const(1.0) / relay.sqrt(var + relay.const(1e-05)) gamma = relay.var("gamma", shape=(16,)) denom = denom * gamma denom_expand1 = relay.expand_dims(denom, axis=1, num_newaxis=2) denom_expand2 = relay.expand_dims(denom_expand1, axis=0) denom_nchwc16 = relay.layout_transform( denom_expand2, src_layout="NCHW", dst_layout="NCHW16c" ) out = add * denom_nchwc16 beta = relay.var("beta", shape=(16,)) numerator = (-mean) * denom + beta numerator_expand1 = relay.expand_dims(numerator, axis=1, num_newaxis=2) numerator_expand2 = relay.expand_dims(numerator_expand1, axis=0) numerator_nchwc16 = relay.layout_transform( numerator_expand2, src_layout="NCHW", dst_layout="NCHW16c" ) out = out + numerator_nchwc16 out = relay.layout_transform(out, src_layout="NCHW16c", dst_layout="NCHW") y = relay.layout_transform(out, src_layout="NCHW", dst_layout="NHWC") y = relay.Function(analysis.free_vars(y), y) return y
def before(): x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8') weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8') weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8') y = relay.qnn.op.conv2d(x, weight1, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') y1 = relay.qnn.op.conv2d(y, weight2, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') y = relay.cast(y, 'int8') y1 = relay.cast(y, 'int8') ret = relay.qnn.op.concatenate( [y, y1], [relay.const(1, 'float32'), relay.const(1, 'float32')], [relay.const(1, 'int32'), relay.const(1, 'int32')], relay.const(1, 'float32'), relay.const(1, 'int32'), axis=3) y = relay.Function(analysis.free_vars(ret), ret) return y
def expected(): x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8') weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8') weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8') weight1 = relay.layout_transform(weight1, 'HWIO', 'OIHW') weight2 = relay.layout_transform(weight2, 'HWIO', 'OIHW') y = relay.layout_transform(x, "NHWC", "NCHW") y = relay.qnn.op.conv2d(y, weight1, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.qnn.op.conv2d(y, weight2, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1)) y = relay.cast(y, 'int8') y1 = relay.cast(y, 'int8') ret = relay.qnn.op.concatenate( [y, y1], [relay.const(1, 'float32'), relay.const(1, 'float32')], [relay.const(1, 'int32'), relay.const(1, 'int32')], relay.const(1, 'float32'), relay.const(1, 'int32'), axis=1) ret = relay.layout_transform(ret, "NCHW", "NHWC") y = relay.Function(analysis.free_vars(ret), ret) return y