def expected(x, w, channels, repeat): args = [x, w] y = x for i in range(repeat): w_concat = relay.concatenate((w, w), axis=0) y = relay.nn.conv2d(y, w_concat, channels=channels*2) y1 = relay.strided_slice(y, [0, 0], [None, channels]) y2 = relay.strided_slice(y, [0, channels], [None, channels * 2]) y = relay.concatenate((y1, y2), axis=1) return relay.Function(args, y)
def expected(x, w1, w2, scale1, scale2, bias, channels1, channels2): args = [x, w1, w2, scale1, scale2, bias] w = relay.concatenate((w1, w2), axis=0) scale = relay.concatenate((scale1, scale2), axis=0) y = relay.nn.conv2d(x, w, channels=channels1 + channels2) y = relay.multiply(y, scale) y = relay.nn.relu(y) y1 = relay.strided_slice(y, [0, 0], [None, channels1]) y2 = relay.strided_slice(y, [0, channels1], [None, channels1 + channels2]) y2 = relay.add(y2, bias) y = relay.Tuple((y1, y2)) return relay.Function(args, y)
def gen_intermediate_tuple(x): y1 = relay.add(x, relay.const(1, "float32")) y2 = relay.add(x, relay.const(1, "float32")) y3 = relay.add(x, relay.const(1, "float32")) concat = relay.concatenate((y1, y2, y3), axis=1) out = relay.add(concat, relay.const(1, "float32")) return out
def Inception7C(data, num_1x1, num_d7_red, num_d7_1, num_d7_2, num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4, pool, proj, name): tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv') tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower' % name), suffix='_conv_1') tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower' % name), suffix='_conv_2') tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_1') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_2') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_3') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_4') pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv') # concat concat = relay.concatenate((tower_1x1, tower_d7, tower_q7, cproj), axis=1) return concat
def before(dshape): x = relay.var("x", shape=dshape) pooled = relay.nn.max_pool2d(x, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) upsampled = relay.nn.upsampling(pooled, scale=2, layout="NCHW") concat = relay.concatenate((upsampled, x), axis=1) out = relay.add(concat, relay.const(1, "float32")) return relay.Function(relay.ir_pass.free_vars(out), out)
def Inception7E(data, num_1x1, num_d3_red, num_d3_1, num_d3_2, num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2, pool, proj, name): tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv') tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower' % name), suffix='_mixed_conv') tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower' % name), suffix='_mixed_conv_1') tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name), suffix='_conv') tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower_1' % name), suffix='_mixed_conv') tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower_1' % name), suffix='_mixed_conv_1') pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv') # concat concat = relay.concatenate( (tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj), axis=1) return concat
def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels, prefix=""): net = _make_fire_conv(net, squeeze_channels, 1, 0, "%s/squeeze1x1" % prefix) left = _make_fire_conv(net, expand1x1_channels, 1, 0, "%s/expand1x1" % prefix) right = _make_fire_conv(net, expand3x3_channels, 3, 1, "%s/expand3x3" % prefix) # NOTE : Assume NCHW layout here net = relay.concatenate((left, right), axis=1) return net
def before(x, w, repeat): args = [x, w] y = x for i in range(repeat): y1 = relay.nn.conv2d(y, w) y2 = relay.nn.conv2d(y, w) y = relay.concatenate((y1, y2), axis=1) return relay.Function(args, y)
def test_split(): x = relay.var('x', shape=(12,)) y = relay.split(x, 3, axis=0).astuple() z = relay.concatenate([relay.TupleGetItem(y, 0)], axis=0) f = relay.Function([x], z) x_data = np.random.rand(12,).astype('float32') res = veval(f, x_data) tvm.testing.assert_allclose(res.asnumpy(), np.split(x_data, 3, axis=0)[0])
def before(x): inj = relay.squeeze(x) y1 = relay.add(inj, relay.const(1, "float32")) tmp = relay.squeeze(inj) tmp = relay.add(tmp, relay.const(1, "float32")) y2 = relay.add(tmp, relay.const(1, "float32")) y3 = relay.add(inj, relay.const(1, "float32")) concat = relay.concatenate((y1, y2, y3), axis=1) out_inj = relay.squeeze(concat) out = relay.add(out_inj, relay.const(1, "float32")) return relay.Function(relay.ir_pass.free_vars(out), out)
def expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4): # use a fixed order of args so alpha equal check can pass args = [x, w1, w2, w3, w4] w = relay.concatenate((w1, w2, w4), axis=0) y = relay.nn.conv2d(x, w, channels=channels1 + channels2 + channels4) y1 = relay.strided_slice(y, [0, 0], [None, channels1]) y2 = relay.strided_slice(y, [0, channels1], [None, channels1 + channels2]) y3 = relay.nn.conv2d(x, w3) y4 = relay.strided_slice(y, [0, channels1 + channels2], [None, channels1 + channels2 + channels4]) y5 = relay.nn.max_pool2d(x) y = relay.Tuple((y1, y2, y3, y4, y5)) return relay.Function(args, y)
def expected(dshape): p0 = relay.var("p0", shape=dshape) c = conv(p0) f0 = relay.Function(relay.ir_pass.free_vars(c), c) p01 = relay.var("p01", shape=dshape) c = conv(p01) f1 = relay.Function(relay.ir_pass.free_vars(c), c) p02 = relay.var("p02", shape=dshape) p12 = relay.var("p12", shape=dshape) concat1 = relay.concatenate((p02, p12), axis=1) f_concat1 = relay.Function([p02, p12], concat1) dshape2 = (dshape[0], dshape[1]*2, dshape[2], dshape[3]) p03 = relay.var("p03", shape=dshape2) c = conv(p03) f2 = relay.Function(relay.ir_pass.free_vars(c), c) p04 = relay.var("p04", shape=dshape2) c = conv(p04) f3 = relay.Function(relay.ir_pass.free_vars(c), c) p05 = relay.var("p05", shape=dshape) p15 = relay.var("p15", shape=dshape) concat2 = relay.concatenate((p05, p15), axis=1) f_concat2 = relay.Function([p05, p15], concat2) x = relay.var("x", shape=dshape) c1 = relay.Call(f0, [x, relay.var("w1")]) c2 = relay.Call(f1, [x, relay.var("w2")]) concat = relay.Call(f_concat1, [c1, c2]) c3 = relay.Call(f2, [concat, relay.var("w3")]) c4 = relay.Call(f3, [concat, relay.var("w4")]) out = relay.Call(f_concat2, [c3, c4]) return relay.Function(relay.ir_pass.free_vars(out), out)
def test_concatenate(): n, t, d = tvm.var("n"), tvm.var("t"), 100 x = relay.var("x", shape=(n, t, d)) y = relay.var("y", shape=(n, t, d)) z = relay.concatenate((x, y), axis=-1) assert "axis=" in z.astext() zz = relay.ir_pass.infer_type(z) assert zz.checked_type == relay.TensorType((n, t, 200)) x = relay.exp(x) z = relay.concatenate((x, y), axis=2) zz = relay.ir_pass.infer_type(z) assert zz.checked_type == relay.TensorType((n, t, 200)) z = relay.concatenate((x, y), axis=1) zz = relay.ir_pass.infer_type(z) assert zz.checked_type == relay.TensorType((n, t + t, 100)) x = relay.var("x", shape=(10, 5)) y = relay.var("y", shape=(10, 5)) t = relay.var("z", shape=()) z = relay.concatenate((x, y), axis=1) z = relay.add(z, t) # Check result. func = relay.Function([x, y, t], z) x_data = np.random.rand(10, 5).astype('float32') y_data = np.random.rand(10, 5).astype('float32') t_data = np.random.uniform(size=()).astype('float32') ref_res = np.concatenate((x_data, y_data), axis=1) + t_data for target, ctx in ctx_list(): intrp1 = relay.create_executor("graph", ctx=ctx, target=target) intrp2 = relay.create_executor("debug", ctx=ctx, target=target) op_res1 = intrp1.evaluate(func)(x_data, y_data, t_data) tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=0.01) op_res2 = intrp2.evaluate(func)(x_data, y_data, t_data) tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=0.01)
def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var('weight1') weight2 = relay.var('weight2') y = relay.nn.conv2d(x, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.nn.conv2d(y, weight2, channels=32, kernel_size=(3, 3), padding=(1, 1)) ret = relay.concatenate([y, y1], axis=1) y = relay.Function(free_vars(ret), ret) return y
def expected(dshape): x = relay.var("x", shape=dshape) pooled = relay.nn.max_pool2d(x, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) f0 = relay.Function([x], pooled) p0 = relay.var("p0", shape=(dshape[0], dshape[1], dshape[2]//2, dshape[3]//2)) p1 = relay.var("p1", shape=dshape) upsampled = relay.nn.upsampling(p0, scale=2, layout="NCHW") concat = relay.concatenate((upsampled, p1), axis=1) out = relay.add(concat, relay.const(1, "float32")) f1 = relay.Function([p0, p1], out) x = relay.var("x", shape=dshape) y = relay.Call(f0, [x]) z = relay.Call(f1, [y, x]) return relay.Function([x], z)
def Inception7B(data, num_3x3, num_d3x3_red, num_d3x3_1, num_d3x3_2, pool, name): tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_conv' % name)) tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv') tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_tower' % name), suffix='_conv_1') tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_2') pooling = Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0, 0), pool_type="max", name=('max_pool_%s_pool' % name)) concat = relay.concatenate((tower_3x3, tower_d3x3, pooling), axis=1) return concat
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var('weight1') weight2 = relay.var('weight2') y = relay.layout_transform(x, "NCHW", "NCHW16c") y = relay.nn.conv2d(y, weight1, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW16c") y1 = relay.nn.conv2d(y, weight2, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout='NCHW16c') ret = relay.concatenate([y, y1], axis=1) ret = relay.layout_transform(ret, "NCHW16c", "NCHW") y = relay.Function(free_vars(ret), ret) return y
def Inception7A(data, num_1x1, num_3x3_red, num_3x3_1, num_3x3_2, num_5x5_red, num_5x5, pool, proj, name): tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name)) tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv') tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), suffix='_conv_1') tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv') tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_2') pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) cproj = Conv(pooling, proj, name=('%s_tower_2' % name), suffix='_conv') concat = relay.concatenate((tower_1x1, tower_5x5, tower_3x3, cproj), axis=1) return concat
def Inception7B(data, num_3x3, num_d3x3_red, num_d3x3_1, num_d3x3_2, pool, name): tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=("%s_conv" % name)) tower_d3x3 = Conv(data, num_d3x3_red, name=("%s_tower" % name), suffix="_conv") tower_d3x3 = Conv( tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=("%s_tower" % name), suffix="_conv_1", ) tower_d3x3 = Conv( tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=("%s_tower" % name), suffix="_conv_2", ) pooling = Pooling( data=data, kernel=(3, 3), stride=(2, 2), pad=(0, 0), pool_type="max", name=("max_pool_%s_pool" % name), ) concat = relay.concatenate((tower_3x3, tower_d3x3, pooling), axis=1) return concat
def test_parallel_calls_with_non_ifm_input(): """ Test a graph that calls many different functions in parallel where the input is not the input to the function. y = f(x) / | \ z0 = g0(y) ... zi = gi(y) \ | / concat """ def get_inner_func_1(): x = relay.var("x", shape=(1, 4, 5, 6), dtype="int8") x = relay.tanh(x) x = _create_primitive_function(x) return x def get_inner_func_2(): x = relay.var("x", shape=(1, 4, 5, 6), dtype="int8") x = relay.nn.max_pool2d(x, pool_size=(2, 2)) x = _create_primitive_function(x) return x ifm = relay.var("input", shape=(1, 4, 5, 6), dtype="int8") y = relay.Call(get_inner_func_1(), [ifm]) g = get_inner_func_2() no_calls = 20 z = [relay.Call(g, [y]) for _ in range(0, no_calls)] out = relay.concatenate(z, axis=3) mod = tvm.IRModule.from_expr(out) expected_annotations = [ [(1 * 4 * 5 * 6) + (1 * 4 * 5 * 6)], [(1 * 4 * 5 * 6) + (1 * 4 * 4 * 5) * i for i in range(1, no_calls + 1)], ] expected_io_annotation = (1 * 4 * 5 * 6) + (1 * 4 * 4 * (5 * no_calls)) _check_used_memory_annotations(mod, expected_annotations, expected_io_annotation)
def get_graph(get_expected=False): exp_layout = "NHCWB16" if get_expected else "NHWC" x = relay.var("x", shape=(1, 2, 2, 2), dtype="int8") depthwise = infra.make_ethosu_depthwise_conv2d(x, 2, (1, 1), (0, 0), (1, 1), (0, 0), ofm_layout=exp_layout) conv = infra.make_ethosu_conv2d( depthwise, 2, 2, (1, 1), (0, 0), (1, 1), (0, 0), ifm_layout=exp_layout, ) pool = infra.make_ethosu_pooling(conv, "MAX", (1, 1), 2, (1, 1), (0, 0)) concat = relay.concatenate([conv, pool], axis=0) return relay.Function(relay.analysis.free_vars(concat), concat)
def block2relay(block: Block, batch_size): node2var = {} params = {} x = relay.var(name_hint=block.enter_node.name, shape=(batch_size, *block.enter_node.output_shape)) node2var[block.enter_node] = x for node in block.inner_nodes + [block.exit_node]: term_vars = [] assert isinstance(node, (Pool, Conv, Identity)) for term in node.inputs: value_vars = [] for value in term: if value.begin == 0 and value.end == value.node.output_shape[0]: var = node2var[value.node] else: var = relay.strided_slice(node2var[value.node], begin=[0, value.begin, 0, 0], end=[batch_size, value.end, *value.node.output_shape[1:]]) value_vars.append(var) term_var = value_vars[0] for value_var in value_vars[1:]: term_var = term_var + value_var term_vars.append(term_var) if len(term_vars) > 1: x = relay.concatenate(term_vars, axis=1) else: x = term_vars[0] if isinstance(node, Conv): node2var[node] = conv2d(x, node, params) elif isinstance(node, Pool): node2var[node] = pool2d(x, node) elif isinstance(node, Identity): node2var[node] = x else: raise NotImplemented fn = relay.Function(relay.analysis.free_vars(x), x) if tvm_minor_version() <= 6: return relay.Module.from_expr(fn), params else: return tvm.ir.IRModule.from_expr(fn), params
def expected(): def create_external_func1(mod_, compiler_name, symbol_name): x_int = relay.var("x_int", shape=(10, 10)) p0 = relay.nn.relu(x_int) q0 = relay.tanh(x_int) # reshapes p0_reshaped = relay.reshape(p0, newshape=100) q0_reshaped = relay.reshape(q0, newshape=100) ofms = relay.concatenate((p0_reshaped, q0_reshaped), 0) f1 = relay.Function([x_int], ofms) f1 = set_func_attr(f1, compiler_name, symbol_name) glb_f1 = relay.GlobalVar(symbol_name) mod_[glb_f1] = f1 mod_ = relay.transform.InferType()(mod_) return glb_f1, mod_ mod = tvm.IRModule() x = relay.var("x", shape=(10, 10)) glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") ofms = relay.Call(glb_symbol_f1, [x]) # splits (p0_flat, q0_flat) = relay.split(ofms, [100]) # reshapes p0_flat_reshaped = relay.reshape(p0_flat, newshape=(10, 10)) q0_flat_reshaped = relay.reshape(q0_flat, newshape=(10, 10)) # original output tuple_out = relay.Tuple([p0_flat_reshaped, q0_flat_reshaped]) p0 = relay.TupleGetItem(tuple_out, 0) q0 = relay.TupleGetItem(tuple_out, 1) r = relay.concatenate((p0, q0), axis=0) main = relay.Function([x], r) mod["main"] = main mod = relay.transform.InferType()(mod) return mod
def Inception7D(data, num_3x3_red, num_3x3, num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3, pool, name): tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name), suffix='_conv') tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_1') tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name), suffix='_conv') tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_1') tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_2') tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2), name=('%s_tower_1' % name), suffix='_conv_3') pooling = Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, pad=(0, 0), name=('%s_pool_%s_pool' % (pool, name))) # concat concat = relay.concatenate((tower_3x3, tower_d7_3x3, pooling), axis=1) return concat
def expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4): # use a fixed order of args so alpha equal check can pass args = [x, w1, w2, w3, w4] w = relay.concatenate((w1, w2, w4), axis=0) y = relay.nn.conv2d(x, w, channels=channels1 + channels2 + channels4) y1 = relay.strided_slice( y, begin=[0, 0], end=[-1, channels1], strides=[1, 1], slice_mode="size" ) y2 = relay.strided_slice( y, begin=[0, channels1], end=[-1, channels2], strides=[1, 1], slice_mode="size" ) y3 = relay.nn.conv2d(x, w3) y4 = relay.strided_slice( y, begin=[0, channels1 + channels2], end=[-1, channels4], strides=[1, 1], slice_mode="size", ) y5 = relay.nn.max_pool2d(x) y = relay.Tuple((y1, y2, y3, y4, y5)) return relay.Function(args, y)
def expected(x, w1, w2, w3, j): args = [x, w1, w2, w3] w_stacked = relay.concatenate((w1, w2, w3), axis=0) y = relay.nn.dense(x, w_stacked, units=6 * j) strides = [1, 1] y1 = relay.strided_slice(y, begin=[0, 0], end=[-1, j], strides=strides, slice_mode="size") y2 = relay.strided_slice(y, begin=[0, j], end=[-1, 2 * j], strides=strides, slice_mode="size") y3 = relay.strided_slice(y, begin=[0, 3 * j], end=[-1, 3 * j], strides=strides, slice_mode="size") y = relay.Tuple((y1, y2, y3)) return relay.Function(args, y)
def test_nested_branches(): """ Tests a graph with branches that also branch. input / \ / \ prim_func_1 prim_func_2 / \ / \ prim_func_3 prim_func_4 """ def get_generic_inner_func(): x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") x = relay.nn.relu(x) return _create_primitive_function(x) ifm = relay.var("input", shape=(1, 2, 2, 4), dtype="int8") a = relay.Call(get_generic_inner_func(), [ifm]) b = relay.Call(get_generic_inner_func(), [ifm]) c = relay.Call(get_generic_inner_func(), [b]) d = relay.Call(get_generic_inner_func(), [b]) out = relay.concatenate([a, c, d], axis=3) mod = tvm.IRModule.from_expr(out) expected_annotations = [ [(1 * 2 * 2 * 4) + (1 * 2 * 2 * 4)], # output from prim_func_1 is also still alive [(1 * 2 * 2 * 4) + (1 * 2 * 2 * 4) + (1 * 2 * 2 * 4)], # output from prim_func_1 is also still alive [(1 * 2 * 2 * 4) + (1 * 2 * 2 * 4) + (1 * 2 * 2 * 4)], # outputs from prim_func_1 and prim_func_3 are also still alive [(1 * 2 * 2 * 4) + (1 * 2 * 2 * 4) + (1 * 2 * 2 * 4) + (1 * 2 * 2 * 4) ], ] expected_io_annotation = (1 * 2 * 2 * 4) + (1 * 2 * 2 * 12) _check_used_memory_annotations(mod, expected_annotations, expected_io_annotation)
def test_concatenate(interface_api, use_unpacked_api, test_runner): dtype = "float32" x = relay.var("x", shape=(10, 5), dtype=dtype) y = relay.var("y", shape=(10, 5), dtype=dtype) t = relay.var("z", shape=(), dtype=dtype) z = relay.concatenate((x, y), axis=1) z = relay.add(z, t) # Check result. func = relay.Function([x, y, t], z) x_data = np.random.rand(10, 5).astype(dtype) y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) inputs = OrderedDict([("x", x_data), ("y", y_data), ("z", t_data)]) output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def _parallel_npu_functions(): mod = tvm.IRModule({}) # NPU function 1 x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") max_pool = relay.nn.max_pool2d(x) composite_func = relay.Function([x], max_pool) composite_func = composite_func.with_attr("Composite", "ethos-u.pooling") inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8") compiler_func = relay.Function([inp], composite_func) compiler_func = compiler_func.with_attr("used_memory", [32]) npu_compiler_func1 = compiler_func.with_attr("Compiler", "ethos-u") g1 = relay.GlobalVar("g1") mod[g1] = npu_compiler_func1 # NPU function 2 x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") abs_op = relay.abs(x) composite_func = relay.Function([x], abs_op) composite_func = composite_func.with_attr("Composite", "ethos-u.unary_elementwise") inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8") compiler_func = relay.Function([inp], composite_func) compiler_func = compiler_func.with_attr("used_memory", [32 + 16]) npu_compiler_func2 = compiler_func.with_attr("Compiler", "ethos-u") g2 = relay.GlobalVar("g2") mod[g2] = npu_compiler_func2 # Main inp = relay.var("main_input", shape=(1, 2, 2, 4), dtype="int8") call1 = relay.Call(g1, [inp]) call2 = relay.Call(g2, [inp]) concat = relay.concatenate([call1, call2], axis=3) main_func = relay.Function([inp], concat) main_func = main_func.with_attr("io_used_memory", 32) mod["main"] = main_func return mod
def get_network(): # Get a list of modules representing subgraphs. mods = [] dshape = (3, 3) data = relay.var("data_0", relay.TensorType(dshape, "float32")) data21 = relay.var("data_1", relay.TensorType(dshape, "float32")) data_net1_output_1 = relay.var("data_0", relay.TensorType(dshape, "float32")) data_net1_output_2 = relay.var("data_1", relay.TensorType(dshape, "float32")) data_net2_output_1 = relay.var("data_0", relay.TensorType(dshape, "float32")) mvalue1 = np.full((1), 1).astype("float32") mvalue2 = np.full((1), 2).astype("float32") mvalue3 = np.full((1), 3).astype("float32") mv1 = relay.Constant(tvm.nd.array(mvalue1)) mv2 = relay.Constant(tvm.nd.array(mvalue2)) mv3 = relay.Constant(tvm.nd.array(mvalue3)) # There are three outputs in the first model. net1_output1 = relay.add(data, mv1) net1_output2 = relay.subtract(data, mv2) net1_output3 = relay.concatenate((net1_output1, net1_output2), axis=0) (net1_output3, _) = relay.split(net1_output3, indices_or_sections=2, axis=0) net1_output3 = relay.add(net1_output3, mv2) # The second model uses the output named net1_output3 of the first model as the first input, # the second input of the second model is data21. net2 = relay.add(net1_output3, mv2) net2 = relay.add(net2, data21) net2_output = relay.add(net2, mv3) # The third model uses the output named net2_output of the second model as the first input # and uses the output named net1_output2 of the first model as the second input. net3 = relay.multiply(net2_output, mv3) net3 = relay.add(net3, net1_output2) return tvm.IRModule.from_expr( relay.Function([data, data21], relay.Tuple([net3]))), dshape
def test_concatenate(interface_api, use_unpacked_api, use_calculated_workspaces): dtype = "float32" x = relay.var("x", shape=(10, 5), dtype=dtype) y = relay.var("y", shape=(10, 5), dtype=dtype) t = relay.var("z", shape=(), dtype=dtype) z = relay.concatenate((x, y), axis=1) z = relay.add(z, t) # Check result. func = relay.Function([x, y, t], z) x_data = np.random.rand(10, 5).astype(dtype) y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) inputs = OrderedDict([("x", x_data), ("y", y_data), ("z", t_data)]) output_list = generate_ref_data(func, inputs) compile_and_run( func, inputs, output_list, interface_api, use_unpacked_api, use_calculated_workspaces, )
def create_graph(): def create_external_func1(mod_, compiler_name, symbol_name): x_int = relay.var("x_int", shape=(10, 10)) w0_int = relay.var("w0_int", shape=(10, 10)) w1_int = relay.var("w1_int", shape=(10, 10)) w2_int = relay.var("w2_int", shape=(10, 10)) z0 = relay.add(x_int, w0_int) p0 = relay.subtract(z0, w1_int) q0 = relay.multiply(z0, w2_int) f1_o_tuple = relay.Tuple([p0, q0]) f1 = relay.Function([x_int, w0_int, w1_int, w2_int], f1_o_tuple) f1 = set_func_attr(f1, compiler_name, symbol_name) glb_f1 = relay.GlobalVar(symbol_name) mod_[glb_f1] = f1 mod_ = relay.transform.InferType()(mod_) return glb_f1, mod_ mod = tvm.IRModule() x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) w2 = relay.var("w2", shape=(10, 10)) glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") pq_tuple = relay.Call(glb_symbol_f1, [x, w0, w1, w2]) p0 = relay.TupleGetItem(pq_tuple, 0) q0 = relay.TupleGetItem(pq_tuple, 1) r = relay.concatenate((p0, q0), axis=0) main = relay.Function([x, w0, w1, w2], r) mod["main"] = main mod = relay.transform.InferType()(mod) return mod
def test_concatenate(interface_api, use_unpacked_api, test_runner): """Tests compilation of concatenate""" dtype = "float32" input_x = relay.var("x", shape=(10, 5), dtype=dtype) input_y = relay.var("y", shape=(10, 5), dtype=dtype) input_z = relay.var("z", shape=(), dtype=dtype) concat_inputs = relay.concatenate((input_x, input_y), axis=1) func_output = relay.add(input_z, concat_inputs) # Check result. func = relay.Function([input_x, input_y, input_z], func_output) x_data = np.random.rand(10, 5).astype(dtype) y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) inputs = OrderedDict([("x", x_data), ("y", y_data), ("z", t_data)]) output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def expected(dshape): x = relay.var("x", shape=dshape) pooled = relay.nn.max_pool2d(x, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) f0 = relay.Function([x], pooled) p0 = relay.var("p0", shape=(dshape[0], dshape[1], dshape[2] // 2, dshape[3] // 2)) p1 = relay.var("p1", shape=dshape) upsampled = relay.nn.upsampling(p0, scale_h=2, scale_w=2, layout="NCHW") concat = relay.concatenate((upsampled, p1), axis=1) out = relay.add(concat, relay.const(1, "float32")) f1 = relay.Function([p0, p1], out) x = relay.var("x", shape=dshape) y = relay.Call(f0, [x]) z = relay.Call(f1, [y, x]) return relay.Function([x], z)
def expected(): def create_external_func1(mod_, compiler_name, symbol_name): ifms_int = relay.var("ifms_int", shape=[200]) # splits (x_int_flat, w0_int_flat) = relay.split(ifms_int, [100]) # reshapes x_int = relay.reshape(x_int_flat, newshape=(10, 10)) w0_int = relay.reshape(w0_int_flat, newshape=(10, 10)) z0 = relay.add(x_int, w0_int) f1 = relay.Function([ifms_int], z0) f1 = set_func_attr(f1, compiler_name, symbol_name) glb_f1 = relay.GlobalVar(symbol_name) mod_[glb_f1] = f1 mod_ = relay.transform.InferType()(mod_) return glb_f1, mod_ mod = tvm.IRModule() x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) # reshapes x_reshaped = relay.reshape(x, newshape=100) w0_reshaped = relay.reshape(w0, newshape=100) # concat ifms = relay.concatenate((x_reshaped, w0_reshaped), 0) glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") r = relay.Call(glb_symbol_f1, [ifms]) main = relay.Function([x, w0], r) mod["main"] = main mod = relay.transform.InferType()(mod) return mod
def inception_like(data): c0 = conv(data) c1 = conv(data) return relay.concatenate((c0, c1), axis=1)
def gen_consecutive_tuple(x): y1 = gen_intermediate_tuple(x) y2 = gen_intermediate_tuple(x) y3 = gen_intermediate_tuple(x) concat = relay.concatenate((y1, y2, y3), axis=1) return concat
def test_byoc_microtvm(workspace_dir, board, west_cmd, microtvm_debug, use_fvp): """This is a simple test case to check BYOC capabilities of microTVM""" model = test_utils.ZEPHYR_BOARDS[board] build_config = {"debug": microtvm_debug} x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) w2 = relay.var("w2", shape=(10, 10)) w3 = relay.var("w3", shape=(10, 10)) w4 = relay.var("w4", shape=(10, 10)) w5 = relay.var("w5", shape=(10, 10)) w6 = relay.var("w6", shape=(10, 10)) w7 = relay.var("w7", shape=(10, 10)) # C compiler z0 = relay.add(x, w0) p0 = relay.subtract(z0, w1) q0 = relay.multiply(p0, w2) z1 = relay.add(x, w3) p1 = relay.subtract(z1, w4) q1 = relay.multiply(p1, w5) # Other parts on TVM z2 = relay.add(x, w6) q2 = relay.subtract(z2, w7) r = relay.concatenate((q0, q1, q2), axis=0) f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r) mod = tvm.IRModule() ann = byoc.CcompilerAnnotator() mod["main"] = ann.visit(f) mod = tvm.relay.transform.PartitionGraph()(mod) mod = tvm.relay.transform.InferType()(mod) x_data = np.random.rand(10, 10).astype("float32") w_data = [] for _ in range(8): w_data.append(np.random.rand(10, 10).astype("float32")) map_inputs = {"w{}".format(i): w_data[i] for i in range(8)} map_inputs["x"] = x_data check_result( temp_dir=workspace_dir, relay_mod=mod, map_inputs=map_inputs, out_shape=(30, 10), result=np.concatenate( ( ((x_data + w_data[0]) - w_data[1]) * w_data[2], ((x_data + w_data[3]) - w_data[4]) * w_data[5], x_data + w_data[6] - w_data[7], ), axis=0, ), model=model, zephyr_board=board, west_cmd=west_cmd, build_config=build_config, use_fvp=use_fvp, )
def test_concatenate_grad(): x = relay.var("x", shape=(2, 2, 5)) y = relay.var("y", shape=(2, 1, 5)) z = relay.var("z", shape=(2, 4, 5)) fwd_func = relay.Function([x, y, z], relay.concatenate([x, y, z], axis=1)) check_grad(fwd_func)
def test_tuple(): target = "llvm" dtype = "float32" dshape = (1, 5, 32, 32) layout = "NCHW" target_ops = [relay.nn.conv2d] data = relay.var("data", shape=dshape, dtype=dtype) w0 = relay.var("w0_weight") conv0 = relay.nn.conv2d(data, w0, channels=2, kernel_size=(3, 3), padding=(1, 1)) w1 = relay.var("w1_weight") conv1 = relay.nn.conv2d(data, w1, channels=3, kernel_size=(3, 3), padding=(1, 1)) out = relay.concatenate([conv0, conv1], axis=1) net = relay.Function(relay.analysis.free_vars(out), out) net, params = relay.testing.create_workload(net) tasks = autotvm.task.extract_from_program(net["main"], target=target, params=params, ops=(relay.op.nn.conv2d, )) wkl_list = [ create_workload((1, 5, 32, 32), (2, 5, 3, 3), (1, 1), (1, 1), (1, 1), layout, layout, dtype, dtype), create_workload((1, 5, 32, 32), (3, 5, 3, 3), (1, 1), (1, 1), (1, 1), layout, layout, dtype, dtype), ] costs = [0.01, 0.012, 0.03, 0.04] config_list = [] cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [1, 5]], ["tile_oc", "sp", [1, 2]], ["tile_ow", "sp", [4, 8]], ["unroll_kw", "ot", True]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [1, 5]], ["tile_oc", "sp", [1, 3]], ["tile_ow", "sp", [2, 16]], ["unroll_kw", "ot", False]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [1, 5]], ["tile_oc", "sp", [2, 1]], ["tile_ow", "sp", [4, 8]], ["unroll_kw", "ot", True]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [1, 5]], ["tile_oc", "sp", [3, 1]], ["tile_ow", "sp", [2, 16]], ["unroll_kw", "ot", False]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) records = [] wkl_list = wkl_list + wkl_list tasks = tasks + tasks for wkl, cost, config, task in zip(wkl_list, costs, config_list, tasks): task.workload = wkl ms_input = MeasureInput(target=target, task=task, config=config) ms_output = MeasureResult(costs=(cost, ), error_no=0, all_cost=-1, timestamp=-1) records.append((ms_input, ms_output)) ltf_records = [] ltf_arg = [ tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c" ] ltf_arg = autotvm.task.topi_integration.serialize_args(ltf_arg) ltf_wkl = ('layout_transform', ) + autotvm.task.args_to_workload(ltf_arg) ltf_task = copy.deepcopy(tasks[0]) ltf_task.workload = ltf_wkl ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05, ), error_no=0, all_cost=-1, timestamp=-1) ltf_records.append((ms_input, ms_output)) executor = DPTuner(net, {"data": dshape}, records, target_ops, target) executor.benchmark_layout_transform(layout_records=ltf_records, infer_layout=True) executor.run() out = [record[0].config for record in executor.get_optimal_records()] expected_out = [records[2][0].config, records[1][0].config] assert expected_out == out, "Output mismatch: expecting %s but got %s" \ % (str(expected_out), str(out)) executor = PBQPTuner(net, {"data": dshape}, records, target_ops, target) executor.benchmark_layout_transform(layout_records=ltf_records, infer_layout=True) executor.run() out = [record[0].config for record in executor.get_optimal_records()] expected_out = [records[2][0].config, records[1][0].config] assert expected_out == out, "Output mismatch: expecting %s but got %s" \ % (str(expected_out), str(out))
def test_multi_node_subgraph(check_result): x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) w2 = relay.var("w2", shape=(10, 10)) w3 = relay.var("w3", shape=(10, 10)) w4 = relay.var("w4", shape=(10, 10)) w5 = relay.var("w5", shape=(10, 10)) w6 = relay.var("w6", shape=(10, 10)) w7 = relay.var("w7", shape=(10, 10)) # subgraph0 x0 = relay.var("x0", shape=(10, 10)) w00 = relay.var("w00", shape=(10, 10)) w01 = relay.var("w01", shape=(10, 10)) w02 = relay.var("w02", shape=(10, 10)) z00 = relay.add(x0, w00) p00 = relay.subtract(z00, w01) q00 = relay.multiply(p00, w02) subgraph0 = relay.Function([x0, w00, w01, w02], q00) subgraph0 = set_external_func_attr(subgraph0, "ccompiler", "ccompiler_0") call0 = relay.Call(subgraph0, [x, w0, w1, w2]) # subgraph1 x1 = relay.var("x1", shape=(10, 10)) w10 = relay.var("w10", shape=(10, 10)) w11 = relay.var("w11", shape=(10, 10)) w12 = relay.var("w12", shape=(10, 10)) z10 = relay.add(x1, w10) p10 = relay.subtract(z10, w11) q10 = relay.multiply(p10, w12) subgraph1 = relay.Function([x1, w10, w11, w12], q10) subgraph1 = set_external_func_attr(subgraph1, "ccompiler", "ccompiler_1") call1 = relay.Call(subgraph1, [x, w3, w4, w5]) # Other parts on TVM z2 = relay.add(x, w6) q2 = relay.subtract(z2, w7) r = relay.concatenate((call0, call1, q2), axis=0) f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r) mod = tvm.IRModule() mod["main"] = f mod = relay.transform.InferType()(mod) x_data = np.random.rand(10, 10).astype("float32") w_data = [] for _ in range(8): w_data.append(np.random.rand(10, 10).astype("float32")) map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)]) check_result( mod, map_inputs, (30, 10), np.concatenate( ( ((x_data + w_data[0]) - w_data[1]) * w_data[2], ((x_data + w_data[3]) - w_data[4]) * w_data[5], x_data + w_data[6] - w_data[7], ), axis=0, ), )
def before(): shape = (tvm.tir.const(10, "int64"), tvm.tir.const(1, "int64")) x = relay.var("x", shape=shape) concat = relay.concatenate([x, x], axis=-1) out = relay.op.take(concat, indices=relay.const([0], dtype="int64")) return relay.Function(relay.analysis.free_vars(out), out)
def concatenate(data, input_scales, input_zero_points, output_scale, output_zero_point, axis): """Concatenate the quantized input tensors along the given axis. Parameters ---------- data : Union(List[relay.Expr], Tuple[relay.Expr]) The list of quantized tensors. input_scales : List[float32] The list of scales of input quantized tensors. input_zero_points : List[int32] The list of zero points of input quantized tensors. output_scale : float32 The scale of the output quantized tensor. output_zero_point : int32 The zero point of the output quantized tensor. axis : int The axis along which the tensors are concatenated. Returns ------- result: relay.Expr The concatenated quantized tensor. """ data = list(data) requantized_exprs = list(data) # Find the dtype of the input expr. This is required for the requantize op. Since, this is # concatenate op, the dtype of the input is same as dtype of the output. mod = relay.Module.from_expr(data[0]) mod = relay.transform.InferType()(mod) entry = mod["main"] data0 = entry if isinstance(data[0], relay.Function) else entry.body in_dtype = data0.checked_type.dtype # First check if all the input qnn params match. If yes, we can call concatenate first, followed # by a requantize. if all(scale == input_scales[0] for scale in input_scales)\ and all(zero_point == input_zero_points[0] for zero_point in input_zero_points): out = relay.concatenate(tuple(data), axis) input_scale = input_scales[0] input_zero_point = input_zero_points[0] if input_scale != output_scale or input_zero_point != output_zero_point: out = requantize(data=out, input_scale=input_scales[0], input_zero_point=input_zero_points[0], output_scale=output_scale, output_zero_point=output_zero_point, out_dtype=in_dtype) return out # If the output qnn params do not match the input qnn params, we can call requantize on the # input expr first, followed by a concatenate on the requantized input exprs. for idx, quantized_expr in enumerate(data): input_scale = input_scales[idx] input_zero_point = input_zero_points[idx] if input_scale != output_scale or input_zero_point != output_zero_point: requantized_exprs[idx] = requantize(data=quantized_expr, input_scale=input_scale, input_zero_point=input_zero_point, output_scale=output_scale, output_zero_point=output_zero_point, out_dtype=in_dtype) return relay.concatenate(tuple(requantized_exprs), axis)
def test_triangle_block(): target = "llvm" dtype = "float32" dshape = (1, 3, 8, 8) layout = "NCHW" conv2d = relay.op.get("nn.conv2d") target_ops = [conv2d] data = relay.var("data", shape=dshape, dtype=dtype) w0 = relay.var("w0_weight") conv0 = relay.nn.conv2d(data, w0, channels=16, kernel_size=(3, 3), padding=(1, 1)) w1 = relay.var("w1_weight") conv1 = relay.nn.conv2d(conv0, w1, channels=32, kernel_size=(1, 1)) w2 = relay.var("w2_weight") conv2 = relay.nn.conv2d(data, w2, channels=32, kernel_size=(3, 3), padding=(1, 1)) out = relay.concatenate([conv0, conv1, conv2], axis=1) net = relay.Function(relay.analysis.free_vars(out), out) net, params = relay.testing.create_workload(net) tasks = autotvm.task.extract_from_program(net["main"], target=target, params=params, ops=(conv2d, )) costs = [0.04, 0.012, 0.03, 0.02, 0.02, 0.045] config_list = [] cfg_dict = { "index": -1, "code_hash": None, "entity": [["tile_ic", "sp", [3, 1]], ["tile_oc", "sp", [4, 4]], ["tile_ow", "sp", [4, 2]], ["unroll_kw", "ot", True]] } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "index": -1, "code_hash": None, "entity": [["tile_ic", "sp", [2, 8]], ["tile_oc", "sp", [1, 32]], ["tile_oh", "ot", 1], ["tile_ow", "sp", [4, 2]]] } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "index": -1, "code_hash": None, "entity": [["tile_ic", "sp", [8, 4]], ["tile_oc", "sp", [4, 8]], ["tile_ow", "sp", [2, 4]], ["unroll_kw", "ot", False]] } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "index": -1, "code_hash": None, "entity": [["tile_ic", "sp", [1, 3]], ["tile_oc", "sp", [2, 8]], ["tile_ow", "sp", [4, 2]], ["unroll_kw", "ot", True]] } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "index": -1, "code_hash": None, "entity": [["tile_ic", "sp", [4, 4]], ["tile_oc", "sp", [2, 16]], ["tile_oh", "ot", 1], ["tile_ow", "sp", [4, 2]]] } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "index": -1, "code_hash": None, "entity": [["tile_ic", "sp", [16, 2]], ["tile_oc", "sp", [8, 4]], ["tile_ow", "sp", [2, 4]], ["unroll_kw", "ot", False]] } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) records = [] tasks = tasks + tasks for cost, config, task in zip(costs, config_list, tasks): ms_input = MeasureInput(target=target, task=task, config=config) ms_output = MeasureResult(costs=(cost, ), error_no=0, all_cost=-1, timestamp=-1) records.append((ms_input, ms_output)) ltf_records = [] ltf_arg = [ tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c" ] ltf_task = autotvm.task.create('layout_transform', ltf_arg, target) ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05, ), error_no=0, all_cost=-1, timestamp=-1) ltf_records.append((ms_input, ms_output)) executor = DPTuner(net, {"data": dshape}, records, target_ops, target) executor.benchmark_layout_transform(layout_records=ltf_records, infer_layout=True) executor.run() out = [record[0].config for record in executor.get_optimal_records()] expected_out = [ records[3][0].config, records[1][0].config, records[2][0].config ] assert expected_out == out, "Output mismatch: expecting %s but got %s" \ % (str(expected_out), str(out)) executor = PBQPTuner(net, {"data": dshape}, records, target_ops, target) executor.benchmark_layout_transform(layout_records=ltf_records, infer_layout=True) executor.run() out = [record[0].config for record in executor.get_optimal_records()] expected_out = [ records[3][0].config, records[1][0].config, records[2][0].config ] assert expected_out == out, "Output mismatch: expecting %s but got %s" \ % (str(expected_out), str(out))
def before(): shape = (tvm.tir.const(10, "int64"), tvm.tir.const(1, "int64")) x = relay.var("x", shape=shape) concat = relay.concatenate([x, x], axis=-1) out = relay.gather_nd(concat, indices=relay.expr.const([[0, 1], [1, 0]], dtype="int64")) return relay.Function(relay.analysis.free_vars(out), out)
def before(): a = relay.const(c_data) b = relay.const(c_data) y = relay.concatenate((a, b), axis=0) return relay.Function([], y)