def verify_any_tile(dshape, reps, np_dshape, np_reps): mod = tvm.IRModule() x = relay.var("x", shape=dshape, dtype="float32") y = relay.tile(x, reps=reps) mod["main"] = relay.Function([x], y) x_data = np.random.uniform(size=np_dshape).astype("float32") ref_res = np.tile(x_data, reps=np_reps) check_result([x_data], mod, ref_res)
def verify_tile(dshape, reps): x = relay.var("x", relay.TensorType(dshape, "float32")) r = relay.var("reps", relay.TensorType((len(reps), ), "float32")) z = relay.tile(x, r) func = relay.Function([x, r], z) x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32") ref_res = np.tile(x_data, reps=reps) verify_func(func, [x_data, np.array(reps).astype("float32")], ref_res)
def verify_tile(dshape, reps): x = relay.var('x', relay.TensorType(dshape, 'float32')) z = relay.tile(x, reps=reps) func = relay.Function([x], z) x_data = np.random.uniform(low=(- 1), high=1, size=dshape).astype('float32') ref_res = np.tile(x_data, reps=reps) for (target, ctx) in tvm.testing.enabled_targets(): for kind in ['graph', 'debug']: intrp = relay.create_executor(kind, ctx=ctx, target=target) op_res = intrp.evaluate(func)(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-05)
def verify_any_tile(dshape, reps, np_dshape, np_reps): mod = tvm.IRModule() x = relay.var("x", shape=dshape, dtype="float32") y = relay.tile(x, reps=reps) mod["main"] = relay.Function([x], y) x_data = np.random.uniform(size=np_dshape).astype("float32") ref_res = np.tile(x_data, reps=np_reps) for kind in ["debug", "vm"]: ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") res = ex.evaluate()(x_data) tvm.testing.assert_allclose(res.asnumpy(), ref_res, rtol=1e-5)
def test_compile_nhwc_pack(): data = relay.var("data", shape=(1, 1, 1, 1024), dtype="uint8") weight = relay.var("weight", shape=(1, 1, 1024, 1001), dtype="int8") p2 = relay.var("p2", shape=(1, 1, 1, 1), dtype="int32") conv = relay.nn.conv2d(data, weight, kernel_size=(1, 1), data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32") multiply = relay.multiply(relay.const(-22, dtype='int32'), p2) tile = relay.tile(multiply, reps=(1, 1, 1, 1001)) subtract = relay.subtract(conv, tile) func = subtract mod = relay.Function(relay.analysis.free_vars(func), func) relay.build(mod, target="llvm")
def verify_tile(dshape, reps): x = relay.var("x", relay.TensorType(dshape, "float32")) z = relay.tile(x, reps=reps) func = relay.Function([x], z) x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32") ref_res = np.tile(x_data, reps=reps) for target, ctx in ctx_list(): for kind in ["graph", "debug"]: intrp = relay.create_executor(kind, ctx=ctx, target=target) op_res = intrp.evaluate(func)(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
def verify_tile(shape, reps, oshape): x = relay.var("x", relay.TensorType(shape, "float32")) y = relay.var("y", relay.TensorType(reps, "float32")) z = relay.tile(x, relay.shape_of(y)) func = run_infer_type(relay.Function([x, y], z)) func2 = run_opt_pass(run_opt_pass(func, transform.DynamicToStatic()), transform.InferType()) zz = func2.body assert isinstance(zz, relay.Call) assert zz.op == relay.op.get("tile") assert zz.checked_type == relay.ty.TensorType(oshape, "float32") x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32") y_data = np.random.uniform(low=-1, high=1, size=reps).astype("float32") ref_res = np.tile(x_data, reps) verify_func(func2, [x_data, y_data], ref_res)
def test_lower_to_tir(): data = relay.var("data", shape=(1, 1, 1, 1024), dtype="uint8") weight = relay.var("weight", shape=(1, 1, 1024, 1001), dtype="int8") p2 = relay.var("p2", shape=(1, 1, 1, 1), dtype="int32") conv = relay.nn.conv2d( data, weight, kernel_size=(1, 1), data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32", ) tile = relay.tile(p2, reps=(1, 1, 1, 1001)) subtract = relay.subtract(conv, tile) func = subtract expr = relay.Function(relay.analysis.free_vars(func), func) mod = tvm.IRModule.from_expr(expr) mod = relay.transform.InferType()(mod) lower_to_tir(mod["main"])
def relay_conv2d_weight_grad(c, data, wsize, dout, stride, pad, dil, groups): assert wsize.is_constant(tuple) assert stride.is_constant(tuple) assert pad.is_constant(tuple) assert dil.is_constant(tuple) assert groups.is_constant(int) batch, in_channel, in_h, in_w = data.abstract.xshape() out_channel, _, filter_h, filter_w = wsize.value _, _, grad_h, grad_w = dout.abstract.xshape() pad_h, pad_w = pad.value data = c.ref(data) dout = c.ref(dout) fpad_h = pad_h * 2 fpad_w = pad_w * 2 fpad_top = (pad_h + 1) // 2 fpad_left = (pad_w + 1) // 2 fpad_bottom = fpad_h - fpad_top fpad_right = fpad_w - fpad_left padded_weight_grad_h = ((in_h - (grad_h - 1) * stride.value[0] - 1 + fpad_top + fpad_bottom) // dil.value[0] + 1) padded_weight_grad_w = ((in_w - (grad_w - 1) * stride.value[1] - 1 + fpad_left + fpad_right) // dil.value[1] + 1) dout = relay.tile(dout, [1, in_channel // groups.value, 1, 1]) dout = relay.reshape(dout, [-1, 1, 0, 0]) data = relay.reshape(data, [1, -1, 0, 0]) d = relay.nn.conv2d(data, dout, strides=dil.value, padding=pad.value, dilation=stride.value, groups=batch * in_channel) d = relay.reshape(d, [batch, in_channel // groups.value, out_channel, padded_weight_grad_h, padded_weight_grad_w]) d = relay.sum(d, axis=0) d = relay.transpose(d, [1, 0, 2, 3]) if padded_weight_grad_h > filter_h or padded_weight_grad_w > filter_w: d = relay.strided_slice(d, begin=[0, 0, 0, 0], end=[None, None, filter_h, filter_w]) return d
def test_compile_propogate_hash(): data = relay.var("data", shape=(1, 1, 1, 1024), dtype="uint8") weight = relay.var("weight", shape=(1, 1, 1024, 1001), dtype="int8") p2 = relay.var("p2", shape=(1, 1, 1, 1), dtype="int32") conv = relay.nn.conv2d( data, weight, kernel_size=(1, 1), data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32", ) multiply = relay.multiply(relay.const(-22, dtype="int32"), p2) tile = relay.tile(multiply, reps=(1, 1, 1, 1001)) subtract = relay.subtract(conv, tile) func = subtract mod = tvm.IRModule.from_expr( relay.Function(relay.analysis.free_vars(func), func)) vm = relay.vm.VMCompiler() opt_mod, _ = vm.optimize(mod, target="llvm") for f in opt_mod.functions.values(): assert "hash" in f.attrs.keys()
def relay_conv2d_weight_grad(c, data, wsize, dout, stride, pad, dil, groups): # This implementation should match the one in pytorch backend # (myia.compile.backends.pytorch_conv_grad.conv2d_weight) assert wsize.is_constant(tuple) assert stride.is_constant(tuple) assert pad.is_constant(tuple) assert dil.is_constant(tuple) assert groups.is_constant(int) batch, in_channel, in_h, in_w = data.abstract.xshape() out_channel, _, filter_h, filter_w = wsize.value grad_sh0, grad_sh1, grad_h, grad_w = dout.abstract.xshape() pad_h, pad_w = pad.value data = c.ref(data) dout = c.ref(dout) fpad_h = pad_h * 2 fpad_w = pad_w * 2 fpad_top = (pad_h + 1) // 2 fpad_left = (pad_w + 1) // 2 fpad_bottom = fpad_h - fpad_top fpad_right = fpad_w - fpad_left padded_weight_grad_h = (in_h - (grad_h - 1) * stride.value[0] - 1 + fpad_top + fpad_bottom) // dil.value[0] + 1 padded_weight_grad_w = (in_w - (grad_w - 1) * stride.value[1] - 1 + fpad_left + fpad_right) // dil.value[1] + 1 dout = relay.tile(dout, [1, in_channel // groups.value, 1, 1]) dout = relay.reshape(dout, [-1, 1, 0, 0]) data = relay.reshape(data, [1, -1, 0, 0]) d = relay.nn.conv2d( data, dout, strides=dil.value, padding=pad.value, dilation=stride.value, groups=batch * in_channel, ) conv_sh1 = grad_sh0 * grad_sh1 * (in_channel // groups.value) d = relay.reshape( d, [batch, conv_sh1 // batch, padded_weight_grad_h, padded_weight_grad_w], ) d = relay.sum(d, axis=0) if groups.value > 1: d = relay.reshape( d, [ grad_sh1, in_channel // groups.value, padded_weight_grad_h, padded_weight_grad_w, ], ) else: d = relay.reshape( d, [ in_channel // groups.value, grad_sh1, padded_weight_grad_h, padded_weight_grad_w, ], ) d = relay.transpose(d, [1, 0, 2, 3]) if padded_weight_grad_h > filter_h or padded_weight_grad_w > filter_w: d = relay.strided_slice(d, begin=[0, 0, 0, 0], end=[None, None, filter_h, filter_w]) return d
def legalize_conv2d_backward_weight(attrs, inputs, types): """Legalize conv2d_backward_weight op. Parameters ---------- attrs : tvm.ir.Attrs Attributes of current op inputs : list of tvm.relay.Expr The args of the Relay expr to be legalized types : list of types List of input and output types Returns ------- result : tvm.relay.Expr The legalized expr """ grad, data = inputs data_shape = get_const_tuple(data.checked_type.shape) weight_shape = get_const_tuple(types[2].shape) _, out_channel, grad_h, grad_w = get_const_tuple(grad.checked_type.shape) batch, in_channel, in_h, in_w = data_shape _, _, filter_h, filter_w = weight_shape fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple( get_const_tuple(attrs.padding), (filter_h, filter_w)) stride_h, stride_w = get_const_tuple(attrs.strides) dilation_h, dilation_w = get_const_tuple(attrs.dilation) grad = relay.tile(grad, [1, in_channel // attrs.groups, 1, 1]) grad = relay.reshape(grad, [-1, 1, 0, 0]) # batch * oc * ic // groups, 1, oh, ow data = relay.reshape(data, [1, -1, 0, 0]) # 1, batch * ic, ih, iw backward_weight = relay.nn.conv2d( data, grad, strides=attrs.dilation, padding=attrs.padding, dilation=attrs.strides, groups=in_channel * batch, out_dtype=attrs.out_dtype, ) # infer shape of backward_weight padded_weight_grad_h = (in_h - (grad_h - 1) * stride_h - 1 + fpad_top + fpad_bottom) // dilation_h + 1 padded_weight_grad_w = (in_w - (grad_w - 1) * stride_w - 1 + fpad_left + fpad_right) // dilation_w + 1 backward_weight = relay.reshape( backward_weight, [ batch, in_channel // attrs.groups, out_channel, padded_weight_grad_h, padded_weight_grad_w, ], ) backward_weight = relay.sum(backward_weight, axis=0) backward_weight = relay.transpose(backward_weight, [1, 0, 2, 3]) assert padded_weight_grad_h >= filter_h assert padded_weight_grad_w >= filter_w if padded_weight_grad_h > filter_h or padded_weight_grad_w > filter_w: backward_weight = relay.strided_slice( backward_weight, begin=[0, 0, 0, 0], end=[out_channel, in_channel // attrs.groups, filter_h, filter_w], ) return backward_weight