def test_conv2d(): in_channel = 3 out_channel = 64 filter_h = 3 filter_w = 3 pad_h = 1 pad_w = 1 stride_h = 1 stride_w = 1 dilation_h = 1 dilation_w = 1 xshape = [1, in_channel, 128, 128] if not tvm.module.enabled("rocm"): print("skip because rocm is not enabled...") return if not tvm.get_global_func("tvm.contrib.miopen.conv2d.setup", True): print("skip because miopen is not enabled...") return wshape = (out_channel, in_channel, filter_h, filter_w) X = tvm.placeholder(xshape, name='X') W = tvm.placeholder(wshape, name='W') Y = miopen.conv2d_forward(X, W, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=0) yshape = [x.value for x in Y.shape] import topi with tvm.target.create("rocm -libs=miopen"): s = topi.generic.schedule_extern(Y) def verify(): ctx = tvm.rocm(0) f = tvm.build(s, [X, W, Y], "rocm", target_host="llvm", name="conv2d") x = tvm.nd.array( np.random.uniform(-1, 1, xshape).astype(np.float32), ctx) w = tvm.nd.array( np.random.uniform(-1, 1, wshape).astype(np.float32), ctx) y = tvm.nd.array( np.random.uniform(-1, 1, yshape).astype(np.float32), ctx) f(x, w, y) Y_ref = topi.nn.conv2d_nchw(X, W, (stride_h, stride_w), (pad_h, pad_w)) with tvm.target.rocm(): s_ref = topi.generic.schedule_conv2d_nchw([Y_ref]) f_ref = tvm.build(s_ref, [X, W, Y_ref], "rocm") y_ref = tvm.nd.array( np.random.uniform(-1, 1, yshape).astype(np.float32), ctx) f_ref(x, w, y_ref) print("Max abs diff:", np.max(np.abs(y.asnumpy() - y_ref.asnumpy()))) tvm.testing.assert_allclose(y.asnumpy(), y_ref.asnumpy(), atol=1e-3) verify()
def conv2d_rocm(cfg, data, kernel, strides, padding, dilation, layout='NCHW', out_dtype='float32'): """Conv2D operator for rocm backend. Parameters ---------- cfg: ConfigEntity The config for this template input : tvm.Tensor 4-D with shape [batch, in_channel, in_height, in_width] filter : tvm.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] strides : int or a list/tuple of two ints stride size, or [stride_height, stride_width] padding : int or a list/tuple of 2 or 4 ints padding size, or [pad_height, pad_width] for 2 ints, or [pad_top, pad_left, pad_bottom, pad_right] for 4 ints layout : str layout of data Returns ------- output : tvm.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ target = tvm.target.Target.current() if "miopen" in target.libs: assert layout == 'NCHW', "Only NCHW layout is supported." CO, CI, KH, KW = get_const_tuple(kernel.shape) N, _, H, W = get_const_tuple(data.shape) # handle dilation stride_h, stride_w = (strides, strides) if isinstance(strides, int) else strides pt, pl, pb, pr = get_pad_tuple(padding, (KH, KW)) pad_h, pad_w = pt + pb, pl + pr dilation_h, dilation_w = (dilation, dilation) if isinstance(dilation, int) else dilation OH = (H + 2 * pad_h - KH) // stride_h + 1 OW = (W + 2 * pad_w - KW) // stride_w + 1 cfg.add_flop(2 * N * OH * OW * CO * CI * ((KH - 1) * dilation_h + 1) *\ ((KW - 1) * dilation_w + 1)) return miopen.conv2d_forward(data, kernel, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=0, data_type=1) return conv2d_cuda(cfg, data, kernel, strides, padding, dilation, layout, out_dtype)
def test_conv2d(): in_channel = 3 out_channel = 64 filter_h = 3 filter_w = 3 pad_h = 1 pad_w = 1 stride_h = 1 stride_w = 1 dilation_h = 1 dilation_w = 1 xshape = [1, in_channel, 128, 128] if not tvm.get_global_func("tvm.contrib.miopen.conv2d.setup", True): print("skip because miopen is not enabled...") return wshape = (out_channel, in_channel, filter_h, filter_w) X = te.placeholder(xshape, name="X") W = te.placeholder(wshape, name="W") Y = miopen.conv2d_forward(X, W, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=0, data_type=1) yshape = [x.value for x in Y.shape] from tvm import topi s = te.create_schedule(Y.op) def verify(): dev = tvm.rocm(0) f = tvm.build(s, [X, W, Y], "rocm --host=llvm", name="conv2d") x = tvm.nd.array( np.random.uniform(-1, 1, xshape).astype(np.float32), dev) w = tvm.nd.array( np.random.uniform(-1, 1, wshape).astype(np.float32), dev) y = tvm.nd.array( np.random.uniform(-1, 1, yshape).astype(np.float32), dev) f(x, w, y) Y_ref = topi.nn.conv2d_nchw(X, W, (stride_h, stride_w), (pad_h, pad_w), (dilation_h, dilation_w)) s_ref = te.create_schedule(Y_ref.op) f_ref = tvm.build(s_ref, [X, W, Y_ref], "rocm --host=llvm") y_ref = tvm.nd.array( np.random.uniform(-1, 1, yshape).astype(np.float32), dev) f_ref(x, w, y_ref) print("Max abs diff:", np.max(np.abs(y.numpy() - y_ref.numpy()))) tvm.testing.assert_allclose(y.numpy(), y_ref.numpy(), atol=1e-3) verify()
def test_conv2d(): in_channel = 3 out_channel = 64 filter_h = 3 filter_w = 3 pad_h = 1 pad_w = 1 stride_h = 1 stride_w = 1 dilation_h = 1 dilation_w = 1 xshape = [1, in_channel, 128, 128] if not tvm.module.enabled("rocm"): print("skip because rocm is not enabled...") return if not tvm.get_global_func("tvm.contrib.miopen.conv2d.setup", True): print("skip because miopen is not enabled...") return wshape = (out_channel, in_channel, filter_h, filter_w) X = tvm.placeholder(xshape, name='X') W = tvm.placeholder(wshape, name='W') Y = miopen.conv2d_forward(X, W, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=0) yshape = [x.value for x in Y.shape] import topi with tvm.target.create("rocm -libs=miopen"): s = topi.generic.schedule_extern(Y) def verify(): ctx = tvm.rocm(0) f = tvm.build(s, [X, W, Y], "rocm", target_host="llvm", name="conv2d") x = tvm.nd.array(np.random.uniform(-1, 1, xshape).astype(np.float32), ctx) w = tvm.nd.array(np.random.uniform(-1, 1, wshape).astype(np.float32), ctx) y = tvm.nd.array(np.random.uniform(-1, 1, yshape).astype(np.float32), ctx) f(x, w, y) Y_ref = topi.nn.conv2d_nchw(X, W, (stride_h, stride_w), (pad_h, pad_w)) with tvm.target.rocm(): s_ref = topi.generic.schedule_conv2d_nchw([Y_ref]) f_ref = tvm.build(s_ref, [X, W, Y_ref], "rocm") y_ref = tvm.nd.array(np.random.uniform(-1, 1, yshape).astype(np.float32), ctx) f_ref(x, w, y_ref) print("Max abs diff:", np.max(np.abs(y.asnumpy() - y_ref.asnumpy()))) tvm.testing.assert_allclose(y.asnumpy(), y_ref.asnumpy(), atol=1e-3) verify()
def conv2d_rocm(data, kernel, stride, padding, layout='NCHW', out_dtype='float32'): """Conv2D operator for rocm backend. Parameters ---------- input : tvm.Tensor 4-D with shape [batch, in_channel, in_height, in_width] filter : tvm.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints stride size, or [stride_height, stride_width] padding : int or a list/tuple of two ints padding size, or [pad_height, pad_width] layout : str layout of data Returns ------- output : tvm.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ assert layout == 'NCHW', "Only NCHW layout is supported." assert isinstance(stride, int) or len(stride) == 2 if isinstance(stride, int): stride_h = stride_w = stride else: stride_h, stride_w = stride if isinstance(padding, int): pad_h = pad_w = padding else: pad_h, pad_w = padding target = tvm.target.current_target() if "miopen" in target.libs: return miopen.conv2d_forward(data, kernel, stride_h, stride_w, pad_h, pad_w, 1, # dilation_h 1, # dilation_w conv_mode=0) return topi.nn.conv2d_nchw(data, kernel, stride, padding, out_dtype)
def conv2d_nchw_miopen(cfg, data, kernel, strides, padding, dilation, layout="NCHW", out_dtype="float32"): """Conv2D operator for rocm backend. Parameters ---------- cfg: ConfigEntity The config for this template input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] filter : tvm.te.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] strides : int or a list/tuple of two ints stride size, or [stride_height, stride_width] padding : int or a list/tuple of 2 or 4 ints padding size, or [pad_height, pad_width] for 2 ints, or [pad_top, pad_left, pad_bottom, pad_right] for 4 ints layout : str layout of data Returns ------- output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ CO, CI, KH, KW = get_const_tuple(kernel.shape) N, _, H, W = get_const_tuple(data.shape) assert layout == "NCHW" # handle dilation stride_h, stride_w = (strides, strides) if isinstance(strides, int) else strides pt, pl, pb, pr = get_pad_tuple(padding, (KH, KW)) pad_h, pad_w = pt + pb, pl + pr dilation_h, dilation_w = (dilation, dilation) if isinstance( dilation, int) else dilation assert (pt == pb) and (pl == pr) OH = (H + 2 * pad_h - KH) // stride_h + 1 OW = (W + 2 * pad_w - KW) // stride_w + 1 cfg.add_flop(2 * N * OH * OW * CO * CI * ((KH - 1) * dilation_h + 1) * ((KW - 1) * dilation_w + 1)) return miopen.conv2d_forward(data, kernel, stride_h, stride_w, pt, pl, dilation_h, dilation_w, conv_mode=0, data_type=1)
def conv2d_rocm(cfg, data, kernel, strides, padding, layout='NCHW', out_dtype='float32'): """Conv2D operator for rocm backend. Parameters ---------- cfg: ConfigEntity The config for this template input : tvm.Tensor 4-D with shape [batch, in_channel, in_height, in_width] filter : tvm.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] strides : int or a list/tuple of two ints stride size, or [stride_height, stride_width] padding : int or a list/tuple of two ints padding size, or [pad_height, pad_width] layout : str layout of data Returns ------- output : tvm.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ target = tvm.target.current_target() if "miopen" in target.libs: assert layout == 'NCHW', "Only NCHW layout is supported." CO, CI, KH, KW = get_const_tuple(kernel.shape) N, _, H, W = get_const_tuple(data.shape) # handle dilation stride_h, stride_w = (strides, strides) if isinstance(strides, int) else strides pad_h, pad_w = (padding, padding) if isinstance(padding, int) else padding OH = (H + 2 * pad_h - KH) // stride_h + 1 OW = (W + 2 * pad_w - KW) // stride_w + 1 cfg.add_flop(2 * N * OH * OW * CO * CI * KH * KW) dilation_h = dilation_w = 1 kernel_before_dilation = kernel if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: kernel_before_dilation = kernel.op.input_tensors[0] if layout == 'NCHW': dilation_h = (get_const_int(kernel.shape[2]) + get_const_int(kernel_before_dilation.shape[2]) - 1) \ // get_const_int(kernel_before_dilation.shape[2]) dilation_w = (get_const_int(kernel.shape[3]) + get_const_int(kernel_before_dilation.shape[3]) - 1) \ // get_const_int(kernel_before_dilation.shape[2]) elif layout == 'NHWC': dilation_h = (get_const_int(kernel.shape[1]) + get_const_int(kernel_before_dilation.shape[1]) - 1) \ // get_const_int(kernel_before_dilation.shape[1]) dilation_w = (get_const_int(kernel.shape[2]) + get_const_int(kernel_before_dilation.shape[2]) - 1) \ // get_const_int(kernel_before_dilation.shape[2]) return miopen.conv2d_forward(data, kernel_before_dilation, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=0) return conv2d_cuda(cfg, data, kernel, strides, padding, layout, out_dtype)
def conv2d_rocm(data, kernel, stride, padding, layout='NCHW', out_dtype='float32'): """Conv2D operator for rocm backend. Parameters ---------- input : tvm.Tensor 4-D with shape [batch, in_channel, in_height, in_width] filter : tvm.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints stride size, or [stride_height, stride_width] padding : int or a list/tuple of two ints padding size, or [pad_height, pad_width] layout : str layout of data Returns ------- output : tvm.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ assert layout == 'NCHW', "Only NCHW layout is supported." assert isinstance(stride, int) or len(stride) == 2 if isinstance(stride, int): stride_h = stride_w = stride else: stride_h, stride_w = stride if isinstance(padding, int): pad_h = pad_w = padding else: pad_h, pad_w = padding # handle dilation dilation_h = dilation_w = 1 kernel_tvm = kernel kernel_cudnn = kernel if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: kernel_before_dilation = kernel.op.input_tensors[0] kernel_cudnn = kernel_before_dilation dilation_h = (get_const_int(kernel.shape[2]) + get_const_int(kernel_before_dilation.shape[2]) - 1) \ // get_const_int(kernel_before_dilation.shape[2]) dilation_w = (get_const_int(kernel.shape[3]) + get_const_int(kernel_before_dilation.shape[3]) - 1) \ // get_const_int(kernel_before_dilation.shape[2]) target = tvm.target.current_target() if "miopen" in target.libs: return miopen.conv2d_forward(data, kernel_cudnn, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=0) return topi.nn.conv2d_nchw(data, kernel_tvm, stride, padding, out_dtype)