def conv2d_cuda(data, kernel, stride, padding, layout='NCHW', out_dtype='float32'): """Conv2D operator for cuda backend. Parameters ---------- input : tvm.Tensor 4-D with shape [batch, in_channel, in_height, in_width] filter : tvm.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints stride size, or [stride_height, stride_width] padding : int or a list/tuple of two ints padding size, or [pad_height, pad_width] layout : str layout of data Returns ------- output : tvm.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ assert isinstance(stride, int) or len(stride) == 2 if isinstance(stride, int): stride_h = stride_w = stride else: stride_h, stride_w = stride if isinstance(padding, int): pad_h = pad_w = padding else: pad_h, pad_w = padding target = tvm.target.current_target() if "cudnn" in target.libs: assert layout != 'HWCN', "HWCN layout not supported with CUDNN." tensor_format = 0 # CUDNN_TENSOR_NCHW if layout == 'NHWC': tensor_format = 1 # CUDNN_TENSOR_NHWC return cudnn.conv2d_forward(data, kernel, stride_h, stride_w, pad_h, pad_w, 1, # dilation_h 1, # dilation_w conv_mode=1, tensor_format=tensor_format, algo=-1) # let CUDNN choose the best algo elif layout == 'NCHW': return topi.nn.conv2d_nchw(data, kernel, stride, padding, out_dtype) elif layout == 'HWCN': return topi.nn.conv2d_hwcn(data, kernel, stride, padding, out_dtype) else: raise ValueError("not support this layout {} yet".format(layout))
def verify_conv2d(data_dtype, conv_dtype, tensor_format=0): in_channel = 4 out_channel = 32 filter_h = 3 filter_w = 3 pad_h = 1 pad_w = 1 stride_h = 1 stride_w = 1 dilation_h = 1 dilation_w = 1 batch = 3 height = 32 weight = 32 if not tvm.module.enabled("cuda"): print("skip because cuda is not enabled...") return if not tvm.get_global_func("tvm.contrib.cudnn.conv2d.output_shape", True): print("skip because cudnn is not enabled...") return xshape = [batch, in_channel, height, weight] wshape = cudnn.conv2d_w_shape(in_channel, out_channel, filter_h, filter_w) X = tvm.placeholder(xshape, name='X', dtype=data_dtype) W = tvm.placeholder(wshape, name='W', dtype=data_dtype) Y = cudnn.conv2d_forward(X, W, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=1, tensor_format=tensor_format, conv_dtype=conv_dtype, algo=-1) yshape = [x.value for x in Y.shape] s = tvm.create_schedule(Y.op) def verify(): ctx = tvm.gpu(0) f = tvm.build(s, [X, W, Y], "cuda", target_host="llvm", name="conv2d") x = tvm.nd.array( np.random.uniform(-1, 1, xshape).astype(data_dtype), ctx) w = tvm.nd.array( np.random.uniform(-1, 1, wshape).astype(data_dtype), ctx) y = tvm.nd.array( np.random.uniform(-1, 1, yshape).astype(data_dtype), ctx) f(x, w, y) verify()
def test_conv2d(): in_channel = 3 out_channel = 32 filter_h = 3 filter_w = 3 pad_h = 1 pad_w = 1 stride_h = 1 stride_w = 1 dilation_h = 1 dilation_w = 1 xshape = [4, 3, 32, 32] if not tvm.module.enabled("cuda"): print("skip because cuda is not enabled...") return if not tvm.get_global_func("tvm.contrib.cudnn.conv2d.output_shape", True): print("skip because cudnn is not enabled...") return wshape = cudnn.conv2d_w_shape(in_channel, out_channel, filter_h, filter_w) X = tvm.placeholder(xshape, name='X') W = tvm.placeholder(wshape, name='W') Y = cudnn.conv2d_forward(X, W, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=1, tensor_format=0, algo=1) yshape = [x.value for x in Y.shape] s = tvm.create_schedule(Y.op) def verify(): ctx = tvm.gpu(0) f = tvm.build(s, [X, W, Y], "cuda", target_host="llvm", name="conv2d") x = tvm.nd.array(np.random.uniform(-1, 1, xshape).astype(np.float32), ctx) w = tvm.nd.array(np.random.uniform(-1, 1, wshape).astype(np.float32), ctx) y = tvm.nd.array(np.random.uniform(-1, 1, yshape).astype(np.float32), ctx) f(x, w, y) verify()
def conv2d_cuda(cfg, data, kernel, strides, padding, dilation, layout='NCHW', out_dtype='float32'): """Conv2D operator for cuda backend. Parameters ---------- cfg: ConfigEntity The config for this template data : tvm.Tensor 4-D with shape [batch, in_channel, in_height, in_width] or 5-D with shape [batch, ic_chunk, in_height, in_width, ic_block] kernel : tvm.Tensor 4-D with shape [num_filter, in_channel, filter_height, filter_width] or 6-D with shape [num_filter_chunk, in_channel_chunk, filter_height, filter_width, num_filter_block, in_channel_block] strides : int or a list/tuple of two ints stride size, or [stride_height, stride_width] padding : int or a list/tuple of two ints padding size, or [pad_height, pad_width] dilation: int or a list/tuple of two ints dilation size, or [dilation_height, dilation_width] layout : str layout of data out_dtype: str The output type. This is used for mixed precision. Returns ------- output : tvm.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ target = tvm.target.current_target() if "cudnn" in target.libs: if layout == 'NCHW': tensor_format = 0 # CUDNN_TENSOR_NCHW N, _, H, W = get_const_tuple(data.shape) elif layout == 'NHWC': tensor_format = 1 # CUDNN_TENSOR_NHWC N, H, W, _ = get_const_tuple(data.shape) else: raise ValueError("Unsupported layout %s in cudnn" % layout) CO, CI, KH, KW = get_const_tuple(kernel.shape) # handle dilation stride_h, stride_w = (strides, strides) if isinstance(strides, int) else strides pad_h, pad_w = (padding, padding) if isinstance(padding, int) else padding dilation_h, dilation_w = (dilation, dilation) if isinstance(dilation, int) else dilation OH = (H + 2 * pad_h - KH) // stride_h + 1 OW = (W + 2 * pad_w - KW) // stride_w + 1 cfg.add_flop(2 * N * OH * OW * CO * CI * ((KH - 1) * dilation_h + 1) *\ ((KW - 1) * dilation_w + 1)) return cudnn.conv2d_forward(data, kernel, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, conv_mode=1, tensor_format=tensor_format, algo=-1) # let CUDNN choose the best algo if cfg.template_key == 'winograd': return winograd_cuda(cfg, data, kernel, strides, padding, dilation, layout, out_dtype, pre_computed=False) if cfg.template_key == 'int8': return conv2d_NCHWc_int8(cfg, data, kernel, strides, padding, dilation, layout, out_dtype) if layout == 'NCHW': return nn.conv2d_nchw(data, kernel, strides, padding, dilation, out_dtype) elif layout == 'HWCN': return nn.conv2d_hwcn(data, kernel, strides, padding, dilation, out_dtype) else: raise ValueError("not support this layout {} yet".format(layout))