def group_conv3d_nchw(Input, Filter, stride, padding, dilation, groups, out_dtype=None): if out_dtype is None: out_dtype = Input.dtype assert isinstance(stride, int) or len(stride) == 3 assert isinstance(dilation, int) or len(dilation) == 3 if isinstance(stride, int): stride_z = stride_h = stride_w = stride else: stride_z, stride_h, stride_w = stride if isinstance(dilation, int): dilation_z = dilation_h = dilation_w = dilation else: dilation_z, dilation_h, dilation_w = dilation batch, in_channel, in_z, in_height, in_width = get_const_tuple(Input.shape) num_filter, _, kernel_z, kernel_h, kernel_w = get_const_tuple(Filter.shape) assert in_channel % groups == 0, "input channels must divide group size" assert num_filter % groups == 0, "output channels must divide group size" pad_front, pad_top, pad_left, pad_back, pad_down, pad_right = get_pad_tuple3d( padding, (kernel_z, kernel_h, kernel_w)) # compute the output shape out_channel = num_filter out_z = simplify( (in_z - (kernel_z - 1) * dilation_z - 1 + pad_front + pad_back) // stride_z + 1) out_height = simplify( (in_height - (kernel_h - 1) * dilation_h - 1 + pad_top + pad_down) // stride_h + 1) out_width = simplify( (in_width - (kernel_w - 1) * dilation_w - 1 + pad_left + pad_right) // stride_w + 1) # compute graph pad_before = [0, 0, pad_front, pad_top, pad_left] pad_after = [0, 0, pad_back, pad_down, pad_right] temp = pad(Input, pad_before, pad_after, name="pad_temp") rc = tvm.reduce_axis((0, in_channel // groups), name='rc') rz = tvm.reduce_axis((0, kernel_z), name='rz') ry = tvm.reduce_axis((0, kernel_h), name='ry') rx = tvm.reduce_axis((0, kernel_w), name='rx') return tvm.compute( (batch, out_channel, out_z, out_height, out_width), lambda nn, ff, zz, yy, xx: tvm.sum(temp[ nn, ff // (num_filter // groups) * (in_channel // groups) + rc, zz * stride_z + rz * dilation_z, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w ].astype(out_dtype) * Filter[ff, rc, rz, ry, rx].astype(out_dtype), axis=[rc, rz, ry, rx]), tag='group_conv3d_nchw')
def conv3d_ndhwc_python(a_np, w_np, stride, padding): """Convolution 3D operator in NDHWC layout. Parameters ---------- a_np : numpy.ndarray 5-D with shape [batch, in_channel, in_depth, in_height, in_width] w_np : numpy.ndarray 5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width] stride : int or a list/tuple of three ints Stride size, or [stride_depth, stride_height, stride_width] padding : int or str or a list/tuple of three ints Padding size, or ['VALID', 'SAME'], or [pad_depth, pad_height, pad_width] groups : int Number of groups Returns ------- b_np : np.ndarray 5-D with shape [batch, out_channel, out_depth, out_height, out_width] """ batch, in_depth, in_height, in_width, in_channel = a_np.shape kernel_d, kernel_h, kernel_w, _, num_filter = w_np.shape if isinstance(stride, int): stride_d = stride_h = stride_w = stride else: stride_d, stride_h, stride_w = stride pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = \ get_pad_tuple3d(padding, (kernel_d, kernel_h, kernel_w)) pad_d = pad_front + pad_back pad_h = pad_top + pad_bottom pad_w = pad_left + pad_right # compute the output shape out_channel = num_filter out_depth = (in_depth - kernel_d + pad_d) // stride_d + 1 out_height = (in_height - kernel_h + pad_h) // stride_h + 1 out_width = (in_width - kernel_w + pad_w) // stride_w + 1 # change the layout from NHWC to NCHW at = a_np.transpose((0, 4, 1, 2, 3)) wt = w_np.transpose((4, 3, 0, 1, 2)) bt = np.zeros((batch, out_channel, out_depth, out_height, out_width)) # computation for n in range(batch): for f in range(out_channel): for c in range(in_channel): if pad_d > 0 or pad_h > 0 or pad_w > 0: apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w)) apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\ pad_left:pad_left + in_width] = at[n, c] else: apad = at[n, c] out = scipy.signal.convolve( apad, np.flip(wt[f, c]), mode='valid') bt[n, f] += out[::stride_d, ::stride_h, ::stride_w] return bt.transpose((0, 2, 3, 4, 1))
def _conv3d_ncdhw_python(a_np, w_np, stride, padding): batch, in_channel, in_depth, in_height, in_width = a_np.shape num_filter, _, kernel_d, kernel_h, kernel_w = w_np.shape if isinstance(stride, int): stride_d = stride_h = stride_w = stride else: stride_d, stride_h, stride_w = stride pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = \ get_pad_tuple3d(padding, (kernel_d, kernel_h, kernel_w)) pad_d = pad_front + pad_back pad_h = pad_top + pad_bottom pad_w = pad_left + pad_right # compute the output shape out_channel = num_filter out_depth = (in_depth - kernel_d + pad_d) // stride_d + 1 out_height = (in_height - kernel_h + pad_h) // stride_h + 1 out_width = (in_width - kernel_w + pad_w) // stride_w + 1 b_np = np.zeros((batch, out_channel, out_depth, out_height, out_width)) # computation for n in range(batch): for f in range(out_channel): for c in range(in_channel): if pad_d > 0 or pad_h > 0 or pad_w > 0: apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w)) apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\ pad_left:pad_left + in_width] = a_np[n, c] else: apad = a_np[n, c] out = scipy.signal.convolve(apad, np.flip(w_np[f, c]), mode='valid') b_np[n, f] += out[::stride_d, ::stride_h, ::stride_w] return b_np
def verify_conv3d_ncdhw(batch, in_channel, in_size, num_filter, depth_kernel, space_kernel, stride, padding, dilation=1, add_bias=False, add_relu=False): pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d( padding, (depth_kernel, space_kernel, space_kernel)) padding_sum = pad_front + pad_back + pad_top + pad_left + pad_bottom + pad_right print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size, num_filter, space_kernel, stride, padding_sum, dilation)) in_depth = in_height = in_width = in_size A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') W = te.placeholder( (num_filter, in_channel, depth_kernel, space_kernel, space_kernel), name='W') bias = te.placeholder((num_filter, 1, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) bias_shape = get_const_tuple(bias.shape) dtype = A.dtype @memoize("topi.tests.test_topi_conv3d_ncdhw.verify_conv3d_ncdhw") def get_ref_data(): a_np = np.random.uniform(size=a_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype) b_np = np.random.uniform(size=bias_shape).astype(dtype) dw_np = topi.testing.dilate_python( w_np, (1, 1, dilation, dilation, dilation)) c_np = topi.testing.conv3d_ncdhw_python(a_np, dw_np, stride, padding) if add_bias: c_np += b_np if add_relu: c_np = np.maximum(c_np, 0) return a_np, w_np, b_np, c_np a_np, w_np, b_np, c_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) fcompute, fschedule = topi.testing.dispatch(device, _conv3d_ncdhw_implement) with tvm.target.create(device): C = fcompute(A, W, (stride, stride, stride), padding, (dilation, dilation, dilation), dtype) if add_bias: C = topi.add(C, bias) if add_relu: C = topi.nn.relu(C) s = fschedule([C]) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(b_np, ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) if add_bias: func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, space_kernel, stride, padding_sum, dilation)) func(a, w, b, c) else: func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, space_kernel, stride, padding_sum, dilation)) func(a, w, c) tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4) for device in ["cuda"]: with autotvm.tophub.context( device): # load tophub pre-tuned parameters check_device(device)
def verify_conv3d_ndhwc(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, add_bias=False, add_relu=False, devices='cuda'): """Test the conv3d with tensorcore for ndhwc layout""" pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d( padding, (kernel, kernel, kernel)) padding_sum = pad_front + pad_top + pad_left + pad_back + pad_bottom + pad_right print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum, dilation)) in_depth = in_height = in_width = in_size A = te.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') W = te.placeholder((kernel, kernel, kernel, in_channel, num_filter), name='W') bias = te.placeholder((1, 1, 1, 1, num_filter), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) bias_shape = get_const_tuple(bias.shape) dtype = A.dtype @memoize("topi.tests.test_topi_conv3d_ndhwc.verify_conv3d_ndhwc") def get_ref_data(): a_np = np.random.uniform(size=a_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype) b_np = np.random.uniform(size=bias_shape).astype(dtype) dw_np = topi.testing.dilate_python(w_np, (1, 1, 1, dilation, dilation)) c_np = topi.testing.conv3d_ndhwc_python(a_np, dw_np, stride, padding) if add_bias: b_np = np.random.uniform(size=bias_shape).astype(dtype) c_np += b_np if add_relu: c_np = np.maximum(c_np, 0) return a_np, w_np, b_np, c_np a_np, w_np, b_np, c_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return if not nvcc.have_tensorcore(ctx.compute_version): print("skip because gpu does not support Tensor Cores") return print("Running on target: %s" % device) with tvm.target.create(device): fcompute, fschedule = topi.testing.dispatch( device, _conv3d_ndhwc_tensorcore_implement) C = fcompute(A, W, stride, padding, dilation, 'float32') if add_bias: C = topi.add(C, bias) if add_relu: C = topi.nn.relu(C) s = fschedule([C]) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(b_np, ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) if add_bias: func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum, dilation)) func(a, w, b, c) else: func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum, dilation)) func(a, w, c) rtol = 1e-3 tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=rtol) check_device(devices)
def conv3d_transpose_ncdhw_python(a_np, w_np, stride, padding): """Transposed 3d convolution operator in NCDHW layout. Parameters ---------- a_np : numpy.ndarray 5-D with shape [batch, in_channel, in_depth, in_height, in_width] w_np : numpy.ndarray 5-D with shape [in_channel, num_filter, filter_depth, filter_height, filter_width] stride : int or a list/tuple of two ints Stride size, or [stride_depth, stride_height, stride_width] padding : int or str Padding size Returns ------- b_np : np.ndarray 5-D with shape [batch, out_channel, out_depth, out_height, out_width] """ batch, in_c, in_d, in_h, in_w = a_np.shape _, out_c, filter_d, filter_h, filter_w = w_np.shape if isinstance(stride, int): stride_d = stride_h = stride_w = stride else: stride_d, stride_h, stride_w = stride # dilate stage dilated_a_np = topi.testing.dilate_python( a_np, [1, 1, stride_d, stride_h, stride_w]) # padding stage fpad_front, fpad_top, fpad_left, fpad_back, fpad_bottom, fpad_right = get_pad_tuple3d( padding, (filter_d, filter_h, filter_w)) bpad_front = filter_d - 1 - fpad_front bpad_back = filter_d - 1 - fpad_back bpad_top = filter_h - 1 - fpad_top bpad_bottom = filter_h - 1 - fpad_bottom bpad_left = filter_w - 1 - fpad_left bpad_right = filter_w - 1 - fpad_right padded_a_np = np.zeros( (batch, in_c, dilated_a_np.shape[2] + bpad_front + bpad_back, dilated_a_np.shape[3] + bpad_top + bpad_bottom, dilated_a_np.shape[4] + bpad_left + bpad_right)) padded_a_np[:, :, bpad_front:dilated_a_np.shape[2] + bpad_back, bpad_top:dilated_a_np.shape[3] + bpad_top, bpad_left:dilated_a_np.shape[4] + bpad_left] = dilated_a_np # convolution stage out_d = (in_d - 1) * stride_d - bpad_front - bpad_back + filter_d out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w w_np = np.flip(w_np, axis=[2, 3, 4]).transpose((1, 0, 2, 3, 4)) b_np = topi.testing.conv3d_ncdhw_python(padded_a_np, w_np, stride=(1, 1, 1), padding=(0, 0, 0)) return b_np