Esempio n. 1
0
def group_conv3d_nchw(Input,
                      Filter,
                      stride,
                      padding,
                      dilation,
                      groups,
                      out_dtype=None):
    if out_dtype is None:
        out_dtype = Input.dtype
    assert isinstance(stride, int) or len(stride) == 3
    assert isinstance(dilation, int) or len(dilation) == 3
    if isinstance(stride, int):
        stride_z = stride_h = stride_w = stride
    else:
        stride_z, stride_h, stride_w = stride

    if isinstance(dilation, int):
        dilation_z = dilation_h = dilation_w = dilation
    else:
        dilation_z, dilation_h, dilation_w = dilation

    batch, in_channel, in_z, in_height, in_width = get_const_tuple(Input.shape)
    num_filter, _, kernel_z, kernel_h, kernel_w = get_const_tuple(Filter.shape)

    assert in_channel % groups == 0, "input channels must divide group size"
    assert num_filter % groups == 0, "output channels must divide group size"

    pad_front, pad_top, pad_left, pad_back, pad_down, pad_right = get_pad_tuple3d(
        padding, (kernel_z, kernel_h, kernel_w))

    # compute the output shape
    out_channel = num_filter
    out_z = simplify(
        (in_z -
         (kernel_z - 1) * dilation_z - 1 + pad_front + pad_back) // stride_z +
        1)
    out_height = simplify(
        (in_height -
         (kernel_h - 1) * dilation_h - 1 + pad_top + pad_down) // stride_h + 1)
    out_width = simplify(
        (in_width -
         (kernel_w - 1) * dilation_w - 1 + pad_left + pad_right) // stride_w +
        1)
    # compute graph
    pad_before = [0, 0, pad_front, pad_top, pad_left]
    pad_after = [0, 0, pad_back, pad_down, pad_right]
    temp = pad(Input, pad_before, pad_after, name="pad_temp")
    rc = tvm.reduce_axis((0, in_channel // groups), name='rc')
    rz = tvm.reduce_axis((0, kernel_z), name='rz')
    ry = tvm.reduce_axis((0, kernel_h), name='ry')
    rx = tvm.reduce_axis((0, kernel_w), name='rx')
    return tvm.compute(
        (batch, out_channel, out_z, out_height, out_width),
        lambda nn, ff, zz, yy, xx: tvm.sum(temp[
            nn, ff // (num_filter // groups) *
            (in_channel // groups) + rc, zz * stride_z + rz * dilation_z, yy *
            stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w
        ].astype(out_dtype) * Filter[ff, rc, rz, ry, rx].astype(out_dtype),
                                           axis=[rc, rz, ry, rx]),
        tag='group_conv3d_nchw')
def conv3d_ndhwc_python(a_np, w_np, stride, padding):
    """Convolution 3D operator in NDHWC layout.

    Parameters
    ----------
    a_np : numpy.ndarray
        5-D with shape [batch, in_channel, in_depth, in_height, in_width]

    w_np : numpy.ndarray
        5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]

    stride : int or a list/tuple of three ints
        Stride size, or [stride_depth, stride_height, stride_width]

    padding : int or str or a list/tuple of three ints
        Padding size, or ['VALID', 'SAME'], or [pad_depth, pad_height, pad_width]
    groups : int
        Number of groups

    Returns
    -------
    b_np : np.ndarray
        5-D with shape [batch, out_channel, out_depth, out_height, out_width]
    """
    batch, in_depth, in_height, in_width, in_channel = a_np.shape
    kernel_d, kernel_h, kernel_w, _, num_filter = w_np.shape
    if isinstance(stride, int):
        stride_d = stride_h = stride_w = stride
    else:
        stride_d, stride_h, stride_w = stride

    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = \
        get_pad_tuple3d(padding, (kernel_d, kernel_h, kernel_w))
    pad_d = pad_front + pad_back
    pad_h = pad_top + pad_bottom
    pad_w = pad_left + pad_right
    # compute the output shape
    out_channel = num_filter
    out_depth = (in_depth - kernel_d + pad_d) // stride_d + 1
    out_height = (in_height - kernel_h + pad_h) // stride_h + 1
    out_width = (in_width - kernel_w + pad_w) // stride_w + 1
    # change the layout from NHWC to NCHW
    at = a_np.transpose((0, 4, 1, 2, 3))
    wt = w_np.transpose((4, 3, 0, 1, 2))
    bt = np.zeros((batch, out_channel, out_depth, out_height, out_width))
    # computation
    for n in range(batch):
        for f in range(out_channel):
            for c in range(in_channel):
                if pad_d > 0 or pad_h > 0 or pad_w > 0:
                    apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w))
                    apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\
                         pad_left:pad_left + in_width] = at[n, c]
                else:
                    apad = at[n, c]
                out = scipy.signal.convolve(
                    apad, np.flip(wt[f, c]), mode='valid')
                bt[n, f] += out[::stride_d, ::stride_h, ::stride_w]
    return bt.transpose((0, 2, 3, 4, 1))
Esempio n. 3
0
def _conv3d_ncdhw_python(a_np, w_np, stride, padding):
    batch, in_channel, in_depth, in_height, in_width = a_np.shape
    num_filter, _, kernel_d, kernel_h, kernel_w = w_np.shape
    if isinstance(stride, int):
        stride_d = stride_h = stride_w = stride
    else:
        stride_d, stride_h, stride_w = stride

    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = \
        get_pad_tuple3d(padding, (kernel_d, kernel_h, kernel_w))
    pad_d = pad_front + pad_back
    pad_h = pad_top + pad_bottom
    pad_w = pad_left + pad_right

    # compute the output shape
    out_channel = num_filter
    out_depth = (in_depth - kernel_d + pad_d) // stride_d + 1
    out_height = (in_height - kernel_h + pad_h) // stride_h + 1
    out_width = (in_width - kernel_w + pad_w) // stride_w + 1
    b_np = np.zeros((batch, out_channel, out_depth, out_height, out_width))
    # computation
    for n in range(batch):
        for f in range(out_channel):
            for c in range(in_channel):
                if pad_d > 0 or pad_h > 0 or pad_w > 0:
                    apad = np.zeros((in_depth + pad_d, in_height + pad_h,
                                     in_width + pad_w))
                    apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\
                         pad_left:pad_left + in_width] = a_np[n, c]
                else:
                    apad = a_np[n, c]
                out = scipy.signal.convolve(apad,
                                            np.flip(w_np[f, c]),
                                            mode='valid')
                b_np[n, f] += out[::stride_d, ::stride_h, ::stride_w]
    return b_np
Esempio n. 4
0
def verify_conv3d_ncdhw(batch,
                        in_channel,
                        in_size,
                        num_filter,
                        depth_kernel,
                        space_kernel,
                        stride,
                        padding,
                        dilation=1,
                        add_bias=False,
                        add_relu=False):
    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d(
        padding, (depth_kernel, space_kernel, space_kernel))
    padding_sum = pad_front + pad_back + pad_top + pad_left + pad_bottom + pad_right
    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" %
          (batch, in_channel, in_size, num_filter, space_kernel, stride,
           padding_sum, dilation))

    in_depth = in_height = in_width = in_size

    A = te.placeholder((batch, in_channel, in_depth, in_height, in_width),
                       name='A')
    W = te.placeholder(
        (num_filter, in_channel, depth_kernel, space_kernel, space_kernel),
        name='W')
    bias = te.placeholder((num_filter, 1, 1, 1), name='bias')

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv3d_ncdhw.verify_conv3d_ncdhw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        dw_np = topi.testing.dilate_python(
            w_np, (1, 1, dilation, dilation, dilation))
        c_np = topi.testing.conv3d_ncdhw_python(a_np, dw_np, stride, padding)
        if add_bias:
            c_np += b_np
        if add_relu:
            c_np = np.maximum(c_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        fcompute, fschedule = topi.testing.dispatch(device,
                                                    _conv3d_ncdhw_implement)
        with tvm.target.create(device):
            C = fcompute(A, W, (stride, stride, stride), padding,
                         (dilation, dilation, dilation), dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter,
                              space_kernel, stride, padding_sum, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter,
                              space_kernel, stride, padding_sum, dilation))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)

    for device in ["cuda"]:
        with autotvm.tophub.context(
                device):  # load tophub pre-tuned parameters
            check_device(device)
def verify_conv3d_ndhwc(batch,
                        in_channel,
                        in_size,
                        num_filter,
                        kernel,
                        stride,
                        padding,
                        dilation=1,
                        add_bias=False,
                        add_relu=False,
                        devices='cuda'):
    """Test the conv3d with tensorcore for ndhwc layout"""
    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d(
        padding, (kernel, kernel, kernel))
    padding_sum = pad_front + pad_top + pad_left + pad_back + pad_bottom + pad_right
    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" %
          (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum,
           dilation))

    in_depth = in_height = in_width = in_size

    A = te.placeholder((batch, in_depth, in_height, in_width, in_channel),
                       name='A')
    W = te.placeholder((kernel, kernel, kernel, in_channel, num_filter),
                       name='W')
    bias = te.placeholder((1, 1, 1, 1, num_filter), name='bias')

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv3d_ndhwc.verify_conv3d_ndhwc")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        dw_np = topi.testing.dilate_python(w_np, (1, 1, 1, dilation, dilation))
        c_np = topi.testing.conv3d_ndhwc_python(a_np, dw_np, stride, padding)
        if add_bias:
            b_np = np.random.uniform(size=bias_shape).astype(dtype)
            c_np += b_np
        if add_relu:
            c_np = np.maximum(c_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        if not nvcc.have_tensorcore(ctx.compute_version):
            print("skip because gpu does not support Tensor Cores")
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            fcompute, fschedule = topi.testing.dispatch(
                device, _conv3d_ndhwc_tensorcore_implement)
            C = fcompute(A, W, stride, padding, dilation, 'float32')
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, c)

        rtol = 1e-3
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=rtol)

    check_device(devices)
def conv3d_transpose_ncdhw_python(a_np, w_np, stride, padding):
    """Transposed 3d convolution operator in NCDHW layout.

    Parameters
    ----------
    a_np : numpy.ndarray
        5-D with shape [batch, in_channel, in_depth, in_height, in_width]

    w_np : numpy.ndarray
        5-D with shape [in_channel, num_filter, filter_depth, filter_height, filter_width]

    stride : int or a list/tuple of two ints
        Stride size, or [stride_depth, stride_height, stride_width]

    padding : int or str
        Padding size

    Returns
    -------
    b_np : np.ndarray
        5-D with shape [batch, out_channel, out_depth, out_height, out_width]
    """
    batch, in_c, in_d, in_h, in_w = a_np.shape
    _, out_c, filter_d, filter_h, filter_w = w_np.shape
    if isinstance(stride, int):
        stride_d = stride_h = stride_w = stride
    else:
        stride_d, stride_h, stride_w = stride

    # dilate stage
    dilated_a_np = topi.testing.dilate_python(
        a_np, [1, 1, stride_d, stride_h, stride_w])

    # padding stage
    fpad_front, fpad_top, fpad_left, fpad_back, fpad_bottom, fpad_right = get_pad_tuple3d(
        padding, (filter_d, filter_h, filter_w))

    bpad_front = filter_d - 1 - fpad_front
    bpad_back = filter_d - 1 - fpad_back
    bpad_top = filter_h - 1 - fpad_top
    bpad_bottom = filter_h - 1 - fpad_bottom
    bpad_left = filter_w - 1 - fpad_left
    bpad_right = filter_w - 1 - fpad_right

    padded_a_np = np.zeros(
        (batch, in_c, dilated_a_np.shape[2] + bpad_front + bpad_back,
         dilated_a_np.shape[3] + bpad_top + bpad_bottom,
         dilated_a_np.shape[4] + bpad_left + bpad_right))

    padded_a_np[:, :, bpad_front:dilated_a_np.shape[2] + bpad_back,
                bpad_top:dilated_a_np.shape[3] + bpad_top,
                bpad_left:dilated_a_np.shape[4] + bpad_left] = dilated_a_np

    # convolution stage
    out_d = (in_d - 1) * stride_d - bpad_front - bpad_back + filter_d
    out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h
    out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w

    w_np = np.flip(w_np, axis=[2, 3, 4]).transpose((1, 0, 2, 3, 4))
    b_np = topi.testing.conv3d_ncdhw_python(padded_a_np,
                                            w_np,
                                            stride=(1, 1, 1),
                                            padding=(0, 0, 0))

    return b_np