Esempio n. 1
0
def group_conv3d_nchw(Input,
                      Filter,
                      stride,
                      padding,
                      dilation,
                      groups,
                      out_dtype=None):
    if out_dtype is None:
        out_dtype = Input.dtype
    assert isinstance(stride, int) or len(stride) == 3
    assert isinstance(dilation, int) or len(dilation) == 3
    if isinstance(stride, int):
        stride_z = stride_h = stride_w = stride
    else:
        stride_z, stride_h, stride_w = stride

    if isinstance(dilation, int):
        dilation_z = dilation_h = dilation_w = dilation
    else:
        dilation_z, dilation_h, dilation_w = dilation

    batch, in_channel, in_z, in_height, in_width = get_const_tuple(Input.shape)
    num_filter, _, kernel_z, kernel_h, kernel_w = get_const_tuple(Filter.shape)

    assert in_channel % groups == 0, "input channels must divide group size"
    assert num_filter % groups == 0, "output channels must divide group size"

    pad_front, pad_top, pad_left, pad_back, pad_down, pad_right = get_pad_tuple3d(
        padding, (kernel_z, kernel_h, kernel_w))

    # compute the output shape
    out_channel = num_filter
    out_z = simplify(
        (in_z -
         (kernel_z - 1) * dilation_z - 1 + pad_front + pad_back) // stride_z +
        1)
    out_height = simplify(
        (in_height -
         (kernel_h - 1) * dilation_h - 1 + pad_top + pad_down) // stride_h + 1)
    out_width = simplify(
        (in_width -
         (kernel_w - 1) * dilation_w - 1 + pad_left + pad_right) // stride_w +
        1)
    # compute graph
    pad_before = [0, 0, pad_front, pad_top, pad_left]
    pad_after = [0, 0, pad_back, pad_down, pad_right]
    temp = pad(Input, pad_before, pad_after, name="pad_temp")
    rc = tvm.reduce_axis((0, in_channel // groups), name='rc')
    rz = tvm.reduce_axis((0, kernel_z), name='rz')
    ry = tvm.reduce_axis((0, kernel_h), name='ry')
    rx = tvm.reduce_axis((0, kernel_w), name='rx')
    return tvm.compute(
        (batch, out_channel, out_z, out_height, out_width),
        lambda nn, ff, zz, yy, xx: tvm.sum(temp[
            nn, ff // (num_filter // groups) *
            (in_channel // groups) + rc, zz * stride_z + rz * dilation_z, yy *
            stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w
        ].astype(out_dtype) * Filter[ff, rc, rz, ry, rx].astype(out_dtype),
                                           axis=[rc, rz, ry, rx]),
        tag='group_conv3d_nchw')
def dilation2d_nhwc(input, filter, stride, padding, dilations, out_dtype=None):
    """Morphological 2d dilation NHWC layout.

    Parameters
    ----------
    input : tvm.Tensor
        4-D with shape [batch, in_height, in_width, in_channel]

    filter : tvm.Tensor
        3-D with shape [filter_height, filter_width, in_channel]

    stride : int or a list/tuple of two ints
        Stride size, or [stride_height, stride_width]

    padding : int
        Padding size

    dilations: int or a list/tuple of two ints
        dilation size, or [dilation_height, dilation_width]

    out_dtype : Optional[str]
        Specifies the output data type.

    Returns
    -------
    Output : tvm.Tensor
        4-D with shape [batch, out_height, out_width, in_channel]
    """
    if out_dtype is None:
        out_dtype = input.dtype
    assert isinstance(stride, int) or len(stride) == 2
    assert isinstance(dilations, int) or len(dilations) == 2
    if isinstance(stride, int):
        stride_h = stride_w = stride
    else:
        stride_h, stride_w = stride

    if isinstance(dilations, int):
        dilation_h = dilation_w = dilations
    else:
        dilation_h, dilation_w = dilations

    batch, in_height, in_width, in_channel = input.shape
    kernel_h, kernel_w, channel = filter.shape
    assert in_channel.value == channel.value, \
        "For Dilation2D input and filter channels should be same."

    # compute the output shape
    dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
    dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
    pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
        padding, (dilated_kernel_h, dilated_kernel_w))

    out_height = simplify(
        (in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1)
    out_width = simplify(
        (in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1)
    pad_before = [0, pad_top, pad_left, 0]
    pad_after = [0, pad_down, pad_right, 0]
    padded_input = pad(input, pad_before, pad_after, name="padded_input")
    ry = te.reduce_axis((0, kernel_h), name='ry')
    rx = te.reduce_axis((0, kernel_w), name='rx')

    return te.compute((batch, out_height, out_width, in_channel),
                      lambda nn, yy, xx, ff: te.max(padded_input[
                          nn, yy * stride_h + ry * dilation_h, xx * stride_w +
                          rx * dilation_w, ff].astype(out_dtype) + filter[
                              ry, rx, ff].astype(out_dtype),
                                                    axis=[ry, rx]),
                      tag="dilation2d_nhcw")
Esempio n. 3
0
def _depthwise_conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=None):
    """Depthwise convolution nchw forward operator.

    Parameters
    ----------
    Input : tvm.Tensor
        4-D with shape [batch, in_channel, in_height, in_width]

    Filter : tvm.Tensor
        4-D with shape [in_channel, channel_multiplier, filter_height, filter_width]

    stride : tuple of two ints
        The spatial stride along height and width

    padding : int or str
        Padding size, or ['VALID', 'SAME']

    dilation: int or a list/tuple of two ints
        dilation size, or [dilation_height, dilation_width]

    out_dtype: str, optional
        Output data type

    Returns
    -------
    Output : tvm.Tensor
        4-D with shape [batch, out_channel, out_height, out_width]
    """
    out_dtype = Input.dtype if out_dtype is None else out_dtype

    if isinstance(stride, int):
        stride_h = stride_w = stride
    else:
        stride_h, stride_w = stride

    if isinstance(dilation, int):
        dilation_h = dilation_w = dilation
    else:
        dilation_h, dilation_w = dilation

    batch, in_channel, in_height, in_width = Input.shape
    # shape of dilated kernel
    filter_channel, channel_multiplier, filter_height, filter_width = Filter.shape

    dilated_kernel_h = (filter_height - 1) * dilation_h + 1
    dilated_kernel_w = (filter_width - 1) * dilation_w + 1
    pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
        padding, (dilated_kernel_h, dilated_kernel_w))
    out_channel = simplify(in_channel * channel_multiplier)
    out_height = simplify((in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1)
    out_width = simplify((in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1)

    # padding stage
    pad_before = [0, 0, pad_top, pad_left]
    pad_after = [0, 0, pad_down, pad_right]
    PaddedInput = topi.nn.pad(Input, pad_before, pad_after, name="PaddedInput")
    # depthconv stage
    di = tvm.te.reduce_axis((0, filter_height), name='di')
    dj = tvm.te.reduce_axis((0, filter_width), name='dj')
    Output = tvm.te.compute(
        (batch, out_channel, out_height, out_width),
        lambda b, c, i, j: tvm.te.sum(
            (PaddedInput[b, c/channel_multiplier, i*stride_h+di*dilation_h,
                         j*stride_w+dj*dilation_w].astype(out_dtype) *
             Filter[c/channel_multiplier, c%channel_multiplier, di, dj].astype(out_dtype)),
            axis=[di, dj]),
        name='DepthwiseConv2d', tag="depthwise_conv2d_nchw")
    return Output
Esempio n. 4
0
def conv2d_direct_simd_compute(cfg, data, kernel, strides, padding, dilation, out_dtype):
    """Compute function for Cortex-M7 SIMD implementation of conv2d."""
    assert isinstance(strides, int) or len(strides) == 2
    assert isinstance(dilation, int) or len(dilation) == 2

    if isinstance(strides, int):
        stride_h = stride_w = strides
    else:
        stride_h, stride_w = strides

    if isinstance(dilation, int):
        dilation_h = dilation_w = dilation
    else:
        dilation_h, dilation_w = dilation

    batch_size, in_height, in_width, in_channels = data.shape
    kernel_h, kernel_w, out_channels, _ = kernel.shape

    # compute the output shape
    dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
    dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
    pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
        padding, (dilated_kernel_h, dilated_kernel_w))
    out_height = simplify((in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1)
    out_width = simplify((in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1)

    pad_before = [0, pad_top, pad_left, 0]
    pad_after = [0, pad_down, pad_right, 0]
    padded_data = pad(data, pad_before, pad_after, name='padded_data')

    rc = te.reduce_axis((0, in_channels), name='rc')
    ry = te.reduce_axis((0, kernel_h), name='ry')
    rx = te.reduce_axis((0, kernel_w), name='rx')

    conv = te.compute(
        (batch_size, out_height, out_width, out_channels),
        lambda nn, yy, xx, ff: te.sum(
            padded_data[nn, yy * stride_h + ry * dilation_h,
                        xx * stride_w + rx * dilation_w, rc].astype(out_dtype) *
            kernel[ry, rx, ff, rc].astype(out_dtype), axis=[ry, rx, rc]),
        name='conv2d', tag='conv2d_nhwc')

    ###########################
    # Config Space Definition #
    ###########################
    n, oh, ow, co = (cfg.axis(batch_size.value),
                     cfg.axis(out_height.value),
                     cfg.axis(out_width.value),
                     cfg.axis(out_channels.value))
    kh, kw, ci = (cfg.reduce_axis(kernel_h.value),
                  cfg.reduce_axis(kernel_w.value),
                  cfg.reduce_axis(in_channels.value))

    assert in_channels.value % 4 == 0
    owo, owi = cfg.define_split('tile_ow', ow, policy='factors', num_outputs=2)
    cio, cii = cfg.define_split('tile_ci', ci, policy='factors', num_outputs=2,
                                filter=lambda x: x.size[-1] % 4 == 0)
    coo, coi = cfg.define_split('tile_co', co, policy='factors', num_outputs=2)

    cfg.define_reorder('reorder_0_simd',
                       [n, oh, owo, owi, coo, coi, kh, kw, cio, cii],
                       policy='candidate', candidate=[
                           [n, oh, kh, kw, owo, coo, cio, owi, coi, cii],
                           [n, oh, kh, kw, coo, owo, cio, owi, coi, cii],
                           [n, kh, kw, oh, owo, coo, cio, owi, coi, cii],
                           [n, kh, kw, oh, coo, owo, cio, owi, coi, cii]])

    cfg.define_knob('auto_unroll_max_step', [0, 2, 4, 8, 16, 32])
    cfg.define_knob('unroll_explicit', [0, 1])

    return conv
Esempio n. 5
0
def depth_1by1_fused(Input,
                     Filter_d,
                     Filter_1,
                     stride_d,
                     padding_d='SAME',
                     dilation_d=1,
                     out_dtype=None,
                     layout="NCHW"):
    """Fused depthwise convolution + 1x1 convolution forward operator (NCHW & NHWC).

    Parameters
    ----------
    Input : tvm.Tensor
        4-D with shape [batch, in_channel, in_height, in_width] (NCHW)
                    or [batch, in_height, in_width, in_channel] (NHWC)

    Filter_d : tvm.Tensor
        4-D with shape [in_channel, in_channel * channel_multiplier, filter_height, filter_width]
                    or [filter_height, filter_width, in_channel, in_channel * channel_multiplier]

    Filter_1 : tvm.Tensor
        4-D with shape [out_channel, in_channel * channel_multiplier, 0, 0]
                    or [0, 0, out_channel, in_channel * channel_multiplier]

    stride_d : tuple of two ints
        The spatial stride along height and width

    padding_d : int or str
        Padding size, or ['VALID', 'SAME']

    dilation_d: int or a list/tuple of two ints
        dilation size, or [dilation_height, dilation_width]

    out_dtype: str, optional
        Output data type

    Returns
    -------
    output : tvm.Tensor
        4-D with shape [batch, out_height, out_width, out_channel]
    """

    assert layout in ["NCHW", "NHWC"]

    out_dtype = Input.dtype if out_dtype is None else out_dtype

    if isinstance(stride_d, int):
        stride_h_d = stride_w_d = stride_d
    else:
        stride_h_d, stride_w_d = stride_d

    if isinstance(dilation_d, int):
        dilation_h_d = dilation_w_d = dilation_d
    else:
        dilation_h_d, dilation_w_d = dilation_d

    if layout == "NCHW":
        if dilation_h_d != 1 or dilation_w_d != 1:
            Filter_d = dilate(Filter_d, (1, 1, dilation_h_d, dilation_w_d))
        batch, in_channel_d, in_height_d, in_width_d = Input.shape
        filter_channel, _, filter_height, filter_width = Filter_d.shape
        num_filter, channel, _, _ = Filter_1.shape
    else:  # NHWC
        if dilation_h_d != 1 or dilation_w_d != 1:
            Filter_d = dilate(Filter_d, (dilation_h_d, dilation_w_d, 1, 1))
        batch, in_height_d, in_width_d, in_channel_d = Input.shape
        filter_height, filter_width, filter_channel, _ = Filter_d.shape
        _, _, num_filter, channel = Filter_1.shape

    pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
        padding_d, (filter_height, filter_width))
    out_channel = simplify(in_channel_d)
    out_height = simplify((in_height_d - filter_height + pad_top + pad_down) //
                          stride_h_d + 1)
    out_width = simplify((in_width_d - filter_width + pad_left + pad_right) //
                         stride_w_d + 1)
    out_channel = num_filter

    # padding stage
    if layout == "NCHW":
        pad_before = [0, 0, pad_top, pad_left]
        pad_after = [0, 0, pad_down, pad_right]
    else:  # NHWC
        pad_before = [0, pad_top, pad_left, 0]
        pad_after = [0, pad_down, pad_right, 0]

    PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")

    # depthconv stage
    di = tvm.reduce_axis((0, filter_height), name='di')
    dj = tvm.reduce_axis((0, filter_width), name='dj')
    # 1by1 stage
    c = tvm.reduce_axis((0, out_channel), name='c')

    if layout == "NCHW":
        Output = tvm.compute(
            (batch, out_channel, out_height, out_width),
            lambda b, f, i, j: tvm.sum(
                (PaddedInput[b, c, i * stride_h_d + di, j * stride_w_d + dj].
                 astype(out_dtype) * Filter_d[c, 0, di, dj].astype(
                     out_dtype) * Filter_1[f, c, 0, 0].astype(out_dtype)),
                axis=[di, dj, c]),
            name='Depthwise1by1Fused',
            tag="depthwise_1by1_fused_nchw")
    else:  # NHWC
        Output = tvm.compute(
            (batch, out_height, out_width, out_channel),
            lambda b, i, j, f: tvm.sum(
                (PaddedInput[b, i * stride_h_d + di, j * stride_w_d + dj, c].
                 astype(out_dtype) * Filter_d[di, dj, c, 0].astype(
                     out_dtype) * Filter_1[0, 0, c, f].astype(out_dtype)),
                axis=[di, dj, c]),
            name='Depthwise1by1Fused',
            tag="depthwise_1by1_fused_nhwc")
    return Output
Esempio n. 6
0
def fused_convs(input_data, filters, resnet_block=False):

	out_dtype = input_data.dtype

	Input = None
	nodes = [input_data]
	params = [input_data]

	for f in filters:
		Input = nodes[-1]
		Filter = f.placeholder
		layout = f.layout
		depthwise = f.depthwise
		kernel = f.kernel
		stride = f.stride
		padding = f.padding
		dilation = f.dilation

		assert not (depthwise and kernel == 1) # Don't consider 1by1 depthwise

		padded_count = 0
		conv_count = 0
		depthwise_count = 0

		if isinstance(stride, int):
			stride_h = stride_w = stride
		else:
			stride_h, stride_w = stride

		if isinstance(dilation, int):
			dilation_h = dilation_w = dilation
		else:
			dilation_h, dilation_w = dilation

		batch, in_height, in_width, in_channel = Input.shape
		if f.NHWC_transpose: # HWOI
			kernel_h, kernel_w, tmp, kernel_channel = Filter.shape
		else: # HWIO
			kernel_h, kernel_w, kernel_channel, tmp = Filter.shape
		if depthwise:
			channel_multiplier = tmp
		else:
			num_filter = tmp

		# compute the output shape
		dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
		dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
		pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
			padding, (dilated_kernel_h, dilated_kernel_w))

		out_channel = simplify(in_channel * channel_multiplier) if depthwise else num_filter
		out_height = simplify((in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1)
		out_width = simplify((in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1)

		if f.kernel > 1:
			print("Padding is needed!")

			pad_before = [0, pad_top, pad_left, 0]
			pad_after = [0, pad_down, pad_right, 0]

			PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput_{}".format(padded_count))
			padded_count += 1
			nodes.append(PaddedInput)

			# Update Input
			Input = PaddedInput
			batch, in_height, in_width, in_channel = Input.shape

		if not depthwise:
			rc = tvm.reduce_axis((0, in_channel), name='rc')
		if kernel > 1:
			ry = tvm.reduce_axis((0, kernel_h), name='ry')
			rx = tvm.reduce_axis((0, kernel_w), name='rx')

		if not depthwise: # Normal convolution
			if kernel > 1:
				Output = tvm.compute(
				(batch, out_height, out_width, out_channel),
				lambda nn, yy, xx, ff: tvm.sum(
					Input[nn, yy * stride_h + ry * dilation_h,
								xx * stride_w + rx * dilation_w, rc].astype(out_dtype) *
					(Filter[ry, rx, ff, rc] if f.NHWC_transpose else Filter[ry, rx, rc, ff]).astype(out_dtype), axis=[ry, rx, rc]),
					name="Conv2dOutput_{}".format(conv_count), tag="conv2d_nhwc")
			else: # Only reduce rc axis
				Output = tvm.compute(
				(batch, out_height, out_width, out_channel),
				lambda nn, yy, xx, ff: tvm.sum(
					Input[nn, yy * stride_h, xx * stride_w, rc].astype(out_dtype) *
					(Filter[0, 0, ff, rc] if f.NHWC_transpose else Filter[0, 0, rc, ff]).astype(out_dtype), axis=[rc]),
					name="Conv2dOutput_{}".format(conv_count), tag="conv2d_nhwc")
			conv_count += 1
		else: # Depthwise convolution (kernel > 1)
			Output = tvm.compute(
			(batch, out_height, out_width, out_channel),
			lambda b, i, j, c: tvm.sum(
				(Input[b, i*stride_h + ry*dilation_h, j*stride_w + rx*dilation_w,
							 tvm.indexdiv(c, channel_multiplier)].astype(out_dtype) *
				(Filter[ry, rx, tvm.indexmod(c, channel_multiplier), tvm.indexdiv(c, channel_multiplier)] if f.NHWC_transpose else Filter[ry, rx, tvm.indexdiv(c, channel_multiplier), tvm.indexmod(c, channel_multiplier)]).astype(out_dtype)),
				axis=[ry, rx]),
			name='DepthwiseConv2dOutput_{}'.format(depthwise_count), tag="depthwise_nhwc")
			depthwise_count += 1

		nodes.append(Output)
		params.append(Filter)

	if resnet_block:
		First = nodes[0]
		Last = nodes[-1]
		assert (first.shape == last.shape)
		Output = tvm.compute(
			(batch, out_height, out_width, out_channel),
			lambda b, i, j, c: tvm.sum(
				(First[b, i, j, c].astype(out_dtype) + 
				(Last[b, i, j, c]).astype(out_dtype))),
			name='ElementwiseAddOutput_{}'.format(depthwise_count), tag="elem_nhwc")
		nodes.append(Output)

	params.append(nodes[-1]) # Final output
	return nodes, params