Ejemplo n.º 1
0
def conv1d_transpose_ncw(cfg, data, kernel, stride, padding, out_dtype):
    """Transposed 1D convolution ncw forward operator.

    Parameters
    ----------
    cfg: ConfigEntity
        The config for this template
    Input : tvm.Tensor
        3-D with shape [batch, in_channel, inp_width]
    Filter : tvm.Tensor
        3-D with shape [in_channel, num_filter, kernel_size]
    stride : tuple of one int
        The spatial stride along width
    padding : int, tuple, or string
        int: padding size
        tuple of 2 ints: (pad_left, pad_right) for left and right padding
        string: ['VALID', 'SAME']
    out_dtype: str
        The output type. This is used in mixed precision

    Returns
    -------
    Output : tvm.Tensor
    u    3-D with shape [batch, out_channel, out_width]
    """
    if isinstance(stride, (tuple, list)):
        stride = stride[0]
    cfg.stride = stride
    batch, inp_channels, inp_width = get_const_tuple(data.shape)
    _, out_channels, kernel_size = get_const_tuple(kernel.shape)
    pad_left, pad_right = nn.get_pad_tuple1d(padding, kernel_size)
    out_width = (inp_width - 1) * stride + kernel_size - pad_left - pad_right
    pad_left = kernel_size - 1 - pad_left
    pad_right = kernel_size - 1 - pad_right
    dilated_width = stride * (inp_width - 1) + 1
    data = tvm.compute(
        (batch, inp_channels, pad_left + dilated_width + pad_right),
        lambda n, c, x: tvm.if_then_else(
            tvm.all(x >= pad_left, x < pad_left + dilated_width,
                    tvm.indexmod(x - pad_left, stride).equal(0)), data[
                        n, c, tvm.indexdiv(x - pad_left, stride)],
            tvm.const(0., "float32")),
        name='data_pad')

    dc = tvm.reduce_axis((0, inp_channels), name='dc')
    dw = tvm.reduce_axis((0, kernel_size), name='dw')
    data_out = tvm.compute(
        (batch, out_channels, out_width),
        lambda b, c, w: tvm.sum(data[b, dc, w + dw].astype(out_dtype) * kernel[
            dc, c, kernel_size - 1 - dw].astype(out_dtype),
                                axis=[dc, dw]),
        tag="conv1d_transpose_ncw")

    return data_out
Ejemplo n.º 2
0
def conv2d_transpose_nchw_cuda(cfg, data, kernel, stride, padding, out_dtype):
    """Transposed 2D convolution nchw forward operator.

    Parameters
    ----------
    cfg: ConfigEntity
        The config for this template
    Input : tvm.Tensor
        4-D with shape [batch, in_channel, in_height, in_width]
    Filter : tvm.Tensor
        4-D with shape [in_channel, num_filter, filter_height, filter_width]
    strides : tuple of two ints
        The spatial stride along height and width
    padding : int or str
        Padding size, or ['VALID', 'SAME']
    out_dtype: str
        The output type. This is used in mixed precision

    Returns
    -------
    Output : tvm.Tensor
        4-D with shape [batch, out_channel, out_height, out_width]
    """
    batch, inp_channels, inp_height, inp_width = get_const_tuple(data.shape)
    _, out_channels, kernel_height, kernel_width = get_const_tuple(kernel.shape)
    stride_height, stride_width = stride
    cfg.stride = stride
    pad_top, pad_left, pad_bottom, pad_right = nn.get_pad_tuple(
        padding, (kernel_height, kernel_width))

    out_width = (inp_width - 1) * stride_width + \
                kernel_width - pad_left - pad_right
    pad_left = kernel_width - 1 - pad_left
    pad_right = kernel_width - 1 - pad_right
    dilated_width = stride_width * (inp_width - 1) + 1

    out_height = (inp_height - 1) * stride_height + \
                 kernel_height - pad_top - pad_bottom
    pad_top = kernel_height - 1 - pad_top
    pad_bottom = kernel_height - 1 - pad_bottom
    dilated_height = stride_height * (inp_height - 1) + 1

    # compute pad
    data = tvm.compute(
        (batch, inp_channels,
         pad_top + dilated_height + pad_bottom,
         pad_left + dilated_width + pad_right),
        lambda n, c, y, x: tvm.if_then_else(
            tvm.all(x >= pad_left,
                    x < pad_left + dilated_width,
                    tvm.indexmod(x - pad_left, stride_width).equal(0),
                    y >= pad_top,
                    y < pad_top + dilated_height,
                    tvm.indexmod(y - pad_top, stride_height).equal(0)),
            data[n, c,
                 tvm.indexdiv(y - pad_top, stride_height),
                 tvm.indexdiv(x - pad_left, stride_width)],
            tvm.const(0., "float32")),
        name='data_pad')

    # compute transposed conv
    dc = tvm.reduce_axis((0, inp_channels), name='dc')
    dh = tvm.reduce_axis((0, kernel_height), name='dh')
    dw = tvm.reduce_axis((0, kernel_width), name='dw')
    data_out = tvm.compute(
        (batch, out_channels, out_height, out_width),
        lambda b, c, h, w: tvm.sum(
            data[b, dc, h + dh, w + dw].astype(out_dtype) *
            kernel[dc,
                   c,
                   kernel_height - 1 - dh,
                   kernel_width - 1 - dw].astype(out_dtype),
            axis=[dc, dh, dw]), tag="conv2d_transpose_nchw")

    return data_out
Ejemplo n.º 3
0
def fused_convs(input_data, filters, resnet_block=False):

	out_dtype = input_data.dtype

	Input = None
	nodes = [input_data]
	params = [input_data]

	for f in filters:
		Input = nodes[-1]
		Filter = f.placeholder
		layout = f.layout
		depthwise = f.depthwise
		kernel = f.kernel
		stride = f.stride
		padding = f.padding
		dilation = f.dilation

		assert not (depthwise and kernel == 1) # Don't consider 1by1 depthwise

		padded_count = 0
		conv_count = 0
		depthwise_count = 0

		if isinstance(stride, int):
			stride_h = stride_w = stride
		else:
			stride_h, stride_w = stride

		if isinstance(dilation, int):
			dilation_h = dilation_w = dilation
		else:
			dilation_h, dilation_w = dilation

		batch, in_height, in_width, in_channel = Input.shape
		if f.NHWC_transpose: # HWOI
			kernel_h, kernel_w, tmp, kernel_channel = Filter.shape
		else: # HWIO
			kernel_h, kernel_w, kernel_channel, tmp = Filter.shape
		if depthwise:
			channel_multiplier = tmp
		else:
			num_filter = tmp

		# compute the output shape
		dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
		dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
		pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
			padding, (dilated_kernel_h, dilated_kernel_w))

		out_channel = simplify(in_channel * channel_multiplier) if depthwise else num_filter
		out_height = simplify((in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1)
		out_width = simplify((in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1)

		if f.kernel > 1:
			print("Padding is needed!")

			pad_before = [0, pad_top, pad_left, 0]
			pad_after = [0, pad_down, pad_right, 0]

			PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput_{}".format(padded_count))
			padded_count += 1
			nodes.append(PaddedInput)

			# Update Input
			Input = PaddedInput
			batch, in_height, in_width, in_channel = Input.shape

		if not depthwise:
			rc = tvm.reduce_axis((0, in_channel), name='rc')
		if kernel > 1:
			ry = tvm.reduce_axis((0, kernel_h), name='ry')
			rx = tvm.reduce_axis((0, kernel_w), name='rx')

		if not depthwise: # Normal convolution
			if kernel > 1:
				Output = tvm.compute(
				(batch, out_height, out_width, out_channel),
				lambda nn, yy, xx, ff: tvm.sum(
					Input[nn, yy * stride_h + ry * dilation_h,
								xx * stride_w + rx * dilation_w, rc].astype(out_dtype) *
					(Filter[ry, rx, ff, rc] if f.NHWC_transpose else Filter[ry, rx, rc, ff]).astype(out_dtype), axis=[ry, rx, rc]),
					name="Conv2dOutput_{}".format(conv_count), tag="conv2d_nhwc")
			else: # Only reduce rc axis
				Output = tvm.compute(
				(batch, out_height, out_width, out_channel),
				lambda nn, yy, xx, ff: tvm.sum(
					Input[nn, yy * stride_h, xx * stride_w, rc].astype(out_dtype) *
					(Filter[0, 0, ff, rc] if f.NHWC_transpose else Filter[0, 0, rc, ff]).astype(out_dtype), axis=[rc]),
					name="Conv2dOutput_{}".format(conv_count), tag="conv2d_nhwc")
			conv_count += 1
		else: # Depthwise convolution (kernel > 1)
			Output = tvm.compute(
			(batch, out_height, out_width, out_channel),
			lambda b, i, j, c: tvm.sum(
				(Input[b, i*stride_h + ry*dilation_h, j*stride_w + rx*dilation_w,
							 tvm.indexdiv(c, channel_multiplier)].astype(out_dtype) *
				(Filter[ry, rx, tvm.indexmod(c, channel_multiplier), tvm.indexdiv(c, channel_multiplier)] if f.NHWC_transpose else Filter[ry, rx, tvm.indexdiv(c, channel_multiplier), tvm.indexmod(c, channel_multiplier)]).astype(out_dtype)),
				axis=[ry, rx]),
			name='DepthwiseConv2dOutput_{}'.format(depthwise_count), tag="depthwise_nhwc")
			depthwise_count += 1

		nodes.append(Output)
		params.append(Filter)

	if resnet_block:
		First = nodes[0]
		Last = nodes[-1]
		assert (first.shape == last.shape)
		Output = tvm.compute(
			(batch, out_height, out_width, out_channel),
			lambda b, i, j, c: tvm.sum(
				(First[b, i, j, c].astype(out_dtype) + 
				(Last[b, i, j, c]).astype(out_dtype))),
			name='ElementwiseAddOutput_{}'.format(depthwise_count), tag="elem_nhwc")
		nodes.append(Output)

	params.append(nodes[-1]) # Final output
	return nodes, params