Python indexdiv Examples

Programming Language: Python

Namespace/Package Name: tvm

Method/Function: indexdiv

Examples at hotexamples.com: 3

Python indexdiv - 3 examples found. These are the top rated real world Python examples of tvm.indexdiv extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: conv1d_transpose_ncw.py Project: Manikant92/tvm

def conv1d_transpose_ncw(cfg, data, kernel, stride, padding, out_dtype):
    """Transposed 1D convolution ncw forward operator.

    Parameters
    ----------
    cfg: ConfigEntity
        The config for this template
    Input : tvm.Tensor
        3-D with shape [batch, in_channel, inp_width]
    Filter : tvm.Tensor
        3-D with shape [in_channel, num_filter, kernel_size]
    stride : tuple of one int
        The spatial stride along width
    padding : int, tuple, or string
        int: padding size
        tuple of 2 ints: (pad_left, pad_right) for left and right padding
        string: ['VALID', 'SAME']
    out_dtype: str
        The output type. This is used in mixed precision

    Returns
    -------
    Output : tvm.Tensor
    u    3-D with shape [batch, out_channel, out_width]
    """
    if isinstance(stride, (tuple, list)):
        stride = stride[0]
    cfg.stride = stride
    batch, inp_channels, inp_width = get_const_tuple(data.shape)
    _, out_channels, kernel_size = get_const_tuple(kernel.shape)
    pad_left, pad_right = nn.get_pad_tuple1d(padding, kernel_size)
    out_width = (inp_width - 1) * stride + kernel_size - pad_left - pad_right
    pad_left = kernel_size - 1 - pad_left
    pad_right = kernel_size - 1 - pad_right
    dilated_width = stride * (inp_width - 1) + 1
    data = tvm.compute(
        (batch, inp_channels, pad_left + dilated_width + pad_right),
        lambda n, c, x: tvm.if_then_else(
            tvm.all(x >= pad_left, x < pad_left + dilated_width,
                    tvm.indexmod(x - pad_left, stride).equal(0)), data[
                        n, c, tvm.indexdiv(x - pad_left, stride)],
            tvm.const(0., "float32")),
        name='data_pad')

    dc = tvm.reduce_axis((0, inp_channels), name='dc')
    dw = tvm.reduce_axis((0, kernel_size), name='dw')
    data_out = tvm.compute(
        (batch, out_channels, out_width),
        lambda b, c, w: tvm.sum(data[b, dc, w + dw].astype(out_dtype) * kernel[
            dc, c, kernel_size - 1 - dw].astype(out_dtype),
                                axis=[dc, dw]),
        tag="conv1d_transpose_ncw")

    return data_out

Example #2

Show file

def conv2d_transpose_nchw_cuda(cfg, data, kernel, stride, padding, out_dtype):
    """Transposed 2D convolution nchw forward operator.

    Parameters
    ----------
    cfg: ConfigEntity
        The config for this template
    Input : tvm.Tensor
        4-D with shape [batch, in_channel, in_height, in_width]
    Filter : tvm.Tensor
        4-D with shape [in_channel, num_filter, filter_height, filter_width]
    strides : tuple of two ints
        The spatial stride along height and width
    padding : int or str
        Padding size, or ['VALID', 'SAME']
    out_dtype: str
        The output type. This is used in mixed precision

    Returns
    -------
    Output : tvm.Tensor
        4-D with shape [batch, out_channel, out_height, out_width]
    """
    batch, inp_channels, inp_height, inp_width = get_const_tuple(data.shape)
    _, out_channels, kernel_height, kernel_width = get_const_tuple(kernel.shape)
    stride_height, stride_width = stride
    cfg.stride = stride
    pad_top, pad_left, pad_bottom, pad_right = nn.get_pad_tuple(
        padding, (kernel_height, kernel_width))

    out_width = (inp_width - 1) * stride_width + \
                kernel_width - pad_left - pad_right
    pad_left = kernel_width - 1 - pad_left
    pad_right = kernel_width - 1 - pad_right
    dilated_width = stride_width * (inp_width - 1) + 1

    out_height = (inp_height - 1) * stride_height + \
                 kernel_height - pad_top - pad_bottom
    pad_top = kernel_height - 1 - pad_top
    pad_bottom = kernel_height - 1 - pad_bottom
    dilated_height = stride_height * (inp_height - 1) + 1

    # compute pad
    data = tvm.compute(
        (batch, inp_channels,
         pad_top + dilated_height + pad_bottom,
         pad_left + dilated_width + pad_right),
        lambda n, c, y, x: tvm.if_then_else(
            tvm.all(x >= pad_left,
                    x < pad_left + dilated_width,
                    tvm.indexmod(x - pad_left, stride_width).equal(0),
                    y >= pad_top,
                    y < pad_top + dilated_height,
                    tvm.indexmod(y - pad_top, stride_height).equal(0)),
            data[n, c,
                 tvm.indexdiv(y - pad_top, stride_height),
                 tvm.indexdiv(x - pad_left, stride_width)],
            tvm.const(0., "float32")),
        name='data_pad')

    # compute transposed conv
    dc = tvm.reduce_axis((0, inp_channels), name='dc')
    dh = tvm.reduce_axis((0, kernel_height), name='dh')
    dw = tvm.reduce_axis((0, kernel_width), name='dw')
    data_out = tvm.compute(
        (batch, out_channels, out_height, out_width),
        lambda b, c, h, w: tvm.sum(
            data[b, dc, h + dh, w + dw].astype(out_dtype) *
            kernel[dc,
                   c,
                   kernel_height - 1 - dh,
                   kernel_width - 1 - dw].astype(out_dtype),
            axis=[dc, dh, dw]), tag="conv2d_transpose_nchw")

    return data_out

Example #3

Show file

def fused_convs(input_data, filters, resnet_block=False):

	out_dtype = input_data.dtype

	Input = None
	nodes = [input_data]
	params = [input_data]

	for f in filters:
		Input = nodes[-1]
		Filter = f.placeholder
		layout = f.layout
		depthwise = f.depthwise
		kernel = f.kernel
		stride = f.stride
		padding = f.padding
		dilation = f.dilation

		assert not (depthwise and kernel == 1) # Don't consider 1by1 depthwise

		padded_count = 0
		conv_count = 0
		depthwise_count = 0

		if isinstance(stride, int):
			stride_h = stride_w = stride
		else:
			stride_h, stride_w = stride

		if isinstance(dilation, int):
			dilation_h = dilation_w = dilation
		else:
			dilation_h, dilation_w = dilation

		batch, in_height, in_width, in_channel = Input.shape
		if f.NHWC_transpose: # HWOI
			kernel_h, kernel_w, tmp, kernel_channel = Filter.shape
		else: # HWIO
			kernel_h, kernel_w, kernel_channel, tmp = Filter.shape
		if depthwise:
			channel_multiplier = tmp
		else:
			num_filter = tmp

		# compute the output shape
		dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
		dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
		pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
			padding, (dilated_kernel_h, dilated_kernel_w))

		out_channel = simplify(in_channel * channel_multiplier) if depthwise else num_filter
		out_height = simplify((in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1)
		out_width = simplify((in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1)

		if f.kernel > 1:
			print("Padding is needed!")

			pad_before = [0, pad_top, pad_left, 0]
			pad_after = [0, pad_down, pad_right, 0]

			PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput_{}".format(padded_count))
			padded_count += 1
			nodes.append(PaddedInput)

			# Update Input
			Input = PaddedInput
			batch, in_height, in_width, in_channel = Input.shape

		if not depthwise:
			rc = tvm.reduce_axis((0, in_channel), name='rc')
		if kernel > 1:
			ry = tvm.reduce_axis((0, kernel_h), name='ry')
			rx = tvm.reduce_axis((0, kernel_w), name='rx')

		if not depthwise: # Normal convolution
			if kernel > 1:
				Output = tvm.compute(
				(batch, out_height, out_width, out_channel),
				lambda nn, yy, xx, ff: tvm.sum(
					Input[nn, yy * stride_h + ry * dilation_h,
								xx * stride_w + rx * dilation_w, rc].astype(out_dtype) *
					(Filter[ry, rx, ff, rc] if f.NHWC_transpose else Filter[ry, rx, rc, ff]).astype(out_dtype), axis=[ry, rx, rc]),
					name="Conv2dOutput_{}".format(conv_count), tag="conv2d_nhwc")
			else: # Only reduce rc axis
				Output = tvm.compute(
				(batch, out_height, out_width, out_channel),
				lambda nn, yy, xx, ff: tvm.sum(
					Input[nn, yy * stride_h, xx * stride_w, rc].astype(out_dtype) *
					(Filter[0, 0, ff, rc] if f.NHWC_transpose else Filter[0, 0, rc, ff]).astype(out_dtype), axis=[rc]),
					name="Conv2dOutput_{}".format(conv_count), tag="conv2d_nhwc")
			conv_count += 1
		else: # Depthwise convolution (kernel > 1)
			Output = tvm.compute(
			(batch, out_height, out_width, out_channel),
			lambda b, i, j, c: tvm.sum(
				(Input[b, i*stride_h + ry*dilation_h, j*stride_w + rx*dilation_w,
							 tvm.indexdiv(c, channel_multiplier)].astype(out_dtype) *
				(Filter[ry, rx, tvm.indexmod(c, channel_multiplier), tvm.indexdiv(c, channel_multiplier)] if f.NHWC_transpose else Filter[ry, rx, tvm.indexdiv(c, channel_multiplier), tvm.indexmod(c, channel_multiplier)]).astype(out_dtype)),
				axis=[ry, rx]),
			name='DepthwiseConv2dOutput_{}'.format(depthwise_count), tag="depthwise_nhwc")
			depthwise_count += 1

		nodes.append(Output)
		params.append(Filter)

	if resnet_block:
		First = nodes[0]
		Last = nodes[-1]
		assert (first.shape == last.shape)
		Output = tvm.compute(
			(batch, out_height, out_width, out_channel),
			lambda b, i, j, c: tvm.sum(
				(First[b, i, j, c].astype(out_dtype) + 
				(Last[b, i, j, c]).astype(out_dtype))),
			name='ElementwiseAddOutput_{}'.format(depthwise_count), tag="elem_nhwc")
		nodes.append(Output)

	params.append(nodes[-1]) # Final output
	return nodes, params