def _compute_correlation(n, q, i, j): # location in data1 y1 = i * stride1 + max_displacement x1 = j * stride1 + max_displacement # location in data2 y2 = y1 + (te.indexdiv(q, displacement_size) - displacement_radius) * stride2 x2 = x1 + (te.indexmod(q, displacement_size) - displacement_radius) * stride2 return te.sum(corr_func(padded_data1[n, rc, y1 + ry, x1 + rx], padded_data2[n, rc, y2 + ry, x2 + rx]), axis=[rc, ry, rx])
def convert_to_nhwc_compute(tensor: te.Tensor, layout: str, channels: int) -> te.Tensor: """Converts a tensor into NHWC layout if it's in NHWCB16 layout. When the current layout is NHCWB16, a reduce sum operation is inserted to ensure that the whole of the input tensor has a data dependency on the copy operation. Without this, TVM removes compute that is deemed to be unnecessary, which causes strides for the NPU to be calculated incorrectly. Parameters ---------- tensor : te.Tensor The tensor to convert. layout : str The layout of the tensor, either NHWC or NHCWB16. channels : int The number of valid channels for the tensor. Returns ------- te.Tensor The converted tensor in NHWC layout. """ assert layout in {"NHWC", "NHCWB16"} convert_to_nhwc_attrs = { "op": "ethosu_convert_to_nhwc", "layout": layout, } if layout == "NHCWB16": rc = te.reduce_axis((0, 16), name="rc") return te.compute( (tensor.shape[0], tensor.shape[1], tensor.shape[3], channels), lambda nn, hh, ww, cc: te.sum(tensor(nn, hh, te.indexdiv(cc, 16), ww, te.indexmod(rc, 16)), axis=rc), name="ethosu_convert_to_nhwc", attrs=convert_to_nhwc_attrs, ) return te.compute( tensor.shape, lambda *i: tensor(*i), name="ethosu_convert_to_nhwc", attrs=convert_to_nhwc_attrs, )
def convert_to_nhwc_compute(tensor: te.Tensor, layout: str, channels: int) -> te.Tensor: """Converts a tensor into NHWC layout if it's in NHWCB16 layout. Parameters ---------- tensor : te.Tensor The tensor to convert. layout : str The layout of the tensor, either NHWC or NHCWB16. channels : int The number of valid channels for the tensor. Returns ------- te.Tensor The converted tensor in NHWC layout. """ assert layout in {"NHWC", "NHCWB16"} convert_to_nhwc_attrs = { "op": "ethosu_convert_to_nhwc", "layout": layout, } if layout == "NHCWB16": return te.compute( (tensor.shape[0], tensor.shape[1], tensor.shape[3], channels), lambda nn, hh, ww, cc: tensor(nn, hh, te.indexdiv(cc, 16), ww, te.indexmod(cc, 16)), name="ethosu_convert_to_nhwc", attrs=convert_to_nhwc_attrs, ) return te.compute( tensor.shape, lambda *i: tensor(*i), name="ethosu_convert_to_nhwc", attrs=convert_to_nhwc_attrs, )