def _get_pixel(n, c, y, x): block_offset = tvm.truncdiv(c, in_c) channel_idx = tvm.truncmod(c, in_c) x_idx = tvm.truncmod(block_offset, block_size) y_idx = tvm.truncdiv(block_offset, block_size) if layout == 'NCHW': output = data(n, channel_idx, y_idx + (y * block_size), x_idx + (x * block_size)) else: output = data(n, y_idx + (y * block_size), x_idx + (x * block_size), channel_idx) return output
def _get_pixel(n, c, y, x): block_x = tvm.truncdiv(x, block_size) block_y = tvm.truncdiv(y, block_size) idx_x = tvm.truncmod(x, block_size) idx_y = tvm.truncmod(y, block_size) if mode == "DCR": channel_idx = channel_factor * ((block_size * idx_y) + idx_x) + c else: channel_idx = (c * block_size * block_size) + ( (block_size * idx_y) + idx_x) if layout == 'NCHW': output = data(n, channel_idx, block_y, block_x) else: output = data(n, block_y, block_x, channel_idx) return output
def test_everything_during_deduction(): m = tvm.size_var('m') n = tvm.size_var('n') ib = tvm.ir_builder.create() with ib.for_range(0, n, 'i') as i: with ib.for_range(0, 32, 'j') as j: with ib.if_scope(ib.likely(tvm.truncdiv(i, j) < m)): # this guard will produce everything during deduction ib.emit(tvm.make.Evaluate(m)) stmt = ib.get() stmt = tvm.ir_pass.LoopPartition(stmt, False) stmt = tvm.ir_pass.Simplify(stmt) assert (isinstance(stmt.body.body, tvm.stmt.IfThenElse))
def test_condition(): ib = tvm.ir_builder.create() m = tvm.size_var('m') n = tvm.size_var('n') with ib.for_range(0, tvm.truncdiv(n + 3, 4), 'i') as i: with ib.for_range(0, 4, 'j') as j: ib.emit( tvm.make.Evaluate( tvm.make.Select(ib.likely(i * 4 + j < n), m, n))) stmt = ib.get() stmt = tvm.ir_pass.LoopPartition(stmt, False) stmt = tvm.ir_pass.Simplify(stmt) assert (not any( collect_visit(stmt[0], lambda x: isinstance(x, tvm.expr.Select))))
def space_to_depth(data, block_size, layout='NCHW'): """Perform space to depth transformation on the data Parameters ---------- data : tvm.Tensor 4-D tensor in either NCHW or NHWC layout. block_size : int Size of blocks to decompose into channel dimension. layout : string Either NCHW or NHWC, indicating data layout. Returns ------- output : tvm.Tensor Output of shape [N, C * block_size**2, H / block_size, W / block_size] """ if layout == 'NCHW': in_n, in_c, in_h, in_w = data.shape output_shape = [ in_n, in_c * block_size * block_size, tvm.truncdiv(in_h, block_size), tvm.truncdiv(in_w, block_size) ] elif layout == 'NHWC': in_n, in_h, in_w, in_c = data.shape output_shape = [ in_n, tvm.truncdiv(in_h, block_size), tvm.truncdiv(in_w, block_size), in_c * block_size * block_size ] else: raise ValueError("Only NCHW and NHWC layouts are currently supported.") def _get_indices(*indices): if layout == 'NCHW': n, c, y, x = indices elif layout == 'NHWC': n, y, x, c = indices return n, c, y, x def _get_pixel(n, c, y, x): block_offset = tvm.truncdiv(c, in_c) channel_idx = tvm.truncmod(c, in_c) x_idx = tvm.truncmod(block_offset, block_size) y_idx = tvm.truncdiv(block_offset, block_size) if layout == 'NCHW': output = data(n, channel_idx, y_idx + (y * block_size), x_idx + (x * block_size)) else: output = data(n, y_idx + (y * block_size), x_idx + (x * block_size), channel_idx) return output def _compute(*indices): n, c, y, x = _get_indices(*indices) return _get_pixel(n, c, y, x) return tvm.compute(output_shape, _compute, name='space_to_depth', tag=tag.INJECTIVE)
def depth_to_space(data, block_size, layout='NCHW', mode='DCR'): """Perform depth to space transformation on the data Parameters ---------- data : tvm.Tensor 4-D tensor in either NCHW or NHWC layout. block_size : int Size of blocks to compose from channel dimension. layout : string Either NCHW or NHWC, indicating data layout. mode : string Either DCR or CDR, indicates how channels should be accessed. In DCR, channels are interwoven in the Tensorflow style while in CDR channels are accessed sequentially as in Pytorch. Returns ------- output : tvm.Tensor Output of shape [N, C / block_size**2, H * block_size, W * block_size] """ if layout == 'NCHW': in_n, in_c, in_h, in_w = data.shape channel_factor = tvm.truncdiv(in_c, (block_size * block_size)) output_shape = [ in_n, channel_factor, in_h * block_size, in_w * block_size ] elif layout == 'NHWC': in_n, in_h, in_w, in_c = data.shape channel_factor = tvm.truncdiv(in_c, (block_size * block_size)) output_shape = [ in_n, in_h * block_size, in_w * block_size, channel_factor ] else: raise ValueError("Only NCHW and NHWC layouts are currently supported.") def _get_indices(*indices): if layout == 'NCHW': n, c, y, x = indices elif layout == 'NHWC': n, y, x, c = indices return n, c, y, x def _get_pixel(n, c, y, x): block_x = tvm.truncdiv(x, block_size) block_y = tvm.truncdiv(y, block_size) idx_x = tvm.truncmod(x, block_size) idx_y = tvm.truncmod(y, block_size) if mode == "DCR": channel_idx = channel_factor * ((block_size * idx_y) + idx_x) + c else: channel_idx = (c * block_size * block_size) + ( (block_size * idx_y) + idx_x) if layout == 'NCHW': output = data(n, channel_idx, block_y, block_x) else: output = data(n, block_y, block_x, channel_idx) return output def _compute(*indices): n, c, y, x = _get_indices(*indices) return _get_pixel(n, c, y, x) return tvm.compute(output_shape, _compute, name='depth_to_space', tag=tag.INJECTIVE)
def func2(): return tvm.exp(tvm.truncdiv((x + y + 1) * y, 4))