def _get_pixel_value(n, c, h, w): if padding_mode == "zeros": return te.if_then_else( te.all(h >= 0, w >= 0, h < in_height, w < in_width), data[n, c, h, w], tir.const(0.0, dtype=data.dtype), ) if padding_mode == "border": h_b = te.max(te.min(h, in_height - 1), 0) w_b = te.max(te.min(w, in_width - 1), 0) return data[n, c, h_b, w_b] raise AssertionError("unsupported padding_mode")
def _compute_intn(dtype, value, *indices): assert output_scale is not None and output_zero_point is not None const_min = tvm.tir.min_value(dtype) const_max = tvm.tir.max_value(dtype) # Use indexmod to handle both scalar and per-channel QNN parameters. scale_idx = tir.indexmod(indices[axis], topi.shape(output_scale)[0]) zp_idx = tir.indexmod(indices[axis], topi.shape(output_zero_point)[0]) return te.max( te.min( te.round(value[indices] / output_scale[scale_idx]) + output_zero_point[zp_idx], const_max, ), const_min, )
def test_basic_operation(): np.random.seed(0) shape = (10, 10) x = te.var("x", dtype='float32') k = te.reduce_axis((0, 10), name="k") l = te.reduce_axis((0, 10), name="l") A0 = te.placeholder(shape, name='A0') A1 = te.placeholder(shape, name='A1') zeros = np.zeros(shape) B = te.compute(shape, lambda i, j: A0[i, j], name='B') check_grad(B, [A0]) B = te.compute(shape, lambda i, j: A0[i, j] + A1[i, j], name='B') check_grad(B, [A0, A1]) B = te.compute(shape, lambda i, j: A0[i, j] + A0[j, i], name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.floor(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.ceil(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.trunc(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.round(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: A0[i, j] + te.exp(A0[j, i]), name='B') check_grad(B, A0) B = te.compute( shape, lambda i, j: te.log(0.1 + te.abs(A0[i, j] + te.exp(A0[j, i]))), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sigmoid(A0[i, j] * A0[i, j] * A0[j, i]), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.tanh(A0[i, j] * A0[i, j] * A0[j, i]), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sqrt(A0[i, j] * A0[i, j] * A0[j, i]), name='B') check_grad(B, A0, data_range=(0.1, 10)) B = te.compute(shape, lambda i, j: te.power(te.abs(A0[i, j]), A0[j, i]), name='B') check_grad(B, A0, data_range=(-4, 4)) B = te.compute(shape, lambda i, j: A0[i, j] * A0[j, i], name='B') check_grad(B, A0) B = te.compute((10, ), lambda i: te.sum(A0[i, k] * A0[k, i], axis=k), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sum(A0[i, k] * A0[k, i] + 5, axis=k), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.max(A0[i, k] * A0[k, j] + 5, axis=k), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: A0[i, j] * (A1[j, i] + A0[j, i]), name='B') check_grad(B, [A0, A1]) B = te.compute(shape, lambda i, j: te.sum( A0[k, k] - A0[te.min(j + k, 9), j] * A0[i, k], axis=k), name='B') check_grad(B, A0) def fcombine(x, y): return x * y def fidentity(t0): return tvm.tir.const(1, t0) prod = te.comm_reducer(fcombine, fidentity, name='prod') B = te.compute((10, 10), lambda i, j: prod(A0[i, k] + A0[k, i], axis=k), name='B') check_grad(B, A0) X = te.placeholder((10, ), name='X') A = te.compute((10, ), lambda i: X[i] + X[9 - i]) B = te.compute((10, ), lambda i: X[i] * X[9 - i]) Y = topi.tensordot(A, B, 1) check_grad(Y, X)
def _clip_coordinates(x, size): return te.min(te.max(x, 0), size - 1)
def process_post_ops(layer_idx, Input, Bias, post_op, pack=False, out_dtype="float32"): if pack: _, _, _, _, OC_vec = Input.shape BiasAdd = te.compute( Input.shape, lambda n, c_chunk, h, w, c_vec: Input[ n, c_chunk, h, w, c_vec] + Bias[c_chunk * OC_vec + c_vec], name='FusedConv2D_BiasAdd_{}'.format(layer_idx), tag='biasadd') else: BiasAdd = te.compute(Input.shape, lambda n, h, w, c: Input[n, h, w, c] + Bias[c], name='FusedConv2D_BiasAdd_{}'.format(layer_idx), tag='biasadd') # TODO: Recover this # if block_input is not None: # inputs = block_input if isinstance(block_input, list) else [block_input] # First = inputs[0] # TODO: Support multiple branches addition later # Last = self.stages[-1][-1] # Output if post_op is None, BiasAdd if it's not None # assert sorted(get_const_tuple(First.shape)) == sorted(get_const_tuple(Last.shape)), '{} is not the same as {}'.format(First.shape, Last.shape) # if self.pack: # Output = te.compute(self.output_shape, # lambda n, c_chunk, h, w, c_vec: (First[n, c_chunk, h, w, c_vec] + (Last[n, c_chunk, h, w, c_vec])), # name='ElementwiseAddOutput_{}'.format(self.layer_idx), # tag='elem_{}'.format(tag_suffix)) # else: # Output = te.compute(self.output_shape, # lambda n, h, w, c: (First[n, h, w, c] + (Last[n, h, w, c])), # name='ElementwiseAddOutput_{}'.format(self.layer_idx), # tag='elem_{}'.format(tag_suffix)) # self.stages[-1].append(Output) # Last = self.stages[-1][-1] # BiasAdd if it's not a block, Output if it's a block # Else: only bias_add Last = BiasAdd if post_op == 'relu': Last = te.compute( Last.shape, lambda *i: te.max(Last(*i), tvm.runtime.const(0, Last.dtype)), name='FusedConv2D_ReLU_{}'.format(layer_idx), tag='relu') elif post_op == 'sigmoid': Last = te.compute(Last.shape, lambda *i: te.sigmoid(Last(*i)), name='FusedConv2D_Sigmoid_{}'.format(layer_idx), tag='sigmoid') elif post_op == 'relu6': Last = te.compute( Last.shape, lambda *i: te.min( te.max(Last(*i), tvm.runtime.const(0, Last.dtype)), tvm.runtime.const(6, Last.dtype)), name='FusedConv2D_ReLU6_{}'.format(layer_idx), tag='relu6') return Last