def quantize(expr, type_map): """Turn a quantize op into requantize or remove it""" out = expr.args[0] t = type_map[out] in_scale = fold_constant(t.scale) in_zero_point = fold_constant(t.zero_point) if not ( approx_equal(in_scale, expr.args[1]) and approx_equal(in_zero_point, expr.args[2]) and tvm.ir.structural_equal(t.dtype, expr.attrs.out_dtype) ): out = relay.qnn.op.requantize( out, in_scale, in_zero_point, expr.args[1], expr.args[2], out_dtype=expr.attrs.out_dtype, axis=t.axis, ) return [ out, TensorAffineType(expr.args[1], expr.args[2], expr.attrs.out_dtype, expr.attrs.axis), ]
def batch_matmul(expr, type_map): """Rewrite a batch_matmul op""" x, y = expr.args x_t = type_map[x] y_t = type_map[y] matmul_scale = fold_constant(x_t.scale * y_t.scale) matmul_zp = relay.const(0) out = relay.qnn.op.batch_matmul(x, y, x_t.zero_point, y_t.zero_point, x_t.scale, y_t.scale) return [out, TensorAffineType(matmul_scale, matmul_zp, out.attrs.out_dtype, x_t.axis)]
def dense(expr, type_map): """Rewrite a dense op""" attrs = {**expr.attrs} attrs.pop("out_dtype") x, weight = expr.args x_t = type_map[x] w_t = type_map[weight] dense_scale = fold_constant(x_t.scale * w_t.scale) dense_zp = relay.const(0) out = relay.qnn.op.dense(x, weight, x_t.zero_point, w_t.zero_point, x_t.scale, w_t.scale, **attrs) return [out, TensorAffineType(dense_scale, dense_zp, out.attrs.out_dtype)]
def conv2d(expr, type_map): """Rewrite a conv2d op""" attrs = {**expr.attrs} attrs.pop("out_dtype") x, weight = expr.args x_t = type_map[x] w_t = type_map[weight] conv_scale = fold_constant(x_t.scale * w_t.scale) conv_zp = get_zeros(conv_scale) out = relay.qnn.op.conv2d( x, weight, x_t.zero_point, w_t.zero_point, x_t.scale, w_t.scale, **attrs ) out_layout = attrs["out_layout"] if attrs["out_layout"] != "" else attrs["data_layout"] out_axis = bijective_layout(out_layout, "NCHW").backward_index(list(range(4)))[1] return [out, TensorAffineType(conv_scale, conv_zp, out.attrs.out_dtype, out_axis.value)]