def quantize(data, shift_bits, target_bits=relay.const(7, dtype='int32')): """Quantize output of layer, to be consistent with source code @yx Question: should the shift_bits participating to network control flow? At mxnet quantization with truman's code, the bits number of max_v is converted to normal interger using function `asscalar()`. However, I cannot find the related function in relay. I am confused with the control flow logic in model network, whether the condition `shift_bits == -1` should join in model network or just left it in python code flow. By Longtao.Wang Parameters ---------- shift_bits: tvm.relay.Expr The shift_bits parameter is never used according to @yx's source code, which always be constant Expr(-1). """ max_v = relay.max(relay.abs(data)) min_v = relay.min(data) ln_max_v = relay.log(relay.cast(max_v, 'float32')) ln_2 = relay.log(relay.const(2.)) total_bits = relay.ceil(relay.divide(ln_max_v, ln_2)) # ceil( ln(max_v) / ln(2) ) shift_bits = relay.subtract(total_bits.astype('int32'), target_bits) shift_bits = relay.maximum(shift_bits, relay.const(0)) denominator = relay.left_shift(relay.const(1), relay.cast(shift_bits, 'int32')) out = relay.divide(data, denominator) # According to @yx's code, use divide operation instead of shift op for # possible negative number round. # out = relay.right_shift(data, shift_bits) out = relay.cast(relay.clip(out, a_min=-128, a_max=127), 'int8') return out, max_v, min_v, shift_bits
def expected(): p0 = relay.var("p0", shape=(), dtype="int32") less = relay.less(p0, relay.const(10, dtype="int32")) z0 = relay.min(less) f0 = relay.Function([p0], z0) f0 = f0.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) x = relay.var("x", shape=(), dtype="int32") f = relay.Call(f0, [x]) return relay.Function([x], f)
def before(): x = relay.var("x", shape=(), dtype="int32") less = relay.less(x, relay.const(10, dtype="int32")) z = relay.min(less) return relay.Function([x], z)