def quantize(data, shift_bits, target_bits=relay.const(7, dtype='int32')): """Quantize output of layer, to be consistent with source code @yx Question: should the shift_bits participating to network control flow? At mxnet quantization with truman's code, the bits number of max_v is converted to normal interger using function `asscalar()`. However, I cannot find the related function in relay. I am confused with the control flow logic in model network, whether the condition `shift_bits == -1` should join in model network or just left it in python code flow. By Longtao.Wang Parameters ---------- shift_bits: tvm.relay.Expr The shift_bits parameter is never used according to @yx's source code, which always be constant Expr(-1). """ max_v = relay.max(relay.abs(data)) min_v = relay.min(data) ln_max_v = relay.log(relay.cast(max_v, 'float32')) ln_2 = relay.log(relay.const(2.)) total_bits = relay.ceil(relay.divide(ln_max_v, ln_2)) # ceil( ln(max_v) / ln(2) ) shift_bits = relay.subtract(total_bits.astype('int32'), target_bits) shift_bits = relay.maximum(shift_bits, relay.const(0)) denominator = relay.left_shift(relay.const(1), relay.cast(shift_bits, 'int32')) out = relay.divide(data, denominator) # According to @yx's code, use divide operation instead of shift op for # possible negative number round. # out = relay.right_shift(data, shift_bits) out = relay.cast(relay.clip(out, a_min=-128, a_max=127), 'int8') return out, max_v, min_v, shift_bits
def expected(x, conv_weight, in_bias, in_scale, in_channels, channels, blocking): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight, in_bias] if blocking: squeezed_scale = relay.squeeze(in_scale, axis=[0, 2, 3]) x = relay.nn.relu(x) in_bias = relay.divide( in_bias, relay.reshape(squeezed_scale, (1, in_channels // blocking[0], 1, 1, blocking[0])), ) # NCHWc x = relay.add(x, in_bias) conv_weight = relay.multiply( conv_weight, relay.reshape(squeezed_scale, (1, in_channels // 2, 1, 1, 2, 1)) ) # OIHWio else: squeezed_scale = relay.squeeze(in_scale, axis=[1, 2]) x = relay.nn.relu(x) in_bias = relay.divide( in_bias, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2) ) x = relay.add(x, in_bias) conv_weight = relay.multiply( conv_weight, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2) ) y = relay.nn.conv2d( x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW{}c".format(blocking[0]) if blocking else "NCHW", kernel_layout="OIHW2i{}o".format(blocking[1]) if blocking else "OIHW", ) return relay.Function(args, y)
def expected(x, weight, in_bias, in_scale): # use a fixed order of args so alpha equal check can pass args = [x, weight, in_bias] x = relay.nn.relu(x) in_bias = relay.divide(in_bias, in_scale) x = relay.add(x, in_bias) weight = relay.multiply(weight, in_scale) y = relay.nn.dense(x, weight) return relay.Function(args, y)
def make_mnist_graph(): data = relay.var("data", relay.TensorType((1, 1, 28, 28), "int8")) out, _, _, sb0 = make_conv_relu(data, (3, 3), (1, 1), (1, 1), 32, "cv0") out, max_v, min_v, sb1 = make_conv_relu(out, (3, 3), (1, 1), (1, 1), 32, "cv1") mp = make_max_pool(out) out, _,_,_ = make_conv_relu(mp, (1, 1), (0, 0), (1, 1), 32, "cv2") out, _,_,_ = make_conv_relu(out, (3, 3), (1, 1), (1, 1), 32, "cv3") out = relay.add(relay.divide(out, relay.const(2, dtype='int8')), relay.divide(mp, relay.const(2, dtype='int8'))) # shortcut layer out = make_max_pool(out) out = relay.nn.batch_flatten(out).astype('int8') out, _, _, _ = make_dense(out, 256, "dense0") out = relay.nn.relu(out) out, max_v, min_v, sb = make_dense(out, 10, "dense1") print ("Free vars: ", relay.ir_pass.free_vars(out)) out = relay.Function(relay.ir_pass.free_vars(out), out) return out
def expected(x, conv_weight, in_bias, in_scale, channels): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight, in_bias] in_bias = relay.expand_dims(in_bias, axis=1, num_newaxis=2) squeezed_scale = relay.squeeze(in_scale, axis=[1,2]) x = relay.nn.relu(x) in_bias = relay.divide(in_bias, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) x = relay.add(x, in_bias) conv_weight = relay.multiply( conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) return relay.Function(args, y)
def expected(x, conv_weight, in_bias, in_scale, channels, blocking): args = [x, conv_weight, in_bias] x = relay.nn.relu(x) if blocking: _in_scale = relay.reshape( in_scale, (1, 1, 1, channels // blocking[0], blocking[0])) # NHWCc else: _in_scale = in_scale in_bias = relay.divide(in_bias, _in_scale) x = relay.subtract(x, in_bias) if blocking: _in_scale = relay.reshape( in_scale, (1, 1, 1, channels // blocking[0], 1, blocking[0])) # HWIOio y1 = relay.nn.conv2d( x, relay.multiply(conv_weight, _in_scale), channels=channels, kernel_size=(3, 3), data_layout="NHWC{}c".format(blocking[0]) if blocking else "NHWC", kernel_layout="HWIO1i{}o".format(blocking[1]) if blocking else "HWIO", groups=channels, padding=(1, 1), ) if blocking: _in_scale = relay.reshape( in_scale, (1, 1, 1, channels // blocking[0], 1, blocking[0])) # HWIOio y2 = relay.nn.conv2d( x, relay.multiply(conv_weight, _in_scale), channels=channels, kernel_size=(3, 3), data_layout="NHWC{}c".format(blocking[0]) if blocking else "NHWC", kernel_layout="HWIO1i{}o".format(blocking[1]) if blocking else "HWIO", groups=channels, padding=(1, 1), ) z = relay.add(y1, y2) return relay.Function(args, z)
def expected(x, conv_weight, in_bias, in_scale, channels): args = [x, conv_weight, in_bias] x = relay.nn.relu(x) in_bias = relay.divide(in_bias, in_scale) x = relay.subtract(x, in_bias) y1 = relay.nn.conv2d(x, relay.multiply(conv_weight, in_scale), channels=channels, kernel_size=(3, 3), data_layout="NHWC", kernel_layout="HWIO", groups=channels, padding=(1, 1)) y2 = relay.nn.conv2d(x, relay.multiply(conv_weight, in_scale), channels=channels, kernel_size=(3, 3), data_layout="NHWC", kernel_layout="HWIO", groups=channels, padding=(1, 1)) z = relay.add(y1, y2) return relay.Function(args, z)
def expected(x, conv_weight, in_bias, in_scale, channels): args = [x, conv_weight, in_bias, in_scale] x = relay.nn.relu(x) in_bias = relay.divide(in_bias, in_scale) x = relay.subtract(x, in_bias) y1 = relay.nn.conv2d(x, relay.multiply(conv_weight, in_scale), channels=channels, kernel_size=(3, 3), data_layout="NHWC", weight_layout="HWIO", groups=channels, padding=(1, 1)) y2 = relay.nn.conv2d(x, relay.multiply(conv_weight, in_scale), channels=channels, kernel_size=(3, 3), data_layout="NHWC", weight_layout="HWIO", groups=channels, padding=(1, 1)) z = relay.add(y1, y2) return relay.Function(args, z)
def merge_transform_to_mxnet_model(mod): """ Add Image Transform Logic Into Model """ svalue = np.array([123., 117., 104.]) sub_data = relay.Constant(tvm.nd.array(svalue)).astype("float32") dvalue = np.array([58.395, 57.12, 57.37]) divide_data = relay.Constant(tvm.nd.array(dvalue)).astype("float32") data_shape = (224, 224, 3) data = relay.var("data", relay.TensorType(data_shape, "float32")) simple_net = relay.expand_dims(data, axis=0, num_newaxis=1) # To do, relay not support dynamic shape now, future need to add resize logic # simple_net = relay.image.resize(simple_net, (224, 224), "NHWC", "bilinear", "align_corners") simple_net = relay.subtract(simple_net, sub_data) simple_net = relay.divide(simple_net, divide_data) simple_net = relay.transpose(simple_net, ((0, 3, 1, 2))) #merge tranform into pretrained model network entry = mod["main"] anf = run_opt_pass(entry.body, transform.ToANormalForm()) call = anf.value data, weights = call.args first_op = op.nn.conv2d(simple_net, weights, strides=call.attrs.strides, padding=call.attrs.padding, dilation=call.attrs.dilation, groups=call.attrs.groups, channels=call.attrs.channels, kernel_size=call.attrs.kernel_size, out_dtype=call.attrs.out_dtype) net = relay.expr.Let(anf.var, first_op, anf.body) net = run_opt_pass(net, transform.ToGraphNormalForm()) mod['main'] = net return mod