def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: if isinstance(pnode, FcParameters): gen_scales(gen, pnode, pnode, qrec) infos, infos_comment = np.array([0, 0, 0, 0, 0]), "no activation" fnode = pnode filt_q = qrec elif isinstance(pnode, LinearFusionParameters) and isinstance( fnode, FcParameters) and pnode.fusion_type == "linear_active": cnodes = pnode.contained_nodes() quants = [ gen.G.quantization[NodeId(pnode, fnode)] for fnode in cnodes ] filt_q = quants[0] gen_scales(gen, pnode, cnodes[0], quants[0]) infos, infos_comment = gen_act_infos(cnodes[1], quants[1]) else: return False infos = np.append(infos, [0, 0, 0, 0]) comment = str.format("BiasQ: {}", 0) + infos_comment infos[5] = 0 # BiasQ if filt_q.cache.get('ne16'): conv_mul_bias = filt_q.cache.get('mul_biases_q') prenorm = conv_mul_bias.pre_normalization if isinstance( conv_mul_bias, MultMulBiasScaleQType) else 0 pad_value = np.array(filt_q.in_qs[0].zero_point).astype(np.int16) pad_value1 = np.bitwise_and(pad_value, 0xFF) pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8 w_offset = -np.array(filt_q.in_qs[1].zero_point).astype(np.int32) w_offset1 = np.bitwise_and(w_offset, 0xFF) w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8 w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16 w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24 infos = np.append( infos, verify_scalar([ prenorm if prenorm else 0, pad_value1, pad_value2, w_offset1, w_offset2, w_offset3, w_offset4 ])) cname, file_name = gen_constant(gen, pnode, fnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=infos) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) return True
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: if isinstance(pnode, MatMulOpParameters): mul_node = pnode mul_qrec = qrec fnode = pnode infos, comment = np.array([0, 0, 0, 0, 0]), "no activation" elif isinstance(pnode, MatMulOpFusionParameters) and isinstance(fnode, MatMulOpParameters): cnodes = pnode.contained_nodes() quants = [gen.G.quantization[NodeId( pnode, fnode)] for fnode in cnodes] mul_node = cnodes[0] mul_qrec = quants[0] infos, comment = gen_act_infos(cnodes[1], quants[1]) else: return False if len(mul_qrec.in_qs[1].scale) > 1: gen_scales(gen, pnode, mul_node, mul_qrec) pl_scale = 0 pl_scalen = 0 else: pl_scale = mul_qrec.cache['mul_biases_q'].qbiases[0] pl_scalen = mul_qrec.cache['mul_biases_q'].qnorms[0] infos = np.append(infos, [0, 0, pl_scale, pl_scalen]) if mul_qrec.cache.get('ne16'): conv_mul_bias = mul_qrec.cache.get('mul_biases_q') prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0 pad_value = np.array(mul_qrec.in_qs[0].zero_point).astype(np.int16) pad_value1 = np.bitwise_and(pad_value, 0xFF) pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8 w_offset = -np.array(mul_qrec.in_qs[1].zero_point).astype(np.int32) w_offset1 = np.bitwise_and(w_offset, 0xFF) w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8 w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16 w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24 infos = np.append( infos, verify_scalar([prenorm if prenorm else 0, pad_value1, pad_value2, w_offset1, w_offset2, w_offset3, w_offset4])) cname, file_name = gen_constant(gen, pnode, fnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=infos) gen.globals.append(GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) return True
def mult8_filter_globals_generator(gen, node, qrec, pnode, fnode) -> bool: if fnode is not None: return False if isinstance(pnode, FilterParameters): gen_scales(gen, pnode, pnode, qrec) elif isinstance(pnode, ConvFusionParameters): cnodes = node.contained_nodes() quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes] if node.fusion_type in ("conv_active_pool", "conv_active", "linear_active", "conv_pool_active", "conv_pool"): gen_scales(gen, pnode, cnodes[0], quants[0]) else: return False else: return False return True
def mult8_infos_generator(gen, node, qrec, pnode, fnode) -> bool: if fnode is not None: return False # if isinstance(pnode, Conv2DParameters): # for_ne16 = qrec.cache.get('ne16') # in_zero_point = qrec.in_qs[0].zero_point # conv_mul_bias = qrec.cache.get('mul_biases_q') # prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0 # act_infos(gen, pnode, pnode, None, None, prenorm=prenorm, extra1=0, # for_ne16=for_ne16, in_zero_point=in_zero_point) # elif isinstance(pnode, (GlobalPoolingParameters, PoolingParameters)): # compute_in_out_scale(qrec) # act_infos(gen, pnode, pnode, None, qrec) elif isinstance(pnode, ActivationParameters): act_infos(gen, pnode, pnode, pnode, gen.G.quantization[NodeId(pnode)]) # elif isinstance(pnode, ConvFusionParameters): # cnodes = node.contained_nodes() # quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes] # for_ne16 = any([qrec.cache.get('ne16') for qrec in quants]) # in_zero_point = quants[0].in_qs[0].zero_point # for qrec in quants: # compute_in_out_scale(qrec) # if node.fusion_type.startswith('linear') or node.fusion_type.startswith('conv') or node.fusion_type.startswith('pool'): # if node.fusion_type in ("pool_active"): # act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1], # extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point) # else: # conv_mul_bias = quants[0].cache.get('mul_biases_q') # prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0 # if node.fusion_type in ("conv_active_pool", "conv_active", "linear_active"): # act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1], prenorm=prenorm, # extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point) # elif node.fusion_type == "conv_pool_active": # act_infos(gen, pnode, cnodes[0], cnodes[2], quants[2], prenorm=prenorm, # extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point) # elif node.fusion_type == "conv_pool": # act_infos(gen, pnode, cnodes[0], None, None, prenorm=prenorm, # extra1=0, for_ne16=for_ne16) elif isinstance(pnode, MatrixMulParameters): compute_in_out_scale(qrec, in_idx=(0, 1), out_idx=0) act_infos(gen, pnode, pnode, None, None, extra1=qrec.cache['scale_mul_biases_q'].qbiases[0], extra2=qrec.cache['scale_mul_biases_q'].qnorms[0]) elif isinstance(pnode, SoftMaxParameters): act_infos(gen, pnode, pnode, pnode, qrec) # elif isinstance(pnode, ActivationFusionBase): # cnodes = node.contained_nodes() # quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes] # for qrec in quants: # compute_in_out_scale(qrec) # if isinstance(cnodes[0], (GlobalPoolingParameters, PoolingParameters)): # act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1]) # else: # return False # return True elif isinstance(pnode, (MatMulOpParameters, MatMulOpFusionParameters)): if isinstance(pnode, MatMulOpFusionParameters): cnodes = node.contained_nodes() quants = [ gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes ] mul_node = cnodes[0] mul_qrec = quants[0] act_node = cnodes[1] act_qrec = quants[1] else: mul_node = pnode mul_qrec = qrec act_node = None act_qrec = None if len(pnode.in_dims) == 3 and len(mul_qrec.in_qs[0].scale) > 1: gen_scales(gen, pnode, mul_node, mul_qrec) extra3 = 0 extra4 = 0 else: extra3 = mul_qrec.cache['mul_biases_q'].qbiases[0] extra4 = mul_qrec.cache['mul_biases_q'].qnorms[0] act_infos(gen, pnode, mul_node, act_node, act_qrec, extra3=extra3, extra4=extra4) elif isinstance(pnode, QuantizeParameters): in_q = qrec.in_qs[0] out_q = qrec.out_qs[0] comment = f'in q: {in_q} out_q: {out_q}' if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP_ZEROPOINT': bits = 8 if in_q.dtype == np.int8 else 16 if in_q.signed: contents = ((int(math.pow(2, bits)) + in_q.zero_point[0] - out_q.zero_point[0]) % int(math.pow(2, bits))).astype(np.uint8) else: contents = (int(math.pow(2, bits)) - in_q.zero_point[0] + out_q.zero_point[0]).astype(np.uint8) # if in_q.dtype == np.int8 and out_q.dtype == np.uint8: # if not np.allclose(in_q.scale, out_q.scale): # return False # if not np.all(in_q.zero_point == (out_q.zero_point - 128)): # return False # contents = ( # (256 + in_q.zero_point[0] - out_q.zero_point[0]) % 256).astype(np.uint8) # elif in_q.dtype == np.uint8 and out_q.dtype == np.int8: # if not np.allclose(in_q.scale, out_q.scale): # return False # if not np.all(in_q.zero_point == (out_q.zero_point - 128)): # return False # contents = ( # 256 - in_q.zero_point[0] + out_q.zero_point[0]).astype(np.uint8) elif in_q.dtype == np.int8 and out_q.dtype == np.int16: if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP': return True raise NotImplementedError() elif in_q.dtype == np.int16 and out_q.dtype == np.int8: if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP': return True raise NotImplementedError() else: raise ValueError(f"strange dtype change in {pnode.name}") cname, file_name = gen_constant(gen, pnode, pnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) else: return False return True