def hsigmoid_mult_gen_factors(params, qrec): in_q = qrec.in_qs[0] fac_1 = in_q.quantize(np.array([params.offset])) compute_in_out_scale(qrec, extra_scale=1 / 6) upper_bound = in_q.quantize([6.]) lower_bound = in_q.quantize([0.]) return fac_1, upper_bound, lower_bound
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") if isinstance(params, Broadcastable) and params.is_broadcasted: in_tensors = params.broadcast_inputs(in_tensors) func = PIECEWISE_OPS[params.__class__] op = func['op'] if func['is_mult']: compute_in_out_scale(qrec, in_idx=(0, 1), out_idx=0) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] i1 = in_tensors[0].astype(np.int32) i2 = in_tensors[1].astype(np.int32) out_tensor = scale_mul_biases_q.apply_scales(op(i1, i2, np.int32)) else: # larger scale should be scaled set_add_in_scale(qrec) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] if qrec.cache['scaled_idx']: i1 = in_tensors[0].astype(np.int32) i2 = qrec.cache['scale_in_mul_biases_q'].apply_scales(in_tensors[1]) else: i1 = qrec.cache['scale_in_mul_biases_q'].apply_scales(in_tensors[0]) i2 = in_tensors[1].astype(np.int32) out_tensor = scale_mul_biases_q.apply_scales(op(i1, i2, None)) return qrec.get_outputs(params, [qrec.out_qs[0].clip(out_tensor)], ktype="symmetric")
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] in_q = in_qs[0] if params.lower_bound != 0: raise NotImplementedError( 'relu with non zero lower bound is not implemented for NE16 quantizer' ) cls.check_valid_ranges(params, stats, idx=0, dirs='out') if force_out_q: # since the relu is done by setting 0 zero point and scaling to the upper bound # we cannot be forced to something that does not meet this requirement if not force_out_q.zero_point_asymmetric_zero: return None if params.upper_bound is not None and not np.isclose( force_out_q.max, params.upper_bound, atol=0.01): return None # if the output has been forced then propagate it in_q = force_out_q else: upper = params.upper_bound if params.upper_bound is not None else stats[ 'range_out'][0]['max'] in_q = QType.from_min_max_sq(0, upper, dtype=in_q.dtype, asymmetric=True, ne16=True, dont_copy_attr=['ne16']) o_q = deepcopy(in_q) o_q.set_forced() qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q], ne16=True) compute_in_out_scale(qrec) return qrec
def hswish_mult_gen_factors(qrec): in_q = qrec.in_qs[0] fac_1 = in_q.quantize(np.array([3.])) # The scale of the result is actually in in_scale * in_scale since it is multiplied by itself compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1 / 6) upper_bound = in_q.quantize([6.]) lower_bound = in_q.quantize([0.]) return fac_1, upper_bound, lower_bound
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] out_q15 = tanh_lut(in_tensor.astype(np.int32) << 8) compute_in_out_scale( qrec, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale / qrec.in_qs[0].scale) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] output = scale_mul_biases_q.apply_scales(out_q15 >> 8) return qrec.get_outputs(params, [output], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] compute_in_out_scale(qrec) neg_in = at_norm(in_tensor * leak_mult_gen_factor_q7(params), 7) in_tensor = in_tensor * (in_tensor > 0) + neg_in * (in_tensor < 0) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] in_tensor = scale_mul_biases_q.apply_scales(in_tensor) if qrec.out_qs[0] != qrec.in_qs[0]: return qrec.get_outputs( params, [qrec.out_qs[0].reduce_from(in_tensor, qrec.in_qs[0])], ktype="symmetric") return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
def max_execute(cls, params, in_tensors, qrec: QRec, details=None): del details # Prepare the quantization levels in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] if qrec.ktype == 'scaled': compute_in_out_scale(qrec, in_idx=0, out_idx=0) return qrec.get_outputs(params, [ np.max( in_tensor, axis=tuple(params.axis), keepdims=params.keep_dims) ], ktype="symmetric")
def sum_execute(cls, params, in_tensors, qrec: QRec): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] if qrec.ktype == 'scaled': compute_in_out_scale(qrec, in_idx=0, out_idx=0) res = np.sum(in_tensor, axis=tuple(params.axis), keepdims=params.keep_dims, dtype=np.int32) if qrec.ktype.startswith('scaled'): res = qrec.cache['scale_mul_biases_q'].apply_scales(res) elif qrec.ktype.startswith('symmetric'): res = qrec.out_qs[0].reduce_from(res, qrec.in_qs[0]) return qrec.get_outputs(params, [res], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] compute_in_out_scale(qrec) relu_lb = qrec.in_qs[0].quantize(params.lower_bound) in_tensor = np.maximum(in_tensor, relu_lb) if params.upper_bound is not None and not NNForceRelu.FORCE_RELU: relu_ub = qrec.in_qs[0].quantize(params.upper_bound) in_tensor = np.minimum(in_tensor, relu_ub) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] in_tensor = scale_mul_biases_q.apply_scales(in_tensor) if qrec.out_qs[0] != qrec.in_qs[0]: return qrec.get_outputs( params, [qrec.out_qs[0].reduce_from(in_tensor, qrec.in_qs[0])], ktype="symmetric") return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: cnodes = node.contained_nodes() quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes] for qrec in quants: compute_in_out_scale(qrec) act_node = [cnode for cnode in cnodes if isinstance( cnode, ActivationParameters)] act_node = act_node[0] if act_node else None act_qrec = quants[-1] if act_node else None set_add_in_scale(quants[1]) act_infos(gen, pnode, pnode, act_node, act_qrec, extra1=quants[1].cache['scale_in_mul_biases_q'].qbiases[0], extra2=quants[1].cache['scale_in_mul_biases_q'].qnorms[0], extra3=quants[1].cache['scale_mul_biases_q'].qbiases[0], extra4=quants[1].cache['scale_mul_biases_q'].qnorms[0]) act_infos(gen, pnode, cnodes[0], act_node, act_qrec, extra_name="Pad", extra1=quants[1].cache['scale_mul_biases_q'].qbiases[0], extra2=quants[1].cache['scale_mul_biases_q'].qnorms[0]) return True
def average_execute_mult(cls, params, in_tensors, qrec: QRec): # Prepare the quantization levels in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] out_dims = params.out_dims[0] compute_in_out_scale(qrec, in_idx=0, out_idx=0) sum_by_chan = np.sum(in_tensor, dtype=np.int32, axis=tuple(params.axis), keepdims=params.keep_dims) sz = reduce( lambda x, y: x * y, [i for idx, i in enumerate(in_tensor.shape) if idx in params.axis]) res = at_norm(((sum_by_chan << 7) / sz).astype(np.int32), 7) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] res = out_tensor = scale_mul_biases_q.apply_scales(res) return qrec.get_outputs(params, [out_tensor.reshape(out_dims.shape)], ktype="symmetric")
def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool: if isinstance(pnode, (GlobalPoolingParameters, PoolingParameters, GlobalSumPoolParameters)): compute_in_out_scale(qrec) infos, comment = np.array([ qrec.cache['scale_mul_biases_q'].qbiases[0], qrec.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0 ]), "no activation" fnode = pnode pool_q = qrec elif isinstance(pnode, ActivationFusion) and isinstance( fnode, (GlobalPoolingParameters, PoolingParameters)): cnodes = pnode.contained_nodes() quants = [ gen.G.quantization[NodeId(pnode, fnode)] for fnode in cnodes ] pool_q = quants[0] infos, comment = gen_act_infos(cnodes[1], quants[1]) else: return False infos = np.append(infos, [0, 0, 0, 0]) if isinstance(fnode, GlobalSumPoolParameters): compute_in_out_scale(pool_q, in_idx=0, out_idx=0) infos[0] = 0 infos[1] = 0 infos[5] = pool_q.cache['scale_mul_biases_q'].qbiases[0] infos[6] = pool_q.cache['scale_mul_biases_q'].qnorms[0] cname, file_name = gen_constant(gen, pnode, fnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=infos) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) return True
def compute_scales(cls, params, qrec): if isinstance(params, (SigmoidScaledSymmetricMult, TanHActivationParameters)): compute_in_out_scale( qrec, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale / qrec.in_qs[0].scale) elif isinstance(params, HSwishActivationParameters): compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1 / 6) else: compute_in_out_scale(qrec) return qrec
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] fusion = kwargs.get('fusion', None) in_q = in_qs[0] if not fusion and in_q.dtype == np.int32: return None if isinstance(params, (HSwishActivationParameters, HSigmoidActivationParameters)): max_val = in_q.scale * pow(2, in_q.bits - 1) if max_val < 6: in_q = QType.from_min_max_sq(-6, 6, dtype=in_q.dtype, forced=True) elif isinstance(params, SigmoidActivationParameters): in_q = QType.from_min_max_sq(-8, 8, dtype=in_q.dtype, forced=True) if force_out_q: if force_out_q.signed != in_q.signed: return None if fusion and fusion.fusion_type in ['conv_active_pool', 'conv_active']: if not isinstance(params, (SigmoidActivationParameters, HTanHActivationParameters, HSwishActivationParameters, HSigmoidActivationParameters)): in_q = deepcopy(force_out_q) o_q = deepcopy(force_out_q) # activation cannot move zeropoint unless it is a reduction step if o_q.zero_point != in_q.zero_point and in_q.dtype != np.int32: return None else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') zero_point = in_q.zero_point if in_q.zero_point != 0 else None o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=in_q.dtype, zero_point=zero_point) qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q]) if isinstance(params, (SigmoidScaledSymmetricMult, TanHActivationParameters)): compute_in_out_scale(qrec, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale/qrec.in_qs[0].scale) elif isinstance(params, HSwishActivationParameters): compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1/6) else: compute_in_out_scale(qrec) return qrec
def gen_act_infos(act_params, act_q): comment = "" if isinstance(act_params, ReluActivationParameters): compute_in_out_scale(act_q) actscale = act_q.cache['scale_mul_biases_q'].qbiases[0] actscalen = act_q.cache['scale_mul_biases_q'].qnorms[0] if act_params.upper_bound is None: # or fnode is not None: if act_q.in_qs[0].zero_point == 0: contents = np.array([actscale, actscalen, 0, 0, 0], dtype=np.int8) if len(comment) == 0: comment = "all 0" else: fac_1 = act_q.in_qs[0].zero_point contents = np.array([actscale, actscalen, fac_1, 0, 0], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0]) else: if act_q.in_qs[0].zero_point == 0: fac_1 = act_q.in_qs[0].quantize(act_params.upper_bound) contents = np.array([actscale, actscalen, fac_1, 0, 0], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0]) else: fac_1 = act_q.in_qs[0].zero_point fac_2 = act_q.in_qs[0].quantize(act_params.upper_bound) contents = np.array([actscale, actscalen, fac_1, fac_2, 0], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} A0: {} B0: {} C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0], fac_2[0]) elif isinstance(act_params, HSigmoidActivationParameters): # currently combines all scaling factors into one scale and shift assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" fac_1, upper_bound, _ = hsigmoid_mult_gen_factors(act_params, act_q) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0], fac_1[0]) elif isinstance(act_params, HSwishActivationParameters): # currently combines all scaling factors into one scale and shift assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" fac_1, upper_bound, _ = hswish_mult_gen_factors(act_q) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0], fac_1[0]) elif isinstance(act_params, SoftMaxParameters): assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" norm = 15 + np.ceil(np.log2(act_q.in_qs[0].scale)) contents = np.array([norm, 0, 0, 0, 0], dtype=np.int8) comment += str.format("in: {:05f} out: {:05f} NORM: {}", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], int(norm[0])) elif isinstance(act_params, LeakyActivationParameters): assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" compute_in_out_scale(act_q) leak_factor_quant = leak_mult_gen_factor_q7(act_params) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant, 0, 0 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: x C0: x", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant) elif isinstance(act_params, (SigmoidActivationParameters, TanHActivationParameters)): assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" compute_in_out_scale( act_q, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale / act_q.in_qs[0].scale) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: x B0: x C0: x", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0]) else: raise NotImplementedError("activation tye not implemented") return contents, comment
def act_infos(gen, pnode, fnode, act_params, act_q, extra1=0, extra2=0, extra3=0, extra4=0, extra5=None, extra6=None, prenorm=0, extra_name='', for_ne16=False, in_zero_point=0): if isinstance(pnode, FilterParameters): comment = str.format("BiasQ: {}", extra1) elif isinstance(pnode, MatrixAddParameters): comment = str.format( "In1Scale: {} In1ScaleN: {} OutScale: {} OutScaleN: {}", extra1, extra2, extra3, extra4) else: comment = "" if act_params is None: contents = np.array([0, 0, 0, 0, 0], dtype=np.int8) elif isinstance(act_params, ReluActivationParameters): compute_in_out_scale(act_q) actscale = act_q.cache['scale_mul_biases_q'].qbiases[0] actscalen = act_q.cache['scale_mul_biases_q'].qnorms[0] if act_params.upper_bound is None: # or fnode is not None: if act_q.in_qs[0].zero_point == 0: contents = np.array([actscale, actscalen, 0, 0, 0], dtype=np.int8) if len(comment) == 0: comment = "all 0" else: fac_1 = act_q.in_qs[0].zero_point contents = np.array([actscale, actscalen, fac_1, 0, 0], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0]) else: if act_q.in_qs[0].zero_point == 0: fac_1 = act_q.in_qs[0].quantize(act_params.upper_bound) contents = np.array([actscale, actscalen, fac_1, 0, 0], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0]) else: fac_1 = act_q.in_qs[0].zero_point fac_2 = act_q.in_qs[0].quantize(act_params.upper_bound) contents = np.array([actscale, actscalen, fac_1, fac_2, 0], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} A0: {} B0: {} C0: 0", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], fac_1[0], fac_2[0]) elif isinstance(act_params, HSigmoidActivationParameters): # currently combines all scaling factors into one scale and shift assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" fac_1, upper_bound, _ = hsigmoid_mult_gen_factors(act_params, act_q) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0], fac_1[0]) elif isinstance(act_params, HSwishActivationParameters): # currently combines all scaling factors into one scale and shift assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" fac_1, upper_bound, _ = hswish_mult_gen_factors(act_q) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0], fac_1[0]) elif isinstance(act_params, SoftMaxParameters): assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" norm = 15 + np.ceil(np.log2(act_q.in_qs[0].scale)) contents = np.array([norm, 0, 0, 0, 0], dtype=np.int8) comment += str.format("in: {:05f} out: {:05f} NORM: {}", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], int(norm[0])) elif isinstance(act_params, LeakyActivationParameters): assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" compute_in_out_scale(act_q) leak_factor_quant = leak_mult_gen_factor_q7(act_params) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant, 0, 0 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: x C0: x", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant) elif isinstance(act_params, (SigmoidActivationParameters, TanHActivationParameters)): assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[ 0].zero_point == 0, "asymmetric not supported" compute_in_out_scale( act_q, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale / act_q.in_qs[0].scale) contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0 ], dtype=np.int8) comment += str.format( "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: x B0: x C0: x", act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0], act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0]) else: raise NotImplementedError("activation tye not implemented") if isinstance(pnode, (GlobalPoolingParameters, PoolingParameters)): contents = np.array([ act_q.cache['scale_mul_biases_q'].qbiases[0], act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0 ], dtype=np.int8) contents = np.append(contents, [extra1, extra2, extra3, extra4]) if extra5 is not None: contents = np.append(contents, [extra5]) if extra6 is not None: contents = np.append(contents, [extra6]) if for_ne16: # append weights_offset and pad_val for ne16 # TODO - default config maybe in future if isinstance(pnode, (ConvFusionParameters, LinearFusionParameters)): filt_q = gen.G.quantization[NodeId(pnode, fnode)] else: filt_q = gen.G.quantization[NodeId(pnode)] pad_value = np.array(in_zero_point).astype(np.int16) pad_value1 = np.bitwise_and(pad_value, 0xFF) pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8 w_offset = -np.array(filt_q.in_qs[1].zero_point).astype(np.int32) w_offset1 = np.bitwise_and(w_offset, 0xFF) w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8 w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16 w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24 contents = np.append( contents, [[prenorm] if prenorm else [0], pad_value1, pad_value2, w_offset1, w_offset2, w_offset3, w_offset4]) cname, file_name = gen_constant(gen, pnode, fnode, INFOS, extra_name) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment))
def mult8_infos_generator(gen, node, qrec, pnode, fnode) -> bool: if fnode is not None: return False # if isinstance(pnode, Conv2DParameters): # for_ne16 = qrec.cache.get('ne16') # in_zero_point = qrec.in_qs[0].zero_point # conv_mul_bias = qrec.cache.get('mul_biases_q') # prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0 # act_infos(gen, pnode, pnode, None, None, prenorm=prenorm, extra1=0, # for_ne16=for_ne16, in_zero_point=in_zero_point) # elif isinstance(pnode, (GlobalPoolingParameters, PoolingParameters)): # compute_in_out_scale(qrec) # act_infos(gen, pnode, pnode, None, qrec) elif isinstance(pnode, ActivationParameters): act_infos(gen, pnode, pnode, pnode, gen.G.quantization[NodeId(pnode)]) # elif isinstance(pnode, ConvFusionParameters): # cnodes = node.contained_nodes() # quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes] # for_ne16 = any([qrec.cache.get('ne16') for qrec in quants]) # in_zero_point = quants[0].in_qs[0].zero_point # for qrec in quants: # compute_in_out_scale(qrec) # if node.fusion_type.startswith('linear') or node.fusion_type.startswith('conv') or node.fusion_type.startswith('pool'): # if node.fusion_type in ("pool_active"): # act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1], # extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point) # else: # conv_mul_bias = quants[0].cache.get('mul_biases_q') # prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0 # if node.fusion_type in ("conv_active_pool", "conv_active", "linear_active"): # act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1], prenorm=prenorm, # extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point) # elif node.fusion_type == "conv_pool_active": # act_infos(gen, pnode, cnodes[0], cnodes[2], quants[2], prenorm=prenorm, # extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point) # elif node.fusion_type == "conv_pool": # act_infos(gen, pnode, cnodes[0], None, None, prenorm=prenorm, # extra1=0, for_ne16=for_ne16) elif isinstance(pnode, MatrixMulParameters): compute_in_out_scale(qrec, in_idx=(0, 1), out_idx=0) act_infos(gen, pnode, pnode, None, None, extra1=qrec.cache['scale_mul_biases_q'].qbiases[0], extra2=qrec.cache['scale_mul_biases_q'].qnorms[0]) elif isinstance(pnode, SoftMaxParameters): act_infos(gen, pnode, pnode, pnode, qrec) # elif isinstance(pnode, ActivationFusionBase): # cnodes = node.contained_nodes() # quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes] # for qrec in quants: # compute_in_out_scale(qrec) # if isinstance(cnodes[0], (GlobalPoolingParameters, PoolingParameters)): # act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1]) # else: # return False # return True elif isinstance(pnode, (MatMulOpParameters, MatMulOpFusionParameters)): if isinstance(pnode, MatMulOpFusionParameters): cnodes = node.contained_nodes() quants = [ gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes ] mul_node = cnodes[0] mul_qrec = quants[0] act_node = cnodes[1] act_qrec = quants[1] else: mul_node = pnode mul_qrec = qrec act_node = None act_qrec = None if len(pnode.in_dims) == 3 and len(mul_qrec.in_qs[0].scale) > 1: gen_scales(gen, pnode, mul_node, mul_qrec) extra3 = 0 extra4 = 0 else: extra3 = mul_qrec.cache['mul_biases_q'].qbiases[0] extra4 = mul_qrec.cache['mul_biases_q'].qnorms[0] act_infos(gen, pnode, mul_node, act_node, act_qrec, extra3=extra3, extra4=extra4) elif isinstance(pnode, QuantizeParameters): in_q = qrec.in_qs[0] out_q = qrec.out_qs[0] comment = f'in q: {in_q} out_q: {out_q}' if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP_ZEROPOINT': bits = 8 if in_q.dtype == np.int8 else 16 if in_q.signed: contents = ((int(math.pow(2, bits)) + in_q.zero_point[0] - out_q.zero_point[0]) % int(math.pow(2, bits))).astype(np.uint8) else: contents = (int(math.pow(2, bits)) - in_q.zero_point[0] + out_q.zero_point[0]).astype(np.uint8) # if in_q.dtype == np.int8 and out_q.dtype == np.uint8: # if not np.allclose(in_q.scale, out_q.scale): # return False # if not np.all(in_q.zero_point == (out_q.zero_point - 128)): # return False # contents = ( # (256 + in_q.zero_point[0] - out_q.zero_point[0]) % 256).astype(np.uint8) # elif in_q.dtype == np.uint8 and out_q.dtype == np.int8: # if not np.allclose(in_q.scale, out_q.scale): # return False # if not np.all(in_q.zero_point == (out_q.zero_point - 128)): # return False # contents = ( # 256 - in_q.zero_point[0] + out_q.zero_point[0]).astype(np.uint8) elif in_q.dtype == np.int8 and out_q.dtype == np.int16: if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP': return True raise NotImplementedError() elif in_q.dtype == np.int16 and out_q.dtype == np.int8: if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP': return True raise NotImplementedError() else: raise ValueError(f"strange dtype change in {pnode.name}") cname, file_name = gen_constant(gen, pnode, pnode, INFOS) const_info = ConstantInfo(file_name, QType.Pow2(bits=8, q=0, signed=True), contents=contents) gen.globals.append( GlobalArgInfo("int8", cname, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=const_info, comment=comment)) else: return False return True