Esempio n. 1
0
def hsigmoid_mult_gen_factors(params, qrec):
    in_q = qrec.in_qs[0]
    fac_1 = in_q.quantize(np.array([params.offset]))
    compute_in_out_scale(qrec, extra_scale=1 / 6)
    upper_bound = in_q.quantize([6.])
    lower_bound = in_q.quantize([0.])
    return fac_1, upper_bound, lower_bound
Esempio n. 2
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")
        if isinstance(params, Broadcastable) and params.is_broadcasted:
            in_tensors = params.broadcast_inputs(in_tensors)
        func = PIECEWISE_OPS[params.__class__]
        op = func['op']

        if func['is_mult']:
            compute_in_out_scale(qrec, in_idx=(0, 1), out_idx=0)
            scale_mul_biases_q = qrec.cache['scale_mul_biases_q']
            i1 = in_tensors[0].astype(np.int32)
            i2 = in_tensors[1].astype(np.int32)
            out_tensor = scale_mul_biases_q.apply_scales(op(i1, i2, np.int32))
        else:
            # larger scale should be scaled
            set_add_in_scale(qrec)
            scale_mul_biases_q = qrec.cache['scale_mul_biases_q']
            if qrec.cache['scaled_idx']:
                i1 = in_tensors[0].astype(np.int32)
                i2 = qrec.cache['scale_in_mul_biases_q'].apply_scales(in_tensors[1])
            else:
                i1 = qrec.cache['scale_in_mul_biases_q'].apply_scales(in_tensors[0])
                i2 = in_tensors[1].astype(np.int32)

            out_tensor = scale_mul_biases_q.apply_scales(op(i1, i2, None))
        return qrec.get_outputs(params, [qrec.out_qs[0].clip(out_tensor)], ktype="symmetric")
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     in_q = in_qs[0]
     if params.lower_bound != 0:
         raise NotImplementedError(
             'relu with non zero lower bound is not implemented for NE16 quantizer'
         )
     cls.check_valid_ranges(params, stats, idx=0, dirs='out')
     if force_out_q:
         # since the relu is done by setting 0 zero point and scaling to the upper bound
         # we cannot be forced to something that does not meet this requirement
         if not force_out_q.zero_point_asymmetric_zero:
             return None
         if params.upper_bound is not None and not np.isclose(
                 force_out_q.max, params.upper_bound, atol=0.01):
             return None
         # if the output has been forced then propagate it
         in_q = force_out_q
     else:
         upper = params.upper_bound if params.upper_bound is not None else stats[
             'range_out'][0]['max']
         in_q = QType.from_min_max_sq(0,
                                      upper,
                                      dtype=in_q.dtype,
                                      asymmetric=True,
                                      ne16=True,
                                      dont_copy_attr=['ne16'])
     o_q = deepcopy(in_q)
     o_q.set_forced()
     qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q], ne16=True)
     compute_in_out_scale(qrec)
     return qrec
Esempio n. 4
0
def hswish_mult_gen_factors(qrec):
    in_q = qrec.in_qs[0]
    fac_1 = in_q.quantize(np.array([3.]))
    # The scale of the result is actually in in_scale * in_scale since it is multiplied by itself
    compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1 / 6)
    upper_bound = in_q.quantize([6.])
    lower_bound = in_q.quantize([0.])
    return fac_1, upper_bound, lower_bound
Esempio n. 5
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        in_tensor = qrec.prepare_inputs(params, in_tensors,
                                        ktype="symmetric")[0]
        out_q15 = tanh_lut(in_tensor.astype(np.int32) << 8)
        compute_in_out_scale(
            qrec,
            extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale /
            qrec.in_qs[0].scale)
        scale_mul_biases_q = qrec.cache['scale_mul_biases_q']
        output = scale_mul_biases_q.apply_scales(out_q15 >> 8)

        return qrec.get_outputs(params, [output], ktype="symmetric")
Esempio n. 6
0
 def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
     in_tensor = qrec.prepare_inputs(params, in_tensors,
                                     ktype="symmetric")[0]
     compute_in_out_scale(qrec)
     neg_in = at_norm(in_tensor * leak_mult_gen_factor_q7(params), 7)
     in_tensor = in_tensor * (in_tensor > 0) + neg_in * (in_tensor < 0)
     scale_mul_biases_q = qrec.cache['scale_mul_biases_q']
     in_tensor = scale_mul_biases_q.apply_scales(in_tensor)
     if qrec.out_qs[0] != qrec.in_qs[0]:
         return qrec.get_outputs(
             params, [qrec.out_qs[0].reduce_from(in_tensor, qrec.in_qs[0])],
             ktype="symmetric")
     return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
Esempio n. 7
0
    def max_execute(cls, params, in_tensors, qrec: QRec, details=None):

        del details

        # Prepare the quantization levels
        in_tensor = qrec.prepare_inputs(params, in_tensors,
                                        ktype="symmetric")[0]
        if qrec.ktype == 'scaled':
            compute_in_out_scale(qrec, in_idx=0, out_idx=0)
        return qrec.get_outputs(params, [
            np.max(
                in_tensor, axis=tuple(params.axis), keepdims=params.keep_dims)
        ],
                                ktype="symmetric")
Esempio n. 8
0
    def sum_execute(cls, params, in_tensors, qrec: QRec):
        in_tensor = qrec.prepare_inputs(params, in_tensors,
                                        ktype="symmetric")[0]
        if qrec.ktype == 'scaled':
            compute_in_out_scale(qrec, in_idx=0, out_idx=0)
        res = np.sum(in_tensor,
                     axis=tuple(params.axis),
                     keepdims=params.keep_dims,
                     dtype=np.int32)

        if qrec.ktype.startswith('scaled'):
            res = qrec.cache['scale_mul_biases_q'].apply_scales(res)
        elif qrec.ktype.startswith('symmetric'):
            res = qrec.out_qs[0].reduce_from(res, qrec.in_qs[0])

        return qrec.get_outputs(params, [res], ktype="symmetric")
Esempio n. 9
0
 def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
     in_tensor = qrec.prepare_inputs(params, in_tensors,
                                     ktype="symmetric")[0]
     compute_in_out_scale(qrec)
     relu_lb = qrec.in_qs[0].quantize(params.lower_bound)
     in_tensor = np.maximum(in_tensor, relu_lb)
     if params.upper_bound is not None and not NNForceRelu.FORCE_RELU:
         relu_ub = qrec.in_qs[0].quantize(params.upper_bound)
         in_tensor = np.minimum(in_tensor, relu_ub)
     scale_mul_biases_q = qrec.cache['scale_mul_biases_q']
     in_tensor = scale_mul_biases_q.apply_scales(in_tensor)
     if qrec.out_qs[0] != qrec.in_qs[0]:
         return qrec.get_outputs(
             params, [qrec.out_qs[0].reduce_from(in_tensor, qrec.in_qs[0])],
             ktype="symmetric")
     return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
 def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool:
     cnodes = node.contained_nodes()
     quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes]
     for qrec in quants:
         compute_in_out_scale(qrec)
     act_node = [cnode for cnode in cnodes if isinstance(
         cnode, ActivationParameters)]
     act_node = act_node[0] if act_node else None
     act_qrec = quants[-1] if act_node else None
     set_add_in_scale(quants[1])
     act_infos(gen, pnode, pnode, act_node, act_qrec,
               extra1=quants[1].cache['scale_in_mul_biases_q'].qbiases[0],
               extra2=quants[1].cache['scale_in_mul_biases_q'].qnorms[0],
               extra3=quants[1].cache['scale_mul_biases_q'].qbiases[0],
               extra4=quants[1].cache['scale_mul_biases_q'].qnorms[0])
     act_infos(gen, pnode, cnodes[0], act_node, act_qrec, extra_name="Pad",
               extra1=quants[1].cache['scale_mul_biases_q'].qbiases[0],
               extra2=quants[1].cache['scale_mul_biases_q'].qnorms[0])
     return True
Esempio n. 11
0
    def average_execute_mult(cls, params, in_tensors, qrec: QRec):

        # Prepare the quantization levels
        in_tensor = qrec.prepare_inputs(params, in_tensors,
                                        ktype="symmetric")[0]
        out_dims = params.out_dims[0]
        compute_in_out_scale(qrec, in_idx=0, out_idx=0)

        sum_by_chan = np.sum(in_tensor,
                             dtype=np.int32,
                             axis=tuple(params.axis),
                             keepdims=params.keep_dims)
        sz = reduce(
            lambda x, y: x * y,
            [i for idx, i in enumerate(in_tensor.shape) if idx in params.axis])
        res = at_norm(((sum_by_chan << 7) / sz).astype(np.int32), 7)
        scale_mul_biases_q = qrec.cache['scale_mul_biases_q']
        res = out_tensor = scale_mul_biases_q.apply_scales(res)
        return qrec.get_outputs(params, [out_tensor.reshape(out_dims.shape)],
                                ktype="symmetric")
    def globals_generator(cls, gen, node, qrec, pnode, fnode) -> bool:
        if isinstance(pnode, (GlobalPoolingParameters, PoolingParameters,
                              GlobalSumPoolParameters)):
            compute_in_out_scale(qrec)
            infos, comment = np.array([
                qrec.cache['scale_mul_biases_q'].qbiases[0],
                qrec.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0
            ]), "no activation"
            fnode = pnode
            pool_q = qrec
        elif isinstance(pnode, ActivationFusion) and isinstance(
                fnode, (GlobalPoolingParameters, PoolingParameters)):
            cnodes = pnode.contained_nodes()
            quants = [
                gen.G.quantization[NodeId(pnode, fnode)] for fnode in cnodes
            ]
            pool_q = quants[0]
            infos, comment = gen_act_infos(cnodes[1], quants[1])
        else:
            return False
        infos = np.append(infos, [0, 0, 0, 0])
        if isinstance(fnode, GlobalSumPoolParameters):
            compute_in_out_scale(pool_q, in_idx=0, out_idx=0)
            infos[0] = 0
            infos[1] = 0
            infos[5] = pool_q.cache['scale_mul_biases_q'].qbiases[0]
            infos[6] = pool_q.cache['scale_mul_biases_q'].qnorms[0]

        cname, file_name = gen_constant(gen, pnode, fnode, INFOS)
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=True),
                                  contents=infos)
        gen.globals.append(
            GlobalArgInfo("int8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=comment))
        return True
 def compute_scales(cls, params, qrec):
     if isinstance(params,
                   (SigmoidScaledSymmetricMult, TanHActivationParameters)):
         compute_in_out_scale(
             qrec,
             extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale /
             qrec.in_qs[0].scale)
     elif isinstance(params, HSwishActivationParameters):
         compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1 / 6)
     else:
         compute_in_out_scale(qrec)
     return qrec
Esempio n. 14
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        fusion = kwargs.get('fusion', None)
        in_q = in_qs[0]
        if not fusion and in_q.dtype == np.int32:
            return None

        if isinstance(params, (HSwishActivationParameters, HSigmoidActivationParameters)):
            max_val = in_q.scale * pow(2, in_q.bits - 1)
            if max_val < 6:
                in_q = QType.from_min_max_sq(-6, 6, dtype=in_q.dtype, forced=True)
        elif isinstance(params, SigmoidActivationParameters):
            in_q = QType.from_min_max_sq(-8, 8, dtype=in_q.dtype, forced=True)

        if force_out_q:
            if force_out_q.signed != in_q.signed:
                return None
            if fusion and fusion.fusion_type in ['conv_active_pool', 'conv_active']:
                if not isinstance(params, (SigmoidActivationParameters, HTanHActivationParameters,
                                           HSwishActivationParameters, HSigmoidActivationParameters)):
                    in_q = deepcopy(force_out_q)
            o_q = deepcopy(force_out_q)
            # activation cannot move zeropoint unless it is a reduction step
            if o_q.zero_point != in_q.zero_point and in_q.dtype != np.int32:
                return None
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            zero_point = in_q.zero_point if in_q.zero_point != 0 else None
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=in_q.dtype,
                                        zero_point=zero_point)

        qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q])
        if isinstance(params, (SigmoidScaledSymmetricMult, TanHActivationParameters)):
            compute_in_out_scale(qrec, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale/qrec.in_qs[0].scale)
        elif isinstance(params, HSwishActivationParameters):
            compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1/6)
        else:
            compute_in_out_scale(qrec)
        return qrec
Esempio n. 15
0
def gen_act_infos(act_params, act_q):
    comment = ""
    if isinstance(act_params, ReluActivationParameters):
        compute_in_out_scale(act_q)
        actscale = act_q.cache['scale_mul_biases_q'].qbiases[0]
        actscalen = act_q.cache['scale_mul_biases_q'].qnorms[0]
        if act_params.upper_bound is None:  # or fnode is not None:
            if act_q.in_qs[0].zero_point == 0:
                contents = np.array([actscale, actscalen, 0, 0, 0],
                                    dtype=np.int8)
                if len(comment) == 0:
                    comment = "all 0"
            else:
                fac_1 = act_q.in_qs[0].zero_point
                contents = np.array([actscale, actscalen, fac_1, 0, 0],
                                    dtype=np.int8)
                comment += str.format(
                    "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0",
                    act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
                    fac_1[0])
        else:
            if act_q.in_qs[0].zero_point == 0:
                fac_1 = act_q.in_qs[0].quantize(act_params.upper_bound)
                contents = np.array([actscale, actscalen, fac_1, 0, 0],
                                    dtype=np.int8)
                comment += str.format(
                    "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0",
                    act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
                    fac_1[0])
            else:
                fac_1 = act_q.in_qs[0].zero_point
                fac_2 = act_q.in_qs[0].quantize(act_params.upper_bound)
                contents = np.array([actscale, actscalen, fac_1, fac_2, 0],
                                    dtype=np.int8)
                comment += str.format(
                    "in: {:05f} out: {:05f} A0: {} B0: {} C0: 0",
                    act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
                    fac_1[0], fac_2[0])
    elif isinstance(act_params, HSigmoidActivationParameters):
        # currently combines all scaling factors into one scale and shift
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        fac_1, upper_bound, _ = hsigmoid_mult_gen_factors(act_params, act_q)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0],
            fac_1[0])
    elif isinstance(act_params, HSwishActivationParameters):
        # currently combines all scaling factors into one scale and shift
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        fac_1, upper_bound, _ = hswish_mult_gen_factors(act_q)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0],
            fac_1[0])
    elif isinstance(act_params, SoftMaxParameters):
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        norm = 15 + np.ceil(np.log2(act_q.in_qs[0].scale))
        contents = np.array([norm, 0, 0, 0, 0], dtype=np.int8)
        comment += str.format("in: {:05f} out: {:05f} NORM: {}",
                              act_q.in_qs[0].scale[0],
                              act_q.out_qs[0].scale[0], int(norm[0]))
    elif isinstance(act_params, LeakyActivationParameters):
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        compute_in_out_scale(act_q)
        leak_factor_quant = leak_mult_gen_factor_q7(act_params)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant, 0,
            0
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: x C0: x",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant)
    elif isinstance(act_params,
                    (SigmoidActivationParameters, TanHActivationParameters)):
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        compute_in_out_scale(
            act_q,
            extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale /
            act_q.in_qs[0].scale)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: x B0: x C0: x",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0])
    else:
        raise NotImplementedError("activation tye not implemented")

    return contents, comment
def act_infos(gen,
              pnode,
              fnode,
              act_params,
              act_q,
              extra1=0,
              extra2=0,
              extra3=0,
              extra4=0,
              extra5=None,
              extra6=None,
              prenorm=0,
              extra_name='',
              for_ne16=False,
              in_zero_point=0):
    if isinstance(pnode, FilterParameters):
        comment = str.format("BiasQ: {}", extra1)
    elif isinstance(pnode, MatrixAddParameters):
        comment = str.format(
            "In1Scale: {} In1ScaleN: {} OutScale: {} OutScaleN: {}", extra1,
            extra2, extra3, extra4)
    else:
        comment = ""

    if act_params is None:
        contents = np.array([0, 0, 0, 0, 0], dtype=np.int8)
    elif isinstance(act_params, ReluActivationParameters):
        compute_in_out_scale(act_q)
        actscale = act_q.cache['scale_mul_biases_q'].qbiases[0]
        actscalen = act_q.cache['scale_mul_biases_q'].qnorms[0]
        if act_params.upper_bound is None:  # or fnode is not None:
            if act_q.in_qs[0].zero_point == 0:
                contents = np.array([actscale, actscalen, 0, 0, 0],
                                    dtype=np.int8)
                if len(comment) == 0:
                    comment = "all 0"
            else:
                fac_1 = act_q.in_qs[0].zero_point
                contents = np.array([actscale, actscalen, fac_1, 0, 0],
                                    dtype=np.int8)
                comment += str.format(
                    "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0",
                    act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
                    fac_1[0])
        else:
            if act_q.in_qs[0].zero_point == 0:
                fac_1 = act_q.in_qs[0].quantize(act_params.upper_bound)
                contents = np.array([actscale, actscalen, fac_1, 0, 0],
                                    dtype=np.int8)
                comment += str.format(
                    "in: {:05f} out: {:05f} A0: {} B0: 0 C0: 0",
                    act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
                    fac_1[0])
            else:
                fac_1 = act_q.in_qs[0].zero_point
                fac_2 = act_q.in_qs[0].quantize(act_params.upper_bound)
                contents = np.array([actscale, actscalen, fac_1, fac_2, 0],
                                    dtype=np.int8)
                comment += str.format(
                    "in: {:05f} out: {:05f} A0: {} B0: {} C0: 0",
                    act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
                    fac_1[0], fac_2[0])
    elif isinstance(act_params, HSigmoidActivationParameters):
        # currently combines all scaling factors into one scale and shift
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        fac_1, upper_bound, _ = hsigmoid_mult_gen_factors(act_params, act_q)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0],
            fac_1[0])
    elif isinstance(act_params, HSwishActivationParameters):
        # currently combines all scaling factors into one scale and shift
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        fac_1, upper_bound, _ = hswish_mult_gen_factors(act_q)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound, fac_1, 1
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: {} C0: 1",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], upper_bound[0],
            fac_1[0])
    elif isinstance(act_params, SoftMaxParameters):
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        norm = 15 + np.ceil(np.log2(act_q.in_qs[0].scale))
        contents = np.array([norm, 0, 0, 0, 0], dtype=np.int8)
        comment += str.format("in: {:05f} out: {:05f} NORM: {}",
                              act_q.in_qs[0].scale[0],
                              act_q.out_qs[0].scale[0], int(norm[0]))
    elif isinstance(act_params, LeakyActivationParameters):
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        compute_in_out_scale(act_q)
        leak_factor_quant = leak_mult_gen_factor_q7(act_params)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant, 0,
            0
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: {} B0: x C0: x",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], leak_factor_quant)
    elif isinstance(act_params,
                    (SigmoidActivationParameters, TanHActivationParameters)):
        assert act_q.in_qs[0].zero_point == 0 and act_q.out_qs[
            0].zero_point == 0, "asymmetric not supported"
        compute_in_out_scale(
            act_q,
            extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale /
            act_q.in_qs[0].scale)
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0
        ],
                            dtype=np.int8)
        comment += str.format(
            "in: {:05f} out: {:05f} qbias: {} qnorm: {} A0: x B0: x C0: x",
            act_q.in_qs[0].scale[0], act_q.out_qs[0].scale[0],
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0])
    else:
        raise NotImplementedError("activation tye not implemented")

    if isinstance(pnode, (GlobalPoolingParameters, PoolingParameters)):
        contents = np.array([
            act_q.cache['scale_mul_biases_q'].qbiases[0],
            act_q.cache['scale_mul_biases_q'].qnorms[0], 0, 0, 0
        ],
                            dtype=np.int8)

    contents = np.append(contents, [extra1, extra2, extra3, extra4])
    if extra5 is not None:
        contents = np.append(contents, [extra5])
    if extra6 is not None:
        contents = np.append(contents, [extra6])

    if for_ne16:
        # append weights_offset and pad_val for ne16
        # TODO - default config maybe in future
        if isinstance(pnode, (ConvFusionParameters, LinearFusionParameters)):
            filt_q = gen.G.quantization[NodeId(pnode, fnode)]
        else:
            filt_q = gen.G.quantization[NodeId(pnode)]
        pad_value = np.array(in_zero_point).astype(np.int16)
        pad_value1 = np.bitwise_and(pad_value, 0xFF)
        pad_value2 = np.bitwise_and(pad_value, 0xFF00) >> 8
        w_offset = -np.array(filt_q.in_qs[1].zero_point).astype(np.int32)
        w_offset1 = np.bitwise_and(w_offset, 0xFF)
        w_offset2 = np.bitwise_and(w_offset, 0xFF00) >> 8
        w_offset3 = np.bitwise_and(w_offset, 0xFF0000) >> 16
        w_offset4 = np.bitwise_and(w_offset, 0xFF000000) >> 24

        contents = np.append(
            contents, [[prenorm] if prenorm else [0], pad_value1, pad_value2,
                       w_offset1, w_offset2, w_offset3, w_offset4])

    cname, file_name = gen_constant(gen, pnode, fnode, INFOS, extra_name)
    const_info = ConstantInfo(file_name,
                              QType.Pow2(bits=8, q=0, signed=True),
                              contents=contents)

    gen.globals.append(
        GlobalArgInfo("int8",
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info,
                      comment=comment))
def mult8_infos_generator(gen, node, qrec, pnode, fnode) -> bool:
    if fnode is not None:
        return False
    # if isinstance(pnode, Conv2DParameters):
    #     for_ne16 = qrec.cache.get('ne16')
    #     in_zero_point = qrec.in_qs[0].zero_point
    #     conv_mul_bias = qrec.cache.get('mul_biases_q')
    #     prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0
    #     act_infos(gen, pnode, pnode, None, None, prenorm=prenorm, extra1=0,
    #               for_ne16=for_ne16, in_zero_point=in_zero_point)
    # elif isinstance(pnode, (GlobalPoolingParameters, PoolingParameters)):
    #     compute_in_out_scale(qrec)
    #     act_infos(gen, pnode, pnode, None, qrec)
    elif isinstance(pnode, ActivationParameters):
        act_infos(gen, pnode, pnode, pnode, gen.G.quantization[NodeId(pnode)])
    # elif isinstance(pnode, ConvFusionParameters):
    #     cnodes = node.contained_nodes()
    #     quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes]
    #     for_ne16 = any([qrec.cache.get('ne16') for qrec in quants])
    #     in_zero_point = quants[0].in_qs[0].zero_point
    #     for qrec in quants:
    #         compute_in_out_scale(qrec)
    #     if node.fusion_type.startswith('linear') or node.fusion_type.startswith('conv') or node.fusion_type.startswith('pool'):
    #         if node.fusion_type in ("pool_active"):
    #             act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1],
    #                       extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point)
    #         else:
    #             conv_mul_bias = quants[0].cache.get('mul_biases_q')
    #             prenorm = conv_mul_bias.pre_normalization if isinstance(conv_mul_bias, MultMulBiasScaleQType) else 0
    #             if node.fusion_type in ("conv_active_pool", "conv_active", "linear_active"):
    #                 act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1], prenorm=prenorm,
    #                           extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point)
    #             elif node.fusion_type == "conv_pool_active":
    #                 act_infos(gen, pnode, cnodes[0], cnodes[2], quants[2], prenorm=prenorm,
    #                           extra1=0, for_ne16=for_ne16, in_zero_point=in_zero_point)
    #             elif node.fusion_type == "conv_pool":
    #                 act_infos(gen, pnode, cnodes[0], None, None, prenorm=prenorm,
    #                           extra1=0, for_ne16=for_ne16)
    elif isinstance(pnode, MatrixMulParameters):
        compute_in_out_scale(qrec, in_idx=(0, 1), out_idx=0)
        act_infos(gen,
                  pnode,
                  pnode,
                  None,
                  None,
                  extra1=qrec.cache['scale_mul_biases_q'].qbiases[0],
                  extra2=qrec.cache['scale_mul_biases_q'].qnorms[0])
    elif isinstance(pnode, SoftMaxParameters):
        act_infos(gen, pnode, pnode, pnode, qrec)
    # elif isinstance(pnode, ActivationFusionBase):
    #     cnodes = node.contained_nodes()
    #     quants = [gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes]
    #     for qrec in quants:
    #         compute_in_out_scale(qrec)
    #     if isinstance(cnodes[0], (GlobalPoolingParameters, PoolingParameters)):
    #         act_infos(gen, pnode, cnodes[0], cnodes[1], quants[1])
    #     else:
    #         return False
    #     return True
    elif isinstance(pnode, (MatMulOpParameters, MatMulOpFusionParameters)):
        if isinstance(pnode, MatMulOpFusionParameters):
            cnodes = node.contained_nodes()
            quants = [
                gen.G.quantization[NodeId(node, fnode)] for fnode in cnodes
            ]
            mul_node = cnodes[0]
            mul_qrec = quants[0]
            act_node = cnodes[1]
            act_qrec = quants[1]
        else:
            mul_node = pnode
            mul_qrec = qrec
            act_node = None
            act_qrec = None

        if len(pnode.in_dims) == 3 and len(mul_qrec.in_qs[0].scale) > 1:
            gen_scales(gen, pnode, mul_node, mul_qrec)
            extra3 = 0
            extra4 = 0
        else:
            extra3 = mul_qrec.cache['mul_biases_q'].qbiases[0]
            extra4 = mul_qrec.cache['mul_biases_q'].qnorms[0]

        act_infos(gen,
                  pnode,
                  mul_node,
                  act_node,
                  act_qrec,
                  extra3=extra3,
                  extra4=extra4)
    elif isinstance(pnode, QuantizeParameters):
        in_q = qrec.in_qs[0]
        out_q = qrec.out_qs[0]
        comment = f'in q: {in_q} out_q: {out_q}'
        if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP_ZEROPOINT':
            bits = 8 if in_q.dtype == np.int8 else 16
            if in_q.signed:
                contents = ((int(math.pow(2, bits)) + in_q.zero_point[0] -
                             out_q.zero_point[0]) %
                            int(math.pow(2, bits))).astype(np.uint8)
            else:
                contents = (int(math.pow(2, bits)) - in_q.zero_point[0] +
                            out_q.zero_point[0]).astype(np.uint8)
        # if in_q.dtype == np.int8 and out_q.dtype == np.uint8:
        #     if not np.allclose(in_q.scale, out_q.scale):
        #         return False
        #     if not np.all(in_q.zero_point == (out_q.zero_point - 128)):
        #         return False
        #     contents = (
        #         (256 + in_q.zero_point[0] - out_q.zero_point[0]) % 256).astype(np.uint8)
        # elif in_q.dtype == np.uint8 and out_q.dtype == np.int8:
        #     if not np.allclose(in_q.scale, out_q.scale):
        #         return False
        #     if not np.all(in_q.zero_point == (out_q.zero_point - 128)):
        #         return False
        #     contents = (
        #         256 - in_q.zero_point[0] + out_q.zero_point[0]).astype(np.uint8)
        elif in_q.dtype == np.int8 and out_q.dtype == np.int16:
            if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP':
                return True
            raise NotImplementedError()
        elif in_q.dtype == np.int16 and out_q.dtype == np.int8:
            if qrec.cache['kernel_type'] == 'KOP_CONVERT_FP_FP':
                return True
            raise NotImplementedError()
        else:
            raise ValueError(f"strange dtype change in {pnode.name}")
        cname, file_name = gen_constant(gen, pnode, pnode, INFOS)
        const_info = ConstantInfo(file_name,
                                  QType.Pow2(bits=8, q=0, signed=True),
                                  contents=contents)

        gen.globals.append(
            GlobalArgInfo("int8",
                          cname,
                          gen.opts['default_global_home_location'],
                          gen.opts['default_global_exec_location'],
                          const_info=const_info,
                          comment=comment))
    else:
        return False
    return True