Esempio n. 1
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        if params.activation == "relu6":
            int_bits = calc_bits(6)
        elif params.activation == "relun":
            relun = params.activation_params
            if isinstance(relun, list):
                relun = max(relun)
            int_bits = calc_bits(relun)
        elif params.activation == "relu" or params.activation == "hswish" or params.activation == "hsigmoid" or params.activation == "leaky":
            int_bits = bits(stats['range_out'][0]['max'],
                            stats['range_out'][0]['min'])
        else:
            raise ValueError(
                f'no support for activation {params.activation} in POW2 quantizer'
            )

        in_q = in_qs[0]
        if force_out_q is None:
            q = max(cls.get_pow2_bits(**kwargs) - int_bits, 0)
            out_q = QType(q=q, dtype=out_dtype)
        else:
            if force_out_q.bits - force_out_q.q < int_bits:
                LOG.warning(
                    'quantization is forcing node %s to have an output that may clip',
                    params.name)
            out_q = force_out_q
        return SymmetricQuantizationRecord(in_qs=[in_q], out_qs=[out_q])
Esempio n. 2
0
def generate_tanh(var, scaling):
    if scaling:
        # what is the current maximum value of the input?
        # We want to: (a) represent (1) precisely
        #             (b) make sure that scaling to this rep does not overflow
        # Find the closest power of 2 greater than the current scale
        closest_repr = math.log2(var.scale)
        closest_repr = min(math.floor(closest_repr), -7)
        new_scale = pow(2, closest_repr)
        cur_max_val = math.ceil(pow(2, var.ibits) * var.scale)
        new_scaled_max_val = math.ceil(cur_max_val / new_scale)
        assert calc_bits(
            new_scaled_max_val) + var.q <= 31, "risk of overflow in htanh"
        new_q = 0
        return ExprState(HTanh(
            ATScale.from_scales(var.expr,
                                var.scale,
                                new_scale,
                                28 - var.length,
                                to_q=new_q,
                                from_q=var.q), new_q, new_scale),
                         abs(closest_repr) + 1,
                         q=new_q,
                         scale=new_scale)
    return ExprState(HTanh(var.expr, None, None), var.ibits)
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        if not fusion and in_qs[0].dtype == np.int32:
            return None

        if params.activation == "relu6":
            int_bits = calc_bits(6)
        elif params.activation == "relun":
            relun = params.activation_params
            if isinstance(relun, list):
                relun = max(relun)
            int_bits = calc_bits(relun)
        elif params.activation in [
                "relu", "hswish", "hsigmoid", "leaky", "htanh"
        ]:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            int_bits = calc_bits(stats['range_out'][0]['max'],
                                 stats['range_out'][0]['min'])
        elif params.activation == "sigmoid" or params.activation == "tanh":
            if force_out_q is None:
                q = 7 if out_dtype == np.int8 else 15
                return QRec.symmetric(in_qs=[in_qs[0]],
                                      out_qs=[QType(q=q, dtype=out_dtype)])
            else:
                q = 7 if force_out_q.dtype == np.int8 else 15
                if force_out_q.q != q:
                    return None
                return QRec.symmetric(in_qs=[in_qs[0]], out_qs=[force_out_q])
        else:
            LOG.error(
                f'no support for activation {params.activation} in POW2 quantizer'
            )
            return None

        in_q = in_qs[0]
        if force_out_q is None:
            q = max(cls.get_pow2_bits(**kwargs) - int_bits, 0)
            out_q = QType(q=q, dtype=out_dtype)
        else:
            if force_out_q.bits - force_out_q.q < int_bits:
                return None
            out_q = force_out_q
        return QRec.symmetric(in_qs=[in_q], out_qs=[out_q])
Esempio n. 4
0
 def compute_activation_out_maxq(node, num_bits):
     relun = None
     if node.activation == "relu6":
         relun = 6
     elif node.activation == "relun":
         relun = node.activation_params
         if isinstance(relun, list):
             relun = max(relun)
     if relun is None:
         return None
     relu_bits = calc_bits(relun)
     return num_bits - relu_bits
Esempio n. 5
0
def astats(size, do_bits=True):
    """Extracts statistics from a tensor
    """
    ret = {
        'mean': 0,
        'std': 0.25,
        'min': -0.9,
        'max': 0.9,
        'size': size,
        'wols': 0,
        'sols': 0,
        'min_out': 0,
        'max_out': 0,
    }
    if do_bits:
        ret['ibits'] = calc_bits(0.9, -0.9)
    return ret
Esempio n. 6
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        in_qs = cls.force_symmetric_and_dtype(in_qs, idx=0)
        if in_qs is None:
            return None
        in_qs = deepcopy(in_qs)
        G = kwargs['G']
        opts = kwargs['opts']

        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        o_q = QType.from_min_max_sq(min_val=stats['range_out'][0]['min'],
                                    max_val=stats['range_out'][0]['max'],
                                    dtype=out_dtype)
        if force_out_qs and force_out_qs[0]:
            LOG.warning(
                'on node %s output is being forced from scale %s -> %s',
                params.name, o_q.scale, force_out_qs[0].scale)
            o_q = force_out_qs[0]

        names = {
            val: idx
            for idx, val in enumerate(LSTMParameters.INPUT_NAMES)
        }
        cell_range = stats.get('range_cell')
        if cell_range is None:
            ValueError(f'cell range not present in stats for {params.name}')
        cell_stat = max(abs(cell_range[var]) for var in ['min', 'max'])
        if params.cell_clip and not params.quant_c_state_with_stat:
            cell_max = params.cell_clip
            ratio_c = cell_max / cell_stat
            if not (ratio_c > 0.9 and ratio_c < 1.1):
                LOG.warning(
                    f"C state is forced to a range [-{cell_max}:{cell_max}] different to the one calulated "
                    f"from the inference statistic [-{cell_stat}:{cell_stat}], consider using nodeoption {params.name} "
                    "QUANT_C_STATE_WITH_STAT 1 to force it to be the one calculated"
                )
        else:
            cell_max = cell_stat

        cell_int_bits = calc_bits(cell_max)

        in_qs[names['c_state']].recalculate_scale(-cell_max, cell_max)
        LOG.debug("cell bits %d max %d cell range %d", cell_int_bits, cell_max,
                  in_qs[names['c_state']].range)
        int2_scale = int3_scale = out_tanh_sig_scale = None
        if params.hard_act:
            # worst case is (internal_q * 3) + 2 = 32 (1 for 1 and 1 for sign) i.e. 10
            # but also (internal_q * 2) + cell_bits = 32
            int_q = min((16 - cell_int_bits), 10)
            int2_scale = math.pow(2, -(int_q * 2))
            int3_scale = math.pow(2, -(int_q * 3))
        else:
            int_q = 12
            # output of LUT activations are always Q15
            out_tanh_sig_scale = math.pow(2, -15)
        int_scale = math.pow(2, -int_q)

        scale_pairs = {
            chan: ('i_2_%s_w' % chan, 'r_2_%s_w' % chan)
            for chan in ['i', 'o', 'c', 'f']
        }
        for weight_name in [
                weight_name for scale_pair in scale_pairs.values()
                for weight_name in scale_pair
        ]:
            in_qs[names[weight_name]] = deepcopy(in_qs[names[weight_name]])
            in_qs[names[weight_name]].dtype = np.int8
            in_qs[names[weight_name]].bits = opts['weight_bits']

        w_scales = [(in_qs[names[namei]].scale, in_qs[names[namer]].scale)
                    for k, (namei, namer) in scale_pairs.items()]
        if (abs(1 - in_qs[0].scale / o_q.scale) < 0.1) and \
                all([(abs(1 - w_scale[0] / w_scale[1]) < 0.2) for w_scale in w_scales]):
            LOG.info(
                "node %s has similar input and i_state scales --> "
                "will be generated the same_scale kernel with better performances",
                params.name)
            params.rnn_same_inout_scale = True
            G.node_options[NodeId(params)] = params.at_options

        if params.rnn_same_inout_scale:
            if not (abs(1 - in_qs[0].scale / o_q.scale) < 0.1) and \
               not all([(abs(1 - w_scale[0] / w_scale[1]) < 0.1) for w_scale in w_scales]):
                LOG.warning(
                    "node %s has different input and i_state scales consider using the "
                    "LSTM kernel with rnn_same_inout_scale=False (better accuracy)",
                    params.name)
            # in and out and state are all in the same scale
            in_and_out_scale = np.maximum(in_qs[0].scale, o_q.scale)
            # i_state scale may be 1 since the value is 0
            # np.maximum(in_and_out_scale, in_qs[names['i_state']].scale)
            i_state_scale = in_scale = in_and_out_scale
            in_qs[0].scale = in_scale
            o_q.scale = in_scale
            scales = {
                k: np.maximum(in_qs[names[namei]].scale,
                              in_qs[names[namer]].scale)
                for k, (namei, namer) in scale_pairs.items()
            }
            for k, (namei, namer) in scale_pairs.items():
                in_qs[names[namei]].scale = scales[k]
                in_qs[names[namer]].scale = scales[k]
        else:
            in_scale = in_qs[0].scale
            i_state_scale = o_q.scale
            o_q.scale = i_state_scale

        if not params.rnn_states_as_inputs:
            in_qs[names['i_state']].scale = i_state_scale

        # compute scales for perceptrons
        r_pscales = {
            k: in_qs[names["r_2_%s_w" % k]].scale * i_state_scale
            for k in ['i', 'o', 'c', 'f']
        }
        scale_qtypes = {
            "r_2_%s_q" % k: MultMulBiasScaleQType(scale=r_pscale / int_scale)
            for k, r_pscale in r_pscales.items()
        }

        i_pscales = {
            k: in_qs[names["i_2_%s_w" % k]].scale * in_scale
            for k in ['i', 'o', 'c', 'f']
        }
        # if input and i_state have different scales -> scale the inputs before sum
        # otherwise do nothing and these scales will be ignored
        scale_qtypes.update({
            "i_2_%s_q" % k: MultMulBiasScaleQType(scale=i_pscale / r_pscale)
            for (k, i_pscale
                 ), r_pscale in zip(i_pscales.items(), r_pscales.values())
        })

        if params.hard_act:
            cell_in_scale = in_qs[names['c_state']].scale / int_scale
            cell_out_scale = int2_scale / in_qs[names['c_state']].scale
            state_out_scale = int3_scale / i_state_scale
        else:
            cell_in_scale = in_qs[
                names['c_state']].scale * out_tanh_sig_scale / int_scale
            cell_out_scale = int_scale / in_qs[names['c_state']].scale
            state_out_scale = out_tanh_sig_scale / i_state_scale

        scale_qtypes['cell_in_q'] = MultMulBiasScaleQType(scale=cell_in_scale)
        # TODO - Check cell clip here
        scale_qtypes['cell_out_q'] = MultMulBiasScaleQType(
            scale=cell_out_scale)
        scale_qtypes['state_out_q'] = MultMulBiasScaleQType(
            scale=state_out_scale)
        # set internal scale
        scale_qtypes['i_qtype'] = QType(q=int_q, bits=32, signed=True)
        # set biases to output of perceptron
        for gate in ['i', 'o', 'c', 'f']:
            in_qs[names[f"{gate}_b"]].scale = r_pscales[gate]
            in_qs[names[f"{gate}_b"]].dtype = np.int32
        if params.lstm_output_c_state:
            out_qs = [o_q, in_qs[names['c_state']]]
        else:
            out_qs = [o_q]
        return QRec.scaled(
            in_qs=in_qs,
            out_qs=out_qs,
            **scale_qtypes,
        )
Esempio n. 7
0
 def from_min_max(cls,
                  min_val,
                  max_val,
                  dtype=None,
                  bits=None,
                  scaled=False,
                  asymmetric=False,
                  narrow_range=False,
                  quantized_dimension=None,
                  scale_zero_as_one=False,
                  forced=False,
                  zero_point=None,
                  **kwargs):
     min_val = cls.init_array(min_val)
     max_val = cls.init_array(max_val)
     # check for scalar
     min_max_equal = np.isclose(min_val, max_val)
     #min_max_equal = min_val == max_val
     max_val = np.where(np.logical_and(min_max_equal, min_val < 0),
                        -(min_val), max_val)
     min_val = np.where(np.logical_and(min_max_equal, min_val > 0),
                        -(max_val), min_val)
     max_val = np.where(np.logical_and(min_max_equal, min_val == 0), 1,
                        max_val)
     min_val = np.where(np.logical_and(min_max_equal, min_val == 0), 1,
                        min_val)
     # zero must be representable
     min_val = np.where(min_val > 0, 0, min_val)
     max_val = np.where(max_val < 0, 0, max_val)
     # work out container
     if dtype is None:
         dtype = np.int8 if scaled else np.int16
     dtype_bits, signed = DTYPES[dtype]
     if bits is None:
         bits = dtype_bits
     elif bits > dtype_bits:
         raise ValueError(f'bits {bits} do not fit in dtype {dtype}')
     if scaled:
         qmin, qmax = cls.calculate_quantized_range(
             bits, narrow_range=narrow_range, signed=signed)
         scale, zero_point = cls.calculate_scale(
             min_val,
             max_val,
             qmin,
             qmax,
             dtype,
             asymmetric=asymmetric,
             scale_zero_as_one=scale_zero_as_one,
             narrow_range=narrow_range,
             zero_point=zero_point)
         if len(scale) == 1:
             quantized_dimension = None
         return cls(bits=bits,
                    signed=signed,
                    dtype=dtype,
                    scale=scale,
                    zero_point=zero_point,
                    quantized_dimension=quantized_dimension,
                    min_val=min_val,
                    max_val=max_val,
                    narrow_range=narrow_range,
                    forced=forced,
                    asymmetric=asymmetric,
                    **kwargs)
     else:
         if asymmetric:
             raise ValueError(
                 'asymmetric is not supported un unscaled mode')
         if quantized_dimension is not None:
             raise ValueError(
                 'quantized dimension is not supported un unscaled mode')
         int_bits = calc_bits(max_val, min_val, signed=signed)
         if int_bits > bits:
             raise ValueError(
                 f"{max_val}, {min_val} number cannot be represented with this many bits"
             )
         return cls(bits=bits,
                    q=bits - int_bits,
                    signed=signed,
                    dtype=dtype,
                    min_val=min_val,
                    max_val=max_val,
                    narrow_range=narrow_range,
                    forced=forced,
                    asymmetric=asymmetric,
                    **kwargs)
Esempio n. 8
0
    def calculate_q(self, G, node, astats, in_qs, dtype, out_dtype=None):
        if out_dtype is None:
            out_dtype = dtype
        if isinstance(node, (PoolingParameters, OutputParameters, SplitParameters)):
            o_q = in_qs[0]
        elif isinstance(node, SoftMaxParameters):
            o_q = SymmetricMultQType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
        else:
            o_q = SymmetricMultQType.from_min_max(min_val=astats['range_out'][0]['min'],
                                                  max_val=astats['range_out'][0]['max'],
                                                  dtype=out_dtype)

        if isinstance(node, (MatrixAddParameters, MatrixSubParameters)):
            qrec = MultAddQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
        elif isinstance(node, ExpressionFusionParameters):
            o_qs = [SymmetricMultQType.from_min_max(min_val=orange['min'],
                                                    max_val=orange['max'],
                                                    dtype=out_dtype)
                    for orange in astats['range_out']]
            fusion_inputs = sorted([n for n in node.subgraph.inputs()
                                    if isinstance(n, FusionInputParameters)],
                                   key=lambda x: x.idx)
            fusion_outputs = sorted([n for n in node.subgraph.outputs()
                                     if isinstance(n, FusionOutputParameters)],
                                    key=lambda x: x.idx)

            node_scale_map = {fnode: in_qs[idx].scale
                              for idx, fnode in enumerate(fusion_inputs)}
            for idx, fnode in enumerate(fusion_outputs):
                node_scale_map[fnode] = o_qs[idx].scale
            inp, outp, expr = node.decompose(node_scale_map=node_scale_map)

            qrec = MultExpressionQuantizationRecord(in_qs=in_qs,
                                                    out_qs=o_qs,
                                                    inputs=inp,
                                                    output_exprs=outp,
                                                    intermediate_exprs=expr)
        elif isinstance(node, (MatrixBroadcastedLinearOpParameters, MatScaleFusionParameters, GlobalPoolParameters)):
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])

        elif isinstance(node, SplitParameters):
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q]*node.num_splits)

        elif isinstance(node, ConstantInputParameters):
            if node.value_quantization:
                qrec = MultConstantQuantizationRecord(out_qs=[node.value_quantization],
                                                      constants_are_quantized=True)
            else:
                qrec = MultConstantQuantizationRecord(out_qs=[o_q],
                                                      constants_are_quantized=False)

        elif isinstance(node, (FcParameters, Conv2DParameters)):
            weights_q = SymmetricMultQType.from_array(arr=node.weights,
                                                      quantized_dimension=self.get_quantized_dimension(
                                                          node),
                                                      dtype=dtype, narrow_range=self._narrow_weights)
            if node.has_bias:
                biases_q = SymmetricMultBiasesQType(
                    dtype=np.int32, scale=weights_q.scale * in_qs[0].scale)
            else:
                biases_q = SymmetricMultBiasesQType(
                    dtype=np.int32, scale=np.array([1], dtype=np.int32))
            mul_biases_q = MultMulBiasScaleQType.from_filter(in_qs[0], weights_q, o_q, node)
            qrec = MultScalableFilterQuantizationRecord(in_qs=[in_qs[0]],
                                                        out_qs=[o_q],
                                                        weights_q=weights_q,
                                                        biases_q=biases_q,
                                                        mul_biases_q=mul_biases_q,
                                                        constants_are_quantized=False)
            LOG.debug("filter %s qrec %s", node.name, qrec)
        elif isinstance(node, RNNParameters):
            input_nodes = {RNNParameters.INPUT_NAMES[edge.to_idx]: edge.from_node
                           for edge in G.in_edges(node.name)
                           if isinstance(edge.from_node, ConstantInputParameters)}
            names = {val: idx for idx, val in enumerate(RNNParameters.INPUT_NAMES)}
            # quantization_mode: extended, autotiler
            # state_width: 16bit or 8bit
            opts = self.get_options(node)
            if opts['mode'] == "extended":
                in_w_scale = in_qs[names['i_2_i_w']].scale * in_qs[0].scale
                state_w_scale = in_qs[names['r_2_i_w']].scale
                i_2_a_q = MultMulBiasScaleQType(scale=in_w_scale/state_w_scale)
                s_2_s_q = MultMulBiasScaleQType(scale=state_w_scale)
                s_2_o_q = MultMulBiasScaleQType(scale=1/o_q.scale)
                self.rescale_constant(input_nodes['i_b'], state_w_scale, dtype=np.int32)
                qrec = MultScalableRnnQuantizationRecord(
                    in_qs=in_qs,
                    out_qs=[o_q],
                    i_2_a_q=i_2_a_q,
                    s_2_s_q=s_2_s_q,
                    s_2_o_q=s_2_o_q
                )
            elif opts['mode'] == 'autotiler':
                in_and_state_scale = np.maximum(in_qs[0].scale, o_q.scale)
                in_and_state_w_scale = np.maximum(
                    in_qs[names['i_2_i_w']].scale, in_qs[names['r_2_i_w']].scale)
                in_qs[0].scale = in_and_state_scale
                o_q.scale = in_and_state_scale
                self.rescale_constant(input_nodes['i_state'], in_and_state_scale)
                self.rescale_constant(input_nodes['i_2_i_w'], in_and_state_w_scale)
                self.rescale_constant(input_nodes['r_2_i_w'], in_and_state_w_scale)
                state_w_scale = in_and_state_scale * in_and_state_w_scale
                self.rescale_constant(input_nodes['i_b'], state_w_scale, dtype=np.int32)
                s_2_s_q = MultMulBiasScaleQType(scale=state_w_scale/in_and_state_scale)
                qrec = MultScalableRnnQuantizationRecord(
                    in_qs=in_qs,
                    out_qs=[o_q],
                    s_2_s_q=s_2_s_q,
                )
        elif isinstance(node, LSTMParameters):
            input_nodes = {LSTMParameters.INPUT_NAMES[edge.to_idx]: edge.from_node
                           for edge in G.in_edges(node.name)
                           if isinstance(edge.from_node, ConstantInputParameters)}
            names = {val: idx for idx, val in enumerate(LSTMParameters.INPUT_NAMES)}
            if node.cell_clip:
                cell_max = node.cell_clip
            else:
                cell_max = max(abs(astats['range_cell'][var]) for var in ['min', 'max'])

            cell_int_bits = calc_bits(cell_max)

            in_qs[names['c_state']].recalculate_scale(-cell_max,
                                                      cell_max)
            LOG.debug("cell bits %d max %d cell range %d",
                      cell_int_bits,
                      cell_max,
                      in_qs[names['c_state']].range)
            # worst case is (internal_q * 3) + 2 = 32 (1 for 1 and 1 for sign) i.e. 10
            # but also (internal_q * 2) + cell_bits = 32
            int_q = min((32-cell_int_bits)//2, 10)
            # in and out and state are all in the same scale
            in_and_out_scale = np.maximum(in_qs[0].scale, o_q.scale)
            in_and_state_scale = np.maximum(in_and_out_scale, in_qs[names['i_state']].scale)
            in_qs[0].scale = in_and_state_scale
            o_q.scale = in_and_state_scale
            self.rescale_constant(input_nodes['i_state'], in_and_state_scale)
            scale_pairs = {chan: ('i_2_%s_w' % chan, 'r_2_%s_w' % chan)
                           for chan in ['i', 'o', 'c', 'f']}
            scales = {k: np.maximum(in_qs[names[namei]].scale, in_qs[names[namer]].scale)
                      for k, (namei, namer) in scale_pairs.items()}
            for k, (namei, namer) in scale_pairs.items():
                self.rescale_constant(input_nodes[namei], scales[k])
                self.rescale_constant(input_nodes[namer], scales[k])
            int_scale = pow(2, -int_q)
            int2_scale = pow(2, -(int_q*2))
            int3_scale = pow(2, -(int_q*3))
            # compute scales for perceptrons
            pscales = {k: scales[k] * in_and_state_scale for k in ['i', 'o', 'c', 'f']}
            scale_qtypes = {"r_2_%s_q" % k: MultMulBiasScaleQType(
                scale=pscale/int_scale) for k, pscale in pscales.items()}
            scale_qtypes['cell_in_q'] = MultMulBiasScaleQType(
                scale=in_qs[names['c_state']].scale/int_scale)
            # TODO - Check cell clip here
            scale_qtypes['cell_out_q'] = MultMulBiasScaleQType(
                scale=int2_scale/in_qs[names['c_state']].scale)
            scale_qtypes['state_out_q'] = MultMulBiasScaleQType(scale=int3_scale/in_and_state_scale)
            # set internal scale
            scale_qtypes['i_qtype'] = QType(q=int_q, bits=32, signed=True)
            # set biases to output of perceptron
            for k in ['i', 'o', 'c', 'f']:
                self.rescale_constant(input_nodes["%s_b" % k], pscales[k], dtype=np.int32)
            qrec = MultScalableLstmQuantizationRecord(
                in_qs=in_qs,
                out_qs=[o_q],
                **scale_qtypes,
            )
        else:
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
        return qrec
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        pow2_biases = kwargs.get('opts')['pow2_biases']
        G = kwargs['G']
        weights_node, biases_node = cls.get_weights_and_biases_nodes(
            G, fusion if fusion else params)

        range_acc = stats.get('range_acc', stats['range_out'][0])
        conv_active = fusion and fusion.fusion_type in [
            'conv_active_pool', 'conv_active'
        ]
        int_dtype = np.int32
        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if conv_active:
            # Take stats from activation after the convolution
            range_out = kwargs['all_stats'][NodeId(
                fusion,
                fusion.contained_nodes()[1])]['range_out'][0]
            out_dtype = np.int32
        else:
            out_dtype = params_dtype
            range_out = stats['range_out'][0]

        in_q = deepcopy(in_qs[0]).scale_to_pow2()
        calc_width = 31

        o_q = QType.from_min_max_pow2(range_out['min'],
                                      range_out['max'],
                                      dtype=out_dtype)
        if force_out_q:
            if o_q.scale > force_out_q.scale:
                return None

        weights_q = QType.from_array_pow2(arr=weights_node.dqvalue,
                                          dtype=params_dtype)
        calc_q = in_q.q + weights_q.q

        acc_bits = calc_bits(range_acc['max'], range_acc['min'])
        act_bits = calc_bits(range_out['min'], range_out['max'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights and input
            missing_bits = act_acc_bits - calc_int_bits
            if missing_bits > calc_q * 0.75:
                raise ValueError(
                    f'Quantizing {params.name} at this precision will loose more than 75% of fractional part'
                )

            prec_inp = min(math.floor(0.5 + missing_bits * in_q.q / calc_q),
                           in_q.q)
            prec_w = min(math.floor(0.5 + missing_bits * weights_q.q / calc_q),
                         weights_q.q)
            left = missing_bits - prec_inp - prec_w
            if left > 0:
                prec_w += left
            LOG.warning(
                'reducing weight and input precision (%s, %s) in %s to satisfy quantization constraints',
                prec_w, prec_inp, params.name)
            weights_q.q -= prec_w
            in_q.q -= prec_inp
            calc_q = in_q.q + weights_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if conv_active:
            o_q = c_q

        if pow2_biases == 0:
            biases_dtype = params_dtype
        elif pow2_biases == 8:
            biases_dtype = np.int8
        elif pow2_biases == 16:
            biases_dtype = np.int16
        else:
            biases_dtype = np.int32

        biases_q = QType.from_array_pow2(arr=biases_node.dqvalue,
                                         dtype=biases_dtype)
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if biases_q.q > acc_q.q:
            biases_q.q = acc_q.q

        if isinstance(params,
                      MultiplicativeBiasParameters) and params.has_mul_bias:
            mb_q = QType.from_array_pow2(arr=params.mul_biases,
                                         dtype=int_dtype)
        else:
            mb_q = None
        return QRec.symmetric(in_qs=[in_q, weights_q, biases_q],
                              out_qs=[o_q],
                              calc_q=c_q,
                              acc_q=acc_q,
                              mul_biases_q=mb_q)
Esempio n. 10
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        pow2_biases = kwargs.get('opts')['pow2_biases']
        G = kwargs['G']
        weights_node, biases_node = cls.get_weights_and_biases_nodes(
            G, fusion if fusion else params)

        range_acc = stats['range_acc']
        conv_active = fusion and fusion.fusion_type in [
            'conv_active_pool', 'conv_active'
        ]
        int_dtype = np.int32
        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if conv_active:
            # Take stats from activation after the convolution
            range_out = kwargs['all_stats'][NodeId(
                fusion,
                fusion.contained_nodes()[1])]['range_out'][0]
            out_dtype = np.int32
        else:
            out_dtype = params_dtype
            range_out = stats['range_out'][0]

        in_q = deepcopy(in_qs[0]).scale_to_pow2()
        calc_width = 32

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_pow2(range_out['min'],
                                          range_out['max'],
                                          dtype=out_dtype)
        weights_q = QType.from_array_pow2(arr=weights_node.dqvalue,
                                          dtype=params_dtype)
        calc_q = in_q.q + weights_q.q

        acc_bits = calc_bits(range_acc['max'], range_acc['min'])
        act_bits = calc_bits(range_out['min'], range_out['max'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights
            missing_bits = act_acc_bits - calc_int_bits
            # TODO - This needs improving
            assert weights_q.q >= missing_bits, "no space in weights to reduce precision"
            LOG.warning(
                'reducing weight precision in %s to satisfy quantization constraints',
                params.name)
            weights_q.q = weights_q.q - missing_bits
            calc_q = in_q.q + weights_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if conv_active:
            o_q = c_q

        if pow2_biases == 0:
            biases_dtype = params_dtype
        elif pow2_biases == 8:
            biases_dtype = np.int8
        elif pow2_biases == 16:
            biases_dtype = np.int16
        else:
            biases_dtype = np.int32

        biases_q = QType.from_array_pow2(arr=biases_node.dqvalue,
                                         dtype=biases_dtype)
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if biases_q.q > acc_q.q:
            biases_q.q = acc_q.q

        if isinstance(params,
                      MultiplicativeBiasParameters) and params.has_mul_bias:
            mb_q = QType.from_array_pow2(arr=params.mul_biases,
                                         dtype=int_dtype)
        else:
            mb_q = None
        return QRec.symmetric(in_qs=[in_q, weights_q, biases_q],
                              out_qs=[o_q],
                              calc_q=c_q,
                              acc_q=acc_q,
                              mul_biases_q=mb_q)
Esempio n. 11
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        in_qs = cls.force_symmetric_and_dtype(in_qs, idx=0, dtype=np.int16)
        if in_qs is None:
            return None
        in_qs = deepcopy(in_qs)
        G = kwargs['G']
        opts = kwargs.get('opts', {})

        cls.check_valid_ranges(params, stats, idx=0, dirs='out')

        names = {
            val: idx
            for idx, val in enumerate(LSTMParameters.INPUT_NAMES)
        }

        o_q = in_qs[names['i_state']] = QType.from_min_max_sq(
            min_val=stats['range_out'][0]['min'],
            max_val=stats['range_out'][0]['max'],
            dtype=np.int16)
        if force_out_q:
            if force_out_q.zero_point != 0:
                return None
            LOG.warning(
                'on node %s output is being forced from scale %s -> %s',
                params.name, o_q.scale, force_out_qs[0].scale)
            o_q = force_out_qs[0]

        cell_range = stats.get('range_cell')
        if cell_range is None:
            raise ValueError(
                f'cell range not present in stats for {params.name}')

        # cell range in minimum 1.0
        cell_stat = max(1.0, *[abs(cell_range[var]) for var in ['min', 'max']])
        if params.cell_clip and not params.quant_c_state_with_stat:
            cell_max = params.cell_clip
            ratio_c = cell_max / cell_stat
            if not (ratio_c > 0.9 and ratio_c < 1.1):
                msg = (
                    f"C state is forced to a range [-{cell_max}:{cell_max}] different to the one calulated "
                    f"from the inference statistic [-{cell_stat}:{cell_stat}], consider using nodeoption {params.name} "
                    "QUANT_C_STATE_WITH_STAT 1 to force it to be the one calculated"
                )
                LOG.warning('%s', msg)
        else:
            cell_max = cell_stat

        # this limit is driven by the c_in * f + c * i calculation
        # c * i will be in Q24 and we want c_in * f to be scaled to the same
        # abs(f) will be <=1 so the cell int bits cannot exceed 31 - 1 (overflow) - 24 = 6
        cell_limit = pow(2, 6)
        if cell_max > cell_limit:
            LOG.warning('Cell state exceeds %s and will be clipped',
                        cell_limit)
            cell_max = cell_limit

        cell_int_bits = calc_bits(cell_max)
        in_qs[names['c_state']] = QType.from_min_max_sq(-cell_max,
                                                        cell_max,
                                                        dtype=np.int16)

        LOG.debug("cell bits %d max %d cell range %d", cell_int_bits, cell_max,
                  in_qs[names['c_state']].range)

        # set weight qtypes
        edges = kwargs['G'].indexed_in_edges(params.name)
        scale_pairs = {
            chan: ('i_2_%s_w' % chan, 'r_2_%s_w' % chan)
            for chan in ['i', 'o', 'c', 'f']
        }
        for scale_pair in scale_pairs.values():
            in_q = in_qs[names[scale_pair[0]]]
            in_qs[names[scale_pair[0]]] = QType.from_min_max_sq(
                in_q.min_val,
                in_q.max_val,
                dtype=np.int8,
                narrow_range=opts.get('narrow_weights'),
                dont_generate_value=True)
            in_qs[names[scale_pair[0]]].bits = opts['weight_bits']
            in_q = in_qs[names[scale_pair[1]]]
            in_qs[names[scale_pair[1]]] = QType.from_min_max_sq(
                in_q.min_val,
                in_q.max_val,
                dtype=np.int8,
                narrow_range=opts.get('narrow_weights'),
                concatenated_nodes=[
                    edges[names[scale_pair[0]]].from_node.name
                ])
            in_qs[names[scale_pair[1]]].bits = opts['weight_bits']

        # get weight scales
        w_scales = [(in_qs[names[namei]].scale, in_qs[names[namer]].scale)
                    for k, (namei, namer) in scale_pairs.items()]

        gate_sum_max = [(get_max(stats[f'range_{gate}_gate_i']),
                         get_max(stats[f'range_{gate}_gate_r']))
                        for gate in ['i', 'o', 'c', 'f']]

        gate_sum_max_bits = [
            (np.ceil(np.log2(gsm_i / (in_qs[0].scale * i_w))),
             np.ceil(np.log2(gsm_r / (o_q.scale * r_w))))
            for (gsm_i, gsm_r), (i_w, r_w) in zip(gate_sum_max, w_scales)
        ]

        for gate, (max_i, max_r) in zip(['i', 'o', 'c', 'f'],
                                        gate_sum_max_bits):
            if max_i > 30:
                LOG.warning(
                    'max bits in accumulation input %s gate %s - there may be errors',
                    max_i, gate)
            if max_r > 30:
                LOG.warning(
                    'max bits in accumulation state %s gate %s - there may be errors',
                    max_i, gate)

        # LUT activations Q12 -> Q15
        act_in_q = 12
        act_out_q = 15
        int_scale = math.pow(2, -act_in_q)
        out_tanh_sig_scale = math.pow(2, -act_out_q)

        scale_qtypes = {}
        r_pscales = {}
        i_pscales = {}
        scale_qtypes['r_pscales'] = r_pscales
        scale_qtypes['i_pscales'] = i_pscales
        for gate, w_scale, max_bits in zip(['i', 'o', 'c', 'f'], w_scales,
                                           gate_sum_max_bits):
            weight_scale_ratio = w_scale[0] / w_scale[1]
            # TODO - decide to scale weights equal
            in_qs[names[f"{gate}_b"]] = QType(scale=int_scale, dtype=np.int32)
            i_pscales[gate] = w_scale[0] * in_qs[0].scale
            scale_qtypes[f"i_2_{gate}_q"] = qscale = MultMulBiasScaleQType(
                scale=i_pscales[gate] / int_scale)
            qscale.pre_normalization = int(max(8 - (31 - max_bits[0]), 0))
            r_pscales[gate] = w_scale[1] * o_q.scale
            scale_qtypes[f"r_2_{gate}_q"] = qscale = MultMulBiasScaleQType(
                scale=r_pscales[gate] / int_scale)
            qscale.pre_normalization = int(max(8 - (31 - max_bits[1]), 0))

        r_pscales['state_out_scale'] = o_q.scale
        r_pscales['int_scale'] = int_scale

        # ct = c_in * f + c * i
        # c * i = Q15 * Q15 -> Q30 -> norm(18) -> Q12
        # scale(c_in * f) = Q15 * Q15 prenorm 8 and scale -> Q12
        # ((c_in * f) + (c * i)) in Q12
        # scale -> cell_out
        # tan(ct) -> Q15
        # o * tan(ct) -> Q30
        # prenorm and scale

        # cell in to Q12
        cell_in_scale = (in_qs[names['c_state']].scale * out_tanh_sig_scale /
                         int_scale)
        # cell_out from Q12
        cell_out_scale = int_scale / in_qs[names['c_state']].scale
        # state out from Q30
        state_out_scale = math.pow(2, -(2 * act_out_q)) / o_q.scale

        r_pscales['act_out_scale'] = out_tanh_sig_scale
        r_pscales['c_before_scale'] = int_scale

        scale_qtypes['cell_in_q'] = MultMulBiasScaleQType(scale=cell_in_scale)
        # for 16 bit pre-normalize the scales to give us room
        scale_qtypes['cell_in_q'].pre_normalization = 8
        scale_qtypes['cell_out_q'] = MultMulBiasScaleQType(
            scale=cell_out_scale)
        scale_qtypes['state_out_q'] = MultMulBiasScaleQType(
            scale=state_out_scale)
        scale_qtypes['state_out_q'].pre_normalization = 8
        scale_qtypes['i_qtype'] = QType(q=act_in_q, dtype=np.int32)
        if params.lstm_output_c_state:
            out_qs = [o_q, in_qs[names['c_state']]]
        else:
            out_qs = [o_q]

        return QRec.scaled(
            in_qs=in_qs,
            out_qs=out_qs,
            **scale_qtypes,
        )
Esempio n. 12
0
    def _quantize(cls, params, in_qs, out_dtype, stats, **kwargs):
        qrecs = kwargs['qrecs']
        G = kwargs['G']

        o_q = SymmetricMultQType.from_min_max(
            min_val=stats['range_out'][0]['min'],
            max_val=stats['range_out'][0]['max'],
            dtype=out_dtype)
        input_nodes = {
            LSTMParameters.INPUT_NAMES[edge.to_idx]: edge.from_node
            for edge in G.in_edges(params.name)
            if isinstance(edge.from_node, ConstantInputParameters)
        }
        names = {
            val: idx
            for idx, val in enumerate(LSTMParameters.INPUT_NAMES)
        }
        if params.cell_clip:
            cell_max = params.cell_clip
        else:
            cell_max = max(
                abs(stats['range_cell'][var]) for var in ['min', 'max'])

        cell_int_bits = calc_bits(cell_max)

        in_qs[names['c_state']].recalculate_scale(-cell_max, cell_max)
        LOG.debug("cell bits %d max %d cell range %d", cell_int_bits, cell_max,
                  in_qs[names['c_state']].range)
        int2_scale = int3_scale = out_tanh_sig_scale = None
        if params.hard_act:
            # worst case is (internal_q * 3) + 2 = 32 (1 for 1 and 1 for sign) i.e. 10
            # but also (internal_q * 2) + cell_bits = 32
            int_q = min((16 - cell_int_bits), 10)
            int2_scale = math.pow(2, -(int_q * 2))
            int3_scale = math.pow(2, -(int_q * 3))
        else:
            int_q = 12
            out_tanh_sig_scale = math.pow(
                2, -15)  # output of LUT activations are always Q15
        int_scale = math.pow(2, -int_q)

        if np.isclose(in_qs[0].scale, o_q.scale, atol=1e-2):
            LOG.info(
                "node %s has similar input and i_state scales --> "
                "will be generated the same_scale kernel with better performances",
                params.name)
            params.rnn_same_inout_scale = True
            G.node_options[NodeId(params)] = params.at_options

        if params.rnn_same_inout_scale:
            if not np.isclose(in_qs[0].scale, o_q.scale, atol=1e-2):
                LOG.warning(
                    "node %s has different input and i_state scales consider using the "
                    "LSTM kernel with rnn_same_inout_scale=False (better accuracy)",
                    params.name)
            # in and out and state are all in the same scale
            in_and_out_scale = np.maximum(in_qs[0].scale, o_q.scale)
            i_state_scale = in_scale = np.maximum(
                in_and_out_scale, in_qs[names['i_state']].scale)
            in_qs[0].scale = in_scale
            o_q.scale = in_scale
            cls.rescale_constant(input_nodes['i_state'], i_state_scale, qrecs)
        else:
            in_scale = in_qs[0].scale
            i_state_scale = np.maximum(o_q.scale,
                                       in_qs[names['i_state']].scale)
            o_q.scale = i_state_scale
        scale_pairs = {
            chan: ('i_2_%s_w' % chan, 'r_2_%s_w' % chan)
            for chan in ['i', 'o', 'c', 'f']
        }
        scales = {
            k: np.maximum(in_qs[names[namei]].scale, in_qs[names[namer]].scale)
            for k, (namei, namer) in scale_pairs.items()
        }
        for k, (namei, namer) in scale_pairs.items():
            cls.rescale_constant(input_nodes[namei], scales[k], qrecs)
            cls.rescale_constant(input_nodes[namer], scales[k], qrecs)
        # compute scales for perceptrons
        pscales = {k: scales[k] * i_state_scale for k in ['i', 'o', 'c', 'f']}
        scale_qtypes = {
            "r_2_%s_q" % k: MultMulBiasScaleQType(scale=pscale / int_scale)
            for k, pscale in pscales.items()
        }

        # if input and i_state have different scales -> scale the inputs before sum
        # otherwise do nothing and these scales will be ignored
        scale_qtypes.update({
            "i_2_%s_q" % k:
            MultMulBiasScaleQType(scale=in_scale / i_state_scale)
            for k in ['i', 'o', 'c', 'f']
        })

        if params.hard_act:
            cell_in_scale = in_qs[names['c_state']].scale / int_scale
            cell_out_scale = int2_scale / in_qs[names['c_state']].scale
            state_out_scale = int3_scale / i_state_scale
        else:
            cell_in_scale = in_qs[
                names['c_state']].scale * out_tanh_sig_scale / int_scale
            cell_out_scale = int_scale / in_qs[names['c_state']].scale
            state_out_scale = out_tanh_sig_scale / i_state_scale

        scale_qtypes['cell_in_q'] = MultMulBiasScaleQType(scale=cell_in_scale)
        # TODO - Check cell clip here
        scale_qtypes['cell_out_q'] = MultMulBiasScaleQType(
            scale=cell_out_scale)
        scale_qtypes['state_out_q'] = MultMulBiasScaleQType(
            scale=state_out_scale)
        # set internal scale
        scale_qtypes['i_qtype'] = QType(q=int_q, bits=32, signed=True)
        # set biases to output of perceptron
        for gate in ['i', 'o', 'c', 'f']:
            cls.rescale_constant(input_nodes["%s_b" % gate],
                                 pscales[gate],
                                 qrecs,
                                 dtype=np.int32)
        return MultScalableLstmQuantizationRecord(
            in_qs=in_qs,
            out_qs=[o_q],
            **scale_qtypes,
        )
    def _quantize_lstm(cls, params, in_qs, stats, input_bits, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_qs and any(force_out_q is not None
                                for force_out_q in force_out_qs):
            return None

        opts = kwargs.get('opts', {})

        if input_bits == 16:
            in_out_dtype = np.uint16
        else:
            in_out_dtype = np.uint8

        if in_qs is None:
            return None
        in_qs = deepcopy(in_qs)
        G = kwargs['G']

        in_q = in_qs[0]

        cls.check_valid_ranges(params, stats, idx=0, dirs='out')

        in_edges = G.indexed_in_edges(params.name)

        names = {
            val: idx
            for idx, val in enumerate(LSTMParameters.INPUT_NAMES)
        }

        o_q = in_qs[names['i_state']] = QType.from_min_max_sq(
            min_val=stats['range_out'][0]['min'],
            max_val=stats['range_out'][0]['max'],
            dtype=in_out_dtype,
            narrow_range=opts['narrow_state'])

        cell_range = stats.get('range_cell')
        if cell_range is None:
            raise ValueError(
                f'cell range not present in stats for {params.name}')

        # cell range in minimum 1.0
        cell_stat = max(1.0, *[abs(cell_range[var]) for var in ['min', 'max']])

        if params.cell_clip and not params.quant_c_state_with_stat:
            cell_max = params.cell_clip
            ratio_c = cell_max / cell_stat
            if not (ratio_c > 0.9 and ratio_c < 1.1):
                msg = (
                    f"C state is forced to a range [-{cell_max}:{cell_max}] different to the one calulated "
                    f"from the inference statistic [-{cell_stat}:{cell_stat}], consider using nodeoption {params.name} "
                    "QUANT_C_STATE_WITH_STAT 1 to force it to be the one calculated"
                )
                LOG.warning('%s', msg)
        else:
            cell_max = cell_stat

        # this limit is driven by the c_in * f + c * i calculation
        # c * i will be in Q24 and we want c_in * f to be scaled to the same
        # abs(f) will be <=1 so the cell int bits cannot exceed 31 - 1 (overflow) - 24 = 6
        cell_limit = pow(2, 6)
        if cell_max > cell_limit:
            LOG.warning('Cell state exceeds %s and will be clipped',
                        cell_limit)
            cell_max = cell_limit

        cell_int_bits = calc_bits(cell_max)
        # cell stays signed since it is used in a haddamard with the int32 streamout
        # in NE16
        in_qs[names['c_state']] = QType.from_min_max_sq(
            -cell_max,
            cell_max,
            dtype=np.int16 if input_bits == 16 else np.int8)

        LOG.debug("cell bits %d max %d cell range %d", cell_int_bits, cell_max,
                  in_qs[names['c_state']].range)

        # set weight qtypes
        int_num_inp = roundup(params.n_inputs, input_bits == 16)
        int_num_states = roundup(params.n_states, input_bits == 16)
        woffs = {}

        in_q = limit_input_precision(params, input_bits, in_q, int_num_inp,
                                     opts['narrow_weights'],
                                     opts['weight_bits'])

        o_q = limit_input_precision(
            params,
            input_bits,
            o_q,
            int_num_states,
            opts['narrow_weights'],
            opts['weight_bits'],
            extra_correction=-1 if opts.get('narrow_state') else 0)

        for gate in ['i', 'o', 'c', 'f']:
            i_idx = names[f'i_2_{gate}_w']
            r_idx = names[f'r_2_{gate}_w']

            woffs[gate] = woff_gate = [None, None]
            woff_gate[0] = calculatate_weight_q(
                in_qs, in_edges, i_idx, in_q.zero_point[0],
                (params.n_states, params.n_inputs),
                (params.n_states, int_num_inp), opts['weight_bits'],
                opts.get('narrow_weights'))

            woff_gate[1] = calculatate_weight_q(
                in_qs, in_edges, r_idx, o_q.zero_point[0],
                (params.n_states, params.n_states),
                (params.n_states, int_num_states), opts['weight_bits'],
                opts.get('narrow_weights'))

        # get weight scales
        scale_pairs = {
            chan: ('i_2_%s_w' % chan, 'r_2_%s_w' % chan)
            for chan in ['i', 'o', 'c', 'f']
        }
        w_scales = [(in_qs[names[namei]].scale, in_qs[names[namer]].scale)
                    for k, (namei, namer) in scale_pairs.items()]

        gate_sum_max = [(get_max_or_one(stats[f'range_{gate}_gate_i']),
                         get_max_or_one(stats[f'range_{gate}_gate_r']))
                        for gate in ['i', 'o', 'c', 'f']]

        gate_sum_max_bits = [
            (np.ceil(np.log2(gsm_i / (in_qs[0].scale * i_w))),
             np.ceil(np.log2(gsm_r / (o_q.scale * r_w))))
            for (gsm_i, gsm_r), (i_w, r_w) in zip(gate_sum_max, w_scales)
        ]

        for gate, (max_i, max_r) in zip(['i', 'o', 'c', 'f'],
                                        gate_sum_max_bits):
            if np.max(max_i) > 30:
                LOG.warning(
                    'max bits in accumulation input %s gate %s - there may be errors',
                    max_i, gate)
            if np.max(max_r) > 30:
                LOG.warning(
                    'max bits in accumulation state %s gate %s - there may be errors',
                    max_i, gate)

        # LUT activations Q12 -> Q15
        act_in_q = 12
        act_out_q = 15
        int_scale = math.pow(2, -act_in_q)
        out_tanh_sig_scale = math.pow(2, -act_out_q)

        scale_qtypes = {}
        r_pscales = {}
        i_pscales = {}
        scale_qtypes['r_pscales'] = r_pscales
        scale_qtypes['i_pscales'] = i_pscales
        for gate, w_scale, max_bits in zip(['i', 'o', 'c', 'f'], w_scales,
                                           gate_sum_max_bits):
            weight_scale_ratio = w_scale[0] / w_scale[1]
            # TODO - decide to scale weights equal

            i_pscales[gate] = w_scale[0] * in_q.scale
            r_pscales[gate] = w_scale[1] * o_q.scale
            if input_bits == 16:
                scale_qtypes[f"i_2_{gate}_q"] = qscale = MultMulBiasScaleQType(
                    scale=i_pscales[gate] / int_scale)
            else:
                scale_qtypes[f"i_2_{gate}_q"] = qscale = MultMulBiasScaleQType(
                    scale=i_pscales[gate] / r_pscales[gate])
            if input_bits == 16:
                i_zp_b = woffs[gate][0]
            else:
                i_zp_b = woffs[gate][0] * qscale.qbiases.astype(np.int32) + (
                    1 << (qscale.qnorms.astype(np.int32) - 1))

            scale_qtypes[f"r_2_{gate}_q"] = qscale = MultMulBiasScaleQType(
                scale=r_pscales[gate] / int_scale)
            if input_bits == 16:
                r_zp_b = woffs[gate][1]
                in_qs[names[f'{gate}_b']] = QType(dtype=np.int32,
                                                  scale=r_pscales[gate],
                                                  offset=r_zp_b,
                                                  interleaved_values=[i_zp_b])
            else:
                r_zp_b = woffs[gate][1] * qscale.qbiases.astype(np.int32) + (
                    1 << (qscale.qnorms.astype(np.int32) - 1))
                in_qs[names[f'{gate}_b']] = QType(dtype=np.int32,
                                                  scale=r_pscales[gate] /
                                                  qscale.qbiases,
                                                  offset=r_zp_b,
                                                  interleaved_values=[i_zp_b])

        # NOTE - for 16 bit pre-normalize the scales to give us room but make sure it isn't negative
        if input_bits == 16:
            gate_prenorm = min(
                np.min([
                    np.min(scale_qtypes[f"{inp}_2_{gate}_q"].qnorms)
                    for gate in ['i', 'o', 'c', 'f'] for inp in ['i', 'r']
                ]), 8)
            for gate in ['i', 'o', 'c', 'f']:
                for inp in ['i', 'r']:
                    scale_qtypes[
                        f"{inp}_2_{gate}_q"].pre_normalization = gate_prenorm
        else:
            gate_prenorm = 0

        r_pscales['state_out_scale'] = o_q.scale
        r_pscales['int_scale'] = int_scale

        # ct = c_in * f + c * i
        # c * i = Q15 * Q15 -> Q30 -> norm(18) -> Q12
        # scale(c_in * f) = Qcell * Q15 (prenorm if 16bit) and scale -> Q12
        # ((c_in * f) + (c * i)) in Q12
        # scale -> cell_out
        # tan(ct) -> Q15
        # o * tan(ct) -> Q30
        # prenorm and scale

        # scale result of c_state_1 * f_gate -> Q15
        cell_in_scale = (in_qs[names['c_state']].scale * out_tanh_sig_scale /
                         out_tanh_sig_scale)

        # cell_out from Q15 -> Q7/Q15 scaled
        cell_out_scale = out_tanh_sig_scale / in_qs[names['c_state']].scale

        state_out_scale = out_tanh_sig_scale / o_q.scale

        r_pscales['act_out_scale'] = out_tanh_sig_scale
        r_pscales['c_before_scale'] = int_scale

        scale_qtypes['cell_in_q'] = MultMulBiasScaleQType(scale=cell_in_scale)
        # NOTE - for 16 bit pre-normalize the scales to give us room
        if input_bits == 16:
            scale_qtypes['cell_in_q'].pre_normalization = 8
        scale_qtypes['cell_out_q'] = MultMulBiasScaleQType(
            scale=cell_out_scale)
        scale_qtypes['state_out_q'] = MultMulBiasScaleQType(
            scale=state_out_scale)
        scale_qtypes['i_qtype'] = QType(q=act_in_q, dtype=np.int32)
        if params.lstm_output_c_state:
            out_qs = [o_q, in_qs[names['c_state']]]
        else:
            out_qs = [o_q]

        return QRec.scaled(
            in_qs=in_qs,
            out_qs=out_qs,
            ne16=True,
            gate_prenorm=gate_prenorm,
            **scale_qtypes,
        )