Ejemplo n.º 1
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        G = kwargs['G']
        weights_node, biases_node = cls.get_weights_and_biases_nodes(
            G, fusion if fusion else params)

        range_acc = stats['range_acc']
        conv_active = fusion and fusion.fusion_type in [
            'conv_active_pool', 'conv_active'
        ]
        int_dtype = out_dtype
        if conv_active:
            # Take stats from activation after the convolution
            range_out = kwargs['all_stats'][NodeId(
                fusion,
                fusion.contained_nodes()[1])]['range_out'][0]
            out_dtype = np.int32
        else:
            range_out = stats['range_out'][0]

        in_q = in_qs[0]
        calc_width = 32

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_pow2(range_out['min'],
                                          range_out['max'],
                                          dtype=out_dtype)
        weights_q = QType.from_array_pow2(arr=weights_node.dqvalue,
                                          dtype=int_dtype)
        calc_q = in_q.q + weights_q.q

        acc_bits = bits(range_acc['max'], range_acc['min'])
        act_bits = bits(range_out['min'], range_out['max'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights
            missing_bits = act_acc_bits - calc_int_bits
            # TODO - This needs improving
            assert weights_q.q >= missing_bits, "no space in weights to reduce precision"
            LOG.warning(
                'reducing weight precision in %s to satisfy quantization constraints',
                params.name)
            weights_q.q = weights_q.q - missing_bits
            calc_q = in_q.q + weights_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if conv_active:
            o_q = c_q

        if not params.has_bias or np.all(biases_node.dqvalue == 0):
            biases_q = o_q
        else:
            biases_q = QType.from_array_pow2(arr=biases_node.dqvalue,
                                             dtype=int_dtype)
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if biases_q.q > acc_q.q:
            biases_q.q = acc_q.q

        if isinstance(params, MultiplicativeBiasParameters):
            if params.has_mul_bias:
                mb_q = QType.from_array_pow2(arr=params.mul_biases,
                                             dtype=int_dtype)
            else:
                mb_q = None
            return SymmetricScalableFilterQuantizationRecord(
                in_qs=[in_q, weights_q, biases_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q,
                mul_biases_q=mb_q)
        else:
            return SymmetricFilterQuantizationRecord(
                in_qs=[in_q, weights_q, biases_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q)
Ejemplo n.º 2
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        pow2_biases = kwargs.get('opts')['pow2_biases']
        G = kwargs['G']
        weights_node, biases_node = cls.get_weights_and_biases_nodes(
            G, fusion if fusion else params)

        range_acc = stats.get('range_acc', stats['range_out'][0])
        conv_active = fusion and fusion.fusion_type in [
            'conv_active_pool', 'conv_active'
        ]
        int_dtype = np.int32
        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if conv_active:
            # Take stats from activation after the convolution
            range_out = kwargs['all_stats'][NodeId(
                fusion,
                fusion.contained_nodes()[1])]['range_out'][0]
            out_dtype = np.int32
        else:
            out_dtype = params_dtype
            range_out = stats['range_out'][0]

        in_q = deepcopy(in_qs[0]).scale_to_pow2()
        calc_width = 31

        o_q = QType.from_min_max_pow2(range_out['min'],
                                      range_out['max'],
                                      dtype=out_dtype)
        if force_out_q:
            if o_q.scale > force_out_q.scale:
                return None

        weights_q = QType.from_array_pow2(arr=weights_node.dqvalue,
                                          dtype=params_dtype)
        calc_q = in_q.q + weights_q.q

        acc_bits = calc_bits(range_acc['max'], range_acc['min'])
        act_bits = calc_bits(range_out['min'], range_out['max'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights and input
            missing_bits = act_acc_bits - calc_int_bits
            if missing_bits > calc_q * 0.75:
                raise ValueError(
                    f'Quantizing {params.name} at this precision will loose more than 75% of fractional part'
                )

            prec_inp = min(math.floor(0.5 + missing_bits * in_q.q / calc_q),
                           in_q.q)
            prec_w = min(math.floor(0.5 + missing_bits * weights_q.q / calc_q),
                         weights_q.q)
            left = missing_bits - prec_inp - prec_w
            if left > 0:
                prec_w += left
            LOG.warning(
                'reducing weight and input precision (%s, %s) in %s to satisfy quantization constraints',
                prec_w, prec_inp, params.name)
            weights_q.q -= prec_w
            in_q.q -= prec_inp
            calc_q = in_q.q + weights_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if conv_active:
            o_q = c_q

        if pow2_biases == 0:
            biases_dtype = params_dtype
        elif pow2_biases == 8:
            biases_dtype = np.int8
        elif pow2_biases == 16:
            biases_dtype = np.int16
        else:
            biases_dtype = np.int32

        biases_q = QType.from_array_pow2(arr=biases_node.dqvalue,
                                         dtype=biases_dtype)
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if biases_q.q > acc_q.q:
            biases_q.q = acc_q.q

        if isinstance(params,
                      MultiplicativeBiasParameters) and params.has_mul_bias:
            mb_q = QType.from_array_pow2(arr=params.mul_biases,
                                         dtype=int_dtype)
        else:
            mb_q = None
        return QRec.symmetric(in_qs=[in_q, weights_q, biases_q],
                              out_qs=[o_q],
                              calc_q=c_q,
                              acc_q=acc_q,
                              mul_biases_q=mb_q)