Example #1
0
def calculate_filter_q(node,
                       in_q,
                       min_qsnr,
                       force_width,
                       acc_as_calc=False,
                       bias_as_out=True):
    fstats = node.stats
    w_q = get_quantization(fstats['weights'], min_qsnr, force_width)
    o_q = get_quantization(node.activation_stats, min_qsnr, force_width)
    calc_width = closest_greater(in_q.bits + w_q.bits)
    calc_q = in_q.q + w_q.q

    acc_bits = bits(node.activation_stats['max_acc'],
                    node.activation_stats['min_acc'])
    act_bits = bits(node.activation_stats['max'], node.activation_stats['min'])
    act_acc_bits = max(acc_bits, act_bits)

    calc_int_bits = calc_width - calc_q
    if calc_int_bits < act_acc_bits:
        missing_bits = act_acc_bits - calc_int_bits
        assert w_q.q >= missing_bits, "no space in weights to reduce precision"
        w_q.q = w_q.q - missing_bits
        calc_q = in_q.q + w_q.q

    c_q = QType(bits=calc_width, q=calc_q, signed=True)

    if 'biases' in fstats:
        b_q = get_quantization(fstats['biases'], min_qsnr, force_width)
        if bias_as_out:
            o_q.q = min(b_q.q, o_q.q)
            b_q.q = o_q.q
    else:
        b_q = o_q.q

    if acc_as_calc or acc_bits > o_q.bits - o_q.q:
        acc_q = c_q
    else:
        acc_q = o_q

    norm = c_q.q - o_q.q
    node.quantization = {
        "input_q": in_q,
        "weights_q": w_q,
        "biases_q": b_q,
        "norm": norm,
        "calc_q": c_q,
        "acc_q": acc_q
    }
    return o_q
Example #2
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        if params.activation == "relu6":
            int_bits = calc_bits(6)
        elif params.activation == "relun":
            relun = params.activation_params
            if isinstance(relun, list):
                relun = max(relun)
            int_bits = calc_bits(relun)
        elif params.activation == "relu" or params.activation == "hswish" or params.activation == "hsigmoid" or params.activation == "leaky":
            int_bits = bits(stats['range_out'][0]['max'],
                            stats['range_out'][0]['min'])
        else:
            raise ValueError(
                f'no support for activation {params.activation} in POW2 quantizer'
            )

        in_q = in_qs[0]
        if force_out_q is None:
            q = max(cls.get_pow2_bits(**kwargs) - int_bits, 0)
            out_q = QType(q=q, dtype=out_dtype)
        else:
            if force_out_q.bits - force_out_q.q < int_bits:
                LOG.warning(
                    'quantization is forcing node %s to have an output that may clip',
                    params.name)
            out_q = force_out_q
        return SymmetricQuantizationRecord(in_qs=[in_q], out_qs=[out_q])
Example #3
0
def test_bits():
    assert bits(0.081599854, -0.07628916) == 1
    assert bits(1.081599854, -0.07628916) == 2
    assert bits(2.081599854, -0.07628916) == 3
    assert bits(256.081599854, -0.07628916) == 10
    assert bits(75, -9) == 8
    assert bits(0, 0) == 1
    assert bits(-1, 0) == 1
    assert bits(-1, 1) == 2
Example #4
0
    def calculate_filter_q(self, node: Parameters, astats, fstats,
                           in_q: QType, min_qsnr, force_width,
                           acc_as_calc=False, bias_as_out=True):
        fstats = node.stats
        w_q = self.get_quantization(fstats['weights'], min_qsnr, force_width)
        o_q = self.get_quantization(astats, min_qsnr, force_width)
        calc_width = closest_greater(in_q.bits + w_q.bits)
        calc_q = in_q.q + w_q.q

        acc_bits = bits(astats['max_acc'], astats['min_acc'])
        act_bits = bits(astats['max'], astats['min'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights
            missing_bits = act_acc_bits - calc_int_bits
            # TODO - This needs improving
            assert w_q.q >= missing_bits, "no space in weights to reduce precision"
            w_q.q = w_q.q - missing_bits
            calc_q = in_q.q + w_q.q

        c_q = QType(bits=calc_width, q=calc_q, signed=True)

        if 'biases' in fstats:
            b_q = self.get_quantization(fstats['biases'], min_qsnr, force_width)
            if bias_as_out:
                o_q.q = min(b_q.q, o_q.q)
                b_q.q = o_q.q
        else:
            b_q = o_q.q

        if acc_as_calc or acc_bits > o_q.bits - o_q.q:
            acc_q = c_q
        else:
            acc_q = o_q

        norm = c_q.q - o_q.q

        node.quantization = {"input_q": in_q, "weights_q": w_q,
                             "biases_q": b_q, "norm": norm, "calc_q": c_q,
                             "acc_q": acc_q}
        return FilterQuantizationRecord(in_qs=[in_q], out_qs=[o_q], calc_q=c_q,
                                        acc_q=acc_q, biases_q=b_q, weights_q=w_q)
Example #5
0
def astats(size, do_bits=True):
    """Extracts statistics from a tensor
    """
    ret = {
        'mean': 0,
        'std': 0.25,
        'min': -0.9,
        'max': 0.9,
        'size': size,
        'wols': 0,
        'sols': 0,
        'min_out': 0,
        'max_out': 0,
    }
    if do_bits:
        ret['ibits'] = bits(0.9, -0.9)
    return ret
Example #6
0
    def calculate_filter_q(self,
                           node: Parameters,
                           astats,
                           in_q: QType,
                           force_width=None,
                           force_out=None,
                           out_as_acc=False):
        #biases_bits_as_acc=False):

        w_q = QType.from_array(arr=node.weights, bits=force_width, signed=True)

        calc_width = 32
        calc_q = in_q.q + w_q.q

        acc_bits = bits(astats['range_acc']['max'], astats['range_acc']['min'])
        act_bits = bits(astats['range_out'][0]['max'],
                        astats['range_out'][0]['min'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights
            missing_bits = act_acc_bits - calc_int_bits
            # TODO - This needs improving
            assert w_q.q >= missing_bits, "no space in weights to reduce precision"
            w_q.q = w_q.q - missing_bits
            calc_q = in_q.q + w_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if out_as_acc:
            o_q = c_q
            if node.has_bias:
                b_q = QType.from_array(node.biases,
                                       bits=force_width,
                                       signed=True)
        else:
            # The output size is requested to be force_out_width size
            if force_out and force_out.bits:
                # The output fixed point position is also forced
                if force_out.q:
                    if (force_out.bits - force_out.q) < act_acc_bits:
                        # clipping so cannot completely satisfy
                        o_q = QType(bits=force_out.bits,
                                    q=force_out.bits - act_acc_bits,
                                    signed=True)
                    else:
                        if force_out.q > calc_q:
                            # We cannot shift left in the kernel
                            # TODO - This should try to increase the input q
                            # Unlikely to happen
                            raise NotImplementedError()
                        # We can satisfy the force
                        o_q = QType(bits=force_out.bits,
                                    q=force_out.q,
                                    signed=True)
                else:
                    # Only the width is forced
                    o_q = QType.from_min_max(astats['range_out'][0]['max'],
                                             astats['range_out'][0]['min'],
                                             bits=force_out.bits)
            else:
                # The output width is not forced so calculate the output q normally
                o_q = QType.from_min_max(astats['range_out'][0]['max'],
                                         astats['range_out'][0]['min'],
                                         bits=force_width)
                if force_out and force_out.q:
                    # The output fixed point position is forced
                    if force_out.q > calc_q:
                        # We cannot shift left in the kernel
                        # TODO - This should try to increase the input q
                        # Unlikely to happen
                        raise NotImplementedError()
                    o_q.q = force_out.q

            if node.has_bias:
                b_q = o_q
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if b_q.q > acc_q.q:
            b_q.q = acc_q.q

        if isinstance(node, MultiplicativeBiasParameters):
            if node.has_mul_bias:
                mb_q = QType.from_array(node.mul_biases, bits=force_width)
            else:
                mb_q = None
            qrec = SymmetricScalableFilterQuantizationRecord(
                in_qs=[in_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q,
                biases_q=b_q,
                weights_q=w_q,
                mul_biases_q=mb_q,
                constants_are_quantized=False)
        else:
            qrec = SymmetricFilterQuantizationRecord(
                in_qs=[in_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q,
                biases_q=b_q,
                weights_q=w_q,
                constants_are_quantized=False)

        LOG.debug("filter %s qrec %s", node.name, qrec)
        return qrec
Example #7
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        G = kwargs['G']
        weights_node, biases_node = cls.get_weights_and_biases_nodes(
            G, fusion if fusion else params)

        range_acc = stats['range_acc']
        conv_active = fusion and fusion.fusion_type in [
            'conv_active_pool', 'conv_active'
        ]
        int_dtype = out_dtype
        if conv_active:
            # Take stats from activation after the convolution
            range_out = kwargs['all_stats'][NodeId(
                fusion,
                fusion.contained_nodes()[1])]['range_out'][0]
            out_dtype = np.int32
        else:
            range_out = stats['range_out'][0]

        in_q = in_qs[0]
        calc_width = 32

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_pow2(range_out['min'],
                                          range_out['max'],
                                          dtype=out_dtype)
        weights_q = QType.from_array_pow2(arr=weights_node.dqvalue,
                                          dtype=int_dtype)
        calc_q = in_q.q + weights_q.q

        acc_bits = bits(range_acc['max'], range_acc['min'])
        act_bits = bits(range_out['min'], range_out['max'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights
            missing_bits = act_acc_bits - calc_int_bits
            # TODO - This needs improving
            assert weights_q.q >= missing_bits, "no space in weights to reduce precision"
            LOG.warning(
                'reducing weight precision in %s to satisfy quantization constraints',
                params.name)
            weights_q.q = weights_q.q - missing_bits
            calc_q = in_q.q + weights_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if conv_active:
            o_q = c_q

        if not params.has_bias or np.all(biases_node.dqvalue == 0):
            biases_q = o_q
        else:
            biases_q = QType.from_array_pow2(arr=biases_node.dqvalue,
                                             dtype=int_dtype)
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if biases_q.q > acc_q.q:
            biases_q.q = acc_q.q

        if isinstance(params, MultiplicativeBiasParameters):
            if params.has_mul_bias:
                mb_q = QType.from_array_pow2(arr=params.mul_biases,
                                             dtype=int_dtype)
            else:
                mb_q = None
            return SymmetricScalableFilterQuantizationRecord(
                in_qs=[in_q, weights_q, biases_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q,
                mul_biases_q=mb_q)
        else:
            return SymmetricFilterQuantizationRecord(
                in_qs=[in_q, weights_q, biases_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q)
Example #8
0
    def calculate_filter_q(self,
                           node: Parameters,
                           astats,
                           fstats,
                           in_q: QType,
                           min_qsnr=None,
                           force_width=None,
                           force_out=None,
                           out_as_acc=False,
                           biases_bits_as_acc=False):

        w_q = self.get_quantization(fstats['weights'], min_qsnr, force_width)

        calc_width = 32
        calc_q = in_q.q + w_q.q

        acc_bits = bits(astats['max_acc'], astats['min_acc'])
        act_bits = bits(astats['max'], astats['min'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights
            missing_bits = act_acc_bits - calc_int_bits
            # TODO - This needs improving
            assert w_q.q >= missing_bits, "no space in weights to reduce precision"
            w_q.q = w_q.q - missing_bits
            calc_q = in_q.q + w_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if out_as_acc:
            o_q = c_q
            if 'biases' in fstats:
                b_q = self.get_quantization(fstats['biases'], min_qsnr,
                                            force_width)
        else:
            # The output size is requested to be force_out_width size
            if force_out and force_out.bits:
                # The output fixed point position is also forced
                if force_out.q:
                    if (force_out.bits - force_out.q) < act_acc_bits:
                        # clipping so cannot completely satisfy
                        o_q = QType(bits=force_out.bits,
                                    q=force_out.bits - act_acc_bits,
                                    signed=True)
                    else:
                        if force_out.q > calc_q:
                            # We cannot shift left in the kernel
                            # TODO - This should try to increase the input q
                            # Unlikely to happen
                            raise NotImplementedError()
                        # We can satisfy the force
                        o_q = QType(bits=force_out.bits,
                                    q=force_out.q,
                                    signed=True)
                else:
                    # Only the width is forced
                    o_q = self.get_quantization(astats, None, force_out.bits)
            else:
                # The output width is not forced so calculate the output q normally
                o_q = self.get_quantization(astats, min_qsnr, force_width)
                if force_out and force_out.q:
                    # The output fixed point position is forced
                    if force_out.q > calc_q:
                        # We cannot shift left in the kernel
                        # TODO - This should try to increase the input q
                        # Unlikely to happen
                        raise NotImplementedError()
                    o_q.q = force_out.q

            if 'biases' in fstats:
                if biases_bits_as_acc:
                    b_q = self.get_quantization(fstats['biases'], None,
                                                calc_width)
                else:
                    # if we are forcing width then match the output size which might
                    # have been promoted if the activation didn't fit
                    b_q = self.get_quantization(fstats['biases'], None,
                                                o_q.bits)
            else:
                b_q = o_q
            # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
            # cause a negative shift
            if b_q.q > acc_q.q:
                b_q.q = acc_q.q

        # node.quantization = {"input_q": in_q, "weights_q": w_q,
        #                      "biases_q": b_q, "norm": norm, "calc_q": c_q,
        #                      "acc_q": acc_q}
        qrec = FilterQuantizationRecord(in_qs=[in_q],
                                        out_qs=[o_q],
                                        calc_q=c_q,
                                        acc_q=acc_q,
                                        biases_q=b_q,
                                        weights_q=w_q)
        LOG.debug("filter %s qrec %s", node.name, qrec)
        return qrec