Ejemplo n.º 1
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):

        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")
        func = PIECEWISE_OPS[params.__class__]
        op = func['op']
        if func['is_mult']:
            i1 = in_tensors[0].astype(np.int32)
            i2 = in_tensors[1].astype(np.int32)
            res = op(i1, i2, np.int32)
            q_calc = QType.Pow2(
                bits=32, q=qrec.in_qs[0].q+qrec.in_qs[1].q, signed=True)
            res = qrec.out_qs[0].reduce_from(res, q_calc)
        else:
            off_in = abs(qrec.in_qs[0].q - qrec.in_qs[1].q)
            if qrec.in_qs[0].q > qrec.in_qs[1].q:
                i1 = at_norm(in_tensors[0].astype(np.int32), off_in)
                i2 = in_tensors[1].astype(np.int32)
            else:
                i1 = in_tensors[0].astype(np.int32)
                i2 = at_norm(in_tensors[1].astype(np.int32), off_in)
            res = op(i1, i2, None)
            q_calc = QType.Pow2(bits=32, q=min(qrec.in_qs[0].q, qrec.in_qs[1].q), signed=True)
            res = qrec.out_qs[0].reduce_from(res, q_calc)
        return qrec.get_outputs(params, [res], ktype="symmetric")
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        qname = kwargs['qname']
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype=qname)
        if qrec:
            in_q = qrec.in_qs[0]
            out_q = qrec.out_qs[0]
            float_conversion = in_q.is_floating or out_q.is_floating
            bit_conversion = in_q.bits != out_q.bits
            if not float_conversion:
                same_sign = in_q.signed == out_q.signed
                if in_q.bits > out_q.bits:
                    bit_diff = in_q.bits - out_q.bits
                    same_scale = np.allclose(in_q.scale *
                                             np.power(2, bit_diff),
                                             out_q.scale,
                                             atol=0.0001)
                    same_zeropoint = np.all(
                        in_q.zero_point >> bit_diff == out_q.zero_point)
                elif out_q.bits > in_q.bits:
                    bit_diff = out_q.bits - in_q.bits
                    same_scale = np.allclose(out_q.scale *
                                             np.power(2, bit_diff),
                                             in_q.scale,
                                             atol=0.0001)
                    same_zeropoint = np.all(
                        in_q.zero_point == out_q.zero_point >> bit_diff)
                else:
                    same_scale = np.allclose(out_q.scale,
                                             in_q.scale,
                                             atol=0.0001)
                    same_zeropoint = np.all(
                        in_q.zero_point == out_q.zero_point)

                if same_scale and same_sign and bit_conversion and same_zeropoint:
                    if in_q.bits > out_q.bits:
                        if in_q.signed:
                            out_tensor = out_q.clip(
                                at_norm(in_tensors[0].astype(np.int32),
                                        in_q.bits - out_q.bits))
                        else:
                            out_tensor = out_q.clip(
                                at_norm(in_tensors[0].astype(np.uint32),
                                        in_q.bits - out_q.bits))
                    else:
                        out_tensor = in_tensors[0].astype(
                            out_q.dtype) << (out_q.bits - in_q.bits)
                    return qrec.get_outputs(params, [out_tensor], ktype=qname)
            # in all other conversions should be numerically equivalent to this (within 1 bit)
            out_tensor = qrec.out_qs[0].quantize_from(in_tensors[0],
                                                      qrec.in_qs[0])
        else:
            out_tensor = in_tensors[0]

        return qrec.get_outputs(params, [out_tensor], ktype=qname)
Ejemplo n.º 3
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        in_tensor = qrec.prepare_inputs(params, in_tensors,
                                        ktype="symmetric")[0]
        in_q = qrec.in_qs[0]
        out_q = qrec.out_qs[0]
        in_tensor = in_tensor.astype(np.int32)
        if in_q.q < 12:
            in_tensor <<= 12 - in_q.q
        elif in_q.q > 12:
            in_tensor = at_norm(in_tensor, in_q.q - 12)
        out_tensor = tanh_lut(in_tensor)
        if out_q.q < 15:
            out_tensor = at_norm(out_tensor, 15 - out_q.q)

        return qrec.get_outputs(params, [out_tensor], ktype="symmetric")
Ejemplo n.º 4
0
def av_global_pool(params,
                   in_tensors,
                   qrec: QuantizationRecordBase,
                   details=None):

    if isinstance(qrec, MultQuantizationRecord):
        return av_global_pool_mult(params, in_tensors, qrec, details=details)

    # Prepare the quantization levels
    in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
    in_dims = params.in_dims[0]
    out_dims = params.out_dims[0]

    sum_by_chan = np.sum(in_tensor,
                         dtype=np.int32,
                         axis=(in_dims.get_order_idx('w'),
                               in_dims.get_order_idx('h')))

    norm = (np.array([31], dtype=np.int32) - gap_clb(sum_by_chan)).astype(
        np.int32)
    inv_wh = (1 << norm) // (in_dims.h * in_dims.w)
    out_tensor = at_norm((inv_wh * sum_by_chan), norm)
    return qrec.get_outputs(
        params, [qrec.out_qs[0].clip(out_tensor).reshape(out_dims.shape)],
        ktype="symmetric")
Ejemplo n.º 5
0
 def apply_multiplicative_bias(self,
                               params: Conv2DParameters,
                               input_tensor: np.ndarray,
                               axis: int,
                               ktype: str = None):
     if ktype == "symmetric":
         if params.has_mul_bias:
             mul_biases = self.quantize_as(params.mul_biases,
                                           'mul_biases_q')
             shape = [
                 params.filter.out_c if idx == axis else 1
                 for idx in range(3)
             ]
             input_tensor *= mul_biases.reshape(shape)
             input_tensor = at_norm(input_tensor, self.mul_biases_q.q)
         return input_tensor
     if ktype == "float32":
         if params.has_mul_bias:
             shape = [
                 params.filter.out_c if idx == axis else 1
                 for idx in range(3)
             ]
             input_tensor *= params.mul_biases.reshape(shape)
         return input_tensor
     raise NotImplementedError()
Ejemplo n.º 6
0
def postprocess(img_in, h, w, c, **kwargs):
    if kwargs.get('transpose'):
        if c == 1:
            img_in = img_in.transpose((1, 0)).reshape((c, h, w))
        else:
            img_in = img_in.transpose((2, 0, 1)).copy()
    elif c == 1:
        img_in = img_in.reshape((c, w, h))

    divisor = kwargs.get('divisor') or 1
    offset = kwargs.get('offset') or 0
    shift = kwargs.get('shift') or 0

    if shift:
        if shift < 0:
            img_in = at_norm(img_in, int(-shift))
        else:
            img_in = img_in << int(shift)

    img_in = np.array(img_in)

    norm_func = kwargs.get('norm_func')
    if norm_func:
        g_env = {}.update(np.__dict__)
        # pylint: disable=eval-used
        compiled_norm_func = eval('lambda ' + norm_func, g_env)
        img_in = compiled_norm_func(img_in)
        img_in = np.array(img_in, dtype=np.float)
    else:
        img_in = (img_in.astype(np.float) / divisor) + offset

    return img_in
Ejemplo n.º 7
0
def apply_multiplicative_bias(qrec,
                              params: FilterParameters,
                              input_tensor: np.ndarray,
                              axis: int,
                              ktype: str = None):
    if ktype == 'float':
        if hasattr(params, 'has_mul_bias') and params.has_mul_bias:
            shape = [
                params.filter.out_c if idx == axis else 1 for idx in range(3)
            ]
            input_tensor *= params.mul_biases.reshape(shape)
        return input_tensor
    if ktype == 'symmetric' and qrec.ktype.startswith('scaled'):
        mul_biases_q = qrec.cache.get('mul_biases_q')
        if isinstance(mul_biases_q, MultMulBiasScaleQType):
            input_tensor = mul_biases_q.apply_scales(input_tensor, axis)
    elif ktype == 'symmetric' and qrec.ktype.startswith('symmetric'):
        if params.has_mul_bias:
            mul_biases_q = qrec.cache.get('mul_biases_q')
            mul_biases = mul_biases_q.quantize(params.mul_biases)
            shape = [
                params.filter.out_c if idx == axis else 1 for idx in range(3)
            ]
            input_tensor *= mul_biases.reshape(shape)
            input_tensor = at_norm(input_tensor, mul_biases_q.q)
    return input_tensor.astype(np.int32)
Ejemplo n.º 8
0
 def apply_scales(self, arr: np.ndarray, axis: int = None):
     if self.pre_normalization > 0:
         arr = at_norm(arr, self.pre_normalization)
     if not self.has_scale:
         return arr
     return apply_scales(self.qbiases,
                         self.qnorms,
                         arr,
                         axis=axis,
                         calc_dtype=self._calc_dtype)
Ejemplo n.º 9
0
 def apply_scales(self, arr: np.ndarray, axis: int = None):
     if self.pre_normalization > 0:
         arr = at_norm(arr, self.pre_normalization)
     if not self.has_scale:
         return arr
     if axis is None:
         mul_biases = self.qbiases
         mul_biases_norm = self.qnorms
         assert len(mul_biases) == 1 and len(
             mul_biases_norm) == 1, "no axis set. should have single scale"
     else:
         shape = [
             len(self.qbiases) if idx == axis else 1
             for idx in range(len(arr.shape))
         ]
         mul_biases = self.qbiases.reshape(shape)
         mul_biases_norm = self.qnorms.reshape(shape)
     return at_norm(np.multiply(arr, mul_biases, dtype=np.int32),
                    mul_biases_norm)
Ejemplo n.º 10
0
    def execute(cls, params, in_tensors, qrec: QuantizationRecordBase,
                **kwargs):

        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")
        func = PIECEWISE_OPS[params.__class__]
        op = func['op']
        if func['is_mult']:
            i1 = in_tensors[0].astype(np.int32)
            i2 = in_tensors[1].astype(np.int32)
            res = op(i1, i2, np.int32)
        else:
            off_in = abs(qrec.in_qs[0].q - qrec.in_qs[1].q)
            if qrec.in_qs[0].q > qrec.in_qs[1].q:
                i1 = at_norm(in_tensors[0].astype(np.int32), off_in)
                i2 = in_tensors[1].astype(np.int32)
            else:
                i1 = in_tensors[0].astype(np.int32)
                i2 = at_norm(in_tensors[1].astype(np.int32), off_in)
            res = op(i1, i2, None)
        return qrec.get_outputs(params, [res], ktype="symmetric")
    def log_step(cls, params, in_data, mel_coeff_q, shift_buff, fft_out_q,
                 shift, norm):
        if params.log_offset:
            raise NotImplementedError()

        # if params.magsquared:
        #     qformat = mel_coeff_q - 2 - shift_buff + 2*fft_out_q + 2*shift
        # else:
        qformat = 30 - shift_buff

        if params.log_type == "db":
            return np.clip(
                at_norm(
                    10 * ((logn_17_15(in_data, True) * LN_10_INV_Q10 >> 10) -
                          (qformat - 15) * LOG10_2), norm), -(1 << 15),
                (1 << 15) - 1).astype(np.int16)
        return np.clip(
            at_norm(
                logn_17_15(in_data, True) - (qformat - 15) * LN_2_1F15, norm),
            -(1 << 15), (1 << 15) - 1).astype(np.int16)
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        fft_twiddles = np.stack([in_tensors[2][::2], in_tensors[2][1::2]],
                                axis=0)
        swap_table = in_tensors[3]
        rfft_twiddles = np.stack([in_tensors[4][::2], in_tensors[4][1::2]],
                                 axis=0)

        mel_filterbank_sparsity_mat = in_tensors[5]
        mel_filterbank_coeff = in_tensors[6]
        if params.n_dct:
            dct_matrix = in_tensors[7]

        result = []
        for frame_idx in range(params.n_frames):
            in_data = in_tensors[0][params.frame_step *
                                    frame_idx:params.frame_step * frame_idx +
                                    params.frame_size]
            in_data, shift = cls.preemphasis(params, in_data,
                                             12 if params.is_radix4() else 13)
            if params.win_fn:
                win_lut = in_tensors[1]
                in_data = cls.windowing(params, in_data, win_lut,
                                        qrec.in_qs[1].q)

            in_cfft = np.stack([in_data[::2], in_data[1::2]], axis=0)
            out_cfft = cls.fft_step(params, in_cfft, fft_twiddles, swap_table)
            out_data = RFFT_Step_Fix16(out_cfft, rfft_twiddles, params.n_fft)

            out_data = out_data[0] + 1j * out_data[1]
            spectrogram = cls.spectrogram_step(params, out_data, shift,
                                               qrec.cache['fft_out_q'].q)

            melspect, shift_buff = cls.melspectrogram_step(
                params, spectrogram, mel_filterbank_sparsity_mat,
                mel_filterbank_coeff, qrec.in_qs[6].q)
            if params.mel_type == "melspectrogram":
                result.append(
                    cls.norm_clip_32_melspect(params, melspect, shift_buff))
                continue

            logmelspect = cls.log_step(params, melspect, qrec.in_qs[6].q,
                                       shift_buff, qrec.cache["fft_out_q"],
                                       shift, params.quant_norm)
            if params.mel_type == "logmelspectrogram":
                result.append(logmelspect)
                continue

            if params.n_dct:
                mfcc = np.clip(at_norm(np.dot(dct_matrix, logmelspect), 14),
                               -(1 << 15), (1 << 15) - 1)
                result.append(mfcc)

        return [np.array(result)]
Ejemplo n.º 13
0
    def average_execute(cls, params, in_tensors, qrec: QuantizationRecordBase):

        # Prepare the quantization levels

        in_tensor = qrec.prepare_inputs(params, in_tensors,
                                        ktype="symmetric")[0]
        in_dims = params.in_dims[0]
        out_dims = params.out_dims[0]
        filter_sz = params.filter.h * params.filter.w

        pool_factor = (1 << 16) // filter_sz

        out_tensor = np.zeros(out_dims.shape, dtype=np.int32)

        if params.padding.h + params.padding.w > 0:
            in_tensor = np.pad(in_tensor,
                               params.padding.numpy_pad_shape(in_dims),
                               mode='constant',
                               constant_values=qrec.in_qs[0].pad_zero_point)
            pad_w = params.padding.w
            pad_h = params.padding.h
        else:
            pad_w = pad_h = 0

        out_h = 0
        for h_idx in range(0, in_dims.h - params.filter.h + pad_h + 1,
                           params.stride.h):
            out_w = 0
            for w_idx in range(0, in_dims.w - params.filter.w + pad_w + 1,
                               params.stride.w):
                # accumulate - potentially with different Q
                out_slice_args = out_dims.srange(h=out_h, w=out_w)
                in_slice_args = in_dims.srange(
                    c=[0, out_dims.c, 1],
                    h=[h_idx, h_idx + params.filter.h, 1],
                    w=[w_idx, w_idx + params.filter.w, 1])

                res_shape = out_tensor[out_slice_args].shape
                sum_filter = np.sum(
                    in_tensor[in_slice_args],
                    dtype=qrec.dtype(ktype="float32"),
                    axis=(out_dims.keys.index('h'),
                          out_dims.keys.index('w'))).reshape(res_shape)
                sum_filter = np.multiply(sum_filter, pool_factor)
                out_tensor[out_slice_args] = sum_filter
                out_w += 1
            out_h += 1

        return qrec.get_outputs(params, [
            qrec.out_qs[0].clip(at_norm(out_tensor, 16), qrec.out_qs[0].dtype)
        ],
                                ktype="symmetric")
Ejemplo n.º 14
0
def piecewise(params, in_tensors, qrec: QuantizationRecordBase, details=None):

    if isinstance(qrec, (MultQuantizationRecord, MultAddQuantizationRecord)):
        return piecewise_mult(params, in_tensors, qrec, details=details)

    in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")
    func = PIECEWISE_OPS[params.__class__]
    op = func['op']
    if func['is_mult']:
        i1 = in_tensors[0].astype(np.int32)
        i2 = in_tensors[1].astype(np.int32)
        res = op(i1, i2, np.int32)
    else:
        off_in = abs(qrec.in_qs[0].q - qrec.in_qs[1].q)
        if qrec.in_qs[0].q > qrec.in_qs[1].q:
            i1 = at_norm(in_tensors[0].astype(np.int32), off_in)
            i2 = in_tensors[1].astype(np.int32)
        else:
            i1 = in_tensors[0].astype(np.int32)
            i2 = at_norm(in_tensors[1].astype(np.int32), off_in)
        res = op(i1, i2, None)
    return qrec.get_outputs(params, [res], ktype="symmetric")
Ejemplo n.º 15
0
    def execute(cls, params,
                in_tensors,
                qrec: QuantizationRecordBase,
                **kwargs):
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
        qrec.set_scale()
        neg_in = at_norm(in_tensor * leak_mult_gen_factor_q7(params), 7)
        in_tensor = in_tensor * (in_tensor > 0) + neg_in * (in_tensor < 0)

        in_tensor = qrec.scale_mul_biases_q.apply_scales(in_tensor)
        if qrec.out_qs[0] != qrec.in_qs[0]:
            return qrec.get_outputs(params, [qrec.out_qs[0].reduce_from(in_tensor, qrec.in_qs[0])], ktype="symmetric")
        return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
Ejemplo n.º 16
0
 def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
     in_tensor = qrec.prepare_inputs(params, in_tensors,
                                     ktype="symmetric")[0]
     compute_in_out_scale(qrec)
     neg_in = at_norm(in_tensor * leak_mult_gen_factor_q7(params), 7)
     in_tensor = in_tensor * (in_tensor > 0) + neg_in * (in_tensor < 0)
     scale_mul_biases_q = qrec.cache['scale_mul_biases_q']
     in_tensor = scale_mul_biases_q.apply_scales(in_tensor)
     if qrec.out_qs[0] != qrec.in_qs[0]:
         return qrec.get_outputs(
             params, [qrec.out_qs[0].reduce_from(in_tensor, qrec.in_qs[0])],
             ktype="symmetric")
     return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
Ejemplo n.º 17
0
def av_pool(params, in_tensors, qrec: QuantizationRecordBase, details=None):
    del details
    # Prepare the quantization levels

    in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
    in_dims = params.in_dims[0]
    out_dims = params.out_dims[0]
    filter_sz = params.filter.h * params.filter.w

    pool_factor = (1 << 16) // filter_sz

    out_tensor = np.zeros(out_dims.shape, dtype=np.int32)

    if params.padding.h + params.padding.w > 0:
        in_tensor = np.pad(in_tensor,
                           params.padding.numpy_pad_shape(in_dims),
                           mode='constant',
                           constant_values=qrec.in_qs[0].pad_zero_point)
        pad_w = params.padding.w
        pad_h = params.padding.h
    else:
        pad_w = pad_h = 0

    for in_c in range(out_dims.c):

        out_h = 0
        for h_idx in range(0, in_dims.h - params.filter.h + pad_h + 1,
                           params.stride.h):
            out_w = 0
            for w_idx in range(0, in_dims.w - params.filter.w + pad_w + 1,
                               params.stride.w):
                # accumulate - potentially with different Q
                in_slice_args = in_dims.srange(
                    c=[in_c, in_c + 1, 1],
                    h=[h_idx, h_idx + params.filter.h, 1],
                    w=[w_idx, w_idx + params.filter.w, 1])

                sum_filter = np.sum(in_tensor[in_slice_args], dtype=np.int32)
                sum_filter = np.multiply(sum_filter,
                                         pool_factor,
                                         dtype=np.int32)
                out_tensor[out_dims.srange(c=in_c, h=out_h,
                                           w=out_w)] = sum_filter
                out_w += 1
            out_h += 1

    return qrec.get_outputs(
        params,
        [qrec.out_qs[0].clip(at_norm(out_tensor, 16), qrec.out_qs[0].dtype)],
        ktype="symmetric")
Ejemplo n.º 18
0
def apply_scales(qbiases, qnorms, arr: np.ndarray, axis: int = None):
    if axis is None:
        mul_biases = qbiases
        mul_biases_norm = qnorms
        assert len(mul_biases) == 1 and len(
            mul_biases_norm) == 1, "no axis set. should have single scale"
    else:
        shape = [
            len(qbiases) if idx == axis else 1 for idx in range(len(arr.shape))
        ]
        mul_biases = qbiases.reshape(shape)
        mul_biases_norm = qnorms.reshape(shape)
    return at_norm(np.multiply(arr, mul_biases, dtype=np.int32),
                   mul_biases_norm)
Ejemplo n.º 19
0
    def average_execute_mult(cls, params,
                             in_tensors,
                             qrec: MultQuantizationRecord):

        # Prepare the quantization levels
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
        out_dims = params.out_dims[0]
        qrec.set_scale(in_idx=0, out_idx=0)

        sum_by_chan = np.sum(in_tensor, dtype=np.int32, axis=tuple(
            params.axis), keepdims=params.keep_dims)
        sz = reduce(lambda x, y: x * y, [i for idx,
                                         i in enumerate(in_tensor.shape) if idx in params.axis])
        res = at_norm(((sum_by_chan << 7) / sz).astype(np.int32), 7)
        res = out_tensor = qrec.scale_mul_biases_q.apply_scales(res)
        return qrec.get_outputs(params,
                                [out_tensor.reshape(out_dims.shape)],
                                ktype="symmetric")
Ejemplo n.º 20
0
    def average_execute(cls, params,
                        in_tensors,
                        qrec: MultQuantizationRecord):

        # Prepare the quantization levels
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
        out_dims = params.out_dims[0]

        sum_by_chan = np.sum(in_tensor, dtype=np.int32, axis=tuple(
            params.axis), keepdims=params.keep_dims)

        norm = (np.array([31], dtype=np.int32) - gap_clb(sum_by_chan.flatten())).astype(np.int32)
        sz = reduce(lambda x, y: x * y, [i for idx,
                                         i in enumerate(in_tensor.shape) if idx in params.axis])
        inv_wh = ((1 << norm) // sz).reshape(sum_by_chan.shape)
        out_tensor = at_norm((inv_wh * sum_by_chan), norm.reshape(sum_by_chan.shape))
        return qrec.get_outputs(params,
                                [qrec.out_qs[0].clip(out_tensor).reshape(out_dims.shape)],
                                ktype="symmetric")
Ejemplo n.º 21
0
def postprocess(img_in, h, w, c, **kwargs):
    if kwargs.get('transpose'):
        if c == 1:
            img_in = img_in.transpose((1, 0)).reshape((c, h, w))
        else:
            img_in = img_in.transpose((2, 0, 1)).copy()
    elif c == 1:
        img_in = img_in.reshape((c, w, h))

    divisor = kwargs.get('divisor') or 1
    offset = kwargs.get('offset') or 0
    shift = kwargs.get('shift') or 0

    if shift:
        if shift < 0:
            img_in = at_norm(img_in, int(-shift))
        else:
            img_in = img_in << int(shift)

    img_in = np.array(img_in)

    norm_func = kwargs.get('norm_func')
    if norm_func:
        g_env = {}.update(np.__dict__)
        # pylint: disable=eval-used
        compiled_norm_func = eval('lambda ' + norm_func, g_env)
        img_in = compiled_norm_func(img_in)
        img_in = np.array(img_in, dtype=np.float)
    else:
        img_in = (img_in.astype(np.float) / divisor) + offset

    if kwargs.get('rgb888_rgb565'):
        r = np.bitwise_and(img_in[:, :, 0].flatten().astype(np.int16),
                           0xf8) << 8
        g = np.bitwise_and(img_in[:, :, 1].flatten().astype(np.int16),
                           0xfc) << 3
        b = np.bitwise_and(img_in[:, :, 2].flatten().astype(np.int16),
                           0xf8) >> 3
        img_565 = r + g + b
        img_in = np.array(img_565, dtype=np.int16)

    return img_in
Ejemplo n.º 22
0
    def execute(cls, params,
                in_tensors,
                qrec: QuantizationRecordBase,
                **kwargs):

        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]

        calc_q = QType.Pow2(bits=32, q=qrec.in_qs[0].q + 15, signed=True)
        fac_1 = qrec.in_qs[0].quantize(np.array([3.]))
        fac_2 = (1 << 15) // 6
        upper_bound = qrec.in_qs[0].quantize([6.])
        lower_bound = qrec.in_qs[0].quantize([0.])
        in_tensor = in_tensor.astype(np.int32)
        in_tensor = at_norm(np.multiply(np.minimum(np.maximum(in_tensor + fac_1, lower_bound), upper_bound),
                                        in_tensor,
                                        dtype=np.int32), qrec.in_qs[0].q)
        return qrec.get_outputs(params,
                                [qrec.out_qs[0].reduce_from(np.multiply(
                                    in_tensor, fac_2, dtype=np.int32), calc_q)],
                                ktype="symmetric")
Ejemplo n.º 23
0
def av_global_pool_mult(params,
                        in_tensors,
                        qrec: MultQuantizationRecord,
                        details=None):

    # Prepare the quantization levels
    in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
    in_dims = params.in_dims[0]
    out_dims = params.out_dims[0]
    qrec.set_scale(in_idx=0, out_idx=0)

    sum_by_chan = np.sum(in_tensor,
                         dtype=np.int32,
                         axis=(in_dims.get_order_idx('w'),
                               in_dims.get_order_idx('h')))

    res = at_norm((sum_by_chan << 7) // (in_dims.h * in_dims.w), 7)
    res = out_tensor = qrec.scale_mul_biases_q.apply_scales(res)
    return qrec.get_outputs(params, [out_tensor.reshape(out_dims.shape)],
                            ktype="symmetric")
Ejemplo n.º 24
0
def hswish(params,
           in_tensors,
           qrec: QuantizationRecordBase,
           details=None):

    if isinstance(qrec, MultQuantizationRecord):
        return hswish_mult(params, in_tensors, qrec, details=details)

    in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]

    calc_q = QType(bits=32, q=qrec.in_qs[0].q + 15, signed=True)
    fac_1 = qrec.in_qs[0].quantize(np.array([3.]))
    fac_2 = (1 << 15) // 6
    upper_bound = qrec.in_qs[0].quantize([6.])
    lower_bound = qrec.in_qs[0].quantize([0.])
    in_tensor = in_tensor.astype(np.int32)
    in_tensor = at_norm(np.multiply(np.minimum(np.maximum(in_tensor + fac_1, lower_bound), upper_bound),
                                    in_tensor,
                                    dtype=np.int32), qrec.in_qs[0].q)
    return qrec.get_outputs(params,
                            [qrec.out_qs[0].reduce_from(np.multiply(
                                in_tensor, fac_2, dtype=np.int32), calc_q)],
                            ktype="symmetric")
 def windowing(cls, params, in_data, win_lut, win_q):
     return at_norm(np.multiply(in_data, win_lut, dtype=np.int32), win_q)
Ejemplo n.º 26
0
    def step_kernel(cls, params: GRUParameters, args: Mapping[str, np.ndarray],
                    idx: int, input_tensor: np.ndarray, qrec):

        z_gate_scratch = args['w_z_b'][0]
        hr_gate_scratch = args['w_r_b'][0]

        if idx < params.n_input_cells:
            # calculate z gate on input
            z_gate_scratch += args['w_2_z_w'][0].astype(np.int32).dot(
                input_tensor[idx])
            # calculate r gate on input
            hr_gate_scratch += args['w_2_r_w'][0].astype(np.int32).dot(
                input_tensor[idx])
            # scale to recurrent * state scale if input scale is different
            if not params.rnn_same_inout_scale:
                z_gate_scratch = qrec.scale_z_input2_z_HtxW(z_gate_scratch,
                                                            0,
                                                            ktype='symmetric')
                hr_gate_scratch = qrec.scale_r_input2_r_HtxW(hr_gate_scratch,
                                                             0,
                                                             ktype='symmetric')

        # calculate z gate on recurrent
        z_gate_scratch += args['r_2_z_w'][0].astype(np.int32).dot(
            args['h_state'][0]) + args['r_z_b'][0]
        # if not hard_act then the scale will scale to Q15
        z_gate_scratch = get_activation(params.activation_zr, params.hard_act)(
            qrec.scale_z_internal(z_gate_scratch, 0,
                                  ktype='symmetric'), qrec.internal_qtype)
        # normalise to internal Q
        if not params.hard_act and qrec.internal_qtype.q != 15:
            z_gate_scratch = at_norm(z_gate_scratch,
                                     15 - qrec.internal_qtype.q)

        # same as above on r gate
        hr_gate_scratch += args['r_2_r_w'][0].astype(np.int32).dot(
            args['h_state'][0]) + args['r_r_b'][0]
        hr_gate_scratch = get_activation(
            params.activation_zr,
            params.hard_act)(qrec.scale_r_internal(hr_gate_scratch,
                                                   0,
                                                   ktype='symmetric'),
                             qrec.internal_qtype)
        if not params.hard_act and qrec.internal_qtype.q != 15:
            hr_gate_scratch = at_norm(hr_gate_scratch,
                                      15 - qrec.internal_qtype.q)

        if params.linear_before_reset:
            # haddamard after linear
            # r_gate_scratch = (rt (.) (Ht-1*(Rh^T) + Rbh))
            h_gate_recurrent = args['r_2_h_w'][0].astype(np.int32).dot(
                args['h_state'][0]) + args['r_h_b'][0]
            # this is int_q_scale * state_q_scale * h_recurrent_weights_scale
            hr_gate_scratch = hr_gate_scratch * h_gate_recurrent
            # normalize to state_q_scale * h_recurrent_weights_scale
            hr_gate_scratch = at_norm(hr_gate_scratch, qrec.internal_qtype.q)

            # ht = g(Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh) # when linear_before_reset != 0
            if idx < params.n_input_cells:
                if not params.rnn_same_inout_scale:
                    # scale input_scale * h_input_weights_scale to state_q_scale * h_recurrent_weights_scale
                    hr_gate_scratch += qrec.scale_h_input2_h_HtxW(
                        (args['w_2_h_w'][0].astype(np.int32).dot(
                            input_tensor[idx]) + args['w_h_b'][0]),
                        0,
                        ktype='symmetric')
                else:
                    # since input_scale == state scale and h_input_weights_scale == h_recurrent_weights_scale
                    # no scaling is necessary
                    hr_gate_scratch += args['w_2_h_w'][0].astype(np.int32).dot(
                        input_tensor[idx]) + args['w_h_b'][0]
            else:
                # Is this correct if there is no input (and below)? This is not a mode that
                # exists in any framework and will not ever be used at present
                if not params.rnn_same_inout_scale:
                    hr_gate_scratch += qrec.scale_h_input2_h_HtxW(
                        args['w_h_b'][0], 0, ktype='symmetric')
                else:
                    hr_gate_scratch += args['w_h_b'][0]
        else:
            # haddamard on state before linear
            # r_gate_scratch = (rt (.) Ht-1)*(Rh^T) + Rbh + Wbh

            # this is int_q_scale * state_q_scale * h_recurrent_weights_scale
            # normalize to state_q_scale * h_recurrent_weights_scale
            hr_gate_scratch = at_norm(
                args['r_2_h_w'][0].astype(np.int32).dot(
                    args['h_state'][0] * hr_gate_scratch),
                qrec.internal_qtype.q) + args['r_h_b'][0]

            if idx < params.n_input_cells:
                if not params.rnn_same_inout_scale:
                    # scale input_scale * h_input_weights_scale to state_q_scale * h_recurrent_weights_scale
                    hr_gate_scratch += qrec.scale_h_input_2_h_HtxW(
                        args['w_2_h_w'][0].dot(input_tensor[idx]) +
                        args['w_h_b'][0],
                        0,
                        ktype='symmetric')
                else:
                    hr_gate_scratch += args['w_2_h_w'][0].astype(np.int32).dot(
                        input_tensor[idx]) + args['w_h_b'][0]
            else:
                if not params.rnn_same_inout_scale:
                    hr_gate_scratch += qrec.scale_h_input2_h_HtxW(
                        args['w_h_b'][0], 0, ktype='symmetric')
                else:
                    hr_gate_scratch += args['w_h_b'][0]

        # scale to q15 or internal Q depending on activation type
        hr_gate_scratch = get_activation(params.activation, params.hard_act)(
            qrec.scale_h_internal(hr_gate_scratch, 0,
                                  ktype='symmetric'), qrec.internal_qtype)
        # if not hard then go from Q15 -> int_q
        if not params.hard_act and qrec.internal_qtype.q != 15:
            hr_gate_scratch = at_norm(hr_gate_scratch,
                                      15 - qrec.internal_qtype.q)

        # ----------- SCALE Q7 -----------

        # Ht = (1 - zt) (.) ht + zt (.) Ht-1
        # zt = (1 - int_q) * Q7 + Q7 * Q7 = INT_Q * 2
        # >> and clip

        h_state = (args['h_state'][0].copy()).astype(
            np.int32) << (qrec.internal_qtype.q - 7)
        h_state = qrec.out_qs[0].clip(
            at_norm(
                (qrec.internal_qtype.quantize(1) - z_gate_scratch) *
                hr_gate_scratch + z_gate_scratch * h_state,
                (qrec.internal_qtype.q * 2) - 7)).astype(qrec.out_qs[0].dtype)
        args['h_state'][0] = h_state.copy()
        return h_state
Ejemplo n.º 27
0
def normalize(obj, n_bits):
    if n_bits == 0:
        return obj
    if n_bits < 0:
        return obj << -n_bits
    return at_norm(obj, n_bits)
Ejemplo n.º 28
0
 def _imp_(arr: np.ndarray, scale, scalen):
     return at_norm(arr.astype(np.int32) * scale, scalen)
Ejemplo n.º 29
0
 def _imp_(arr: np.ndarray, norm):
     return at_norm(arr.astype(np.int32), norm)
Ejemplo n.º 30
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")
        offsets = in_tensors[0]
        scores = in_tensors[1]
        anchors = in_tensors[2]
        # decoded_bboxes: Q14
        # valid_scores: Q7
        anchors_type = "centers"
        if anchors_type == 'centers':
            anchors_cnts = anchors
        else:
            anchors_cnts = convert_cors2cnts(anchors)
        set_ssd_scales(qrec, params)
        scores_q = qrec.in_qs[1]
        score_threshold = scores_q.quantize(params.nms_score_threshold)
        decoded_bboxes = []
        for i in range(scores.shape[0]):
            for j in range(scores.shape[1]):
                if len(decoded_bboxes) > params.max_bb_before_nms:
                    break
                if scores[i, j] <= score_threshold:
                    continue
                offset = offsets[i]
                anchor = anchors[i]
                #  xcnt, ycnt --> Q14
                #  xcnt = (So*O * Sa*Aw)/params.x_scale + Sa*Ax = So*Sa/params.x_scale (O*Aw + x_scale/So * Ax) =
                #           (scale_x * (O*Aw + (scale_x_anc*Ax)>>scale_x_ancNorm))>>scale_xNorm =
                #           at_norm(scale_x*(O*Aw + at_norm(scale_x_anc*Ax, scale_x_ancNorm)), scale_xNorm)
                xcenter = qrec.cache['scale_x_q'].apply_scales(
                    np.multiply(
                        offset[CNTX_IDX], anchor[W_IDX], dtype=np.int32) +
                    qrec.cache['scale_x_anc_q'].apply_scales(anchor[CNTX_IDX]))
                ycenter = qrec.cache['scale_y_q'].apply_scales(
                    np.multiply(
                        offset[CNTY_IDX], anchor[H_IDX], dtype=np.int32) +
                    qrec.cache['scale_y_anc_q'].apply_scales(anchor[CNTY_IDX]))

                #  half_h, half_w --> Q14
                #  half_h = exp(So*Off / params.h_scale) * Sa*A = Sa/So * exp(So/params.h_scale *O) * A =
                #           (scale_ao * (A* exp17.15(scale_h*O<<15-scale_hNorm))>>scale_aoNorm) =
                #           at_norm(scale_ao*(A*exp17.15(scale_h*O<<15-scale_hNorm)), scale_aoNorm)
                norm_h = 15 - qrec.cache['scale_h_q'].qnorms
                norm_w = 15 - qrec.cache['scale_w_q'].qnorms
                exp_h = exp_fp_17_15(
                    np.multiply(offset[H_IDX],
                                int(qrec.cache['scale_h_q'].qbiases),
                                dtype=np.int32) << norm_h)
                exp_w = exp_fp_17_15(
                    np.multiply(offset[W_IDX],
                                int(qrec.cache['scale_w_q'].qbiases),
                                dtype=np.int32) << norm_w)
                half_h = qrec.cache['scale_ao_q'].apply_scales(
                    np.multiply(exp_h, anchor[H_IDX], dtype=np.int32)) >> 1
                half_w = qrec.cache['scale_ao_q'].apply_scales(
                    np.multiply(exp_w, anchor[W_IDX], dtype=np.int32)) >> 1

                decoded_bboxes.append({
                    "bbox": [
                        ycenter - half_h, xcenter - half_w, ycenter + half_h,
                        xcenter + half_w
                    ],
                    "score":
                    scores[i, j],
                    "class":
                    j,
                    "alive":
                    True
                })

        # Bubble sort to sort the scores
        changed = True
        while changed:
            changed = False
            for i in range(len(decoded_bboxes) - 1):
                if decoded_bboxes[i]['score'] < decoded_bboxes[i + 1]['score']:
                    temp = decoded_bboxes[i]
                    decoded_bboxes[i] = decoded_bboxes[i + 1]
                    decoded_bboxes[i + 1] = temp
                    changed = True

        # NMS
        for idx in range(len(decoded_bboxes)):
            for idx_int in range(idx + 1, len(decoded_bboxes)):
                if (not decoded_bboxes[idx_int]['alive']) or (
                        decoded_bboxes[idx]['class'] !=
                        decoded_bboxes[idx_int]['class']):
                    continue
                intersection = rect_intersect_area(
                    decoded_bboxes[idx]['bbox'],
                    decoded_bboxes[idx_int]['bbox'])
                union = rect_union_area(decoded_bboxes[idx]['bbox'],
                                        decoded_bboxes[idx_int]['bbox'])
                if intersection >= at_norm(
                        scores_q.quantize(params.nms_iou_threshold) * union,
                        7):
                    decoded_bboxes[idx_int]['alive'] = False

        out_boxes = np.zeros((params.max_detections, 4),
                             dtype=qrec.out_qs[0].dtype)
        out_classes = np.zeros(params.max_detections,
                               dtype=qrec.out_qs[1].dtype)
        out_scores = np.zeros(params.max_detections,
                              dtype=qrec.out_qs[2].dtype)
        out_idx = 0
        for i in range(len(decoded_bboxes)):
            if out_idx >= params.max_detections:
                break
            bbox = decoded_bboxes[i]
            if bbox['alive']:
                out_boxes[out_idx] = bbox['bbox']
                out_classes[out_idx] = bbox['class']
                out_scores[out_idx] = bbox['score']
                out_idx += 1
        # decoded_bboxes, valid_scores = cls.decoder(
        #     params, qrec, offsets, anchors, scores, anchors_type='centers')
        # out_boxes, out_scores, out_classes = cls.nms(params, qrec, decoded_bboxes, valid_scores)
        # out_count = np.array([sum(out_classes != 0)])
        return qrec.get_outputs(params, [out_boxes, out_classes, out_scores],
                                ktype="symmetric")