コード例 #1
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        _, dtype = cls.get_float_opts(**kwargs)
        names = {
            val: idx
            for idx, val in enumerate(LSTMParameters.INPUT_NAMES)
        }
        edges = kwargs['G'].indexed_in_edges(params.name)
        in_qs = deepcopy(in_qs)
        scale_pairs = {
            chan: ('i_2_%s_w' % chan, 'r_2_%s_w' % chan)
            for chan in ['i', 'o', 'c', 'f']
        }
        for scale_pair in scale_pairs.values():
            w_q = in_qs[names[scale_pair[0]]]
            in_qs[names[scale_pair[0]]] = QType(min_val=w_q.min_val,
                                                max_val=w_q.max_val,
                                                dtype=dtype,
                                                dont_generate_value=True)
            w_q = in_qs[names[scale_pair[1]]]
            in_qs[names[scale_pair[1]]] = QType(
                min_val=w_q.min_val,
                max_val=w_q.max_val,
                dtype=dtype,
                concatenated_nodes=[
                    edges[names[scale_pair[0]]].from_node.name
                ])

        if params.lstm_output_c_state:
            out_qs = [QType(dtype=dtype), QType(dtype=dtype)]
        else:
            out_qs = [QType(dtype=dtype)]

        return QRec.float(in_qs=in_qs, out_qs=out_qs, float_dtype=dtype)
コード例 #2
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, dtype = cls.get_float_opts(**kwargs)
     if force_out_qs and all(qtype.dtype != dtype for qtype in force_out_qs if qtype is not None):
         return None
     return QRec.float(in_qs=[QType(dtype=dtype), QType(dtype=dtype), QType(dtype=dtype), QType(dtype=np.int16), QType(dtype=dtype), QType(dtype=np.int16), QType(dtype=dtype), QType(dtype=dtype)],
                       out_qs=[QType(dtype=dtype)],
                       float_dtype=dtype)
コード例 #3
0
 def matscale3(cls, in_tensors, qrec):
     assert qrec.in_qs[0].bits == qrec.in_qs[1].bits
     assert qrec.in_qs[1].bits == qrec.in_qs[2].bits
     if qrec.in_qs[0].bits == 8:
         q_calc = QType(bits=32,
                        q=qrec.in_qs[0].q + qrec.in_qs[1].q +
                        qrec.in_qs[2].q,
                        signed=True)
         res = np.multiply(np.multiply(in_tensors[0],
                                       in_tensors[1],
                                       dtype=np.int32),
                           in_tensors[2],
                           dtype=np.int32)
         res = qrec.out_qs[0].reduce_from(res, q_calc)
     elif qrec.in_qs[0].bits == 16:
         q_calc = QType(bits=32,
                        q=qrec.in_qs[0].q + qrec.in_qs[1].q,
                        signed=True)
         res = np.multiply(in_tensors[0], in_tensors[1], dtype=np.int32)
         res = qrec.out_qs[0].reduce_from(res, q_calc)
         q_calc = QType(bits=32,
                        q=qrec.in_qs[2].q + qrec.out_qs[0].q,
                        signed=True)
         res = np.multiply(res, in_tensors[2], dtype=np.int32)
         res = qrec.out_qs[0].reduce_from(res, q_calc)
     else:
         raise ValueError("only 8 and 16 bits supported")
     return res
コード例 #4
0
ファイル: test_operations.py プロジェクト: hasetz/gap_sdk
def test_conf2d_q2(caplog):
    caplog.set_level(logging.INFO)
    weights_q = QType(16, 1, True)
    weights = weights_q.quantize(np.full([1, 1, 2, 2], 1.0))
    filt = Conv2DFilterDim(2, 2, 1, 1)
    stride = StrideDim(1)
    pad = PadDim.valid()
    dilation = DilationDim(1)
    params = Conv2DParameters("test",
                              filt=filt,
                              stride=stride,
                              padding=pad,
                              dilation=dilation,
                              in_dims_hint=[['c', 'h', 'w']],
                              out_dims_hint=[['c', 'h', 'w']])
    in_q = QType(16, 0, True)
    calc_q = QType(weights_q.bits + in_q.bits, weights_q.q + in_q.q, True)
    qrec = FilterQuantizationRecord(in_qs=[in_q],
                                    out_qs=[in_q],
                                    weights_q=weights_q,
                                    acc_q=calc_q,
                                    calc_q=calc_q)
    input_ = in_q.quantize(np.full([1, 2, 2], 1.0))
    in_dims = Dim.named(c=1, h=2, w=2).impose_order(['c', 'h', 'w'])
    out_dims = params.get_output_size([in_dims])
    output_ = conv2d(params,
                     in_dims,
                     out_dims[0],
                     input_,
                     weights,
                     None,
                     qrec=qrec)
    output_ = in_q.dequantize(output_)
    assert np.array_equal(output_, [[[4.]]])
コード例 #5
0
ファイル: test_operations.py プロジェクト: hasetz/gap_sdk
def test_fc():
    filt = FcFilterDim(3, 3, 3, 1)
    params = FcParameters("test", filt=filt)
    weights_q = QType(16, 2, True)
    in_q = QType(16, 2, True)
    acc_q = QType(16, 4, True)
    calc_q = QType(16, 4, True)
    qrec = FilterQuantizationRecord(in_qs=[in_q],
                                    out_qs=[in_q],
                                    calc_q=calc_q,
                                    acc_q=acc_q,
                                    biases_q=None,
                                    weights_q=weights_q)
    weights = weights_q.quantize(np.full([3, 1, 3, 3], 1.0))
    input_ = in_q.quantize(np.arange(9)).reshape([1, 3, 3])
    in_dims = Dim.named(c=1, h=3, w=3).impose_order(['c', 'h', 'w'])
    out_dims = params.get_output_size([in_dims])

    output_ = linear(params,
                     in_dims,
                     out_dims[0],
                     input_,
                     weights,
                     None,
                     qrec=qrec)
    output_ = in_q.dequantize(output_)
    assert np.array_equal(output_, [[[36]], [[36]], [[36]]])
コード例 #6
0
def matscale3(in_tensors, qrec):
    assert qrec.in_qs[0].bits == qrec.in_qs[1].bits
    assert qrec.in_qs[1].bits == qrec.in_qs[2].bits
    if qrec.in_qs[0].bits == 8:
        q_calc = QType(bits=32,
                       q=qrec.in_qs[0].q + qrec.in_qs[1].q + qrec.in_qs[2].q,
                       signed=True)
        res = np.multiply(np.multiply(in_tensors[0],
                                      in_tensors[1],
                                      dtype=np.int32),
                          in_tensors[2],
                          dtype=np.int32)
        res = qrec.out_qs[0].reduce_from(res, q_calc)
    elif qrec.in_qs[0].bits == 16:
        q_calc = QType(bits=32,
                       q=qrec.in_qs[0].q + qrec.in_qs[1].q,
                       signed=True)
        res = np.multiply(in_tensors[0], in_tensors[1], dtype=np.int32)
        res = qrec.out_qs[0].reduce_from(res, q_calc)
        q_calc = QType(bits=32,
                       q=qrec.in_qs[2].q + qrec.out_qs[0].q,
                       signed=True)
        res = np.multiply(res, in_tensors[2], dtype=np.int32)
        res = qrec.out_qs[0].reduce_from(res, q_calc)
    return res
コード例 #7
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        opts = kwargs['opts']
        if force_out_q:
            if force_out_q.forced_scale or force_out_q.forced_zero_point:
                return None
            if in_qs[0].dtype == np.int8:
                dtypes = [np.int8, np.int16]
            else:
                dtypes = [np.int16]
            if force_out_q.forced_dtype and force_out_q.dtype not in dtypes:
                return None

        in_qs = cls.force_symmetric_and_dtype(in_qs)
        if in_qs is None:
            return None
        # force the input to be POW2 scaled
        pow2_scale = np.power(2, np.ceil(np.log2(in_qs[0].scale)))
        in_q = QType(min_val=in_qs[0].min_val,
                     max_val=in_qs[0].max_val,
                     dtype=in_qs[0].dtype,
                     scale=pow2_scale,
                     forced=True)
        if in_q.dtype == np.int8 and (opts.get('softmax_out_8bits', None) or
                                      (force_out_q
                                       and force_out_q.dtype == np.int8)):
            # params.at_options.softmax_out_8bits = 1
            o_q = QType(min_val=-1, max_val=1, dtype=np.int8, scale=2**(-7))
        else:
            o_q = QType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
            if in_q.dtype == np.int16 and o_q.dtype == np.int16:
                return QRec.symmetric(in_qs=[in_q], out_qs=[o_q])

        return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
コード例 #8
0
 def get_outputs(self,
                 params: Parameters,
                 output_tensors: Sequence[np.ndarray],
                 ktype: str = None) -> Sequence[np.ndarray]:
     if ktype == "symmetric":
         if isinstance(params, (MatrixAddParameters, MatrixSubParameters)):
             q_calc = QType(bits=32,
                            q=min(self.in_qs[0].q, self.in_qs[1].q),
                            signed=True)
             output_tensors = [
                 self.out_qs[0].reduce_from(output_tensors[0], q_calc)
             ]
         elif isinstance(params,
                         (MatrixMulParameters, MatrixDivParameters)):
             q_calc = QType(bits=32,
                            q=self.in_qs[0].q + self.in_qs[1].q,
                            signed=True)
             output_tensors = [
                 self.out_qs[0].reduce_from(output_tensors[0], q_calc)
             ]
         elif isinstance(
                 params,
                 GlobalPoolParameters) and params.pool_type == "sum":
             output_tensors = [
                 self.out_qs[0].reduce_from(output_tensors[0],
                                            self.in_qs[0])
             ]
         if self._auto_dequantize_outputs:
             return [
                 self.out_qs[idx].dequantize(output_tensor)
                 for idx, output_tensor in enumerate(output_tensors)
             ]
     return output_tensors
コード例 #9
0
    def _common(cls, node: TFLiteNode, **kwargs):
        custom_opts = node.get_custom_options()
        G = kwargs['G']
        opts = kwargs['opts']
        all_nodes = kwargs['all_nodes']
        importer = kwargs['importer']

        inputs = [all_nodes[t] for t in node.input]
        outputs = [
            all_nodes.get(node.output[idx]) if idx < len(node.output) else None
            for idx in range(4)
        ]
        # inp_shapes = [input[2].shape for input in inputs]

        if 'max_bb_before_nms' not in custom_opts:
            custom_opts['max_bb_before_nms'] = 300

        params = SSDDetectorParameters(node.name, parameters=custom_opts)

        overriden_outputs = []
        for idx, output in enumerate(outputs):
            if output:
                overriden_outputs.append(node.output[idx])
                continue
            oparams = G.add_output()
            otensor = TensorBase("Detect_%s" % idx)
            overriden_outputs.append(otensor)
            importer.provisional_outputs[otensor] = (oparams, 0, None)
        # covers the case where not all outputs are generated by the conversion tool
        node.override_outputs(overriden_outputs)

        for idx, inp in enumerate(inputs):
            G.add_edge(
                NNEdge(from_node=inp[0],
                       to_node=params,
                       from_idx=inp[1],
                       to_idx=idx))

        if opts.get('load_quantization'):
            in_qtypes = [
                QType.from_min_max_sq(tensor.qtype.min_val,
                                      tensor.qtype.max_val) if
                (tensor.qtype.is_asymmetric
                 or not tensor.qtype.signed) else tensor.qtype
                for tensor in node.input
            ]
            o_boxes_qtype = QType(min_val=-2,
                                  max_val=2,
                                  dtype=np.int16,
                                  scale=2**(-14))
            o_scores_qtype = node.input[1].qtype
            o_class_qtype = QType(scale=1, dtype=np.int8)
            qrec = QRec.scaled(in_qs=in_qtypes,
                               out_qs=[
                                   o_boxes_qtype, o_class_qtype,
                                   o_scores_qtype, o_class_qtype
                               ])
            G.quantization[NodeId(params)] = qrec

        return params
コード例 #10
0
def get_closest_qtype(constraint, qtype):
    if 'dtype' in constraint:
        dtype_constraint = constraint['dtype']
        if isinstance(dtype_constraint, set):
            return QType(dtype=next(dtype_constraint))
        return QType(dtype=dtype_constraint)
    return None
コード例 #11
0
def get_quantization(stats, min_qsnr, force_width):
    qstats = stats['qstats']
    if force_width is not None:
        return QType(bits=force_width, q=qstats[force_width]['q'], signed=True)
    for width in STATS_BITS:
        if qstats[width]['qsnr'] > min_qsnr:
            return QType(bits=width, q=qstats[width]['q'], signed=True)
    raise ValueError("no solution for this QSNR could be found")
コード例 #12
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, dtype = cls.get_float_opts(**kwargs)
     if force_out_qs and all(qtype.dtype != dtype for qtype in force_out_qs if qtype is not None):
         return None
     # all inputs and outputs are set to the required float type
     return QRec.float(in_qs=[QType(dtype=dtype)
                              for _ in range(3)],
                       out_qs=[QType(dtype=dtype)],
                       float_dtype=dtype)
コード例 #13
0
 def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs):
     float_type = kwargs['opts']['float_type']
     dtype = FLOAT_DTYPES.get(float_type)
     if dtype is None:
         raise ValueError(f'invalid float_type {float_type}')
     if stats:
         return [QType(min_val=stats['range_in'][idx]['min'],
                     max_val=stats['range_in'][idx]['max'],
                     dtype=dtype) if dim is not None else None
                 for idx, dim in enumerate(params.in_dims)]
     return [QType(dtype=dtype) if dim is not None else None
             for idx, dim in enumerate(params.in_dims)]
コード例 #14
0
def gen_ssd_globals(gen, node, qrec):
    qrec.set_scales(node)
    scores_q = qrec.in_qs[1]
    scores_scale, scores_norm = compute_mul_bias(scores_q.scale)

    cname_scales, file_name_scales = gen_constant(gen, node, node, SSD_SCALES)
    contents = np.array([qrec.scale_x_q.qbiases,
                         qrec.scale_x_anc_q.qbiases,
                         qrec.scale_y_q.qbiases,
                         qrec.scale_y_anc_q.qbiases,
                         qrec.scale_h_q.qbiases,
                         qrec.scale_w_q.qbiases,
                         qrec.scale_ao_q.qbiases,
                         scores_scale], dtype=np.int8)
    scale_info = ConstantInfo(file_name_scales, QType(bits=8, q=0, signed=True), contents=contents)

    cname_norms, file_name_norms = gen_constant(gen, node, node, SSD_NORMS)
    contents = np.array([qrec.scale_x_q.qnorms,
                         qrec.scale_x_anc_q.qnorms,
                         qrec.scale_y_q.qnorms,
                         qrec.scale_y_anc_q.qnorms,
                         qrec.scale_h_q.qnorms,
                         qrec.scale_w_q.qnorms,
                         qrec.scale_ao_q.qnorms,
                         scores_norm], dtype=np.int8)
    norms_info = ConstantInfo(file_name_norms, QType(bits=8, q=0, signed=True), contents=contents)

    score_threshold = scores_q.quantize(node.nms_score_threshold)
    cname_infos, file_name_infos = gen_constant(gen, node, node, INFOS)
    contents = np.array([round(node.nms_iou_threshold * 2**7),     # Q7
                         score_threshold,                          # Q0 [0:255]
                         node.max_detections,                      # Q0 [0:255]
                         node.max_classes_per_detection,           # Q0 [0:255]
                         node.max_bb_before_nms >> 8,
                         node.max_bb_before_nms], dtype=np.int8)   # max_bb = Infos[4]<<8 + Infos[5]
    ssd_infos = ConstantInfo(file_name_infos, QType(bits=8, q=0, signed=True), contents=contents)

    gen.globals.append(GlobalArgInfo(qrec.scale_x_q.ctype, cname_scales,
                                     gen.opts['default_global_home_location'],
                                     gen.opts['default_global_exec_location'],
                                     const_info=scale_info))

    gen.globals.append(GlobalArgInfo(qrec.scale_x_q.shift_ctype, cname_norms,
                                     gen.opts['default_global_home_location'],
                                     gen.opts['default_global_exec_location'],
                                     const_info=norms_info))

    gen.globals.append(GlobalArgInfo('uint8', cname_infos,
                                     gen.opts['default_global_home_location'],
                                     gen.opts['default_global_exec_location'],
                                     const_info=ssd_infos))
コード例 #15
0
ファイル: default_float.py プロジェクト: mfkiwl/gap_sdk
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, dtype = cls.get_float_opts(**kwargs)
     if force_out_qs and all(qtype.dtype != dtype for qtype in force_out_qs
                             if qtype is not None):
         return None
     # use cur_G not G here since this may be called inside a fusion
     # cur_G == G or fusion subgraph if inside fusion
     G = kwargs['cur_G']
     in_len = len(G.indexed_in_edges(params.name))
     out_len = len(G.indexed_out_edges(params.name))
     # all inputs and outputs are set to the required float type
     return QRec.float(in_qs=[QType(dtype=dtype) for _ in range(in_len)],
                       out_qs=[QType(dtype=dtype) for _ in range(out_len)],
                       float_dtype=dtype)
コード例 #16
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, dtype = cls.get_float_opts(**kwargs)
     if force_out_qs and any(qtype.dtype != dtype for qtype in force_out_qs
                             if qtype is not None):
         return None
     # all inputs and outputs are set to the required float type
     opts = kwargs['opts']
     if opts['hwc']:
         cls.check_order(params, AT_HWC_KER_IN_ORDER, AT_HWC_KER_OUT_ORDER)
     else:
         cls.check_order(params, AT_CHW_KER_IN_ORDER, AT_CHW_KER_OUT_ORDER)
     return QRec.float(in_qs=[QType(dtype=dtype) for _ in range(3)],
                       out_qs=[QType(dtype=dtype)],
                       float_dtype=dtype)
コード例 #17
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, dtype = cls.get_float_opts(**kwargs)
     if force_out_qs and any(qtype.dtype != dtype for qtype in force_out_qs
                             if qtype is not None):
         return None
     opts = kwargs['opts']
     if opts['hwc']:
         cls.check_order(params, [['h', 'w', 'c']], [['h', 'w', 'c']])
     elif params.in_dims_hint:
         cls.check_order(params, [['c', 'h', 'w']], [['c', 'h', 'w']])
     # all inputs and outputs are set to the required float type
     return QRec.float(in_qs=[QType(dtype=dtype)],
                       out_qs=[QType(dtype=dtype)],
                       float_dtype=dtype)
コード例 #18
0
    def calculate_q(self, node, astats, in_qs, force_width, force_out=None):

        if isinstance(node,
                      (InputParameters, MatrixBroadcastedLinearOpParameters,
                       ConstantInputParameters, MatScaleFusionParameters)):
            qrec = self.calculate_output_q(node,
                                           astats,
                                           in_qs,
                                           force_width=force_width,
                                           force_out=force_out)
        elif isinstance(node, Conv2DParameters):
            qrec = self.calculate_filter_q(node,
                                           astats,
                                           in_q=in_qs[0],
                                           force_width=force_width,
                                           force_out=force_out)
        elif isinstance(node, FcParameters):
            qrec = self.calculate_filter_q(node,
                                           astats,
                                           in_q=in_qs[0],
                                           force_width=force_width,
                                           force_out=force_out)
        elif isinstance(node, SoftMaxParameters):
            # softmax always outputs Q15
            qrec = SymmetricQuantizationRecord(in_qs=in_qs,
                                               out_qs=[QType(16, 15, True)])
        elif isinstance(node, ActivationParameters):
            qrec = SymmetricQuantizationRecord(
                in_qs=in_qs,
                out_qs=[self.compute_activation_out_qtype(node, in_qs[0])])
        else:
            qrec = SymmetricQuantizationRecord(in_qs=in_qs, out_qs=in_qs)
        return qrec
コード例 #19
0
 def _get_common_q(cls, in_qs):
     max_int_bits_idx = max(in_q.bits - in_q.q for in_q in in_qs)
     max_bits = max([in_q.bits for in_q in in_qs])
     common_q = QType(bits=max_bits,
                      q=max_bits - max_int_bits_idx,
                      signed=True)
     return common_q
コード例 #20
0
 def _common(cls, node, **kwargs):
     all_nodes = kwargs['all_nodes']
     G = kwargs['G']
     valid_name = kwargs['valid_name']
     inputs = [all_nodes[inp] for inp in node.input]
     x = inputs[0]
     x_shape = x[2].shape
     to_dtype = node.attrs['to']
     if cls.is_constant(x):
         x_val = cls.get_constant(x)
         x_val = x_val.astype(to_dtype)
         if x_val.size < 10:
             logger.info("reducing %s to a constant %s", valid_name, x_val)
         else:
             logger.info("reducing %s to a constant", valid_name)
         params = ConstantInputParameters(valid_name,
                                          dims=Dim.unnamed(x_val.shape),
                                          value=x_val)
     else:
         params = QuantizeParameters(valid_name,
                                     to_qtype=QType(dtype=to_dtype))
         G.add_edge(
             NNEdge(from_node=x[0], to_node=params, from_idx=x[1],
                    to_idx=0))
     all_nodes[node.output[0]] = (params, 0, ProvisionalDim(x_shape), None)
     return params
コード例 #21
0
def rnn_infos(gen, node, qrec):
    i_state_q = qrec.in_qs[node.INPUT_NAMES.index('i_state')]

    contents = []
    comments = []

    # info for activation (scale the act input to the proper scale)
    info, comment = INFOS_FUNCS[node.activation]("f", qrec.s_2_s_q, i_state_q)
    contents.append(info)
    comments.append(comment)

    # info for input scaling (only used with non SameInputStateScale kernels)
    info, comment = scale_infos("f", getattr(qrec, "i_2_a_q"))
    contents.append(info)
    comments.append(comment)

    # info for scaling the activation out to out scale (only used for non Hard activations kernels)
    info, comment = scale_infos("f", getattr(qrec, "s_2_o_q"))
    contents.append(info)
    comments.append(comment)

    cname, file_name = gen_constant(gen, node, node, INFOS)
    const_info = ConstantInfo(file_name,
                              QType(bits=8, q=0, signed=True),
                              contents=np.hstack(tuple(contents)))

    gen.globals.append(
        GlobalArgInfo("int8",
                      cname,
                      gen.opts['default_global_home_location'],
                      gen.opts['default_global_exec_location'],
                      const_info=const_info,
                      comment=comment))
コード例 #22
0
ファイル: activation_pow2.py プロジェクト: brupa9/gap_sdk
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        if params.activation == "relu6":
            int_bits = calc_bits(6)
        elif params.activation == "relun":
            relun = params.activation_params
            if isinstance(relun, list):
                relun = max(relun)
            int_bits = calc_bits(relun)
        elif params.activation == "relu" or params.activation == "hswish" or params.activation == "hsigmoid" or params.activation == "leaky":
            int_bits = bits(stats['range_out'][0]['max'],
                            stats['range_out'][0]['min'])
        else:
            raise ValueError(
                f'no support for activation {params.activation} in POW2 quantizer'
            )

        in_q = in_qs[0]
        if force_out_q is None:
            q = max(cls.get_pow2_bits(**kwargs) - int_bits, 0)
            out_q = QType(q=q, dtype=out_dtype)
        else:
            if force_out_q.bits - force_out_q.q < int_bits:
                LOG.warning(
                    'quantization is forcing node %s to have an output that may clip',
                    params.name)
            out_q = force_out_q
        return SymmetricQuantizationRecord(in_qs=[in_q], out_qs=[out_q])
コード例 #23
0
 def calculate_output_q(self,
                        node: Parameters,
                        astats,
                        in_qs,
                        force_width=None,
                        force_out=None):
     del node
     if force_out:
         if force_out.bits:
             if force_out.q:
                 o_q = QType(bits=force_out.bits,
                             q=force_out.q,
                             signed=True)
             else:
                 o_q = QType.from_min_max(
                     max_val=astats['range_out'][0]['max'],
                     min_val=astats['range_out'][0]['min'],
                     bits=force_out.bits)
         elif force_out.q:
             o_q = QType.from_min_max(max_val=astats['range_out'][0]['max'],
                                      min_val=astats['range_out'][0]['min'],
                                      bits=force_width)
             o_q.q = force_out.q
     else:
         o_q = QType.from_min_max(max_val=astats['range_out'][0]['max'],
                                  min_val=astats['range_out'][0]['min'],
                                  bits=force_width)
     return SymmetricQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
コード例 #24
0
    def __init__(self,
                 *args,
                 i_2_z_WR_q: ScalingQType = None,
                 i_2_h_WR_q: ScalingQType = None,
                 i_2_r_WR_q: ScalingQType = None,
                 z_WR_2_int_q: ScalingQType = None,
                 h_WR_2_int_q: ScalingQType = None,
                 r_WR_2_int_q: ScalingQType = None,
                 i_qtype: QType = None,
                 scales=None,
                 info=None,
                 **kwargs):

        super(MultScalableGRUQuantizationRecord, self).__init__(*args,
                                                                info=info,
                                                                **kwargs)
        if info is None:
            # scale applied to input after weights to recurrent after weights
            self._info['i_2_z_WR_q'] = i_2_z_WR_q
            self._info['i_2_h_WR_q'] = i_2_h_WR_q
            self._info['i_2_r_WR_q'] = i_2_r_WR_q
            # scale applied to gate before activation to internal q
            self._info['z_WR_2_int_q'] = z_WR_2_int_q
            self._info['h_WR_2_int_q'] = h_WR_2_int_q
            self._info['r_WR_2_int_q'] = r_WR_2_int_q
            # internal qtype which is also the output scale
            self._info['i_qtype'] = i_qtype or QType(bits=8, q=7, signed=True)
            self._info['scales'] = scales
コード例 #25
0
 def matscale2(cls, in_tensors, qrec=None):
     assert qrec.in_qs[0].bits == qrec.in_qs[1].bits
     q_calc = QType(bits=32,
                    q=qrec.in_qs[0].q + qrec.in_qs[1].q,
                    signed=True)
     res = np.multiply(in_tensors[0], in_tensors[1], dtype=np.int32)
     res = qrec.out_qs[0].reduce_from(res, q_calc)
     return res
コード例 #26
0
ファイル: test_operations.py プロジェクト: hasetz/gap_sdk
def test_conf2d_depth_q():
    calc_q = QType(32, 9, True)
    biases_q = acc_q = out_q = QType(16, 4, True)
    weights_q = QType(16, 4, True)
    in_q = QType(16, 5, True)
    # TF Lite depthwise convolution
    biases = np.full([2], 0.5)
    qbiases = biases_q.quantize(biases)
    weights = np.full([3, 3], 0.5)
    weights = np.repeat(weights, 2).reshape([1, 3, 3, 2])
    qweights = weights_q.quantize(weights)
    filt = Conv2DFilterDim(3, 3, 2,
                           1).impose_order(["in_c", "h", "w", "out_c"])
    stride = StrideDim(1)
    pad = PadDim(0)
    dilation = DilationDim(1)
    params = Conv2DParameters("test",
                              filt=filt,
                              stride=stride,
                              padding=pad,
                              dilation=dilation,
                              groups=1,
                              multiplier=2,
                              tf_depthwise=True,
                              in_dims_hint=[['c', 'h', 'w']],
                              out_dims_hint=[['c', 'h', 'w']])
    qrec = FilterQuantizationRecord(in_qs=[in_q],
                                    out_qs=[out_q],
                                    weights_q=weights_q,
                                    biases_q=biases_q,
                                    acc_q=acc_q,
                                    calc_q=calc_q)
    input_ = np.full([1, 4, 4], 2)
    qinput_ = in_q.quantize(input_)
    in_dims = Dim.named(c=1, h=4, w=4).impose_order(['c', 'h', 'w'])
    out_dims = params.get_output_size([in_dims])
    output_ = conv2d(params, in_dims, out_dims[0], input_, weights, biases)
    qoutput_ = conv2d(params,
                      in_dims,
                      out_dims[0],
                      qinput_,
                      qweights,
                      qbiases,
                      qrec=qrec)
    dqoutput_ = out_q.dequantize(qoutput_)
    assert np.array_equal(output_, dqoutput_)
コード例 #27
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        if not fusion and in_qs[0].dtype == np.int32:
            return None

        if params.activation == "relu6":
            int_bits = calc_bits(6)
        elif params.activation == "relun":
            relun = params.activation_params
            if isinstance(relun, list):
                relun = max(relun)
            int_bits = calc_bits(relun)
        elif params.activation in [
                "relu", "hswish", "hsigmoid", "leaky", "htanh"
        ]:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            int_bits = calc_bits(stats['range_out'][0]['max'],
                                 stats['range_out'][0]['min'])
        elif params.activation == "sigmoid" or params.activation == "tanh":
            if force_out_q is None:
                q = 7 if out_dtype == np.int8 else 15
                return QRec.symmetric(in_qs=[in_qs[0]],
                                      out_qs=[QType(q=q, dtype=out_dtype)])
            else:
                q = 7 if force_out_q.dtype == np.int8 else 15
                if force_out_q.q != q:
                    return None
                return QRec.symmetric(in_qs=[in_qs[0]], out_qs=[force_out_q])
        else:
            LOG.error(
                f'no support for activation {params.activation} in POW2 quantizer'
            )
            return None

        in_q = in_qs[0]
        if force_out_q is None:
            q = max(cls.get_pow2_bits(**kwargs) - int_bits, 0)
            out_q = QType(q=q, dtype=out_dtype)
        else:
            if force_out_q.bits - force_out_q.q < int_bits:
                return None
            out_q = force_out_q
        return QRec.symmetric(in_qs=[in_q], out_qs=[out_q])
コード例 #28
0
ファイル: filter_mult.py プロジェクト: brupa9/gap_sdk
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        opts = kwargs['opts']
        fusion = kwargs.get('fusion', None)
        G = kwargs['G']
        weights_node = cls.get_weights_node(G, fusion if fusion else params)
        min_val, max_val = None, None
        weights_q = QType.from_array_sq(
            arr=weights_node.dqvalue,
            quantized_dimension=cls.get_quantized_dimension(params, opts),
            dtype=np.int8,
            narrow_range=opts['narrow_weights'])
        if fusion and fusion.fusion_type in [
                'conv_active_pool', 'conv_active'
        ]:
            stats = kwargs['all_stats'][NodeId(fusion,
                                               fusion.contained_nodes()[0])]

            if isinstance(
                    fusion.contained_nodes()[1],
                (SigmoidActivationParameters, TanHActivationParameters,
                 HSwishActivationParameters)):
                stats = kwargs['all_stats'][NodeId(
                    fusion,
                    fusion.contained_nodes()[0])]
            elif fusion and isinstance(fusion.contained_nodes()[1],
                                       HSigmoidActivationParameters):
                # Hard sigmoid implements a RELU, be sure 6 can be representable
                min_val, max_val = 0, 6
            else:
                # Take stats from activation after the convolution
                stats = kwargs['all_stats'][NodeId(
                    fusion,
                    fusion.contained_nodes()[1])]

        if min_val is None or max_val is None:
            min_val, max_val = stats['range_out'][0]['min'], stats[
                'range_out'][0]['max']

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_sq(min_val=min_val,
                                        max_val=max_val,
                                        dtype=out_dtype)
        biases_q = QType(dtype=np.int32,
                         scale=weights_q.scale * in_qs[0].scale)
        mul_biases_q = MultMulBiasScaleQType.from_filter(
            in_qs[0], weights_q, o_q, params)
        # returning the new weights and biases qs will force backprop
        # TODO - ACC_Q LOOKS WRONG AFTER THIS
        return MultScalableFilterQuantizationRecord(
            in_qs=[in_qs[0], weights_q, biases_q],
            out_qs=[o_q],
            acc_q=biases_q,
            calc_q=biases_q,
            mul_biases_q=mul_biases_q)
コード例 #29
0
 def quantize_fusion(self,
                     G: NNGraph,
                     node: ConvFusionParameters,
                     in_qs,
                     force_out=None) -> SymmetricQuantizationRecord:
     if node.fusion_type == 'conv_active':
         result = OrderedDict()
         nodes = node.contained_nodes()
         conv_node = nodes[0]
         conv_astats = self._activation_stats.get(NodeId(node, conv_node))
         conv_qrec = self.calculate_filter_q(conv_node,
                                             conv_astats,
                                             in_q=in_qs[0],
                                             force_width=self._force_width,
                                             out_as_acc=True)
         result[NodeId(node, conv_node)] = conv_qrec
         act_node = nodes[1]
         act_astats = self._activation_stats.get(NodeId(node, act_node))
         if force_out and force_out.bits:
             act_max_q = self.compute_activation_out_maxq(
                 act_node, force_out.bits)
             if force_out.q is not None:
                 if (act_max_q is not None and force_out.q > act_max_q
                     ) or force_out.q > conv_qrec.out_qs[0].q:
                     # We cannot shift left in the kernel
                     # TODO - This should try to increase the input q and perhaps the width
                     # Unlikely to happen
                     raise NotImplementedError()
                 act_o_q = QType(bits=force_out.bits,
                                 q=force_out.q,
                                 signed=True)
             else:
                 if act_max_q is not None:
                     act_o_q.q = min(act_max_q, act_o_q.q)
         else:
             act_o_q = QType.from_min_max(
                 max_val=act_astats['range_out'][0]['max'],
                 min_val=act_astats['range_out'][0]['min'],
                 bits=self._force_width)
             act_o_q.q = min(act_o_q.q, conv_qrec.out_qs[0].q)
             if force_out and force_out.q:
                 if force_out.q > act_max_q or force_out.q > conv_qrec.out_qs[
                         0].q:
                     # We cannot shift left in the kernel
                     # TODO - This should try to increase the input q and perhaps the width
                     # Unlikely to happen
                     raise NotImplementedError()
                 act_o_q.q = force_out.q
         act_qrec = SymmetricQuantizationRecord(in_qs=conv_qrec.out_qs,
                                                out_qs=[act_o_q])
         result[NodeId(node, act_node)] = act_qrec
         return SymmetricQuantizationRecord(in_qs=in_qs,
                                            out_qs=act_qrec.out_qs), result
     else:
         return self.default_quantize_fusion(G,
                                             node,
                                             in_qs,
                                             force_out=force_out)
コード例 #30
0
ファイル: ssd_postprocess.py プロジェクト: brupa9/gap_sdk
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, _ = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     if force_out_q:
         return None
     o_boxes_qtype = QType(min_val=-2,
                           max_val=2,
                           dtype=np.int16,
                           scale=2**(-14))
     o_scores_qtype = in_qs[1]
     o_class_qtype = QType(scale=1, dtype=np.int8)
     return MultSSDDetectorQuantizationRecord(in_qs=in_qs,
                                              out_qs=[
                                                  o_boxes_qtype,
                                                  o_scores_qtype,
                                                  o_class_qtype,
                                                  o_class_qtype
                                              ])