Ejemplo n.º 1
0
def overwrite_range(in_ranges, out_ranges, in_dtypes=None, out_dtypes=None):
    if in_dtypes is None:
        in_dtypes = [np.int8] * len(in_ranges)
    if out_dtypes is None:
        out_dtypes = [np.int8] * len(out_ranges)
    proto_in_qs = [
        QType.from_min_max_sq(*min_max, dtype=dtype)
        for min_max, dtype in zip(in_ranges, in_dtypes)
    ]
    proto_out_qs = [
        QType.from_min_max_sq(*min_max, dtype=dtype)
        for min_max, dtype in zip(out_ranges, out_dtypes)
    ]

    def handler(G, qrec, node, in_qs=None, out_qs=None):
        nonlocal proto_in_qs, proto_out_qs
        if qrec is None:
            return QRec.scaled(in_qs=list(proto_in_qs),
                               out_qs=list(proto_out_qs))
        new_in_qs = [
            q1 if q1 else q2 for q1, q2 in zip_longest(qrec.in_qs, proto_in_qs)
        ] if qrec.in_qs else in_qs
        new_out_qs = [
            q1 if q1 else q2
            for q1, q2 in zip_longest(qrec.out_qs, proto_out_qs)
        ] if qrec.out_qs else out_qs
        return QRec.scaled(in_qs=new_in_qs, out_qs=new_out_qs)

    return handler
Ejemplo n.º 2
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if isinstance(
                params,
            (HSwishActivationParameters, HSigmoidActivationParameters)):
            in_q = in_qs[0]
            max_val = in_q.scale * pow(2, in_q.bits - 1)
            if max_val < 6:
                in_qs = [QType.from_min_max_sq(-6, 6, dtype=in_q.dtype)]
        if force_out_q:
            fusion = kwargs.get('fusion', None)
            if fusion and fusion.fusion_type in [
                    'conv_active_pool', 'conv_active'
            ]:
                if not isinstance(
                        params,
                    (SigmoidActivationParameters, TanHActivationParameters,
                     HSwishActivationParameters,
                     HSigmoidActivationParameters)):
                    in_qs = [deepcopy(force_out_q)]
            o_q = deepcopy(force_out_q)
        else:
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype)

        return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
Ejemplo n.º 3
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     if force_out_q:
         return None
     in_qs = [QType.from_min_max_sq(-8, 8, dtype=np.int8, forced=True)]
     o_q = QType.from_min_max_sq(min_val=-1.0,
                                 max_val=1.0,
                                 dtype=out_dtype,
                                 forced=True)
     return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
Ejemplo n.º 4
0
 def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs):
     return [
         QType.from_min_max_sq(stats['range_in'][idx]['min'],
                               stats['range_in'][idx]['max'],
                               dtype=np.int8) if dim is not None else None
         for idx, dim in enumerate(params.in_dims)
     ]
Ejemplo n.º 5
0
    def _common(cls, node: TFLiteNode, **kwargs):
        custom_opts = node.get_custom_options()
        G = kwargs['G']
        opts = kwargs['opts']
        all_nodes = kwargs['all_nodes']
        importer = kwargs['importer']

        inputs = [all_nodes[t] for t in node.input]
        outputs = [
            all_nodes.get(node.output[idx]) if idx < len(node.output) else None
            for idx in range(4)
        ]
        # inp_shapes = [input[2].shape for input in inputs]

        if 'max_bb_before_nms' not in custom_opts:
            custom_opts['max_bb_before_nms'] = 300

        params = SSDDetectorParameters(node.name, parameters=custom_opts)

        overriden_outputs = []
        for idx, output in enumerate(outputs):
            if output:
                overriden_outputs.append(node.output[idx])
                continue
            oparams = G.add_output()
            otensor = TensorBase("Detect_%s" % idx)
            overriden_outputs.append(otensor)
            importer.provisional_outputs[otensor] = (oparams, 0, None)
        # covers the case where not all outputs are generated by the conversion tool
        node.override_outputs(overriden_outputs)

        for idx, inp in enumerate(inputs):
            G.add_edge(
                NNEdge(from_node=inp[0],
                       to_node=params,
                       from_idx=inp[1],
                       to_idx=idx))

        if opts.get('load_quantization'):
            in_qtypes = [
                QType.from_min_max_sq(tensor.qtype.min_val,
                                      tensor.qtype.max_val) if
                (tensor.qtype.is_asymmetric
                 or not tensor.qtype.signed) else tensor.qtype
                for tensor in node.input
            ]
            o_boxes_qtype = QType(min_val=-2,
                                  max_val=2,
                                  dtype=np.int16,
                                  scale=2**(-14))
            o_scores_qtype = node.input[1].qtype
            o_class_qtype = QType(scale=1, dtype=np.int8)
            qrec = QRec.scaled(in_qs=in_qtypes,
                               out_qs=[
                                   o_boxes_qtype, o_class_qtype,
                                   o_scores_qtype, o_class_qtype
                               ])
            G.quantization[NodeId(params)] = qrec

        return params
Ejemplo n.º 6
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     if force_out_q:
         return None
     o_q = QType.from_min_max_sq(min_val=-1.0, max_val=1.0, dtype=out_dtype)
     return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
    def get_weights_qtype_by_channel(cls, filter_shape, out_idx, weights_node):
        assert len(filter_shape) == 4 or len(filter_shape) == 2
        dqweights = weights_node.dqvalue
        filter_axis = tuple(idx for idx in range(len(filter_shape))
                            if idx != out_idx)
        # get the minimums and maximums above and below 0
        w_mins = np.minimum(np.min(dqweights, axis=filter_axis), 0)
        w_maxes = np.maximum(np.max(dqweights, axis=filter_axis), 0)
        wqtype = QType.from_min_max_sq(w_mins,
                                       w_maxes,
                                       quantized_dimension=out_idx,
                                       narrow_range=True,
                                       scale_zero_as_one=True)
        tiny_weight_scales = wqtype.scale < QType.kInt8NearZeroTolerance
        if np.count_nonzero(tiny_weight_scales):
            # Sets weights scaled under a very small value to zero to avoid
            # silly mult biases.
            shape = tuple(
                slice(None) if idx != out_idx else tiny_weight_scales
                for idx, _ in enumerate(dqweights.shape))
            if np.any(shape):
                dqweights[shape] = 0
                wqtype.scale = np.where(tiny_weight_scales, 1, wqtype.scale)
                weights_node.value = dqweights
                weights_node.qtype = None

        # weights_node.value = wqtype.quantize(dqweights)
        # weights_node.qtype = deepcopy(wqtype)
        return wqtype
Ejemplo n.º 8
0
    def _update_qrecs(self, G, qrecs, all_nodes, ranges_dict):
        for node, idx, _, qtype in all_nodes.values():
            if qtype is None and node.name not in ranges_dict.keys():
                continue
            if node.name not in G:
                continue
            nid = NodeId(node)
            qrec = qrecs.get(nid)
            if not qrec:
                in_qs = [None] * G.num_in_edges(node)
                out_qs = [None] * len(G.indexed_out_edges(node))
                qrec = QRec.scaled(in_qs=in_qs, out_qs=out_qs)
                qrecs[nid] = qrec

            if node.name in ranges_dict.keys():
                out_min, out_max = ranges_dict[node.name]["range"]
                dtype = ranges_dict[node.name].get("dtype", np.int8)
                bits = ranges_dict[node.name].get("n_bits", 8)
                channel = ranges_dict[node.name].get("per_channel", None)
                qtype = QType.from_min_max_sq(out_min,
                                              out_max,
                                              dtype=dtype,
                                              bits=bits,
                                              quantized_dimension=channel)
            qrec.out_qs[idx] = qtype
Ejemplo n.º 9
0
    def match(self, G: GraphView, set_identity: bool = True):
        if not G.quantization:
            return
        sigs_swishes = [
            node for node in G.nodes()
            if isinstance(node, (SigmoidActivationParameters,
                                 HSigmoidActivationParameters,
                                 HSwishActivationParameters))
        ]
        qrecs = [G.quantization[NodeId(node)] for node in sigs_swishes]
        for sig_swish, qrec in zip(sigs_swishes, qrecs):
            in_edge = [
                edge for edge in G.in_edges(sig_swish.name) if edge.to_idx == 0
            ][0]
            in_q = qrec.in_qs[0]
            min_val, max_val = in_q.min_val, in_q.max_val
            if isinstance(
                    sig_swish,
                (HSigmoidActivationParameters, SigmoidActivationParameters)):
                # Hard sigmoid implements a RELU, be sure 6 can be representable
                min_val, max_val = 0, 6
            elif isinstance(sig_swish, HSwishActivationParameters):
                min_val, max_val = 0, in_q.max_val * 6

            new_in_q = QType.from_min_max_sq(min_val=min_val,
                                             max_val=max_val,
                                             dtype=in_q.dtype)
            propagate_qtype_up(G, new_in_q, in_edge)

        if set_identity:
            self.set_identity(G)

        return False
Ejemplo n.º 10
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        # NOTE: The autotiler kernel scales and clips after the operation and before the
        # activation so there is no change if this is in a fusion or not
        scaled_idx = params.force_quantized_index if isinstance(
            params, MatrixAddParameters) else None
        in_qs = cls.force_symmetric_and_dtype(in_qs)
        if in_qs is None:
            return None

        if force_out_q:
            o_q = deepcopy(force_out_q)
            if o_q.is_asymmetric:
                return None
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype)
        o_q.set_forced(flags=['dtype', 'zero_point'])
        in_qs = [
            in_q.set_forced(flags=['dtype', 'zero_point']) for in_q in in_qs
        ]
        return QRec.scaled(in_qs=in_qs, out_qs=[o_q], scaled_idx=scaled_idx)
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     in_q = in_qs[0]
     if params.lower_bound != 0:
         raise NotImplementedError(
             'relu with non zero lower bound is not implemented for NE16 quantizer'
         )
     cls.check_valid_ranges(params, stats, idx=0, dirs='out')
     if force_out_q:
         # since the relu is done by setting 0 zero point and scaling to the upper bound
         # we cannot be forced to something that does not meet this requirement
         if not force_out_q.zero_point_asymmetric_zero:
             return None
         if params.upper_bound is not None and not np.isclose(
                 force_out_q.max, params.upper_bound, atol=0.01):
             return None
         # if the output has been forced then propagate it
         in_q = force_out_q
     else:
         upper = params.upper_bound if params.upper_bound is not None else stats[
             'range_out'][0]['max']
         in_q = QType.from_min_max_sq(0,
                                      upper,
                                      dtype=in_q.dtype,
                                      asymmetric=True,
                                      ne16=True,
                                      dont_copy_attr=['ne16'])
     o_q = deepcopy(in_q)
     o_q.set_forced()
     qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q], ne16=True)
     compute_in_out_scale(qrec)
     return qrec
Ejemplo n.º 12
0
 def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs):
     return [QType.from_min_max_sq(
         stats['range_in'][idx]['min'],
         stats['range_in'][idx]['max'],
         dtype=np.int8,
         asymmetric=in_qs and in_qs[idx].asymmetric and cls.can_handle_asymmetric_input(params, **kwargs))
         if dim is not None else None
         for idx, dim in enumerate(params.in_dims)]
Ejemplo n.º 13
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        opts = kwargs['opts']
        fusion = kwargs.get('fusion', None)
        G = kwargs['G']
        weights_node = cls.get_weights_node(G, fusion if fusion else params)
        min_val, max_val = None, None
        weights_q = QType.from_array_sq(
            arr=weights_node.dqvalue,
            quantized_dimension=cls.get_quantized_dimension(params, opts),
            dtype=np.int8,
            narrow_range=opts['narrow_weights'])
        if fusion and fusion.fusion_type in [
                'conv_active_pool', 'conv_active'
        ]:
            stats = kwargs['all_stats'][NodeId(fusion,
                                               fusion.contained_nodes()[0])]

            if isinstance(
                    fusion.contained_nodes()[1],
                (SigmoidActivationParameters, TanHActivationParameters,
                 HSwishActivationParameters)):
                stats = kwargs['all_stats'][NodeId(
                    fusion,
                    fusion.contained_nodes()[0])]
            elif fusion and isinstance(fusion.contained_nodes()[1],
                                       HSigmoidActivationParameters):
                # Hard sigmoid implements a RELU, be sure 6 can be representable
                min_val, max_val = 0, 6
            else:
                # Take stats from activation after the convolution
                stats = kwargs['all_stats'][NodeId(
                    fusion,
                    fusion.contained_nodes()[1])]

        if min_val is None or max_val is None:
            min_val, max_val = stats['range_out'][0]['min'], stats[
                'range_out'][0]['max']

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_sq(min_val=min_val,
                                        max_val=max_val,
                                        dtype=out_dtype)
        biases_q = QType(dtype=np.int32,
                         scale=weights_q.scale * in_qs[0].scale)
        mul_biases_q = MultMulBiasScaleQType.from_filter(
            in_qs[0], weights_q, o_q, params)
        # returning the new weights and biases qs will force backprop
        # TODO - ACC_Q LOOKS WRONG AFTER THIS
        return MultScalableFilterQuantizationRecord(
            in_qs=[in_qs[0], weights_q, biases_q],
            out_qs=[o_q],
            acc_q=biases_q,
            calc_q=biases_q,
            mul_biases_q=mul_biases_q)
Ejemplo n.º 14
0
 def _common(cls, node, **kwargs):
     if kwargs['opts'].get('load_quantization'
                           ) and not kwargs['opts'].get('use_hard_sigmoid'):
         kwargs['in_qs'] = [QType.from_min_max_sq(-8, 8, dtype=np.int8)]
     params_class = SigmoidActivationParameters if not kwargs['opts'].get(
         'use_hard_sigmoid') else HSigmoidActivationParameters
     return super(Logistic, cls)._common(node,
                                         params_class=params_class,
                                         **kwargs)
Ejemplo n.º 15
0
 def _common(cls, node, **kwargs):
     if kwargs['opts'].get('load_quantization') and kwargs['opts'].get(
             'use_lut_tanh'):
         kwargs['in_qs'] = [QType.from_min_max_sq(-8, 8, dtype=np.int8)]
     params_class = TanHActivationParameters if kwargs['opts'].get(
         'use_lut_tanh') else HTanHActivationParameters
     return super(Tanh, cls)._common(node,
                                     params_class=params_class,
                                     **kwargs)
Ejemplo n.º 16
0
 def _load_quantization(qrecs, node_recs):
     for tensor in node_recs:
         qtype = tensor.qtype
         if qtype:
             if qtype.is_sq and qtype.is_asymmetric:
                 qtype = QType.from_min_max_sq(qtype.min_val, qtype.max_val,
                                               quantized_dimension=qtype.quantized_dimension)
             qrecs[NodeId(node_recs[tensor][0])] = MultConstantQuantizationRecord(
                 in_qs=[qtype], out_qs=[qtype])
Ejemplo n.º 17
0
 def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs):
     opts = kwargs['opts']
     in_dtype = np.uint8 if opts.get(
         'force_input_size', 8) == 8 else np.uint16
     return [QType.from_min_max_sq(stats['range_in'][idx]['min'],
                                   stats['range_in'][idx]['max'],
                                   dtype=in_dtype,
                                   asymmetric=len(stats['range_in'][idx]) == 1)
             if dim is not None and stats['range_in'][idx] else None
             for idx, dim in enumerate(params.in_dims)]
Ejemplo n.º 18
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        _, dtype = cls.get_float_opts(**kwargs)
        names = {val: idx for idx, val in enumerate(RNNParameters.INPUT_NAMES)}
        edges = kwargs['G'].indexed_in_edges(params.name)
        in_qs = deepcopy(in_qs)
        w_q = in_qs[names['i_2_i_w']]
        in_qs[names['i_2_i_w']] = QType.from_min_max_sq(
            w_q.min_val, w_q.max_val, dtype=dtype, dont_generate_value=True)

        w_q = in_qs[names['r_2_i_w']]
        in_qs[names['r_2_i_w']] = QType.from_min_max_sq(
            w_q.min_val,
            w_q.max_val,
            dtype=dtype,
            concatenated_nodes=[edges[names['i_2_i_w']].from_node.name])

        return QRec.float(in_qs=in_qs,
                          out_qs=[QType(dtype=dtype)],
                          float_dtype=dtype)
Ejemplo n.º 19
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        fusion = kwargs.get('fusion', None)
        in_q = in_qs[0]
        if not fusion and in_q.dtype == np.int32:
            return None

        if isinstance(params, (HSwishActivationParameters, HSigmoidActivationParameters)):
            max_val = in_q.scale * pow(2, in_q.bits - 1)
            if max_val < 6:
                in_q = QType.from_min_max_sq(-6, 6, dtype=in_q.dtype, forced=True)
        elif isinstance(params, SigmoidActivationParameters):
            in_q = QType.from_min_max_sq(-8, 8, dtype=in_q.dtype, forced=True)

        if force_out_q:
            if force_out_q.signed != in_q.signed:
                return None
            if fusion and fusion.fusion_type in ['conv_active_pool', 'conv_active']:
                if not isinstance(params, (SigmoidActivationParameters, HTanHActivationParameters,
                                           HSwishActivationParameters, HSigmoidActivationParameters)):
                    in_q = deepcopy(force_out_q)
            o_q = deepcopy(force_out_q)
            # activation cannot move zeropoint unless it is a reduction step
            if o_q.zero_point != in_q.zero_point and in_q.dtype != np.int32:
                return None
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            zero_point = in_q.zero_point if in_q.zero_point != 0 else None
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=in_q.dtype,
                                        zero_point=zero_point)

        qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q])
        if isinstance(params, (SigmoidScaledSymmetricMult, TanHActivationParameters)):
            compute_in_out_scale(qrec, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale/qrec.in_qs[0].scale)
        elif isinstance(params, HSwishActivationParameters):
            compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1/6)
        else:
            compute_in_out_scale(qrec)
        return qrec
Ejemplo n.º 20
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_q:
            o_q = deepcopy(force_out_q)
        else:
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype)

        return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
Ejemplo n.º 21
0
 def get_weights_qtype_by_tensor(cls, weights_node):
     dqweights = weights_node.dqvalue
     w_mins = np.minimum(np.min(dqweights), 0)
     w_maxes = np.maximum(np.max(dqweights), 0)
     wqtype = QType.from_min_max_sq(w_mins,
                                    w_maxes,
                                    narrow_range=True,
                                    scale_zero_as_one=True)
     # weights_node.value = wqtype.quantize(dqweights)
     # weights_node.qtype = deepcopy(wqtype)
     return wqtype
Ejemplo n.º 22
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        # copy in_qs because we may modify it
        in_qs = in_qs.copy()
        opts = kwargs['opts']
        fusion = kwargs.get('fusion', None)

        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        G = kwargs['G']
        # only attempt channel scaling if the second input is constant
        # if len(in_qs) > 2:
        in2_node, in_qs = cls.move_constant(G, fusion if fusion else params,
                                            in_qs)
        if in2_node:
            kwargs['graph_update']['requires_adjust'] = True
            in_q2 = QType.from_array_sq(arr=in2_node.dqvalue,
                                        quantized_dimension=0,
                                        dtype=np.int8,
                                        narrow_range=True,
                                        bits=8)
        else:
            in_q2 = in_qs[1].make_symmetric_signed()

        in_q1 = in_qs[0].make_symmetric_signed()

        min_val, max_val = cls.get_min_max(fusion, stats, kwargs['all_stats'],
                                           params)

        if force_out_q:
            o_q = force_out_q
            # can't be forced to something not np.int8
            if o_q.dtype != np.int8 or o_q.asymmetric:
                return None
            LOG.warning(
                'node %s output forced to range %s/%s - actual range %s/%s %s',
                params.name, o_q.min, o_q.max, min_val, max_val,
                "asymmetric" if o_q.asymmetric else "symmetric")
        else:
            o_q = QType.from_min_max_sq(min_val=min_val,
                                        max_val=max_val,
                                        dtype=out_dtype)
        if len(in_qs) == 3:
            biases_q = QType(dtype=np.int32, scale=in_q1.scale * in_q2.scale)
            out_in_qs = [in_q1, in_q2, biases_q]
        else:
            out_in_qs = [in_q1, in_q2]

        mul_biases_q = MultMulBiasScaleQType()
        mul_biases_q.scale = in_q1.scale * in_q2.scale / o_q.scale

        return QRec.scaled(in_qs=out_in_qs,
                           out_qs=[o_q],
                           mul_biases_q=mul_biases_q)
Ejemplo n.º 23
0
 def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs):
     opts = kwargs['opts']
     fusion = kwargs.get('fusion', None)
     return [
         QType.from_min_max_sq(
             stats['range_in'][idx]['min'],
             stats['range_in'][idx]['max'],
             dtype=np.uint8
             if cls.can_ne16(params, opts, fusion) else np.int8,
             asymmetric=in_qs[idx].is_asymmetric
             and cls.can_handle_asymmetric_input(params, **kwargs))
         if dim is not None else None
         for idx, dim in enumerate(params.in_dims)
     ]
Ejemplo n.º 24
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        in_q = in_qs[0]

        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if force_out_q:
            # if the output has been forced then propagate it
            in_q = force_out_q
        else:
            upper = params.upper_bound if params.upper_bound is not None else stats['range_out'][0]['max']
            in_q = QType.from_min_max_sq(0, upper, dtype=np.uint8, asymmetric=True)

        return QRec.scaled(in_qs=[in_q], out_qs=[in_q], ne16=True)
Ejemplo n.º 25
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        opts = kwargs['opts']
        if force_out_q:
            o_q = deepcopy(force_out_q)
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype,
                                        asymmetric=opts['allow_asymmetric'])

        return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
Ejemplo n.º 26
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     # if forced set what we are forced to
     if force_out_q:
         o_q = deepcopy(force_out_q)
     # if value is already quantized then keep the same quantization
     elif params.qtype:
         o_q = deepcopy(params.qtype)
     # derive quantization from statistics
     else:
         o_q = QType.from_min_max_sq(min_val=stats['range_out'][0]['min'],
                                     max_val=stats['range_out'][0]['max'],
                                     dtype=out_dtype)
     return MultConstantQuantizationRecord(out_qs=[o_q])
Ejemplo n.º 27
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_q:
            return None

        out_dtype = params.output_dtype
        in_dtype = params.input_dtype
        in_q = QType(scale=1, dtype=in_dtype)
        out_q = QType.from_min_max_sq(-1,
                                      1,
                                      dtype=out_dtype,
                                      narrow_range=True)

        return MultQuantizationRecord(in_qs=[in_q], out_qs=[out_q])
Ejemplo n.º 28
0
    def _quantize_sw(cls,
                     params,
                     in_qs,
                     stats,
                     inout_dtype,
                     asym=False,
                     **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        # NOTE: The autotiler kernel scales and clips after the operation and before the
        # activation so there is no change if this is in a fusion or not
        scaled_idx = params.force_quantized_index if isinstance(
            params, MatrixAddParameters) else None
        if not asym:
            in_qs = cls.force_symmetric_and_dtype(in_qs)
        if in_qs is None:
            return None

        if force_out_q:
            o_q = deepcopy(force_out_q)
            if (o_q.asymmetric and not asym) or o_q.dtype != inout_dtype:
                return None
            # important to set ne16 here so the o_q matches the force_out_q since
            # this attribute is not copied by deepcopy
            if force_out_q.attr.ne16:
                o_q.attr.ne16 = True
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=inout_dtype,
                                        asymmetric=asym,
                                        dont_copy_attr=['ne16'],
                                        ne16=asym)
        if asym:
            o_q.set_forced(flags=['dtype'])
            in_qs = [in_q.set_forced(flags=['dtype']) for in_q in in_qs]
        else:
            o_q.set_forced(flags=['dtype', 'zero_point'])
            in_qs = [
                in_q.set_forced(flags=['dtype', 'zero_point'])
                for in_q in in_qs
            ]
        return QRec.scaled(in_qs=in_qs,
                           out_qs=[o_q],
                           scaled_idx=scaled_idx,
                           ne16=asym)
Ejemplo n.º 29
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        # NOTE: The autotiler kernel scales and clips after the operation and before the
        # activation so there is no change if this is in a fusion or not
        scaled_idx = params.force_quantized_index if isinstance(
            params, MatrixAddParameters) else None

        if force_out_q:
            o_q = deepcopy(force_out_q)
        else:
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype)
        return MultAddQuantizationRecord(in_qs=in_qs,
                                         out_qs=[o_q],
                                         scaled_idx=scaled_idx)
Ejemplo n.º 30
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8)
        if in_qs is None:
            return None
        if force_out_q:
            o_q = deepcopy(force_out_q)
            if o_q.is_asymmetric:
                return None
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype)

        return QRec.scaled(in_qs=in_qs, out_qs=[o_q])