Beispiel #1
0
def get_qrec_pass(G, qrec, node, copy_qs):
    if qrec is None:
        if copy_qs:
            return QRec.scaled(in_qs=copy_qs, out_qs=copy_qs)
        return None
    in_qs = copy_qs if not qrec.in_qs or qrec.in_qs is None else qrec.in_qs
    out_qs = copy_qs if not qrec.out_qs or qrec.out_qs is None else qrec.out_qs
    return QRec.scaled(in_qs=in_qs, out_qs=out_qs)
Beispiel #2
0
def pass_qtype(G, qrec, node, in_qs=None, out_qs=None):
    copy_qs = in_qs if in_qs is not None else out_qs
    if not copy_qs:
        return qrec
    if qrec is None:
        if copy_qs:
            return QRec.scaled(in_qs=copy_qs, out_qs=copy_qs)
        return None
    in_qs = copy_qs if not qrec.in_qs or qrec.in_qs[0] is None else qrec.in_qs
    out_qs = copy_qs if not qrec.out_qs or qrec.out_qs[
        0] is None else qrec.out_qs
    return QRec.scaled(in_qs=in_qs, out_qs=out_qs)
Beispiel #3
0
 def handler(G, qrec, node, in_qs=None, out_qs=None):
     nonlocal proto_in_qs, proto_out_qs
     if qrec is None:
         return QRec.scaled(in_qs=list(proto_in_qs),
                            out_qs=list(proto_out_qs))
     new_in_qs = [
         q1 if q1 else q2 for q1, q2 in zip_longest(qrec.in_qs, proto_in_qs)
     ] if qrec.in_qs else in_qs
     new_out_qs = [
         q1 if q1 else q2
         for q1, q2 in zip_longest(qrec.out_qs, proto_out_qs)
     ] if qrec.out_qs else out_qs
     return QRec.scaled(in_qs=new_in_qs, out_qs=new_out_qs)
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        opts = kwargs['opts']
        if force_out_q:
            if force_out_q.forced_scale or force_out_q.forced_zero_point:
                return None
            if in_qs[0].dtype == np.int8:
                dtypes = [np.int8, np.int16]
            else:
                dtypes = [np.int16]
            if force_out_q.forced_dtype and force_out_q.dtype not in dtypes:
                return None

        in_qs = cls.force_symmetric_and_dtype(in_qs)
        if in_qs is None:
            return None
        # force the input to be POW2 scaled
        pow2_scale = np.power(2, np.ceil(np.log2(in_qs[0].scale)))
        in_q = QType(min_val=in_qs[0].min_val,
                     max_val=in_qs[0].max_val,
                     dtype=in_qs[0].dtype,
                     scale=pow2_scale,
                     forced=True)
        if in_q.dtype == np.int8 and (opts.get('softmax_out_8bits', None) or
                                      (force_out_q
                                       and force_out_q.dtype == np.int8)):
            # params.at_options.softmax_out_8bits = 1
            o_q = QType(min_val=-1, max_val=1, dtype=np.int8, scale=2**(-7))
        else:
            o_q = QType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
            if in_q.dtype == np.int16 and o_q.dtype == np.int16:
                return QRec.symmetric(in_qs=[in_q], out_qs=[o_q])

        return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     in_q = in_qs[0]
     if params.lower_bound != 0:
         raise NotImplementedError(
             'relu with non zero lower bound is not implemented for NE16 quantizer'
         )
     cls.check_valid_ranges(params, stats, idx=0, dirs='out')
     if force_out_q:
         # since the relu is done by setting 0 zero point and scaling to the upper bound
         # we cannot be forced to something that does not meet this requirement
         if not force_out_q.zero_point_asymmetric_zero:
             return None
         if params.upper_bound is not None and not np.isclose(
                 force_out_q.max, params.upper_bound, atol=0.01):
             return None
         # if the output has been forced then propagate it
         in_q = force_out_q
     else:
         upper = params.upper_bound if params.upper_bound is not None else stats[
             'range_out'][0]['max']
         in_q = QType.from_min_max_sq(0,
                                      upper,
                                      dtype=in_q.dtype,
                                      asymmetric=True,
                                      ne16=True,
                                      dont_copy_attr=['ne16'])
     o_q = deepcopy(in_q)
     o_q.set_forced()
     qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q], ne16=True)
     compute_in_out_scale(qrec)
     return qrec
Beispiel #6
0
    def _import_nodes(self, G, graph, handlers, all_nodes, outputs, opts):
        for node in graph.nodes:
            handler = handlers.get(node.op_name, None)
            if not handler:
                raise ValueError("no handler found for %s" % node.op_type)
            if node.is_custom and handler:
                handler = handler.get(node.custom_op_name, None)
                if not handler:
                    raise ValueError(
                        "no handler found for custom operation %s" %
                        node.custom_op_name)

            params = handler.handle(node,
                                    all_nodes=all_nodes,
                                    G=G,
                                    opts=opts,
                                    importer=self)
            if params is None:
                continue
            for idx, out_tensor in enumerate(node.output):
                output = outputs.get(out_tensor)
                if not output:
                    continue
                G.add_edge(
                    NNEdge(from_node=params,
                           to_node=output[0],
                           from_idx=idx,
                           to_idx=output[1]))
                if opts.get('load_quantization'):
                    qtype = deepcopy(
                        G.quantization[NodeId(params)].out_qs[idx])
                    G.quantization[NodeId(output[0])] = QRec.scaled(
                        in_qs=[qtype], out_qs=[qtype])
Beispiel #7
0
    def _common(cls, node: TFLiteNode, **kwargs):
        custom_opts = node.get_custom_options()
        G = kwargs['G']
        opts = kwargs['opts']
        all_nodes = kwargs['all_nodes']
        importer = kwargs['importer']

        inputs = [all_nodes[t] for t in node.input]
        outputs = [
            all_nodes.get(node.output[idx]) if idx < len(node.output) else None
            for idx in range(4)
        ]
        # inp_shapes = [input[2].shape for input in inputs]

        if 'max_bb_before_nms' not in custom_opts:
            custom_opts['max_bb_before_nms'] = 300

        params = SSDDetectorParameters(node.name, parameters=custom_opts)

        overriden_outputs = []
        for idx, output in enumerate(outputs):
            if output:
                overriden_outputs.append(node.output[idx])
                continue
            oparams = G.add_output()
            otensor = TensorBase("Detect_%s" % idx)
            overriden_outputs.append(otensor)
            importer.provisional_outputs[otensor] = (oparams, 0, None)
        # covers the case where not all outputs are generated by the conversion tool
        node.override_outputs(overriden_outputs)

        for idx, inp in enumerate(inputs):
            G.add_edge(
                NNEdge(from_node=inp[0],
                       to_node=params,
                       from_idx=inp[1],
                       to_idx=idx))

        if opts.get('load_quantization'):
            in_qtypes = [
                QType.from_min_max_sq(tensor.qtype.min_val,
                                      tensor.qtype.max_val) if
                (tensor.qtype.is_asymmetric
                 or not tensor.qtype.signed) else tensor.qtype
                for tensor in node.input
            ]
            o_boxes_qtype = QType(min_val=-2,
                                  max_val=2,
                                  dtype=np.int16,
                                  scale=2**(-14))
            o_scores_qtype = node.input[1].qtype
            o_class_qtype = QType(scale=1, dtype=np.int8)
            qrec = QRec.scaled(in_qs=in_qtypes,
                               out_qs=[
                                   o_boxes_qtype, o_class_qtype,
                                   o_scores_qtype, o_class_qtype
                               ])
            G.quantization[NodeId(params)] = qrec

        return params
Beispiel #8
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        # copy in_qs because we may modify it
        in_qs = in_qs.copy()
        opts = kwargs['opts']
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        G = kwargs['G']
        in_q = in_qs[0]

        cls.check_valid_ranges(params, stats, idx=0, dirs='in')
        min_val = stats['range_in'][0]['min']
        max_val = stats['range_in'][0]['max']

        if force_out_q:
            # get rid of the force out if ne16 is not selected.
            if not (opts.get('use_ne16') or opts.get('force_ne16')):
                LOG.info('%s ne16 max pool possible but ne16 mode not enabled',
                         params.name)
                return None
            o_q = force_out_q
            if in_q.forced and in_q.zero_point != o_q.zero_point:
                return None
            in_q = deepcopy(o_q)

            LOG.warning(
                'node %s output forced to range %s/%s - actual range %s/%s %s',
                params.name, o_q.min, o_q.max, min_val, max_val,
                "asymmetric" if o_q.asymmetric else "symmetric")
        else:
            o_q = deepcopy(in_q)
        o_q.attr.ne16 = True
        cls.check_order(params, [['h', 'w', 'c']], [['h', 'w', 'c']])
        return QRec.scaled(in_qs=[in_q], out_qs=[o_q], ne16=True)
Beispiel #9
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        # NOTE: The autotiler kernel scales and clips after the operation and before the
        # activation so there is no change if this is in a fusion or not
        scaled_idx = params.force_quantized_index if isinstance(
            params, MatrixAddParameters) else None
        in_qs = cls.force_symmetric_and_dtype(in_qs)
        if in_qs is None:
            return None

        if force_out_q:
            o_q = deepcopy(force_out_q)
            if o_q.is_asymmetric:
                return None
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype)
        o_q.set_forced(flags=['dtype', 'zero_point'])
        in_qs = [
            in_q.set_forced(flags=['dtype', 'zero_point']) for in_q in in_qs
        ]
        return QRec.scaled(in_qs=in_qs, out_qs=[o_q], scaled_idx=scaled_idx)
Beispiel #10
0
    def _update_qrecs(self, G, qrecs, all_nodes, ranges_dict):
        for node, idx, _, qtype in all_nodes.values():
            if qtype is None and node.name not in ranges_dict.keys():
                continue
            if node.name not in G:
                continue
            nid = NodeId(node)
            qrec = qrecs.get(nid)
            if not qrec:
                in_qs = [None] * G.num_in_edges(node)
                out_qs = [None] * len(G.indexed_out_edges(node))
                qrec = QRec.scaled(in_qs=in_qs, out_qs=out_qs)
                qrecs[nid] = qrec

            if node.name in ranges_dict.keys():
                out_min, out_max = ranges_dict[node.name]["range"]
                dtype = ranges_dict[node.name].get("dtype", np.int8)
                bits = ranges_dict[node.name].get("n_bits", 8)
                channel = ranges_dict[node.name].get("per_channel", None)
                qtype = QType.from_min_max_sq(out_min,
                                              out_max,
                                              dtype=dtype,
                                              bits=bits,
                                              quantized_dimension=channel)
            qrec.out_qs[idx] = qtype
Beispiel #11
0
    def common_quantize(cls, in_qtype, out_qtype, node, **kwargs):
        all_nodes = kwargs['all_nodes']
        opts = kwargs['opts']
        G = kwargs['G']
        inputs = [all_nodes[t] for t in node.input]
        x = inputs[0]
        in_qtype = in_qtype.make_symmetric_signed()
        out_qtype = out_qtype.make_symmetric_signed()
        if cls.is_constant(x):
            LOG.info("reducing %s to a constant", node.name)
            if out_qtype:
                val = x[0].value_as(out_qtype)
            else:
                val = cls.get_constant(x)
            params = ConstantInputParameters(node.name,
                                             value=val,
                                             dims=Dim.unnamed(val.shape),
                                             qtype=out_qtype,
                                             constant_store=G.constant_store)
            if opts.get('load_quantization'):
                G.quantization[NodeId(params)] = QRec.scaled(
                    in_qs=[out_qtype], out_qs=[out_qtype])
        else:
            if in_qtype == out_qtype:
                LOG.info('removing (de)quantize node %s with no effect',
                         node.name)
                params = NoOPParameters(node.name,
                                        desc="quantize with no effect")
            elif in_qtype.dtype == out_qtype.dtype:
                LOG.info('removing (de)quantize node %s with scale change',
                         node.name)
                params = NoOPParameters(node.name,
                                        desc="quantize with scale change")
                out_qtype = in_qtype
            else:
                params = QuantizeParameters(node.name,
                                            from_qtype=in_qtype,
                                            to_qtype=out_qtype)
            G.add_edge(
                NNEdge(from_node=x[0], to_node=params, from_idx=x[1],
                       to_idx=0))

            if opts.get('load_quantization'):
                G.quantization[NodeId(params)] = QRec.scaled(
                    in_qs=[in_qtype], out_qs=[out_qtype])
        all_nodes[node.output[0]] = (params, 0, deepcopy(x[2]))
        return params
    def _common(cls, node, copy_qtype=False, quantized_args=None, **kwargs):
        all_nodes = kwargs['all_nodes']
        valid_name = kwargs['valid_name']
        G = kwargs['G']
        constant_operation = kwargs.get('constant_operation')
        constant_int_operation = kwargs.get('constant_int_operation')
        inputs = [all_nodes[inp] for inp in node.input]
        if quantized_args:
            args = [inputs[quantized_args[0][0]], inputs[quantized_args[1][0]]]
            inp_qtypes = [
                cls.get_qtype(inputs[quantized_args[0][1]], inputs[quantized_args[0][2]]),
                cls.get_qtype(inputs[quantized_args[1][1]], inputs[quantized_args[1][2]])
            ]
            out_qtype = cls.get_qtype(inputs[quantized_args[2][0]], inputs[quantized_args[2][1]])
        else:
            args = inputs
            assert len(args) == 2
            out_qtype = None

        if all(cls.is_constant(inp) for inp in args) and constant_operation:
            values = [cls.get_constant(inp) for inp in args]
            if quantized_args:
                values = [inp_qtype.dequantize(val) for inp_qtype, val in zip(inp_qtypes, values)]
            outputs = cls.implied_broadcast(inputs)
            if constant_int_operation and all(np.issubdtype(val.dtype, np.integer) for val in values):
                res = constant_int_operation(*values)
            else:
                res = constant_operation(*values)
            if quantized_args:
                res = out_qtype.quantize(res)
            if res.size < 10:
                logger.info("reducing %s to a constant %s", valid_name, res)
            else:
                logger.info("reducing %s to a constant", valid_name)
            params = ConstantInputParameters(valid_name, value=res,
                                             dims=Dim.unnamed(outputs[0].known_shape),
                                             qtype=out_qtype)
        else:
            params_args = kwargs.get('params_args', {})
            params = kwargs['params_class'](valid_name, **params_args)
            outputs = cls.implied_broadcast(inputs)
            shapes = []
            for idx, inp in enumerate(args):
                G.add_edge(NNEdge(from_node=inp[0], to_node=params, from_idx=inp[1], to_idx=idx))
                shapes.append(inp[2].known_shape)
            if isinstance(params, Broadcastable):
                params.set_broadcast(shapes)
            if quantized_args:
                for qtype, inp in zip(inp_qtypes, args):
                    if cls.is_constant(inp):
                        inp[0].qtype = qtype
                qrecs = kwargs['qrecs']
                qrecs[NodeId(params)] = QRec.scaled(in_qs=inp_qtypes, out_qs=[out_qtype])

        if copy_qtype:
            out_qtype = inputs[0][3] if inputs[0][3] is not None else inputs[1][3]

        all_nodes[node.output[0]] = (params, 0, outputs[0], out_qtype)
        return params
Beispiel #13
0
 def _load_quantization(qrecs, node_recs):
     for tensor in node_recs:
         qtype = tensor.qtype
         if qtype:
             qtype = qtype.make_symmetric_signed()
             setattr(qtype, 'is_input', True)
             qrecs[NodeId(node_recs[tensor][0])] = QRec.scaled(
                 in_qs=[qtype], out_qs=[qtype])
 def record_constant_qrec(cls, inp, cnode, **kwargs):
     qtype = inp[3]
     if qtype is None:
         return
     qrecs = kwargs.get('qrecs')
     if qrecs is None:
         return
     qrecs[NodeId(cnode)] = QRec.scaled(out_qs=[qtype])
Beispiel #15
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_q:
            return None

        in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8)
        if in_qs is None:
            return None
        return QRec.scaled(in_qs=in_qs, out_qs=[QType(scale=1, dtype=np.int16)])
Beispiel #16
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     if force_out_q:
         return None
     in_qs = [QType.from_min_max_sq(-8, 8, dtype=np.int8, forced=True)]
     o_q = QType.from_min_max_sq(min_val=-1.0,
                                 max_val=1.0,
                                 dtype=out_dtype,
                                 forced=True)
     return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
Beispiel #17
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        # copy in_qs because we may modify it
        in_qs = in_qs.copy()
        opts = kwargs['opts']
        fusion = kwargs.get('fusion', None)

        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        G = kwargs['G']
        # only attempt channel scaling if the second input is constant
        # if len(in_qs) > 2:
        in2_node, in_qs = cls.move_constant(G, fusion if fusion else params,
                                            in_qs)
        if in2_node:
            kwargs['graph_update']['requires_adjust'] = True
            in_q2 = QType.from_array_sq(arr=in2_node.dqvalue,
                                        quantized_dimension=0,
                                        dtype=np.int8,
                                        narrow_range=True,
                                        bits=8)
        else:
            in_q2 = in_qs[1].make_symmetric_signed()

        in_q1 = in_qs[0].make_symmetric_signed()

        min_val, max_val = cls.get_min_max(fusion, stats, kwargs['all_stats'],
                                           params)

        if force_out_q:
            o_q = force_out_q
            # can't be forced to something not np.int8
            if o_q.dtype != np.int8 or o_q.asymmetric:
                return None
            LOG.warning(
                'node %s output forced to range %s/%s - actual range %s/%s %s',
                params.name, o_q.min, o_q.max, min_val, max_val,
                "asymmetric" if o_q.asymmetric else "symmetric")
        else:
            o_q = QType.from_min_max_sq(min_val=min_val,
                                        max_val=max_val,
                                        dtype=out_dtype)
        if len(in_qs) == 3:
            biases_q = QType(dtype=np.int32, scale=in_q1.scale * in_q2.scale)
            out_in_qs = [in_q1, in_q2, biases_q]
        else:
            out_in_qs = [in_q1, in_q2]

        mul_biases_q = MultMulBiasScaleQType()
        mul_biases_q.scale = in_q1.scale * in_q2.scale / o_q.scale

        return QRec.scaled(in_qs=out_in_qs,
                           out_qs=[o_q],
                           mul_biases_q=mul_biases_q)
Beispiel #18
0
    def set_c_state_as_output(self, G):
        output_c_state = G.add_output()
        lstm_qrec = G.quantization and G.quantization.get(NodeId(self))
        if lstm_qrec:
            c_state_idx = self.INPUT_NAMES.index('c_state')
            in_q = lstm_qrec.in_qs[c_state_idx]
            lstm_qrec.out_qs.append(in_q)
            c_state_q = QRec.scaled(in_qs=[in_q], out_qs=[in_q])
            G.quantization[NodeId(output_c_state)] = c_state_q

        G.add_edge(NNEdge(self, output_c_state, from_idx=1))
        G.add_dimensions()
Beispiel #19
0
 def load_tf_quantization(cls, input_tensors, output_tensors, in_qs=None, out_qs=None, qrec_class=None):
     if qrec_class is None:
         qrec = QRec.scaled(
             in_qs=cls.convert_to_symmetric(
                 in_qs if in_qs is not None else [tensor.qtype for tensor in input_tensors]),
             out_qs=cls.convert_to_symmetric(
                 out_qs if out_qs is not None else [tensor.qtype for tensor in output_tensors]))
     else:
         qrec = qrec_class(
             in_qs=cls.convert_to_symmetric(
                 in_qs if in_qs is not None else [tensor.qtype for tensor in input_tensors]),
             out_qs=cls.convert_to_symmetric(
                 out_qs if out_qs is not None else [tensor.qtype for tensor in output_tensors]))
     return qrec
Beispiel #20
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        opts = kwargs['opts']
        if force_out_q:
            o_q = deepcopy(force_out_q)
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype,
                                        asymmetric=opts['allow_asymmetric'])

        return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
Beispiel #21
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     # if forced set what we are forced to
     if force_out_q:
         o_q = deepcopy(force_out_q)
     # if value is already quantized then keep the same quantization
     elif params.qtype:
         o_q = deepcopy(params.qtype)
     # derive quantization from statistics
     else:
         o_q = QType.from_array_sq(params.value, dtype=out_dtype)
     o_q.is_constant = True
     return QRec.scaled(out_qs=[o_q])
Beispiel #22
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        in_q = in_qs[0]

        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if force_out_q:
            # if the output has been forced then propagate it
            in_q = force_out_q
        else:
            upper = params.upper_bound if params.upper_bound is not None else stats['range_out'][0]['max']
            in_q = QType.from_min_max_sq(0, upper, dtype=np.uint8, asymmetric=True)

        return QRec.scaled(in_qs=[in_q], out_qs=[in_q], ne16=True)
Beispiel #23
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_q:
            return None

        out_dtype = params.output_dtype
        in_dtype = params.input_dtype
        in_q = QType(scale=1, dtype=in_dtype)
        out_q = QType.from_min_max_sq(-1,
                                      1,
                                      dtype=out_dtype,
                                      narrow_range=True)

        return QRec.scaled(in_qs=[in_q], out_qs=[out_q])
Beispiel #24
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        # copy in_qs because we may modify it
        in_qs = in_qs.copy()
        opts = kwargs['opts']

        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        G = kwargs['G']
        in_q = in_qs[0]

        if (in_q.asymmetric and isinstance(params, PoolingParameters)
                and params.padding.has_padding):
            in_qs = cls.force_symmetric(in_qs)
            if in_qs is None:
                return None
            in_q = in_qs[0]

        cls.check_valid_ranges(params, stats, idx=0, dirs='in')
        min_val = stats['range_in'][0]['min']
        max_val = stats['range_in'][0]['max']

        if force_out_q:
            if force_out_q.asymmetric and not opts.get('allow_asymmetric'):
                LOG.warning(
                    '%s could be asymmetricaly quantized but allow_asymmetric option not selected',
                    params.name)
                return None
            if force_out_q.dtype != in_q.dtype:
                return None
            o_q = force_out_q
            in_q = deepcopy(force_out_q)
            if force_out_q.dtype != in_q.dtype or force_out_q.zero_point != in_q.zero_point:
                if in_q.forced and force_out_q.zero_point != 0:
                    return None
            LOG.warning(
                'node %s output forced to range %s/%s  %s - actual range %s/%s',
                params.name, o_q.min, o_q.max,
                "asymmetric" if o_q.asymmetric else "symmetric", min_val,
                max_val)
        else:
            o_q = deepcopy(in_q)

        if opts['hwc']:
            cls.check_order(params, [['h', 'w', 'c']], [['h', 'w', 'c']])
        else:
            cls.check_order(params, [['c', 'h', 'w']], [['c', 'h', 'w']])
        return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
    def _quantize_sw(cls,
                     params,
                     in_qs,
                     stats,
                     inout_dtype,
                     asym=False,
                     **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        # NOTE: The autotiler kernel scales and clips after the operation and before the
        # activation so there is no change if this is in a fusion or not
        scaled_idx = params.force_quantized_index if isinstance(
            params, MatrixAddParameters) else None
        if not asym:
            in_qs = cls.force_symmetric_and_dtype(in_qs)
        if in_qs is None:
            return None

        if force_out_q:
            o_q = deepcopy(force_out_q)
            if (o_q.asymmetric and not asym) or o_q.dtype != inout_dtype:
                return None
            # important to set ne16 here so the o_q matches the force_out_q since
            # this attribute is not copied by deepcopy
            if force_out_q.attr.ne16:
                o_q.attr.ne16 = True
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=inout_dtype,
                                        asymmetric=asym,
                                        dont_copy_attr=['ne16'],
                                        ne16=asym)
        if asym:
            o_q.set_forced(flags=['dtype'])
            in_qs = [in_q.set_forced(flags=['dtype']) for in_q in in_qs]
        else:
            o_q.set_forced(flags=['dtype', 'zero_point'])
            in_qs = [
                in_q.set_forced(flags=['dtype', 'zero_point'])
                for in_q in in_qs
            ]
        return QRec.scaled(in_qs=in_qs,
                           out_qs=[o_q],
                           scaled_idx=scaled_idx,
                           ne16=asym)
Beispiel #26
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8)
        if in_qs is None:
            return None
        if force_out_q:
            o_q = deepcopy(force_out_q)
            if o_q.is_asymmetric:
                return None
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype)

        return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
Beispiel #27
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        # copy in_qs because we may modify it
        in_qs = in_qs.copy()
        opts = kwargs['opts']
        fusion = kwargs.get('fusion', None)

        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        G = kwargs['G']
        in_q = in_qs[0]

        cls.check_valid_ranges(params, stats, idx=0, dirs='in')
        min_val = stats['range_in'][0]['min']
        max_val = stats['range_in'][0]['max']

        if fusion:
            # Global pooling fused with activations need to have only the activation scale
            o_q = deepcopy(in_q)
            o_q.dtype = np.int32
        elif force_out_q:
            if force_out_q.zero_point != in_q.zero_point:
                return None
            o_q = force_out_q
            LOG.warning(
                'node %s output forced to range %s/%s  %s - actual range %s/%s',
                params.name, o_q.min, o_q.max,
                "asymmetric" if o_q.asymmetric else "symmetric", min_val,
                max_val)
        elif isinstance(params, GlobalAveragePoolParameters) or isinstance(
                params, GlobalSumPoolParameters):
            # scaling needs to be based on stats and zero point
            o_q = QType.from_min_max_sq(
                stats['range_out'][0]['min'],
                stats['range_out'][0]['max'],
                dtype=out_dtype,
                asymmetric=(stats['range_out'][0]['min'] == 0
                            and in_q.zero_point == -128))
        else:
            o_q = deepcopy(in_q)

        if opts['hwc']:
            cls.check_order(params, [['h', 'w', 'c']], [['h', 'w', 'c']])
        elif params.in_dims_hint:
            cls.check_order(params, [['c', 'h', 'w']], [['c', 'h', 'w']])
        return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
Beispiel #28
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        fusion = kwargs.get('fusion', None)
        in_q = in_qs[0]
        if not fusion and in_q.dtype == np.int32:
            return None

        if isinstance(params, (HSwishActivationParameters, HSigmoidActivationParameters)):
            max_val = in_q.scale * pow(2, in_q.bits - 1)
            if max_val < 6:
                in_q = QType.from_min_max_sq(-6, 6, dtype=in_q.dtype, forced=True)
        elif isinstance(params, SigmoidActivationParameters):
            in_q = QType.from_min_max_sq(-8, 8, dtype=in_q.dtype, forced=True)

        if force_out_q:
            if force_out_q.signed != in_q.signed:
                return None
            if fusion and fusion.fusion_type in ['conv_active_pool', 'conv_active']:
                if not isinstance(params, (SigmoidActivationParameters, HTanHActivationParameters,
                                           HSwishActivationParameters, HSigmoidActivationParameters)):
                    in_q = deepcopy(force_out_q)
            o_q = deepcopy(force_out_q)
            # activation cannot move zeropoint unless it is a reduction step
            if o_q.zero_point != in_q.zero_point and in_q.dtype != np.int32:
                return None
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            zero_point = in_q.zero_point if in_q.zero_point != 0 else None
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=in_q.dtype,
                                        zero_point=zero_point)

        qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q])
        if isinstance(params, (SigmoidScaledSymmetricMult, TanHActivationParameters)):
            compute_in_out_scale(qrec, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale/qrec.in_qs[0].scale)
        elif isinstance(params, HSwishActivationParameters):
            compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1/6)
        else:
            compute_in_out_scale(qrec)
        return qrec
Beispiel #29
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_q:
            return None

        in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8)
        if in_qs is None:
            return None
        o_boxes_qtype = QType(min_val=-2,
                              max_val=2,
                              dtype=np.int16,
                              scale=2**(-14))
        o_scores_qtype = in_qs[1]
        o_class_qtype = QType(scale=1, dtype=np.int8)
        return QRec.scaled(in_qs=in_qs,
                           out_qs=[
                               o_boxes_qtype, o_class_qtype, o_scores_qtype,
                               o_class_qtype
                           ])
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        opts = kwargs['opts']
        if force_out_q:
            if force_out_q.forced_scale or force_out_q.forced_zero_point:
                return None

        in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8)
        if in_qs is None:
            return None
        # force the input to be POW2 scaled
        pow2_scale = np.power(2, np.ceil(np.log2(in_qs[0].scale)))
        in_q = QType(min_val=in_qs[0].min_val,
                     max_val=in_qs[0].max_val,
                     dtype=np.int8,
                     scale=pow2_scale,
                     forced=True)
        o_q = QType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
        return QRec.scaled(in_qs=[in_q], out_qs=[o_q])