Esempio n. 1
0
    def load_filter_parameters(cls, G, params, input_tensors, output_tensors, opts, converted_to_conv=False):
        if opts.get('load_tensors') or opts.get('load_quantization'):
            params.weights = input_tensors[1].value
            if converted_to_conv:
                params.weights = params.weights.transpose(cls.TF_LITE_DW_FILTER_TRANSPOSE)
            if params.has_bias:
                params.biases = input_tensors[2].value

        if opts.get('load_quantization'):
            if input_tensors[0].qtype is None:
                raise NoQuantizationError("quantization not present in tflite file")
            weights_scales, biases_scales, w_mins, w_maxes = cls.fix_weights_and_biases(
                params, input_tensors, opts)
            biases_q = SymmetricMultBiasesQType(dtype=np.int32, scale=biases_scales.flatten())
            weights_q = SymmetricMultQType(
                dtype=np.int8, narrow_range=True, scale=weights_scales.flatten(), min_val=w_mins, max_val=w_maxes)
            in_q = input_tensors[0].qtype
            out_q = output_tensors[0].qtype
            mulbiases_q = MultMulBiasScaleQType.from_filter(in_q, weights_q, out_q, params)
            qrec = MultScalableFilterQuantizationRecord(in_qs=[in_q],
                                                        out_qs=[out_q],
                                                        mul_biases_q=mulbiases_q,
                                                        weights_q=weights_q,
                                                        biases_q=biases_q)
            G.quantization[NodeId(params)] = qrec
Esempio n. 2
0
    def calculate_q(self, G, node, astats, in_qs, dtype, out_dtype=None):
        del G
        if out_dtype is None:
            out_dtype = dtype
        if isinstance(node, (PoolingParameters, OutputParameters)):
            o_q = in_qs[0]
        elif isinstance(node, SoftMaxParameters):
            o_q = SymmetricMultQType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
        else:
            o_q = SymmetricMultQType.from_min_max(min_val=astats['min'],
                                                  max_val=astats['max'],
                                                  dtype=out_dtype)

        if isinstance(node, (MatrixAddParameters, MatrixSubParameters)):
            qrec = MultAddQuantizationRecord(in_qs=in_qs, out_qs=[o_q])

        elif isinstance(node, (MatrixBroadcastedLinearOpParameters, MatScaleFusionParameters, GlobalPoolParameters)):
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])

        elif isinstance(node, ConstantInputParameters):
            qrec = MultConstantQuantizationRecord(out_qs=[o_q],
                                                  constants_are_quantized=False)

        elif isinstance(node, (FcParameters, Conv2DParameters)):
            weights_q = SymmetricMultQType.from_array(arr=node.weights,
                                                      quantized_dimension=self.get_quantized_dimension(node),
                                                      dtype=dtype, narrow_range=self._narrow_weights)
            if node.has_bias:
                biases_q = SymmetricMultBiasesQType(dtype=np.int32, scale=weights_q.scale * in_qs[0].scale)
            else:
                biases_q = SymmetricMultBiasesQType(dtype=np.int32, scale=np.array([1], dtype=np.int32))
            mul_biases_q = MultMulBiasScaleQType.from_filter(in_qs[0], weights_q, o_q, node)
            qrec = MultScalableFilterQuantizationRecord(in_qs=[in_qs[0]],
                                                        out_qs=[o_q],
                                                        weights_q=weights_q,
                                                        biases_q=biases_q,
                                                        mul_biases_q=mul_biases_q,
                                                        constants_are_quantized=False)
            LOG.debug("filter %s qrec %s", node.name, qrec)
        else:
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
        return qrec
Esempio n. 3
0
    def calculate_q(self, G, node, astats, in_qs, dtype, out_dtype=None):
        if out_dtype is None:
            out_dtype = dtype
        if isinstance(node, (PoolingParameters, OutputParameters, SplitParameters)):
            o_q = in_qs[0]
        elif isinstance(node, SoftMaxParameters):
            o_q = SymmetricMultQType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
        else:
            o_q = SymmetricMultQType.from_min_max(min_val=astats['range_out'][0]['min'],
                                                  max_val=astats['range_out'][0]['max'],
                                                  dtype=out_dtype)

        if isinstance(node, (MatrixAddParameters, MatrixSubParameters)):
            qrec = MultAddQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
        elif isinstance(node, ExpressionFusionParameters):
            o_qs = [SymmetricMultQType.from_min_max(min_val=orange['min'],
                                                    max_val=orange['max'],
                                                    dtype=out_dtype)
                    for orange in astats['range_out']]
            fusion_inputs = sorted([n for n in node.subgraph.inputs()
                                    if isinstance(n, FusionInputParameters)],
                                   key=lambda x: x.idx)
            fusion_outputs = sorted([n for n in node.subgraph.outputs()
                                     if isinstance(n, FusionOutputParameters)],
                                    key=lambda x: x.idx)

            node_scale_map = {fnode: in_qs[idx].scale
                              for idx, fnode in enumerate(fusion_inputs)}
            for idx, fnode in enumerate(fusion_outputs):
                node_scale_map[fnode] = o_qs[idx].scale
            inp, outp, expr = node.decompose(node_scale_map=node_scale_map)

            qrec = MultExpressionQuantizationRecord(in_qs=in_qs,
                                                    out_qs=o_qs,
                                                    inputs=inp,
                                                    output_exprs=outp,
                                                    intermediate_exprs=expr)
        elif isinstance(node, (MatrixBroadcastedLinearOpParameters, MatScaleFusionParameters, GlobalPoolParameters)):
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])

        elif isinstance(node, SplitParameters):
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q]*node.num_splits)

        elif isinstance(node, ConstantInputParameters):
            if node.value_quantization:
                qrec = MultConstantQuantizationRecord(out_qs=[node.value_quantization],
                                                      constants_are_quantized=True)
            else:
                qrec = MultConstantQuantizationRecord(out_qs=[o_q],
                                                      constants_are_quantized=False)

        elif isinstance(node, (FcParameters, Conv2DParameters)):
            weights_q = SymmetricMultQType.from_array(arr=node.weights,
                                                      quantized_dimension=self.get_quantized_dimension(
                                                          node),
                                                      dtype=dtype, narrow_range=self._narrow_weights)
            if node.has_bias:
                biases_q = SymmetricMultBiasesQType(
                    dtype=np.int32, scale=weights_q.scale * in_qs[0].scale)
            else:
                biases_q = SymmetricMultBiasesQType(
                    dtype=np.int32, scale=np.array([1], dtype=np.int32))
            mul_biases_q = MultMulBiasScaleQType.from_filter(in_qs[0], weights_q, o_q, node)
            qrec = MultScalableFilterQuantizationRecord(in_qs=[in_qs[0]],
                                                        out_qs=[o_q],
                                                        weights_q=weights_q,
                                                        biases_q=biases_q,
                                                        mul_biases_q=mul_biases_q,
                                                        constants_are_quantized=False)
            LOG.debug("filter %s qrec %s", node.name, qrec)
        elif isinstance(node, RNNParameters):
            input_nodes = {RNNParameters.INPUT_NAMES[edge.to_idx]: edge.from_node
                           for edge in G.in_edges(node.name)
                           if isinstance(edge.from_node, ConstantInputParameters)}
            names = {val: idx for idx, val in enumerate(RNNParameters.INPUT_NAMES)}
            # quantization_mode: extended, autotiler
            # state_width: 16bit or 8bit
            opts = self.get_options(node)
            if opts['mode'] == "extended":
                in_w_scale = in_qs[names['i_2_i_w']].scale * in_qs[0].scale
                state_w_scale = in_qs[names['r_2_i_w']].scale
                i_2_a_q = MultMulBiasScaleQType(scale=in_w_scale/state_w_scale)
                s_2_s_q = MultMulBiasScaleQType(scale=state_w_scale)
                s_2_o_q = MultMulBiasScaleQType(scale=1/o_q.scale)
                self.rescale_constant(input_nodes['i_b'], state_w_scale, dtype=np.int32)
                qrec = MultScalableRnnQuantizationRecord(
                    in_qs=in_qs,
                    out_qs=[o_q],
                    i_2_a_q=i_2_a_q,
                    s_2_s_q=s_2_s_q,
                    s_2_o_q=s_2_o_q
                )
            elif opts['mode'] == 'autotiler':
                in_and_state_scale = np.maximum(in_qs[0].scale, o_q.scale)
                in_and_state_w_scale = np.maximum(
                    in_qs[names['i_2_i_w']].scale, in_qs[names['r_2_i_w']].scale)
                in_qs[0].scale = in_and_state_scale
                o_q.scale = in_and_state_scale
                self.rescale_constant(input_nodes['i_state'], in_and_state_scale)
                self.rescale_constant(input_nodes['i_2_i_w'], in_and_state_w_scale)
                self.rescale_constant(input_nodes['r_2_i_w'], in_and_state_w_scale)
                state_w_scale = in_and_state_scale * in_and_state_w_scale
                self.rescale_constant(input_nodes['i_b'], state_w_scale, dtype=np.int32)
                s_2_s_q = MultMulBiasScaleQType(scale=state_w_scale/in_and_state_scale)
                qrec = MultScalableRnnQuantizationRecord(
                    in_qs=in_qs,
                    out_qs=[o_q],
                    s_2_s_q=s_2_s_q,
                )
        elif isinstance(node, LSTMParameters):
            input_nodes = {LSTMParameters.INPUT_NAMES[edge.to_idx]: edge.from_node
                           for edge in G.in_edges(node.name)
                           if isinstance(edge.from_node, ConstantInputParameters)}
            names = {val: idx for idx, val in enumerate(LSTMParameters.INPUT_NAMES)}
            if node.cell_clip:
                cell_max = node.cell_clip
            else:
                cell_max = max(abs(astats['range_cell'][var]) for var in ['min', 'max'])

            cell_int_bits = calc_bits(cell_max)

            in_qs[names['c_state']].recalculate_scale(-cell_max,
                                                      cell_max)
            LOG.debug("cell bits %d max %d cell range %d",
                      cell_int_bits,
                      cell_max,
                      in_qs[names['c_state']].range)
            # worst case is (internal_q * 3) + 2 = 32 (1 for 1 and 1 for sign) i.e. 10
            # but also (internal_q * 2) + cell_bits = 32
            int_q = min((32-cell_int_bits)//2, 10)
            # in and out and state are all in the same scale
            in_and_out_scale = np.maximum(in_qs[0].scale, o_q.scale)
            in_and_state_scale = np.maximum(in_and_out_scale, in_qs[names['i_state']].scale)
            in_qs[0].scale = in_and_state_scale
            o_q.scale = in_and_state_scale
            self.rescale_constant(input_nodes['i_state'], in_and_state_scale)
            scale_pairs = {chan: ('i_2_%s_w' % chan, 'r_2_%s_w' % chan)
                           for chan in ['i', 'o', 'c', 'f']}
            scales = {k: np.maximum(in_qs[names[namei]].scale, in_qs[names[namer]].scale)
                      for k, (namei, namer) in scale_pairs.items()}
            for k, (namei, namer) in scale_pairs.items():
                self.rescale_constant(input_nodes[namei], scales[k])
                self.rescale_constant(input_nodes[namer], scales[k])
            int_scale = pow(2, -int_q)
            int2_scale = pow(2, -(int_q*2))
            int3_scale = pow(2, -(int_q*3))
            # compute scales for perceptrons
            pscales = {k: scales[k] * in_and_state_scale for k in ['i', 'o', 'c', 'f']}
            scale_qtypes = {"r_2_%s_q" % k: MultMulBiasScaleQType(
                scale=pscale/int_scale) for k, pscale in pscales.items()}
            scale_qtypes['cell_in_q'] = MultMulBiasScaleQType(
                scale=in_qs[names['c_state']].scale/int_scale)
            # TODO - Check cell clip here
            scale_qtypes['cell_out_q'] = MultMulBiasScaleQType(
                scale=int2_scale/in_qs[names['c_state']].scale)
            scale_qtypes['state_out_q'] = MultMulBiasScaleQType(scale=int3_scale/in_and_state_scale)
            # set internal scale
            scale_qtypes['i_qtype'] = QType(q=int_q, bits=32, signed=True)
            # set biases to output of perceptron
            for k in ['i', 'o', 'c', 'f']:
                self.rescale_constant(input_nodes["%s_b" % k], pscales[k], dtype=np.int32)
            qrec = MultScalableLstmQuantizationRecord(
                in_qs=in_qs,
                out_qs=[o_q],
                **scale_qtypes,
            )
        else:
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
        return qrec