Beispiel #1
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        backwards = kwargs.get('backwards')
        # if we are going backwards
        if backwards:
            # if output must be forced
            assert force_out_q, f'going backwards at {params.name} but output is not forced'
            return MultQuantizationRecord(in_qs=[force_out_qs[0]] * len(in_qs), out_qs=[deepcopy(force_out_qs[0])])

        # if going forwards and our output is forced and does not match input then
        # we cannot satisfy
        if force_out_q and not all(in_q == force_out_q for in_q in in_qs):
            return None

        # if all the inputs are the same qtype then we output that qtype
        if all(in_qs[0] == in_q for in_q in in_qs[1::]):
            return MultQuantizationRecord(in_qs=in_qs, out_qs=[deepcopy(in_qs[0])])

        # our output cannot be forced at this point
        # if all the inputs are not the same then force all of them to the maximum input size with a Q that
        # fits the most int bits
        max_scale_idx = max([(idx, in_q.scale) for idx, in_q in enumerate(in_qs)], key=lambda x: x[1])[0]
        max_scale_q = in_qs[max_scale_idx]

        return MultQuantizationRecord(in_qs=[max_scale_q] * len(in_qs), out_qs=[deepcopy(max_scale_q)])
Beispiel #2
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        o_q = in_qs[0]
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        first_forced_q = force_out_qs and next(
            iter(out_q for out_q in force_out_qs if out_q is not None), None)
        if first_forced_q and not all(out_q == first_forced_q
                                      for out_q in force_out_qs
                                      if out_q is not None):
            LOG.error(
                'split %s is being forced to have different output qtypes',
                params.name)
            return None
        if first_forced_q:
            backwards = kwargs.get('backwards', None)
            if backwards:
                # if going backwards and forced then we force our input
                return MultQuantizationRecord(
                    in_qs=[first_forced_q],
                    out_qs=[
                        deepcopy(first_forced_q)
                        for _ in range(params.num_splits)
                    ])
            elif o_q != first_forced_q:
                LOG.error(
                    'split %s is being forced to have different output to input',
                    params.name)
                return None
            # continue here if forced since o_q == forced_q

        return MultQuantizationRecord(
            in_qs=in_qs,
            out_qs=[deepcopy(o_q) for _ in range(params.num_splits)])
Beispiel #3
0
    def common_quantize(cls, in_qtype, out_qtype, node, **kwargs):
        all_nodes = kwargs['all_nodes']
        opts = kwargs['opts']
        G = kwargs['G']
        inputs = [all_nodes[t] for t in node.input]
        x = inputs[0]
        if cls.is_constant(x):
            LOG.info("reducing %s to a constant", node.name)
            if out_qtype:
                val = x[0].value_as(out_qtype)
            else:
                val = cls.get_constant(x)
            params = ConstantInputParameters(node.name,
                                             value=val,
                                             dims=Dim.unnamed(val.shape),
                                             qtype=out_qtype,
                                             constant_store=G.constant_store)
            if opts.get('load_quantization'):
                G.quantization[NodeId(params)] = MultQuantizationRecord(
                    in_qs=[out_qtype], out_qs=[out_qtype])
        else:
            params = QuantizeParameters(node.name, from_qtype=in_qtype, to_qtype=out_qtype)
            G.add_edge(NNEdge(from_node=x[0], to_node=params, from_idx=x[1], to_idx=0))

            if opts.get('load_quantization'):
                G.quantization[NodeId(params)] = MultQuantizationRecord(
                    in_qs=[in_qtype], out_qs=[out_qtype])
        all_nodes[node.output[0]] = (params, 0, deepcopy(x[2]))
        return params
    def match(self, G: GraphView, set_identity: bool = True):
        if not G.quantization:
            return
        for nid in [nid for nid, qrec in G.quantization.sorted_iterator(G) if qrec is None or not (qrec.in_qs and qrec.out_qs)]:
            if nid.fnode_name:
                LOG.warning("can't add quantization to fused node %s", nid.fnode_name)
                continue
            if nid.node_name not in G:
                # previous fusions may have removed nodes from the graph
                continue

            node = nid.get_node(G)
            predecessors = [NodeId(pred) for pred in G.predecessors(node.name)]
            successors = [NodeId(succ) for succs in G.successors(node.name) for succ in succs]
            go_back = not successors or (predecessors and all(pred in G.quantization for pred in predecessors))
            go_forward = not predecessors or (successors and all(succ in G.quantization for succ in successors))

            if not (go_back or go_forward):
                LOG.warning("node %s is not connected to anything and has no quantization", node.name)
                continue

            if go_forward:
                out_qrecs = set(G.quantization[nid] for nid in successors)
                if not all(isinstance(out_qrec, MultQuantizationRecord) for out_qrec in out_qrecs):
                    continue
                out_qtypes = reduce_qtypes([(edge.from_idx, G.quantization[NodeId(edge.to_node)].in_qs[edge.to_idx])
                                            for edge in G.out_edges(node.name)])
            else:
                out_qtypes = None
            if go_back:
                in_qrecs = set(G.quantization[nid] for nid in predecessors)
                if not all(isinstance(in_qrec, MultQuantizationRecord) for in_qrec in in_qrecs):
                    continue
                in_qtypes = reduce_qtypes([(edge.to_idx, G.quantization[NodeId(edge.from_node)].out_qs[edge.from_idx])
                                           for edge in G.in_edges(node.name)])
            else:
                in_qtypes = None

            if not in_qtypes:
                if not predecessors:
                    LOG.info("setting quantization on input node %s", node.name)
                    qrec = MultQuantizationRecord(in_qs=deepcopy(out_qtypes), out_qs=deepcopy(out_qtypes))
                else:
                    raise NotImplementedError("propagating qrecs not implemented")
            elif not out_qtypes:
                if not successors:
                    LOG.info("setting quantization on output node %s", node.name)
                    qrec = MultQuantizationRecord(in_qs=deepcopy(in_qtypes), out_qs=deepcopy(in_qtypes))
                else:
                    raise NotImplementedError("propagating qrecs not implemented")
            else:
                LOG.info("setting quantization on node %s", node.name)
                qrec = MultQuantizationRecord(in_qs=deepcopy(in_qtypes), out_qs=deepcopy(out_qtypes))

            G.quantization[nid] = qrec

        if set_identity:
            self.set_identity(G)

        return False
def hsigmoid_mult(params,
                  in_tensors,
                  qrec: MultQuantizationRecord,
                  details=None):
    del details
    in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
    fac_1, upper_bound, lower_bound = hsigmoid_mult_gen_factors(params, qrec)
    in_tensor = in_tensor.astype(np.int32)
    in_tensor_relued = np.minimum(np.maximum(in_tensor + fac_1, lower_bound),
                                  upper_bound)
    in_tensor = qrec.scale_mul_biases_q.apply_scales(in_tensor_relued)
    return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
Beispiel #6
0
 def replace_function(self, G: GraphView, subgraph: GraphView):
     if not self.validate_match(subgraph):
         raise DontReplaceError()
     step = 0
     for node in subgraph.nodes():
         node.step_idx = step
         step = step + 1
         if isinstance(node, Conv2DParameters):
             conv_name = node.name + "_fusion"
             break
     LOG.debug("fused nodes %s", ",".join((node.name for node in subgraph.nodes())))
     # simple node order is necessary because nodes() will not necessarily
     # be in order
     pnode = ConvFusionParameters(conv_name, fusion_type=self.fusion_type, subgraph=subgraph)
     if G.quantization:
         qrecs = G.quantization.get_all(pnode.contained_nodes())
         if qrecs:
             if isinstance(qrecs[0], (SymmetricQuantizationRecord, SymmetricScalableFilterQuantizationRecord)):
                 prec = SymmetricQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
             elif isinstance(qrecs[0], (MultQuantizationRecord, MultScalableFilterQuantizationRecord)):
                 prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
             elif isinstance(qrecs[0], (Float32QuantizationRecord, Float32ScalableFilterQuantizationRecord)):
                 prec = Float32QuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
             for node in pnode.contained_nodes():
                 G.quantization.move_to_fusion(node, pnode)
             G.quantization[NodeId(pnode)] = prec
     return pnode, None, None
Beispiel #7
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if isinstance(
                params,
            (HSwishActivationParameters, HSigmoidActivationParameters)):
            in_q = in_qs[0]
            max_val = in_q.scale * pow(2, in_q.bits - 1)
            if max_val < 6:
                in_qs = [QType.from_min_max_sq(-6, 6, dtype=in_q.dtype)]
        if force_out_q:
            fusion = kwargs.get('fusion', None)
            if fusion and fusion.fusion_type in [
                    'conv_active_pool', 'conv_active'
            ]:
                if not isinstance(
                        params,
                    (SigmoidActivationParameters, TanHActivationParameters,
                     HSwishActivationParameters,
                     HSigmoidActivationParameters)):
                    in_qs = [deepcopy(force_out_q)]
            o_q = deepcopy(force_out_q)
        else:
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype)

        return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
Beispiel #8
0
    def replace_function(self, G: GraphView, subgraph: GraphView):
        relu_node = None
        constant_node = None
        mul_node = None
        for node in subgraph.nodes():
            if isinstance(node, ReluActivationParameters):
                relu_node = node
            elif isinstance(node, ConstantInputParameters):
                constant_node = node
            elif isinstance(node, MatrixMulParameters):
                mul_node = node

        activation = HSigmoidActivationParameters(mul_node.name +
                                                  "_fused_close_hsigmoid",
                                                  offset=0)

        if G.quantization:
            reluqrec = G.quantization[NodeId(relu_node)]
            mulqrec = G.quantization[NodeId(mul_node)]
            del G.quantization[NodeId(constant_node)]
            if isinstance(reluqrec, (SymmetricQuantizationRecord)):
                pqrec = SymmetricQuantizationRecord(in_qs=reluqrec.in_qs,
                                                    out_qs=mulqrec.out_qs)
            elif isinstance(reluqrec, (MultQuantizationRecord)):
                pqrec = MultQuantizationRecord(in_qs=reluqrec.in_qs,
                                               out_qs=mulqrec.out_qs)
            elif isinstance(reluqrec, (Float32QuantizationRecord)):
                pqrec = Float32QuantizationRecord(in_qs=reluqrec.in_qs,
                                                  out_qs=mulqrec.out_qs)
            else:
                raise NotImplementedError()
            G.quantization[NodeId(activation)] = pqrec
        return activation, None, None
Beispiel #9
0
    def _import_nodes(self, G, graph, handlers, all_nodes, outputs, opts):
        for node in graph.nodes:
            handler = handlers.get(node.op_name, None)
            if not handler:
                raise ValueError("no handler found for %s" % node.op_type)
            if node.is_custom and handler:
                handler = handler.get(node.custom_op_name, None)
                if not handler:
                    raise ValueError("no handler found for custom operation %s" %
                                     node.custom_op_name)

            params = handler.handle(node, all_nodes=all_nodes, G=G, opts=opts, importer=self)
            if params is None:
                continue
            for idx, out_tensor in enumerate(node.output):
                output = outputs.get(out_tensor)
                if not output:
                    continue
                G.add_edge(NNEdge(from_node=params,
                                  to_node=output[0], from_idx=idx, to_idx=output[1]))
                if opts.get('load_quantization'):
                    qtype = deepcopy(G.quantization[NodeId(params)].out_qs[idx])
                    G.quantization[NodeId(output[0])] = MultQuantizationRecord(
                        in_qs=[qtype],
                        out_qs=[qtype]
                    )
Beispiel #10
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     if force_out_q:
         return None
     o_q = QType.from_min_max_sq(min_val=-1.0, max_val=1.0, dtype=out_dtype)
     return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
Beispiel #11
0
 def replace_function(self, G: NNGraph, subgraph: GraphView):
     step = 0
     for node in subgraph.nodes():
         node.step_idx = step
         step = step + 1
         if isinstance(node, FcParameters):
             linear_name = node.name + "_fusion"
             break
     LOG.info("fusing nodes %s", ",".join(
         (node.name for node in subgraph.nodes())))
     # simple node order is necessary because nodes() will not necessarily
     # be in order
     pnode = ConvFusionParameters(linear_name, fusion_type="linear_active", subgraph=subgraph)
     if G.quantization:
         qrecs = G.quantization.get_all(pnode.contained_nodes())
         if qrecs:
             if isinstance(qrecs[0], (SymmetricQuantizationRecord, SymmetricScalableFilterQuantizationRecord)):
                 prec = SymmetricQuantizationRecord(
                     in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
             elif isinstance(qrecs[0], (MultQuantizationRecord, MultScalableFilterQuantizationRecord)):
                 prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
             elif isinstance(qrecs[0], (Float32QuantizationRecord, Float32ScalableFilterQuantizationRecord)):
                 prec = Float32QuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
             for node in pnode.contained_nodes():
                 G.quantization.move_to_fusion(node, pnode)
             G.quantization[NodeId(pnode)] = prec
     return pnode, None, None
Beispiel #12
0
 def replace_function(self, G: NNGraph, subgraph: GraphView):
     nodes = list(subgraph.nodes())
     pnode = ActivationFusion(nodes[0].name + "fusion",
                              nodes[0].op_name + "_active", subgraph)
     nodes[0].step_idx = 0
     nodes[1].step_idx = 1
     LOG.debug("fused nodes %s", ",".join((node.name for node in nodes)))
     if G.quantization:
         qrecs = G.quantization.get_all(subgraph.nodes())
         if qrecs:
             if isinstance(qrecs[0],
                           (SymmetricQuantizationRecord,
                            SymmetricScalableFilterQuantizationRecord)):
                 prec = SymmetricQuantizationRecord(in_qs=qrecs[0].in_qs,
                                                    out_qs=qrecs[-1].out_qs)
             elif isinstance(qrecs[0],
                             (MultQuantizationRecord,
                              MultScalableFilterQuantizationRecord)):
                 prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs,
                                               out_qs=qrecs[-1].out_qs)
             elif isinstance(qrecs[0],
                             (Float32QuantizationRecord,
                              Float32ScalableFilterQuantizationRecord)):
                 prec = Float32QuantizationRecord(in_qs=qrecs[0].in_qs,
                                                  out_qs=qrecs[-1].out_qs)
             for node in subgraph.nodes():
                 G.quantization.move_to_fusion(node, pnode)
             G.quantization[NodeId(pnode)] = prec
     return pnode
Beispiel #13
0
 def quantize_fusion(self, G, node, in_qs, dtype):
     fin_qs = in_qs
     nodes = node.contained_nodes()
     if node.fusion_type in ['conv_active_pool', 'conv_active']:
         conv_node = nodes[0]
         act_node = nodes[1]
         act_astats = self._activation_stats.get(NodeId(node, act_node))
         conv_qrec = self.calculate_q(G,
                                      conv_node,
                                      act_astats,
                                      fin_qs,
                                      dtype,
                                      out_dtype=np.int8)
         self.qrecs[NodeId(node, conv_node)] = conv_qrec
         fin_qs = conv_qrec.out_qs
         nodes = nodes[1:]
     for fnode in nodes:
         qrec = self.calculate_q(G,
                                 fnode,
                                 self._activation_stats.get(NodeId(node, fnode)),
                                 fin_qs,
                                 dtype)
         self.qrecs[NodeId(node, fnode)] = qrec
         fin_qs = qrec.out_qs
     return MultQuantizationRecord(in_qs=in_qs, out_qs=fin_qs)
Beispiel #14
0
    def match(self, G: GraphView, set_identity: bool = True):
        has_modified_graph = False
        for conv_node in [params for params in G.nodes() if isinstance(params, Conv2DParameters)]:
            node_list = self.get_node_list(G, conv_node)
            if node_list is None or len(node_list.order) < 2:
                continue
            if node_list.fusion_type == 'conv_active_pool':
                if node_list.pool.pool_type == "average":
                    node_list.order = node_list.order[:2:]
                    node_list.pool = None
            elif node_list.fusion_type == 'conv_pool_active':
                if node_list.pool.pool_type == "average" and node_list.active.activation != "relu":
                    continue
            LOG.info("fusing nodes %s", ",".join((node.name for node in node_list.order)))
            has_modified_graph = True
            subgraph = GraphView()
            last_node = None
            for node in node_list.order:
                if last_node is not None:
                    subgraph.add_edge(NNEdge(from_node=last_node, to_node=node))
                last_node = node
            input_mapping = [[(node_list.conv, idx)] for idx in range(3)]
            output_mapping = [(last_node, 0)]
            pnode = ConvFusionParameters(
                node_list.conv.name + '_fusion',
                fusion_type=node_list.fusion_type,
                subgraph=subgraph,
                in_dims_hint=node_list.conv.in_dims_hint,
                out_dims_hint=node_list.conv.out_dims_hint,
                input_mapping=input_mapping,
                output_mapping=output_mapping)
            if G.quantization:
                qrecs = G.quantization.get_all(pnode.contained_nodes())
                if qrecs:
                    prec = None
                    if isinstance(qrecs[0], (SymmetricQuantizationRecord, SymmetricScalableFilterQuantizationRecord)):
                        prec = SymmetricQuantizationRecord(
                            in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
                    elif isinstance(qrecs[0], (MultQuantizationRecord, MultScalableFilterQuantizationRecord)):
                        prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
                    elif isinstance(qrecs[0], (Float32QuantizationRecord, Float32ScalableFilterQuantizationRecord)):
                        prec = Float32QuantizationRecord(
                            in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
                    for node in pnode.contained_nodes():
                        G.quantization.move_to_fusion(node, pnode)
                    G.quantization[NodeId(pnode)] = prec
            in_edges = G.in_edges(node_list.conv.name)
            out_edges = G.out_edges(last_node.name)
            for node in node_list.order:
                G.remove(node)
            for edge in in_edges:
                G.add_edge(NNEdge(edge.from_node, pnode, from_idx=edge.from_idx, to_idx=edge.to_idx))
            for edge in out_edges:
                G.add_edge(NNEdge(pnode, edge.to_node, from_idx=edge.from_idx, to_idx=edge.to_idx))

        if set_identity:
            self.set_identity(G)

        return has_modified_graph
Beispiel #15
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        backwards = kwargs.get('backwards')
        if backwards:
            # if output must be forced
            assert force_out_q, f'going backwards at {params.name} but output is not forced'
            return MultQuantizationRecord(in_qs=[deepcopy(force_out_q)] *
                                          len(in_qs),
                                          out_qs=[deepcopy(force_out_q)])

        # if going forwards and our output is forced and does not match input then
        # we cannot satisfy
        if force_out_q and not all(in_q == force_out_q for in_q in in_qs):
            return None

        return MultQuantizationRecord(in_qs=in_qs, out_qs=[deepcopy(in_qs[0])])
Beispiel #16
0
    def average_execute_mult(cls, params,
                             in_tensors,
                             qrec: MultQuantizationRecord):

        # Prepare the quantization levels
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
        out_dims = params.out_dims[0]
        qrec.set_scale(in_idx=0, out_idx=0)

        sum_by_chan = np.sum(in_tensor, dtype=np.int32, axis=tuple(
            params.axis), keepdims=params.keep_dims)
        sz = reduce(lambda x, y: x * y, [i for idx,
                                         i in enumerate(in_tensor.shape) if idx in params.axis])
        res = at_norm(((sum_by_chan << 7) / sz).astype(np.int32), 7)
        res = out_tensor = qrec.scale_mul_biases_q.apply_scales(res)
        return qrec.get_outputs(params,
                                [out_tensor.reshape(out_dims.shape)],
                                ktype="symmetric")
Beispiel #17
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, _ = cls.get_mult_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     if force_out_q:
         return None
     # force the input to be POW2 scaled
     in_q = deepcopy(in_qs[0])
     # in_q.scale_to_pow2()
     o_q = QType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
     return MultQuantizationRecord(in_qs=[in_q], out_qs=[o_q])
Beispiel #18
0
    def average_execute(cls, params,
                        in_tensors,
                        qrec: MultQuantizationRecord):

        # Prepare the quantization levels
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
        out_dims = params.out_dims[0]

        sum_by_chan = np.sum(in_tensor, dtype=np.int32, axis=tuple(
            params.axis), keepdims=params.keep_dims)

        norm = (np.array([31], dtype=np.int32) - gap_clb(sum_by_chan.flatten())).astype(np.int32)
        sz = reduce(lambda x, y: x * y, [i for idx,
                                         i in enumerate(in_tensor.shape) if idx in params.axis])
        inv_wh = ((1 << norm) // sz).reshape(sum_by_chan.shape)
        out_tensor = at_norm((inv_wh * sum_by_chan), norm.reshape(sum_by_chan.shape))
        return qrec.get_outputs(params,
                                [qrec.out_qs[0].clip(out_tensor).reshape(out_dims.shape)],
                                ktype="symmetric")
Beispiel #19
0
    def calculate_q(self, G, node, astats, in_qs, dtype, out_dtype=None):
        del G
        if out_dtype is None:
            out_dtype = dtype
        if isinstance(node, (PoolingParameters, OutputParameters)):
            o_q = in_qs[0]
        elif isinstance(node, SoftMaxParameters):
            o_q = SymmetricMultQType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
        else:
            o_q = SymmetricMultQType.from_min_max(min_val=astats['min'],
                                                  max_val=astats['max'],
                                                  dtype=out_dtype)

        if isinstance(node, (MatrixAddParameters, MatrixSubParameters)):
            qrec = MultAddQuantizationRecord(in_qs=in_qs, out_qs=[o_q])

        elif isinstance(node, (MatrixBroadcastedLinearOpParameters, MatScaleFusionParameters, GlobalPoolParameters)):
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])

        elif isinstance(node, ConstantInputParameters):
            qrec = MultConstantQuantizationRecord(out_qs=[o_q],
                                                  constants_are_quantized=False)

        elif isinstance(node, (FcParameters, Conv2DParameters)):
            weights_q = SymmetricMultQType.from_array(arr=node.weights,
                                                      quantized_dimension=self.get_quantized_dimension(node),
                                                      dtype=dtype, narrow_range=self._narrow_weights)
            if node.has_bias:
                biases_q = SymmetricMultBiasesQType(dtype=np.int32, scale=weights_q.scale * in_qs[0].scale)
            else:
                biases_q = SymmetricMultBiasesQType(dtype=np.int32, scale=np.array([1], dtype=np.int32))
            mul_biases_q = MultMulBiasScaleQType.from_filter(in_qs[0], weights_q, o_q, node)
            qrec = MultScalableFilterQuantizationRecord(in_qs=[in_qs[0]],
                                                        out_qs=[o_q],
                                                        weights_q=weights_q,
                                                        biases_q=biases_q,
                                                        mul_biases_q=mul_biases_q,
                                                        constants_are_quantized=False)
            LOG.debug("filter %s qrec %s", node.name, qrec)
        else:
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
        return qrec
Beispiel #20
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_q:
            o_q = deepcopy(force_out_q)
        else:
            o_q = QType.from_min_max_sq(stats['range_out'][0]['min'],
                                        stats['range_out'][0]['max'],
                                        dtype=out_dtype)

        return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
Beispiel #21
0
def av_global_pool_mult(params,
                        in_tensors,
                        qrec: MultQuantizationRecord,
                        details=None):

    # Prepare the quantization levels
    in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0]
    in_dims = params.in_dims[0]
    out_dims = params.out_dims[0]
    qrec.set_scale(in_idx=0, out_idx=0)

    sum_by_chan = np.sum(in_tensor,
                         dtype=np.int32,
                         axis=(in_dims.get_order_idx('w'),
                               in_dims.get_order_idx('h')))

    res = at_norm((sum_by_chan << 7) // (in_dims.h * in_dims.w), 7)
    res = out_tensor = qrec.scale_mul_biases_q.apply_scales(res)
    return qrec.get_outputs(params, [out_tensor.reshape(out_dims.shape)],
                            ktype="symmetric")
Beispiel #22
0
    def set_c_state_as_output(self, G):
        output_c_state = G.add_output()
        lstm_qrec = G.quantization and G.quantization.get(NodeId(self))
        if lstm_qrec:
            c_state_idx = self.INPUT_NAMES.index('c_state')
            in_q = lstm_qrec.in_qs[c_state_idx]
            lstm_qrec.out_qs.append(in_q)
            c_state_q = MultQuantizationRecord(in_qs=[in_q], out_qs=[in_q])
            G.quantization[NodeId(output_c_state)] = c_state_q

        G.add_edge(NNEdge(self, output_c_state, from_idx=1))
        G.add_dimensions()
Beispiel #23
0
def piecewise_mult(params,
                   in_tensors,
                   qrec: MultQuantizationRecord,
                   details=None):
    del details
    in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")
    func = PIECEWISE_OPS[params.__class__]
    op = func['op']
    if func['is_mult']:
        qrec.set_scale(in_idx=(0, 1), out_idx=0)
        i1 = in_tensors[0].astype(np.int32)
        i2 = in_tensors[1].astype(np.int32)
        res = qrec.scale_mul_biases_q.apply_scales(op(i1, i2, np.int32))
    else:
        # larger scale should be scaled
        qrec.set_add_scale()
        if qrec.scaled_idx:
            i1 = in_tensors[0].astype(np.int32)
            i2 = qrec.scale_in_mul_biases_q.apply_scales(in_tensors[1])
        else:
            i1 = qrec.scale_in_mul_biases_q.apply_scales(in_tensors[0])
            i2 = in_tensors[1].astype(np.int32)

        res = qrec.scale_mul_biases_q.apply_scales(op(i1, i2, None))
    return qrec.get_outputs(params, [qrec.out_qs[0].clip(res)],
                            ktype="symmetric")
Beispiel #24
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_q:
            return None

        out_dtype = params.output_dtype
        in_dtype = params.input_dtype
        in_q = QType(scale=1, dtype=in_dtype)
        out_q = QType.from_min_max_sq(-1,
                                      1,
                                      dtype=out_dtype,
                                      narrow_range=True)

        return MultQuantizationRecord(in_qs=[in_q], out_qs=[out_q])
Beispiel #25
0
 def quantize_forward_fusion(self, pparams, in_qs, out_dtype, **kwargs):
     fin_qs = in_qs
     for fparams in pparams.contained_nodes():
         handler = self.handlers[0].get(fparams.__class__,
                                        self.handlers[0]['__default__'])
         stats = kwargs['all_stats'].get(NodeId(pparams, fparams))
         qrec = self.handle(handler,
                            fparams,
                            fin_qs,
                            out_dtype,
                            stats=stats,
                            fusion=pparams,
                            **kwargs)
         assert qrec, "handler did not return a result"
         self.qrecs[NodeId(pparams, fparams)] = qrec
         fin_qs = qrec.out_qs
     return MultQuantizationRecord(in_qs=in_qs, out_qs=fin_qs)
Beispiel #26
0
    def fuse_activation(cls, tfl_opts, name, params, **kwargs):
        G = kwargs['G']
        opts = kwargs['opts']
        if opts.get('load_quantization') and NodeId(params) in G.quantization:
            node_qrec = G.quantization[NodeId(params)]
        else:
            node_qrec = None
        # if node_qrec is not None and None in node_qrec.in_qs + node_qrec.out_qs:
        #     # one of the input is a constant or strange behaviour -> may be is something fusions will get rid of
        #     return add_node(self.G, node)
        aparams = None
        if tfl_opts.FusedActivationFunction() == ActivationFunctionType.NONE:
            if node_qrec is not None and isinstance(
                    node_qrec, MultQuantizationRecordBase):
                # here we have no activation in an asymmetric qtype -> may be an omitted relu
                if node_qrec.out_qs[0].min_val == 0:
                    if np.all(np.round(node_qrec.out_qs[0].max_val) == 6):
                        aparams = ActivationParameters.get_activation(
                            'relu6', name + "_activation")
                    else:
                        aparams = ActivationParameters.get_activation(
                            'relu', name + "_activation")
        else:
            aparams = ActivationParameters.get_activation(
                cls.TF_ACTIVATIONS[tfl_opts.FusedActivationFunction()],
                name + "_activation")
        if aparams:
            G.add_edge(NNEdge(from_node=params, to_node=aparams))

            if opts.get('load_quantization'):
                # In between the fused operation and activation the
                # transfer is in int32 representation
                node_qrec = G.quantization[NodeId(params)]
                ina_qtype = deepcopy(node_qrec.out_qs[0])
                outa_qtype = deepcopy(ina_qtype)
                G.quantization[NodeId(aparams)] = MultQuantizationRecord(
                    in_qs=[ina_qtype], out_qs=[outa_qtype])
            params = aparams
        return params
    def match(self, G: GraphView, set_identity: bool = True):
        has_modified_graph = False
        for pad_node in [
                params for params in G.nodes()
                if isinstance(params, PadParameters)
        ]:
            node_list = self.get_node_list(G, pad_node)
            if node_list is None or len(node_list.order) < 2:
                continue
            LOG.info("fusing nodes %s", ",".join(
                (node.name for node in node_list.order)))
            has_modified_graph = True
            subgraph = GraphView()
            padded_input_idx = G.out_edges(node_list.pad.name)[0].to_idx
            subgraph.add_edge(
                NNEdge(from_node=node_list.pad,
                       to_node=node_list.add,
                       to_idx=padded_input_idx))
            last_node = node_list.add
            node_list.add.force_quantized_index = 0
            if node_list.active:
                subgraph.add_edge(
                    NNEdge(from_node=node_list.add, to_node=node_list.active))
                last_node = node_list.active
            if padded_input_idx == 0:
                input_mapping = [[(node_list.pad, 0)], [(node_list.add, 1)]]
            else:
                input_mapping = [[(node_list.add, 0)], [(node_list.pad, 1)]]

            output_mapping = [(last_node, 0)]
            pnode = PaddedAddFusionParameters(
                "PADDED_" + node_list.add.name,
                fusion_type=node_list.fusion_type,
                subgraph=subgraph,
                input_mapping=input_mapping,
                output_mapping=output_mapping)
            if G.quantization:
                qrecs = G.quantization.get_all(pnode.contained_nodes())
                if qrecs:
                    prec = None
                    if isinstance(qrecs[0],
                                  (SymmetricQuantizationRecord,
                                   SymmetricScalableFilterQuantizationRecord)):
                        prec = SymmetricQuantizationRecord(
                            in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
                    elif isinstance(qrecs[0],
                                    (MultQuantizationRecord,
                                     MultScalableFilterQuantizationRecord)):
                        prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs,
                                                      out_qs=qrecs[-1].out_qs)
                    elif isinstance(qrecs[0],
                                    (Float32QuantizationRecord,
                                     Float32ScalableFilterQuantizationRecord)):
                        prec = Float32QuantizationRecord(
                            in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
                    for node in pnode.contained_nodes():
                        G.quantization.move_to_fusion(node, pnode)
                    G.quantization[NodeId(pnode)] = prec
            if padded_input_idx == 0:
                in_edges = G.in_edges(node_list.pad.name) + G.indexed_in_edges(
                    node_list.add.name)[1::]
            else:
                in_edges = G.indexed_in_edges(
                    node_list.add.name)[0:1:] + G.in_edges(node_list.pad.name)
            out_edges = G.out_edges(last_node.name)
            for node in node_list.order:
                G.remove(node)
            for edge in in_edges:
                G.add_edge(
                    NNEdge(edge.from_node,
                           pnode,
                           from_idx=edge.from_idx,
                           to_idx=edge.to_idx))
            for edge in out_edges:
                G.add_edge(
                    NNEdge(pnode,
                           edge.to_node,
                           from_idx=edge.from_idx,
                           to_idx=edge.to_idx))

        if set_identity:
            self.set_identity(G)

        return has_modified_graph
Beispiel #28
0
    def match(self, G: GraphView, set_identity: bool = True):
        has_modified_graph = False
        for matmul_node in [
                params for params in G.nodes()
                if isinstance(params, MatMulOpParameters)
        ]:
            node_list = self.get_node_list(G, matmul_node)
            if node_list is None or len(node_list.order) < 2:
                continue
            LOG.info("fusing nodes %s", ",".join(
                (node.name for node in node_list.order)))
            has_modified_graph = True
            subgraph = GraphView()
            if node_list.active is not None:
                subgraph.add_edge(
                    NNEdge(from_node=node_list.matmul,
                           to_node=node_list.active))
            input_mapping = [[(node_list.matmul, idx)] for idx in range(2)]
            if node_list.add:
                input_mapping += [[(node_list.matmul, 2)]]
            output_mapping = [(node_list.active,
                               0)] if node_list.active else [(node_list.matmul,
                                                              0)]
            pnode = MatMulOpFusionParameters(node_list.matmul.name + '_fusion',
                                             fusion_type=node_list.fusion_type,
                                             subgraph=subgraph,
                                             input_mapping=input_mapping,
                                             output_mapping=output_mapping)
            if G.quantization:
                qrecs = G.quantization.get_all(pnode.contained_nodes())
                if qrecs:
                    prec = None
                    if isinstance(qrecs[0],
                                  (SymmetricQuantizationRecord,
                                   SymmetricScalableFilterQuantizationRecord)):
                        prec = SymmetricQuantizationRecord(
                            in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
                    elif isinstance(qrecs[0],
                                    (MultQuantizationRecord,
                                     MultScalableFilterQuantizationRecord)):
                        prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs,
                                                      out_qs=qrecs[-1].out_qs)
                    elif isinstance(qrecs[0],
                                    (Float32QuantizationRecord,
                                     Float32ScalableFilterQuantizationRecord)):
                        prec = Float32QuantizationRecord(
                            in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs)
                    for node in pnode.contained_nodes():
                        G.quantization.move_to_fusion(node, pnode)
                    G.quantization[NodeId(pnode)] = prec
            in_edges = G.in_edges(node_list.matmul.name)
            if node_list.add:
                bias_edge = [
                    add_edge for add_edge in G.in_edges(node_list.add.name)
                    if isinstance(add_edge.from_node, ConstantInputParameters)
                ][0]
            out_edges = G.out_edges(node_list.order[-1].name)
            for node in node_list.order:
                G.remove(node)
            for edge in in_edges:
                G.add_edge(
                    NNEdge(edge.from_node,
                           pnode,
                           from_idx=edge.from_idx,
                           to_idx=edge.to_idx))
            if node_list.add:
                G.add_edge(
                    NNEdge(bias_edge.from_node,
                           pnode,
                           from_idx=bias_edge.from_idx,
                           to_idx=2))
            for edge in out_edges:
                G.add_edge(
                    NNEdge(pnode,
                           edge.to_node,
                           from_idx=edge.from_idx,
                           to_idx=edge.to_idx))

        if set_identity:
            self.set_identity(G)

        return has_modified_graph
Beispiel #29
0
 def _quantize(cls, params, in_qs, out_dtype, stats, **kwargs):
     o_q = SymmetricMultQType.from_min_max(min_val=stats['range_out'][0]['min'],
                                           max_val=stats['range_out'][0]['max'],
                                           dtype=out_dtype)
     return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
Beispiel #30
0
    def calculate_q(self, G, node, astats, in_qs, dtype, out_dtype=None):
        if out_dtype is None:
            out_dtype = dtype
        if isinstance(node, (PoolingParameters, OutputParameters, SplitParameters)):
            o_q = in_qs[0]
        elif isinstance(node, SoftMaxParameters):
            o_q = SymmetricMultQType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
        else:
            o_q = SymmetricMultQType.from_min_max(min_val=astats['range_out'][0]['min'],
                                                  max_val=astats['range_out'][0]['max'],
                                                  dtype=out_dtype)

        if isinstance(node, (MatrixAddParameters, MatrixSubParameters)):
            qrec = MultAddQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
        elif isinstance(node, ExpressionFusionParameters):
            o_qs = [SymmetricMultQType.from_min_max(min_val=orange['min'],
                                                    max_val=orange['max'],
                                                    dtype=out_dtype)
                    for orange in astats['range_out']]
            fusion_inputs = sorted([n for n in node.subgraph.inputs()
                                    if isinstance(n, FusionInputParameters)],
                                   key=lambda x: x.idx)
            fusion_outputs = sorted([n for n in node.subgraph.outputs()
                                     if isinstance(n, FusionOutputParameters)],
                                    key=lambda x: x.idx)

            node_scale_map = {fnode: in_qs[idx].scale
                              for idx, fnode in enumerate(fusion_inputs)}
            for idx, fnode in enumerate(fusion_outputs):
                node_scale_map[fnode] = o_qs[idx].scale
            inp, outp, expr = node.decompose(node_scale_map=node_scale_map)

            qrec = MultExpressionQuantizationRecord(in_qs=in_qs,
                                                    out_qs=o_qs,
                                                    inputs=inp,
                                                    output_exprs=outp,
                                                    intermediate_exprs=expr)
        elif isinstance(node, (MatrixBroadcastedLinearOpParameters, MatScaleFusionParameters, GlobalPoolParameters)):
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])

        elif isinstance(node, SplitParameters):
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q]*node.num_splits)

        elif isinstance(node, ConstantInputParameters):
            if node.value_quantization:
                qrec = MultConstantQuantizationRecord(out_qs=[node.value_quantization],
                                                      constants_are_quantized=True)
            else:
                qrec = MultConstantQuantizationRecord(out_qs=[o_q],
                                                      constants_are_quantized=False)

        elif isinstance(node, (FcParameters, Conv2DParameters)):
            weights_q = SymmetricMultQType.from_array(arr=node.weights,
                                                      quantized_dimension=self.get_quantized_dimension(
                                                          node),
                                                      dtype=dtype, narrow_range=self._narrow_weights)
            if node.has_bias:
                biases_q = SymmetricMultBiasesQType(
                    dtype=np.int32, scale=weights_q.scale * in_qs[0].scale)
            else:
                biases_q = SymmetricMultBiasesQType(
                    dtype=np.int32, scale=np.array([1], dtype=np.int32))
            mul_biases_q = MultMulBiasScaleQType.from_filter(in_qs[0], weights_q, o_q, node)
            qrec = MultScalableFilterQuantizationRecord(in_qs=[in_qs[0]],
                                                        out_qs=[o_q],
                                                        weights_q=weights_q,
                                                        biases_q=biases_q,
                                                        mul_biases_q=mul_biases_q,
                                                        constants_are_quantized=False)
            LOG.debug("filter %s qrec %s", node.name, qrec)
        elif isinstance(node, RNNParameters):
            input_nodes = {RNNParameters.INPUT_NAMES[edge.to_idx]: edge.from_node
                           for edge in G.in_edges(node.name)
                           if isinstance(edge.from_node, ConstantInputParameters)}
            names = {val: idx for idx, val in enumerate(RNNParameters.INPUT_NAMES)}
            # quantization_mode: extended, autotiler
            # state_width: 16bit or 8bit
            opts = self.get_options(node)
            if opts['mode'] == "extended":
                in_w_scale = in_qs[names['i_2_i_w']].scale * in_qs[0].scale
                state_w_scale = in_qs[names['r_2_i_w']].scale
                i_2_a_q = MultMulBiasScaleQType(scale=in_w_scale/state_w_scale)
                s_2_s_q = MultMulBiasScaleQType(scale=state_w_scale)
                s_2_o_q = MultMulBiasScaleQType(scale=1/o_q.scale)
                self.rescale_constant(input_nodes['i_b'], state_w_scale, dtype=np.int32)
                qrec = MultScalableRnnQuantizationRecord(
                    in_qs=in_qs,
                    out_qs=[o_q],
                    i_2_a_q=i_2_a_q,
                    s_2_s_q=s_2_s_q,
                    s_2_o_q=s_2_o_q
                )
            elif opts['mode'] == 'autotiler':
                in_and_state_scale = np.maximum(in_qs[0].scale, o_q.scale)
                in_and_state_w_scale = np.maximum(
                    in_qs[names['i_2_i_w']].scale, in_qs[names['r_2_i_w']].scale)
                in_qs[0].scale = in_and_state_scale
                o_q.scale = in_and_state_scale
                self.rescale_constant(input_nodes['i_state'], in_and_state_scale)
                self.rescale_constant(input_nodes['i_2_i_w'], in_and_state_w_scale)
                self.rescale_constant(input_nodes['r_2_i_w'], in_and_state_w_scale)
                state_w_scale = in_and_state_scale * in_and_state_w_scale
                self.rescale_constant(input_nodes['i_b'], state_w_scale, dtype=np.int32)
                s_2_s_q = MultMulBiasScaleQType(scale=state_w_scale/in_and_state_scale)
                qrec = MultScalableRnnQuantizationRecord(
                    in_qs=in_qs,
                    out_qs=[o_q],
                    s_2_s_q=s_2_s_q,
                )
        elif isinstance(node, LSTMParameters):
            input_nodes = {LSTMParameters.INPUT_NAMES[edge.to_idx]: edge.from_node
                           for edge in G.in_edges(node.name)
                           if isinstance(edge.from_node, ConstantInputParameters)}
            names = {val: idx for idx, val in enumerate(LSTMParameters.INPUT_NAMES)}
            if node.cell_clip:
                cell_max = node.cell_clip
            else:
                cell_max = max(abs(astats['range_cell'][var]) for var in ['min', 'max'])

            cell_int_bits = calc_bits(cell_max)

            in_qs[names['c_state']].recalculate_scale(-cell_max,
                                                      cell_max)
            LOG.debug("cell bits %d max %d cell range %d",
                      cell_int_bits,
                      cell_max,
                      in_qs[names['c_state']].range)
            # worst case is (internal_q * 3) + 2 = 32 (1 for 1 and 1 for sign) i.e. 10
            # but also (internal_q * 2) + cell_bits = 32
            int_q = min((32-cell_int_bits)//2, 10)
            # in and out and state are all in the same scale
            in_and_out_scale = np.maximum(in_qs[0].scale, o_q.scale)
            in_and_state_scale = np.maximum(in_and_out_scale, in_qs[names['i_state']].scale)
            in_qs[0].scale = in_and_state_scale
            o_q.scale = in_and_state_scale
            self.rescale_constant(input_nodes['i_state'], in_and_state_scale)
            scale_pairs = {chan: ('i_2_%s_w' % chan, 'r_2_%s_w' % chan)
                           for chan in ['i', 'o', 'c', 'f']}
            scales = {k: np.maximum(in_qs[names[namei]].scale, in_qs[names[namer]].scale)
                      for k, (namei, namer) in scale_pairs.items()}
            for k, (namei, namer) in scale_pairs.items():
                self.rescale_constant(input_nodes[namei], scales[k])
                self.rescale_constant(input_nodes[namer], scales[k])
            int_scale = pow(2, -int_q)
            int2_scale = pow(2, -(int_q*2))
            int3_scale = pow(2, -(int_q*3))
            # compute scales for perceptrons
            pscales = {k: scales[k] * in_and_state_scale for k in ['i', 'o', 'c', 'f']}
            scale_qtypes = {"r_2_%s_q" % k: MultMulBiasScaleQType(
                scale=pscale/int_scale) for k, pscale in pscales.items()}
            scale_qtypes['cell_in_q'] = MultMulBiasScaleQType(
                scale=in_qs[names['c_state']].scale/int_scale)
            # TODO - Check cell clip here
            scale_qtypes['cell_out_q'] = MultMulBiasScaleQType(
                scale=int2_scale/in_qs[names['c_state']].scale)
            scale_qtypes['state_out_q'] = MultMulBiasScaleQType(scale=int3_scale/in_and_state_scale)
            # set internal scale
            scale_qtypes['i_qtype'] = QType(q=int_q, bits=32, signed=True)
            # set biases to output of perceptron
            for k in ['i', 'o', 'c', 'f']:
                self.rescale_constant(input_nodes["%s_b" % k], pscales[k], dtype=np.int32)
            qrec = MultScalableLstmQuantizationRecord(
                in_qs=in_qs,
                out_qs=[o_q],
                **scale_qtypes,
            )
        else:
            qrec = MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
        return qrec