def get_qrec_pass(G, qrec, node, copy_qs): if qrec is None: if copy_qs: return QRec.scaled(in_qs=copy_qs, out_qs=copy_qs) return None in_qs = copy_qs if not qrec.in_qs or qrec.in_qs is None else qrec.in_qs out_qs = copy_qs if not qrec.out_qs or qrec.out_qs is None else qrec.out_qs return QRec.scaled(in_qs=in_qs, out_qs=out_qs)
def pass_qtype(G, qrec, node, in_qs=None, out_qs=None): copy_qs = in_qs if in_qs is not None else out_qs if not copy_qs: return qrec if qrec is None: if copy_qs: return QRec.scaled(in_qs=copy_qs, out_qs=copy_qs) return None in_qs = copy_qs if not qrec.in_qs or qrec.in_qs[0] is None else qrec.in_qs out_qs = copy_qs if not qrec.out_qs or qrec.out_qs[ 0] is None else qrec.out_qs return QRec.scaled(in_qs=in_qs, out_qs=out_qs)
def handler(G, qrec, node, in_qs=None, out_qs=None): nonlocal proto_in_qs, proto_out_qs if qrec is None: return QRec.scaled(in_qs=list(proto_in_qs), out_qs=list(proto_out_qs)) new_in_qs = [ q1 if q1 else q2 for q1, q2 in zip_longest(qrec.in_qs, proto_in_qs) ] if qrec.in_qs else in_qs new_out_qs = [ q1 if q1 else q2 for q1, q2 in zip_longest(qrec.out_qs, proto_out_qs) ] if qrec.out_qs else out_qs return QRec.scaled(in_qs=new_in_qs, out_qs=new_out_qs)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] opts = kwargs['opts'] if force_out_q: if force_out_q.forced_scale or force_out_q.forced_zero_point: return None if in_qs[0].dtype == np.int8: dtypes = [np.int8, np.int16] else: dtypes = [np.int16] if force_out_q.forced_dtype and force_out_q.dtype not in dtypes: return None in_qs = cls.force_symmetric_and_dtype(in_qs) if in_qs is None: return None # force the input to be POW2 scaled pow2_scale = np.power(2, np.ceil(np.log2(in_qs[0].scale))) in_q = QType(min_val=in_qs[0].min_val, max_val=in_qs[0].max_val, dtype=in_qs[0].dtype, scale=pow2_scale, forced=True) if in_q.dtype == np.int8 and (opts.get('softmax_out_8bits', None) or (force_out_q and force_out_q.dtype == np.int8)): # params.at_options.softmax_out_8bits = 1 o_q = QType(min_val=-1, max_val=1, dtype=np.int8, scale=2**(-7)) else: o_q = QType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15)) if in_q.dtype == np.int16 and o_q.dtype == np.int16: return QRec.symmetric(in_qs=[in_q], out_qs=[o_q]) return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] in_q = in_qs[0] if params.lower_bound != 0: raise NotImplementedError( 'relu with non zero lower bound is not implemented for NE16 quantizer' ) cls.check_valid_ranges(params, stats, idx=0, dirs='out') if force_out_q: # since the relu is done by setting 0 zero point and scaling to the upper bound # we cannot be forced to something that does not meet this requirement if not force_out_q.zero_point_asymmetric_zero: return None if params.upper_bound is not None and not np.isclose( force_out_q.max, params.upper_bound, atol=0.01): return None # if the output has been forced then propagate it in_q = force_out_q else: upper = params.upper_bound if params.upper_bound is not None else stats[ 'range_out'][0]['max'] in_q = QType.from_min_max_sq(0, upper, dtype=in_q.dtype, asymmetric=True, ne16=True, dont_copy_attr=['ne16']) o_q = deepcopy(in_q) o_q.set_forced() qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q], ne16=True) compute_in_out_scale(qrec) return qrec
def _import_nodes(self, G, graph, handlers, all_nodes, outputs, opts): for node in graph.nodes: handler = handlers.get(node.op_name, None) if not handler: raise ValueError("no handler found for %s" % node.op_type) if node.is_custom and handler: handler = handler.get(node.custom_op_name, None) if not handler: raise ValueError( "no handler found for custom operation %s" % node.custom_op_name) params = handler.handle(node, all_nodes=all_nodes, G=G, opts=opts, importer=self) if params is None: continue for idx, out_tensor in enumerate(node.output): output = outputs.get(out_tensor) if not output: continue G.add_edge( NNEdge(from_node=params, to_node=output[0], from_idx=idx, to_idx=output[1])) if opts.get('load_quantization'): qtype = deepcopy( G.quantization[NodeId(params)].out_qs[idx]) G.quantization[NodeId(output[0])] = QRec.scaled( in_qs=[qtype], out_qs=[qtype])
def _common(cls, node: TFLiteNode, **kwargs): custom_opts = node.get_custom_options() G = kwargs['G'] opts = kwargs['opts'] all_nodes = kwargs['all_nodes'] importer = kwargs['importer'] inputs = [all_nodes[t] for t in node.input] outputs = [ all_nodes.get(node.output[idx]) if idx < len(node.output) else None for idx in range(4) ] # inp_shapes = [input[2].shape for input in inputs] if 'max_bb_before_nms' not in custom_opts: custom_opts['max_bb_before_nms'] = 300 params = SSDDetectorParameters(node.name, parameters=custom_opts) overriden_outputs = [] for idx, output in enumerate(outputs): if output: overriden_outputs.append(node.output[idx]) continue oparams = G.add_output() otensor = TensorBase("Detect_%s" % idx) overriden_outputs.append(otensor) importer.provisional_outputs[otensor] = (oparams, 0, None) # covers the case where not all outputs are generated by the conversion tool node.override_outputs(overriden_outputs) for idx, inp in enumerate(inputs): G.add_edge( NNEdge(from_node=inp[0], to_node=params, from_idx=inp[1], to_idx=idx)) if opts.get('load_quantization'): in_qtypes = [ QType.from_min_max_sq(tensor.qtype.min_val, tensor.qtype.max_val) if (tensor.qtype.is_asymmetric or not tensor.qtype.signed) else tensor.qtype for tensor in node.input ] o_boxes_qtype = QType(min_val=-2, max_val=2, dtype=np.int16, scale=2**(-14)) o_scores_qtype = node.input[1].qtype o_class_qtype = QType(scale=1, dtype=np.int8) qrec = QRec.scaled(in_qs=in_qtypes, out_qs=[ o_boxes_qtype, o_class_qtype, o_scores_qtype, o_class_qtype ]) G.quantization[NodeId(params)] = qrec return params
def _quantize(cls, params, in_qs, stats, **kwargs): # copy in_qs because we may modify it in_qs = in_qs.copy() opts = kwargs['opts'] force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] G = kwargs['G'] in_q = in_qs[0] cls.check_valid_ranges(params, stats, idx=0, dirs='in') min_val = stats['range_in'][0]['min'] max_val = stats['range_in'][0]['max'] if force_out_q: # get rid of the force out if ne16 is not selected. if not (opts.get('use_ne16') or opts.get('force_ne16')): LOG.info('%s ne16 max pool possible but ne16 mode not enabled', params.name) return None o_q = force_out_q if in_q.forced and in_q.zero_point != o_q.zero_point: return None in_q = deepcopy(o_q) LOG.warning( 'node %s output forced to range %s/%s - actual range %s/%s %s', params.name, o_q.min, o_q.max, min_val, max_val, "asymmetric" if o_q.asymmetric else "symmetric") else: o_q = deepcopy(in_q) o_q.attr.ne16 = True cls.check_order(params, [['h', 'w', 'c']], [['h', 'w', 'c']]) return QRec.scaled(in_qs=[in_q], out_qs=[o_q], ne16=True)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] # NOTE: The autotiler kernel scales and clips after the operation and before the # activation so there is no change if this is in a fusion or not scaled_idx = params.force_quantized_index if isinstance( params, MatrixAddParameters) else None in_qs = cls.force_symmetric_and_dtype(in_qs) if in_qs is None: return None if force_out_q: o_q = deepcopy(force_out_q) if o_q.is_asymmetric: return None else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype) o_q.set_forced(flags=['dtype', 'zero_point']) in_qs = [ in_q.set_forced(flags=['dtype', 'zero_point']) for in_q in in_qs ] return QRec.scaled(in_qs=in_qs, out_qs=[o_q], scaled_idx=scaled_idx)
def _update_qrecs(self, G, qrecs, all_nodes, ranges_dict): for node, idx, _, qtype in all_nodes.values(): if qtype is None and node.name not in ranges_dict.keys(): continue if node.name not in G: continue nid = NodeId(node) qrec = qrecs.get(nid) if not qrec: in_qs = [None] * G.num_in_edges(node) out_qs = [None] * len(G.indexed_out_edges(node)) qrec = QRec.scaled(in_qs=in_qs, out_qs=out_qs) qrecs[nid] = qrec if node.name in ranges_dict.keys(): out_min, out_max = ranges_dict[node.name]["range"] dtype = ranges_dict[node.name].get("dtype", np.int8) bits = ranges_dict[node.name].get("n_bits", 8) channel = ranges_dict[node.name].get("per_channel", None) qtype = QType.from_min_max_sq(out_min, out_max, dtype=dtype, bits=bits, quantized_dimension=channel) qrec.out_qs[idx] = qtype
def common_quantize(cls, in_qtype, out_qtype, node, **kwargs): all_nodes = kwargs['all_nodes'] opts = kwargs['opts'] G = kwargs['G'] inputs = [all_nodes[t] for t in node.input] x = inputs[0] in_qtype = in_qtype.make_symmetric_signed() out_qtype = out_qtype.make_symmetric_signed() if cls.is_constant(x): LOG.info("reducing %s to a constant", node.name) if out_qtype: val = x[0].value_as(out_qtype) else: val = cls.get_constant(x) params = ConstantInputParameters(node.name, value=val, dims=Dim.unnamed(val.shape), qtype=out_qtype, constant_store=G.constant_store) if opts.get('load_quantization'): G.quantization[NodeId(params)] = QRec.scaled( in_qs=[out_qtype], out_qs=[out_qtype]) else: if in_qtype == out_qtype: LOG.info('removing (de)quantize node %s with no effect', node.name) params = NoOPParameters(node.name, desc="quantize with no effect") elif in_qtype.dtype == out_qtype.dtype: LOG.info('removing (de)quantize node %s with scale change', node.name) params = NoOPParameters(node.name, desc="quantize with scale change") out_qtype = in_qtype else: params = QuantizeParameters(node.name, from_qtype=in_qtype, to_qtype=out_qtype) G.add_edge( NNEdge(from_node=x[0], to_node=params, from_idx=x[1], to_idx=0)) if opts.get('load_quantization'): G.quantization[NodeId(params)] = QRec.scaled( in_qs=[in_qtype], out_qs=[out_qtype]) all_nodes[node.output[0]] = (params, 0, deepcopy(x[2])) return params
def _common(cls, node, copy_qtype=False, quantized_args=None, **kwargs): all_nodes = kwargs['all_nodes'] valid_name = kwargs['valid_name'] G = kwargs['G'] constant_operation = kwargs.get('constant_operation') constant_int_operation = kwargs.get('constant_int_operation') inputs = [all_nodes[inp] for inp in node.input] if quantized_args: args = [inputs[quantized_args[0][0]], inputs[quantized_args[1][0]]] inp_qtypes = [ cls.get_qtype(inputs[quantized_args[0][1]], inputs[quantized_args[0][2]]), cls.get_qtype(inputs[quantized_args[1][1]], inputs[quantized_args[1][2]]) ] out_qtype = cls.get_qtype(inputs[quantized_args[2][0]], inputs[quantized_args[2][1]]) else: args = inputs assert len(args) == 2 out_qtype = None if all(cls.is_constant(inp) for inp in args) and constant_operation: values = [cls.get_constant(inp) for inp in args] if quantized_args: values = [inp_qtype.dequantize(val) for inp_qtype, val in zip(inp_qtypes, values)] outputs = cls.implied_broadcast(inputs) if constant_int_operation and all(np.issubdtype(val.dtype, np.integer) for val in values): res = constant_int_operation(*values) else: res = constant_operation(*values) if quantized_args: res = out_qtype.quantize(res) if res.size < 10: logger.info("reducing %s to a constant %s", valid_name, res) else: logger.info("reducing %s to a constant", valid_name) params = ConstantInputParameters(valid_name, value=res, dims=Dim.unnamed(outputs[0].known_shape), qtype=out_qtype) else: params_args = kwargs.get('params_args', {}) params = kwargs['params_class'](valid_name, **params_args) outputs = cls.implied_broadcast(inputs) shapes = [] for idx, inp in enumerate(args): G.add_edge(NNEdge(from_node=inp[0], to_node=params, from_idx=inp[1], to_idx=idx)) shapes.append(inp[2].known_shape) if isinstance(params, Broadcastable): params.set_broadcast(shapes) if quantized_args: for qtype, inp in zip(inp_qtypes, args): if cls.is_constant(inp): inp[0].qtype = qtype qrecs = kwargs['qrecs'] qrecs[NodeId(params)] = QRec.scaled(in_qs=inp_qtypes, out_qs=[out_qtype]) if copy_qtype: out_qtype = inputs[0][3] if inputs[0][3] is not None else inputs[1][3] all_nodes[node.output[0]] = (params, 0, outputs[0], out_qtype) return params
def _load_quantization(qrecs, node_recs): for tensor in node_recs: qtype = tensor.qtype if qtype: qtype = qtype.make_symmetric_signed() setattr(qtype, 'is_input', True) qrecs[NodeId(node_recs[tensor][0])] = QRec.scaled( in_qs=[qtype], out_qs=[qtype])
def record_constant_qrec(cls, inp, cnode, **kwargs): qtype = inp[3] if qtype is None: return qrecs = kwargs.get('qrecs') if qrecs is None: return qrecs[NodeId(cnode)] = QRec.scaled(out_qs=[qtype])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: return None in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8) if in_qs is None: return None return QRec.scaled(in_qs=in_qs, out_qs=[QType(scale=1, dtype=np.int16)])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: return None in_qs = [QType.from_min_max_sq(-8, 8, dtype=np.int8, forced=True)] o_q = QType.from_min_max_sq(min_val=-1.0, max_val=1.0, dtype=out_dtype, forced=True) return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): # copy in_qs because we may modify it in_qs = in_qs.copy() opts = kwargs['opts'] fusion = kwargs.get('fusion', None) force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] G = kwargs['G'] # only attempt channel scaling if the second input is constant # if len(in_qs) > 2: in2_node, in_qs = cls.move_constant(G, fusion if fusion else params, in_qs) if in2_node: kwargs['graph_update']['requires_adjust'] = True in_q2 = QType.from_array_sq(arr=in2_node.dqvalue, quantized_dimension=0, dtype=np.int8, narrow_range=True, bits=8) else: in_q2 = in_qs[1].make_symmetric_signed() in_q1 = in_qs[0].make_symmetric_signed() min_val, max_val = cls.get_min_max(fusion, stats, kwargs['all_stats'], params) if force_out_q: o_q = force_out_q # can't be forced to something not np.int8 if o_q.dtype != np.int8 or o_q.asymmetric: return None LOG.warning( 'node %s output forced to range %s/%s - actual range %s/%s %s', params.name, o_q.min, o_q.max, min_val, max_val, "asymmetric" if o_q.asymmetric else "symmetric") else: o_q = QType.from_min_max_sq(min_val=min_val, max_val=max_val, dtype=out_dtype) if len(in_qs) == 3: biases_q = QType(dtype=np.int32, scale=in_q1.scale * in_q2.scale) out_in_qs = [in_q1, in_q2, biases_q] else: out_in_qs = [in_q1, in_q2] mul_biases_q = MultMulBiasScaleQType() mul_biases_q.scale = in_q1.scale * in_q2.scale / o_q.scale return QRec.scaled(in_qs=out_in_qs, out_qs=[o_q], mul_biases_q=mul_biases_q)
def set_c_state_as_output(self, G): output_c_state = G.add_output() lstm_qrec = G.quantization and G.quantization.get(NodeId(self)) if lstm_qrec: c_state_idx = self.INPUT_NAMES.index('c_state') in_q = lstm_qrec.in_qs[c_state_idx] lstm_qrec.out_qs.append(in_q) c_state_q = QRec.scaled(in_qs=[in_q], out_qs=[in_q]) G.quantization[NodeId(output_c_state)] = c_state_q G.add_edge(NNEdge(self, output_c_state, from_idx=1)) G.add_dimensions()
def load_tf_quantization(cls, input_tensors, output_tensors, in_qs=None, out_qs=None, qrec_class=None): if qrec_class is None: qrec = QRec.scaled( in_qs=cls.convert_to_symmetric( in_qs if in_qs is not None else [tensor.qtype for tensor in input_tensors]), out_qs=cls.convert_to_symmetric( out_qs if out_qs is not None else [tensor.qtype for tensor in output_tensors])) else: qrec = qrec_class( in_qs=cls.convert_to_symmetric( in_qs if in_qs is not None else [tensor.qtype for tensor in input_tensors]), out_qs=cls.convert_to_symmetric( out_qs if out_qs is not None else [tensor.qtype for tensor in output_tensors])) return qrec
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] opts = kwargs['opts'] if force_out_q: o_q = deepcopy(force_out_q) else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype, asymmetric=opts['allow_asymmetric']) return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] # if forced set what we are forced to if force_out_q: o_q = deepcopy(force_out_q) # if value is already quantized then keep the same quantization elif params.qtype: o_q = deepcopy(params.qtype) # derive quantization from statistics else: o_q = QType.from_array_sq(params.value, dtype=out_dtype) o_q.is_constant = True return QRec.scaled(out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] in_q = in_qs[0] cls.check_valid_ranges(params, stats, idx=0, dirs='out') if force_out_q: # if the output has been forced then propagate it in_q = force_out_q else: upper = params.upper_bound if params.upper_bound is not None else stats['range_out'][0]['max'] in_q = QType.from_min_max_sq(0, upper, dtype=np.uint8, asymmetric=True) return QRec.scaled(in_qs=[in_q], out_qs=[in_q], ne16=True)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: return None out_dtype = params.output_dtype in_dtype = params.input_dtype in_q = QType(scale=1, dtype=in_dtype) out_q = QType.from_min_max_sq(-1, 1, dtype=out_dtype, narrow_range=True) return QRec.scaled(in_qs=[in_q], out_qs=[out_q])
def _quantize(cls, params, in_qs, stats, **kwargs): # copy in_qs because we may modify it in_qs = in_qs.copy() opts = kwargs['opts'] force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] G = kwargs['G'] in_q = in_qs[0] if (in_q.asymmetric and isinstance(params, PoolingParameters) and params.padding.has_padding): in_qs = cls.force_symmetric(in_qs) if in_qs is None: return None in_q = in_qs[0] cls.check_valid_ranges(params, stats, idx=0, dirs='in') min_val = stats['range_in'][0]['min'] max_val = stats['range_in'][0]['max'] if force_out_q: if force_out_q.asymmetric and not opts.get('allow_asymmetric'): LOG.warning( '%s could be asymmetricaly quantized but allow_asymmetric option not selected', params.name) return None if force_out_q.dtype != in_q.dtype: return None o_q = force_out_q in_q = deepcopy(force_out_q) if force_out_q.dtype != in_q.dtype or force_out_q.zero_point != in_q.zero_point: if in_q.forced and force_out_q.zero_point != 0: return None LOG.warning( 'node %s output forced to range %s/%s %s - actual range %s/%s', params.name, o_q.min, o_q.max, "asymmetric" if o_q.asymmetric else "symmetric", min_val, max_val) else: o_q = deepcopy(in_q) if opts['hwc']: cls.check_order(params, [['h', 'w', 'c']], [['h', 'w', 'c']]) else: cls.check_order(params, [['c', 'h', 'w']], [['c', 'h', 'w']]) return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
def _quantize_sw(cls, params, in_qs, stats, inout_dtype, asym=False, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] # NOTE: The autotiler kernel scales and clips after the operation and before the # activation so there is no change if this is in a fusion or not scaled_idx = params.force_quantized_index if isinstance( params, MatrixAddParameters) else None if not asym: in_qs = cls.force_symmetric_and_dtype(in_qs) if in_qs is None: return None if force_out_q: o_q = deepcopy(force_out_q) if (o_q.asymmetric and not asym) or o_q.dtype != inout_dtype: return None # important to set ne16 here so the o_q matches the force_out_q since # this attribute is not copied by deepcopy if force_out_q.attr.ne16: o_q.attr.ne16 = True else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=inout_dtype, asymmetric=asym, dont_copy_attr=['ne16'], ne16=asym) if asym: o_q.set_forced(flags=['dtype']) in_qs = [in_q.set_forced(flags=['dtype']) for in_q in in_qs] else: o_q.set_forced(flags=['dtype', 'zero_point']) in_qs = [ in_q.set_forced(flags=['dtype', 'zero_point']) for in_q in in_qs ] return QRec.scaled(in_qs=in_qs, out_qs=[o_q], scaled_idx=scaled_idx, ne16=asym)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8) if in_qs is None: return None if force_out_q: o_q = deepcopy(force_out_q) if o_q.is_asymmetric: return None else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype) return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): # copy in_qs because we may modify it in_qs = in_qs.copy() opts = kwargs['opts'] fusion = kwargs.get('fusion', None) force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] G = kwargs['G'] in_q = in_qs[0] cls.check_valid_ranges(params, stats, idx=0, dirs='in') min_val = stats['range_in'][0]['min'] max_val = stats['range_in'][0]['max'] if fusion: # Global pooling fused with activations need to have only the activation scale o_q = deepcopy(in_q) o_q.dtype = np.int32 elif force_out_q: if force_out_q.zero_point != in_q.zero_point: return None o_q = force_out_q LOG.warning( 'node %s output forced to range %s/%s %s - actual range %s/%s', params.name, o_q.min, o_q.max, "asymmetric" if o_q.asymmetric else "symmetric", min_val, max_val) elif isinstance(params, GlobalAveragePoolParameters) or isinstance( params, GlobalSumPoolParameters): # scaling needs to be based on stats and zero point o_q = QType.from_min_max_sq( stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype, asymmetric=(stats['range_out'][0]['min'] == 0 and in_q.zero_point == -128)) else: o_q = deepcopy(in_q) if opts['hwc']: cls.check_order(params, [['h', 'w', 'c']], [['h', 'w', 'c']]) elif params.in_dims_hint: cls.check_order(params, [['c', 'h', 'w']], [['c', 'h', 'w']]) return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] fusion = kwargs.get('fusion', None) in_q = in_qs[0] if not fusion and in_q.dtype == np.int32: return None if isinstance(params, (HSwishActivationParameters, HSigmoidActivationParameters)): max_val = in_q.scale * pow(2, in_q.bits - 1) if max_val < 6: in_q = QType.from_min_max_sq(-6, 6, dtype=in_q.dtype, forced=True) elif isinstance(params, SigmoidActivationParameters): in_q = QType.from_min_max_sq(-8, 8, dtype=in_q.dtype, forced=True) if force_out_q: if force_out_q.signed != in_q.signed: return None if fusion and fusion.fusion_type in ['conv_active_pool', 'conv_active']: if not isinstance(params, (SigmoidActivationParameters, HTanHActivationParameters, HSwishActivationParameters, HSigmoidActivationParameters)): in_q = deepcopy(force_out_q) o_q = deepcopy(force_out_q) # activation cannot move zeropoint unless it is a reduction step if o_q.zero_point != in_q.zero_point and in_q.dtype != np.int32: return None else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') zero_point = in_q.zero_point if in_q.zero_point != 0 else None o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=in_q.dtype, zero_point=zero_point) qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q]) if isinstance(params, (SigmoidScaledSymmetricMult, TanHActivationParameters)): compute_in_out_scale(qrec, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale/qrec.in_qs[0].scale) elif isinstance(params, HSwishActivationParameters): compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1/6) else: compute_in_out_scale(qrec) return qrec
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: return None in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8) if in_qs is None: return None o_boxes_qtype = QType(min_val=-2, max_val=2, dtype=np.int16, scale=2**(-14)) o_scores_qtype = in_qs[1] o_class_qtype = QType(scale=1, dtype=np.int8) return QRec.scaled(in_qs=in_qs, out_qs=[ o_boxes_qtype, o_class_qtype, o_scores_qtype, o_class_qtype ])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] opts = kwargs['opts'] if force_out_q: if force_out_q.forced_scale or force_out_q.forced_zero_point: return None in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8) if in_qs is None: return None # force the input to be POW2 scaled pow2_scale = np.power(2, np.ceil(np.log2(in_qs[0].scale))) in_q = QType(min_val=in_qs[0].min_val, max_val=in_qs[0].max_val, dtype=np.int8, scale=pow2_scale, forced=True) o_q = QType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15)) return QRec.scaled(in_qs=[in_q], out_qs=[o_q])