def overwrite_range(in_ranges, out_ranges, in_dtypes=None, out_dtypes=None): if in_dtypes is None: in_dtypes = [np.int8] * len(in_ranges) if out_dtypes is None: out_dtypes = [np.int8] * len(out_ranges) proto_in_qs = [ QType.from_min_max_sq(*min_max, dtype=dtype) for min_max, dtype in zip(in_ranges, in_dtypes) ] proto_out_qs = [ QType.from_min_max_sq(*min_max, dtype=dtype) for min_max, dtype in zip(out_ranges, out_dtypes) ] def handler(G, qrec, node, in_qs=None, out_qs=None): nonlocal proto_in_qs, proto_out_qs if qrec is None: return QRec.scaled(in_qs=list(proto_in_qs), out_qs=list(proto_out_qs)) new_in_qs = [ q1 if q1 else q2 for q1, q2 in zip_longest(qrec.in_qs, proto_in_qs) ] if qrec.in_qs else in_qs new_out_qs = [ q1 if q1 else q2 for q1, q2 in zip_longest(qrec.out_qs, proto_out_qs) ] if qrec.out_qs else out_qs return QRec.scaled(in_qs=new_in_qs, out_qs=new_out_qs) return handler
def _common(cls, node: TFLiteNode, **kwargs): custom_opts = node.get_custom_options() G = kwargs['G'] opts = kwargs['opts'] all_nodes = kwargs['all_nodes'] importer = kwargs['importer'] inputs = [all_nodes[t] for t in node.input] outputs = [ all_nodes.get(node.output[idx]) if idx < len(node.output) else None for idx in range(4) ] # inp_shapes = [input[2].shape for input in inputs] if 'max_bb_before_nms' not in custom_opts: custom_opts['max_bb_before_nms'] = 300 params = SSDDetectorParameters(node.name, parameters=custom_opts) overriden_outputs = [] for idx, output in enumerate(outputs): if output: overriden_outputs.append(node.output[idx]) continue oparams = G.add_output() otensor = TensorBase("Detect_%s" % idx) overriden_outputs.append(otensor) importer.provisional_outputs[otensor] = (oparams, 0, None) # covers the case where not all outputs are generated by the conversion tool node.override_outputs(overriden_outputs) for idx, inp in enumerate(inputs): G.add_edge( NNEdge(from_node=inp[0], to_node=params, from_idx=inp[1], to_idx=idx)) if opts.get('load_quantization'): in_qtypes = [ QType.from_min_max_sq(tensor.qtype.min_val, tensor.qtype.max_val) if (tensor.qtype.is_asymmetric or not tensor.qtype.signed) else tensor.qtype for tensor in node.input ] o_boxes_qtype = QType(min_val=-2, max_val=2, dtype=np.int16, scale=2**(-14)) o_scores_qtype = node.input[1].qtype o_class_qtype = QType(scale=1, dtype=np.int8) qrec = QRec.scaled(in_qs=in_qtypes, out_qs=[ o_boxes_qtype, o_class_qtype, o_scores_qtype, o_class_qtype ]) G.quantization[NodeId(params)] = qrec return params
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if isinstance( params, (HSwishActivationParameters, HSigmoidActivationParameters)): in_q = in_qs[0] max_val = in_q.scale * pow(2, in_q.bits - 1) if max_val < 6: in_qs = [QType.from_min_max_sq(-6, 6, dtype=in_q.dtype)] if force_out_q: fusion = kwargs.get('fusion', None) if fusion and fusion.fusion_type in [ 'conv_active_pool', 'conv_active' ]: if not isinstance( params, (SigmoidActivationParameters, TanHActivationParameters, HSwishActivationParameters, HSigmoidActivationParameters)): in_qs = [deepcopy(force_out_q)] o_q = deepcopy(force_out_q) else: o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype) return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): _, dtype = cls.get_float_opts(**kwargs) names = { val: idx for idx, val in enumerate(LSTMParameters.INPUT_NAMES) } edges = kwargs['G'].indexed_in_edges(params.name) in_qs = deepcopy(in_qs) scale_pairs = { chan: ('i_2_%s_w' % chan, 'r_2_%s_w' % chan) for chan in ['i', 'o', 'c', 'f'] } for scale_pair in scale_pairs.values(): w_q = in_qs[names[scale_pair[0]]] in_qs[names[scale_pair[0]]] = QType(min_val=w_q.min_val, max_val=w_q.max_val, dtype=dtype, dont_generate_value=True) w_q = in_qs[names[scale_pair[1]]] in_qs[names[scale_pair[1]]] = QType( min_val=w_q.min_val, max_val=w_q.max_val, dtype=dtype, concatenated_nodes=[ edges[names[scale_pair[0]]].from_node.name ]) if params.lstm_output_c_state: out_qs = [QType(dtype=dtype), QType(dtype=dtype)] else: out_qs = [QType(dtype=dtype)] return QRec.float(in_qs=in_qs, out_qs=out_qs, float_dtype=dtype)
def matscale3(cls, in_tensors, qrec): assert qrec.in_qs[0].bits == qrec.in_qs[1].bits assert qrec.in_qs[1].bits == qrec.in_qs[2].bits if qrec.in_qs[0].bits == 8: q_calc = QType.Pow2(bits=32, q=qrec.in_qs[0].q + qrec.in_qs[1].q + qrec.in_qs[2].q, signed=True) res = np.multiply(np.multiply(in_tensors[0], in_tensors[1], dtype=np.int32), in_tensors[2], dtype=np.int32) res = qrec.out_qs[0].reduce_from(res, q_calc) elif qrec.in_qs[0].bits == 16: q_calc = QType.Pow2(bits=32, q=qrec.in_qs[0].q + qrec.in_qs[1].q, signed=True) res = np.multiply(in_tensors[0], in_tensors[1], dtype=np.int32) res = qrec.out_qs[0].reduce_from(res, q_calc) q_calc = QType.Pow2(bits=32, q=qrec.in_qs[2].q + qrec.out_qs[0].q, signed=True) res = np.multiply(res, in_tensors[2], dtype=np.int32) res = qrec.out_qs[0].reduce_from(res, q_calc) else: raise ValueError("only 8 and 16 bits supported") return res
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] fusion = kwargs.get('fusion', None) cls.check_valid_ranges(params, stats, idx=0, dirs='out') if fusion: activation = fusion.contained_nodes()[1] if isinstance(activation, ReluActivationParameters): # Take stats from activation after the convolution range_out = kwargs['all_stats'][NodeId( fusion, activation)]['range_out'][0] out_dtype = np.int32 else: out_dtype = params_dtype range_out = stats['range_out'][0] in_q1 = deepcopy(in_qs[0]).scale_to_pow2() in_q2 = deepcopy(in_qs[0]).scale_to_pow2() biases_q = QType.Pow2(32, in_q1.q + in_q2.q, True) if force_out_q: o_q = force_out_q else: o_q = QType.from_min_max_pow2(range_out['min'], range_out['max'], dtype=out_dtype) if len(in_qs) == 3: return QRec.symmetric(in_qs=[in_q1, in_q2, biases_q], out_qs=[o_q]) return QRec.symmetric(in_qs=[in_q1, in_q2], out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] opts = kwargs['opts'] if force_out_q: if force_out_q.forced_scale or force_out_q.forced_zero_point: return None if in_qs[0].dtype == np.int8: dtypes = [np.int8, np.int16] else: dtypes = [np.int16] if force_out_q.forced_dtype and force_out_q.dtype not in dtypes: return None in_qs = cls.force_symmetric_and_dtype(in_qs) if in_qs is None: return None # force the input to be POW2 scaled pow2_scale = np.power(2, np.ceil(np.log2(in_qs[0].scale))) in_q = QType(min_val=in_qs[0].min_val, max_val=in_qs[0].max_val, dtype=in_qs[0].dtype, scale=pow2_scale, forced=True) if in_q.dtype == np.int8 and (opts.get('softmax_out_8bits', None) or (force_out_q and force_out_q.dtype == np.int8)): # params.at_options.softmax_out_8bits = 1 o_q = QType(min_val=-1, max_val=1, dtype=np.int8, scale=2**(-7)) else: o_q = QType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15)) if in_q.dtype == np.int16 and o_q.dtype == np.int16: return QRec.symmetric(in_qs=[in_q], out_qs=[o_q]) return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, dtype = cls.get_float_opts(**kwargs) if force_out_qs and all(qtype.dtype != dtype for qtype in force_out_qs if qtype is not None): return None return QRec.float(in_qs=[QType(dtype=dtype), QType(dtype=dtype), QType(dtype=dtype), QType(dtype=np.int16), QType(dtype=dtype), QType(dtype=np.int16), QType(dtype=dtype), QType(dtype=dtype)], out_qs=[QType(dtype=dtype)], float_dtype=dtype)
def matscale3(in_tensors, qrec): assert qrec.in_qs[0].bits == qrec.in_qs[1].bits assert qrec.in_qs[1].bits == qrec.in_qs[2].bits if qrec.in_qs[0].bits == 8: q_calc = QType(bits=32, q=qrec.in_qs[0].q + qrec.in_qs[1].q + qrec.in_qs[2].q, signed=True) res = np.multiply(np.multiply(in_tensors[0], in_tensors[1], dtype=np.int32), in_tensors[2], dtype=np.int32) res = qrec.out_qs[0].reduce_from(res, q_calc) elif qrec.in_qs[0].bits == 16: q_calc = QType(bits=32, q=qrec.in_qs[0].q + qrec.in_qs[1].q, signed=True) res = np.multiply(in_tensors[0], in_tensors[1], dtype=np.int32) res = qrec.out_qs[0].reduce_from(res, q_calc) q_calc = QType(bits=32, q=qrec.in_qs[2].q + qrec.out_qs[0].q, signed=True) res = np.multiply(res, in_tensors[2], dtype=np.int32) res = qrec.out_qs[0].reduce_from(res, q_calc) return res
def get_outputs(self, params: Parameters, output_tensors: Sequence[np.ndarray], ktype: str = None) -> Sequence[np.ndarray]: if ktype == "symmetric": if isinstance(params, (MatrixAddParameters, MatrixSubParameters)): q_calc = QType(bits=32, q=min(self.in_qs[0].q, self.in_qs[1].q), signed=True) output_tensors = [ self.out_qs[0].reduce_from(output_tensors[0], q_calc) ] elif isinstance(params, (MatrixMulParameters, MatrixDivParameters)): q_calc = QType(bits=32, q=self.in_qs[0].q + self.in_qs[1].q, signed=True) output_tensors = [ self.out_qs[0].reduce_from(output_tensors[0], q_calc) ] elif isinstance( params, GlobalPoolParameters) and params.pool_type == "sum": output_tensors = [ self.out_qs[0].reduce_from(output_tensors[0], self.in_qs[0]) ] if self._auto_dequantize_outputs: return [ self.out_qs[idx].dequantize(output_tensor) for idx, output_tensor in enumerate(output_tensors) ] return output_tensors
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") func = PIECEWISE_OPS[params.__class__] op = func['op'] if func['is_mult']: i1 = in_tensors[0].astype(np.int32) i2 = in_tensors[1].astype(np.int32) res = op(i1, i2, np.int32) q_calc = QType.Pow2( bits=32, q=qrec.in_qs[0].q+qrec.in_qs[1].q, signed=True) res = qrec.out_qs[0].reduce_from(res, q_calc) else: off_in = abs(qrec.in_qs[0].q - qrec.in_qs[1].q) if qrec.in_qs[0].q > qrec.in_qs[1].q: i1 = at_norm(in_tensors[0].astype(np.int32), off_in) i2 = in_tensors[1].astype(np.int32) else: i1 = in_tensors[0].astype(np.int32) i2 = at_norm(in_tensors[1].astype(np.int32), off_in) res = op(i1, i2, None) q_calc = QType.Pow2(bits=32, q=min(qrec.in_qs[0].q, qrec.in_qs[1].q), signed=True) res = qrec.out_qs[0].reduce_from(res, q_calc) return qrec.get_outputs(params, [res], ktype="symmetric")
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_pow2_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] out_q = QType.Pow2(16, 15, True) if force_out_q and force_out_q != out_q: return None return SymmetricQuantizationRecord(in_qs=in_qs, out_qs=[QType.Pow2(16, 15, True)])
def get_closest_qtype(constraint, qtype): if 'dtype' in constraint: dtype_constraint = constraint['dtype'] if isinstance(dtype_constraint, set): return QType(dtype=next(dtype_constraint)) return QType(dtype=dtype_constraint) return None
def get_quantization(stats, min_qsnr, force_width): qstats = stats['qstats'] if force_width is not None: return QType(bits=force_width, q=qstats[force_width]['q'], signed=True) for width in STATS_BITS: if qstats[width]['qsnr'] > min_qsnr: return QType(bits=width, q=qstats[width]['q'], signed=True) raise ValueError("no solution for this QSNR could be found")
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] opts = kwargs['opts'] fusion = kwargs.get('fusion', None) G = kwargs['G'] weights_node = cls.get_weights_node(G, fusion if fusion else params) min_val, max_val = None, None weights_q = QType.from_array_sq( arr=weights_node.dqvalue, quantized_dimension=cls.get_quantized_dimension(params, opts), dtype=np.int8, narrow_range=opts['narrow_weights']) if fusion and fusion.fusion_type in [ 'conv_active_pool', 'conv_active' ]: stats = kwargs['all_stats'][NodeId(fusion, fusion.contained_nodes()[0])] if isinstance( fusion.contained_nodes()[1], (SigmoidActivationParameters, TanHActivationParameters, HSwishActivationParameters)): stats = kwargs['all_stats'][NodeId( fusion, fusion.contained_nodes()[0])] elif fusion and isinstance(fusion.contained_nodes()[1], HSigmoidActivationParameters): # Hard sigmoid implements a RELU, be sure 6 can be representable min_val, max_val = 0, 6 else: # Take stats from activation after the convolution stats = kwargs['all_stats'][NodeId( fusion, fusion.contained_nodes()[1])] if min_val is None or max_val is None: min_val, max_val = stats['range_out'][0]['min'], stats[ 'range_out'][0]['max'] if force_out_q: o_q = force_out_q else: o_q = QType.from_min_max_sq(min_val=min_val, max_val=max_val, dtype=out_dtype) biases_q = QType(dtype=np.int32, scale=weights_q.scale * in_qs[0].scale) mul_biases_q = MultMulBiasScaleQType.from_filter( in_qs[0], weights_q, o_q, params) # returning the new weights and biases qs will force backprop # TODO - ACC_Q LOOKS WRONG AFTER THIS return MultScalableFilterQuantizationRecord( in_qs=[in_qs[0], weights_q, biases_q], out_qs=[o_q], acc_q=biases_q, calc_q=biases_q, mul_biases_q=mul_biases_q)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, dtype = cls.get_float_opts(**kwargs) if force_out_qs and all(qtype.dtype != dtype for qtype in force_out_qs if qtype is not None): return None # all inputs and outputs are set to the required float type return QRec.float(in_qs=[QType(dtype=dtype) for _ in range(3)], out_qs=[QType(dtype=dtype)], float_dtype=dtype)
def test_activation(): in_q = QType(16, 13, True) input_ = in_q.quantize(np.array([-1.2, 0.5, 0.5, -0.6])).reshape([4, 1, 1]) in_dims = Dim.named(c=4, h=1, w=1).impose_order(['c', 'h', 'w']) params = ActivationParameters("test") qrec = QuantizationRecord([in_q], [in_q]) out_dims = params.get_output_size([in_dims]) output_ = activation(params, in_dims, out_dims[0], input_, qrec=qrec) output_ = in_q.dequantize(output_) assert np.array_equal(output_, [[[0]], [[0.5]], [[0.5]], [[0]]])
def test_fc(): filt = FcFilterDim(3, 3, 3, 1) params = FcParameters("test", filt=filt) weights_q = QType(16, 2, True) in_q = QType(16, 2, True) acc_q = QType(16, 4, True) calc_q = QType(16, 4, True) qrec = FilterQuantizationRecord(in_qs=[in_q], out_qs=[in_q], calc_q=calc_q, acc_q=acc_q, biases_q=None, weights_q=weights_q) weights = weights_q.quantize(np.full([3, 1, 3, 3], 1.0)) input_ = in_q.quantize(np.arange(9)).reshape([1, 3, 3]) in_dims = Dim.named(c=1, h=3, w=3).impose_order(['c', 'h', 'w']) out_dims = params.get_output_size([in_dims]) output_ = linear(params, in_dims, out_dims[0], input_, weights, None, qrec=qrec) output_ = in_q.dequantize(output_) assert np.array_equal(output_, [[[36]], [[36]], [[36]]])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, dtype = cls.get_float_opts(**kwargs) if force_out_qs: o_q = force_out_qs[0] else: o_q = QType(dtype=dtype) o_q.is_constant = True return QRec.float(in_qs=None, out_qs=[o_q], float_dtype=dtype)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: return None in_qs = [QType.from_min_max_sq(-8, 8, dtype=np.int8, forced=True)] o_q = QType.from_min_max_sq(min_val=-1.0, max_val=1.0, dtype=out_dtype, forced=True) return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): # copy in_qs because we may modify it in_qs = in_qs.copy() opts = kwargs['opts'] fusion = kwargs.get('fusion', None) force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] G = kwargs['G'] # only attempt channel scaling if the second input is constant # if len(in_qs) > 2: in2_node, in_qs = cls.move_constant(G, fusion if fusion else params, in_qs) if in2_node: kwargs['graph_update']['requires_adjust'] = True in_q2 = QType.from_array_sq(arr=in2_node.dqvalue, quantized_dimension=0, dtype=np.int8, narrow_range=True, bits=8) else: in_q2 = in_qs[1].make_symmetric_signed() in_q1 = in_qs[0].make_symmetric_signed() min_val, max_val = cls.get_min_max(fusion, stats, kwargs['all_stats'], params) if force_out_q: o_q = force_out_q # can't be forced to something not np.int8 if o_q.dtype != np.int8 or o_q.asymmetric: return None LOG.warning( 'node %s output forced to range %s/%s - actual range %s/%s %s', params.name, o_q.min, o_q.max, min_val, max_val, "asymmetric" if o_q.asymmetric else "symmetric") else: o_q = QType.from_min_max_sq(min_val=min_val, max_val=max_val, dtype=out_dtype) if len(in_qs) == 3: biases_q = QType(dtype=np.int32, scale=in_q1.scale * in_q2.scale) out_in_qs = [in_q1, in_q2, biases_q] else: out_in_qs = [in_q1, in_q2] mul_biases_q = MultMulBiasScaleQType() mul_biases_q.scale = in_q1.scale * in_q2.scale / o_q.scale return QRec.scaled(in_qs=out_in_qs, out_qs=[o_q], mul_biases_q=mul_biases_q)
def filter_adjust_inputq(node, in_q: QType): if node.in_qs[0] == in_q: return node.in_qs[0] = in_q.clone() if in_q.q + node.weights_q.q + node.calc_q.bits - node.calc_q.q > node.calc_q.bits: assert node.calc_q.bits < STATS_BITS[-1] factor = 2 else: factor = 1 node.calc_q = QType(bits=node.calc_q.bits * factor, q=in_q.q + node.weights_q, signed=True)
def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs): float_type = kwargs['opts']['float_type'] dtype = FLOAT_DTYPES.get(float_type) if dtype is None: raise ValueError(f'invalid float_type {float_type}') if stats: return [QType(min_val=stats['range_in'][idx]['min'], max_val=stats['range_in'][idx]['max'], dtype=dtype) if dim is not None else None for idx, dim in enumerate(params.in_dims)] return [QType(dtype=dtype) if dim is not None else None for idx, dim in enumerate(params.in_dims)]
def gen_ssd_globals(gen, node, qrec): qrec.set_scales(node) scores_q = qrec.in_qs[1] scores_scale, scores_norm = compute_mul_bias(scores_q.scale) cname_scales, file_name_scales = gen_constant(gen, node, node, SSD_SCALES) contents = np.array([qrec.scale_x_q.qbiases, qrec.scale_x_anc_q.qbiases, qrec.scale_y_q.qbiases, qrec.scale_y_anc_q.qbiases, qrec.scale_h_q.qbiases, qrec.scale_w_q.qbiases, qrec.scale_ao_q.qbiases, scores_scale], dtype=np.int8) scale_info = ConstantInfo(file_name_scales, QType(bits=8, q=0, signed=True), contents=contents) cname_norms, file_name_norms = gen_constant(gen, node, node, SSD_NORMS) contents = np.array([qrec.scale_x_q.qnorms, qrec.scale_x_anc_q.qnorms, qrec.scale_y_q.qnorms, qrec.scale_y_anc_q.qnorms, qrec.scale_h_q.qnorms, qrec.scale_w_q.qnorms, qrec.scale_ao_q.qnorms, scores_norm], dtype=np.int8) norms_info = ConstantInfo(file_name_norms, QType(bits=8, q=0, signed=True), contents=contents) score_threshold = scores_q.quantize(node.nms_score_threshold) cname_infos, file_name_infos = gen_constant(gen, node, node, INFOS) contents = np.array([round(node.nms_iou_threshold * 2**7), # Q7 score_threshold, # Q0 [0:255] node.max_detections, # Q0 [0:255] node.max_classes_per_detection, # Q0 [0:255] node.max_bb_before_nms >> 8, node.max_bb_before_nms], dtype=np.int8) # max_bb = Infos[4]<<8 + Infos[5] ssd_infos = ConstantInfo(file_name_infos, QType(bits=8, q=0, signed=True), contents=contents) gen.globals.append(GlobalArgInfo(qrec.scale_x_q.ctype, cname_scales, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=scale_info)) gen.globals.append(GlobalArgInfo(qrec.scale_x_q.shift_ctype, cname_norms, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=norms_info)) gen.globals.append(GlobalArgInfo('uint8', cname_infos, gen.opts['default_global_home_location'], gen.opts['default_global_exec_location'], const_info=ssd_infos))
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, dtype = cls.get_float_opts(**kwargs) if force_out_qs and any(qtype.dtype != dtype for qtype in force_out_qs if qtype is not None): return None opts = kwargs['opts'] if opts['hwc']: cls.check_order(params, [['h', 'w', 'c']], [['h', 'w', 'c']]) elif params.in_dims_hint: cls.check_order(params, [['c', 'h', 'w']], [['c', 'h', 'w']]) # all inputs and outputs are set to the required float type return QRec.float(in_qs=[QType(dtype=dtype)], out_qs=[QType(dtype=dtype)], float_dtype=dtype)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, dtype = cls.get_float_opts(**kwargs) if force_out_qs and any(qtype.dtype != dtype for qtype in force_out_qs if qtype is not None): return None # all inputs and outputs are set to the required float type opts = kwargs['opts'] if opts['hwc']: cls.check_order(params, AT_HWC_KER_IN_ORDER, AT_HWC_KER_OUT_ORDER) else: cls.check_order(params, AT_CHW_KER_IN_ORDER, AT_CHW_KER_OUT_ORDER) return QRec.float(in_qs=[QType(dtype=dtype) for _ in range(3)], out_qs=[QType(dtype=dtype)], float_dtype=dtype)
def test_conf2d_q2(caplog): caplog.set_level(logging.INFO) weights_q = QType(16, 1, True) weights = weights_q.quantize(np.full([1, 1, 2, 2], 1.0)) filt = Conv2DFilterDim(2, 2, 1, 1) stride = StrideDim(1) pad = PadDim.valid() dilation = DilationDim(1) params = Conv2DParameters("test", filt=filt, stride=stride, padding=pad, dilation=dilation, in_dims_hint=[['c', 'h', 'w']], out_dims_hint=[['c', 'h', 'w']]) in_q = QType(16, 0, True) calc_q = QType(weights_q.bits + in_q.bits, weights_q.q + in_q.q, True) qrec = FilterQuantizationRecord(in_qs=[in_q], out_qs=[in_q], weights_q=weights_q, acc_q=calc_q, calc_q=calc_q) input_ = in_q.quantize(np.full([1, 2, 2], 1.0)) in_dims = Dim.named(c=1, h=2, w=2).impose_order(['c', 'h', 'w']) out_dims = params.get_output_size([in_dims]) output_ = conv2d(params, in_dims, out_dims[0], input_, weights, None, qrec=qrec) output_ = in_q.dequantize(output_) assert np.array_equal(output_, [[[4.]]])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, dtype = cls.get_float_opts(**kwargs) if force_out_qs and all(qtype.dtype != dtype for qtype in force_out_qs if qtype is not None): return None # use cur_G not G here since this may be called inside a fusion # cur_G == G or fusion subgraph if inside fusion G = kwargs['cur_G'] in_len = len(G.indexed_in_edges(params.name)) out_len = len(G.indexed_out_edges(params.name)) # all inputs and outputs are set to the required float type return QRec.float(in_qs=[QType(dtype=dtype) for _ in range(in_len)], out_qs=[QType(dtype=dtype) for _ in range(out_len)], float_dtype=dtype)
def test_concat_q(): in_q = QType(16, 1, True) inputs = [ in_q.quantize(np.full([1, 2, 2], 1.0)), in_q.quantize(np.full([2, 2, 2], 2.0)) ] in_dims = [ Dim.named(c=1, h=2, w=2).impose_order(['c', 'h', 'w']), Dim.named(c=1, h=2, w=2).impose_order(['c', 'h', 'w']) ] params = ConcatParameters("test", axis=0) out_dims = params.get_output_size(in_dims) output_ = concat(params, in_dims, out_dims[0], inputs) assert np.array_equal(output_, np.concatenate(inputs, 0))
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: return None out_dtype = params.output_dtype in_dtype = params.input_dtype in_q = QType(scale=1, dtype=in_dtype) out_q = QType.from_min_max_sq(-1, 1, dtype=out_dtype, narrow_range=True) return MultQuantizationRecord(in_qs=[in_q], out_qs=[out_q])