def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] fusion = kwargs.get('fusion', None) cls.check_valid_ranges(params, stats, idx=0, dirs='out') if fusion: activation = fusion.contained_nodes()[1] if isinstance(activation, ReluActivationParameters): # Take stats from activation after the convolution range_out = kwargs['all_stats'][NodeId( fusion, activation)]['range_out'][0] out_dtype = np.int32 else: out_dtype = params_dtype range_out = stats['range_out'][0] in_q1 = deepcopy(in_qs[0]).scale_to_pow2() in_q2 = deepcopy(in_qs[0]).scale_to_pow2() biases_q = QType.Pow2(32, in_q1.q + in_q2.q, True) if force_out_q: o_q = force_out_q else: o_q = QType.from_min_max_pow2(range_out['min'], range_out['max'], dtype=out_dtype) if len(in_qs) == 3: return QRec.symmetric(in_qs=[in_q1, in_q2, biases_q], out_qs=[o_q]) return QRec.symmetric(in_qs=[in_q1, in_q2], out_qs=[o_q])
def _handle(cls, params, in_qs, stats, ktype, **kwargs): forced_out_qs = kwargs.get('force_out_qs') if forced_out_qs: # some could not be forced forced_out_qs = [ qtype for qtype in forced_out_qs if qtype is not None] forced_in_qs = [in_q for in_q in in_qs if in_q.forced] forced_in_q = forced_in_qs[0] if forced_in_qs else None # two inputs cannot be forced to different values if forced_out_qs and not QType.forced_equal(*forced_out_qs): LOG.info( 'two output qtypes of split %s are forced to different qtypes', params.name) return None # input cannot be forced to different value than output if forced_in_q and forced_out_qs and not forced_in_q.can_force(*forced_out_qs): LOG.error( 'output and input of split %s are forced to different qtypes', params.name) return None # now if forced we are compatible with the split forced_out_q = forced_out_qs[0] if forced_out_qs else None if forced_in_q: out_qs = [deepcopy(forced_in_q) for _ in range(params.num_splits)] return QRec(ktype=ktype, in_qs=[deepcopy(forced_in_q)], out_qs=out_qs) if forced_out_q: out_qs = [deepcopy(forced_out_q) for _ in range(params.num_splits)] return QRec(ktype=ktype, in_qs=[deepcopy(forced_out_q)], out_qs=out_qs) out_qs = [deepcopy(in_qs[0]) for _ in range(params.num_splits)] return QRec(ktype=ktype, in_qs=[deepcopy(in_qs[0])], out_qs=out_qs)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] return qrec.get_outputs(params, [1 / (1 + np.exp(-in_tensor))], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] in_dims, out_dims = cls.calc_transposed_dims(params) in_tensor = in_tensor.transpose(in_dims[0].transpose_to_order( ("h", "w", "c"))) w_out = out_dims[0].w h_out = out_dims[0].h c_out = out_dims[0].c w_in = in_dims[0].w h_in = in_dims[0].h wstep = (w_in - 1) / w_out hstep = (h_in - 1) / h_out out_tensor = np.empty((h_out, w_out, c_out)) for i in range(h_out): y_l, y_h = math.floor(hstep * i), math.ceil(hstep * i) hc = (hstep * i) - y_l for j in range(w_out): x_l, x_h = math.floor(wstep * j), math.ceil(wstep * j) wc = (wstep * j) - x_l P1 = in_tensor[y_l, x_l, :] P2 = in_tensor[y_l, x_h, :] P3 = in_tensor[y_h, x_l, :] P4 = in_tensor[y_h, x_h, :] out_tensor[i, j, :] = P1 * (1 - wc) * (1 - hc) \ + P2 * wc * (1 - hc) \ + P3 * (1 - wc) * hc \ + P4 * wc * hc out_tensor = out_tensor.transpose(out_dims[0].transpose_from_order( ("h", "w", "c"))) return qrec.get_outputs(params, [out_tensor], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") if isinstance(params, Broadcastable) and params.is_broadcasted: in_tensors = params.broadcast_inputs(in_tensors) func = PIECEWISE_OPS[params.__class__] op = func['op'] if func['is_mult']: compute_in_out_scale(qrec, in_idx=(0, 1), out_idx=0) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] i1 = in_tensors[0].astype(np.int32) i2 = in_tensors[1].astype(np.int32) out_tensor = scale_mul_biases_q.apply_scales(op(i1, i2, np.int32)) else: # larger scale should be scaled set_add_in_scale(qrec) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] if qrec.cache['scaled_idx']: i1 = in_tensors[0].astype(np.int32) i2 = qrec.cache['scale_in_mul_biases_q'].apply_scales(in_tensors[1]) else: i1 = qrec.cache['scale_in_mul_biases_q'].apply_scales(in_tensors[0]) i2 = in_tensors[1].astype(np.int32) out_tensor = scale_mul_biases_q.apply_scales(op(i1, i2, None)) return qrec.get_outputs(params, [qrec.out_qs[0].clip(out_tensor)], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): qname = kwargs['qname'] params = typing_cast(SplitParameters, params) in_tensor = qrec.prepare_inputs(params, in_tensors, ktype=qname)[0] out_tensors = params.numpy_split(in_tensor) return qrec.get_outputs(params, out_tensors, ktype=qname)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") func = PIECEWISE_OPS[params.__class__] op = func['op'] if func['is_mult']: i1 = in_tensors[0].astype(np.int32) i2 = in_tensors[1].astype(np.int32) res = op(i1, i2, np.int32) q_calc = QType.Pow2( bits=32, q=qrec.in_qs[0].q+qrec.in_qs[1].q, signed=True) res = qrec.out_qs[0].reduce_from(res, q_calc) else: off_in = abs(qrec.in_qs[0].q - qrec.in_qs[1].q) if qrec.in_qs[0].q > qrec.in_qs[1].q: i1 = at_norm(in_tensors[0].astype(np.int32), off_in) i2 = in_tensors[1].astype(np.int32) else: i1 = in_tensors[0].astype(np.int32) i2 = at_norm(in_tensors[1].astype(np.int32), off_in) res = op(i1, i2, None) q_calc = QType.Pow2(bits=32, q=min(qrec.in_qs[0].q, qrec.in_qs[1].q), signed=True) res = qrec.out_qs[0].reduce_from(res, q_calc) return qrec.get_outputs(params, [res], ktype="symmetric")
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] opts = kwargs['opts'] if force_out_q: if force_out_q.forced_scale or force_out_q.forced_zero_point: return None if in_qs[0].dtype == np.int8: dtypes = [np.int8, np.int16] else: dtypes = [np.int16] if force_out_q.forced_dtype and force_out_q.dtype not in dtypes: return None in_qs = cls.force_symmetric_and_dtype(in_qs) if in_qs is None: return None # force the input to be POW2 scaled pow2_scale = np.power(2, np.ceil(np.log2(in_qs[0].scale))) in_q = QType(min_val=in_qs[0].min_val, max_val=in_qs[0].max_val, dtype=in_qs[0].dtype, scale=pow2_scale, forced=True) if in_q.dtype == np.int8 and (opts.get('softmax_out_8bits', None) or (force_out_q and force_out_q.dtype == np.int8)): # params.at_options.softmax_out_8bits = 1 o_q = QType(min_val=-1, max_val=1, dtype=np.int8, scale=2**(-7)) else: o_q = QType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15)) if in_q.dtype == np.int16 and o_q.dtype == np.int16: return QRec.symmetric(in_qs=[in_q], out_qs=[o_q]) return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): qname = kwargs['qname'] in_tensor = qrec.prepare_inputs(params, in_tensors, ktype=qname)[0] if params.transpose: in_tensor = np.transpose(in_tensor, params.transpose) return qrec.get_outputs(params, [in_tensor], ktype=qname)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = [ in_tensor.astype(np.int32) for in_tensor in qrec.prepare_inputs( params, in_tensors, ktype="symmetric") ] if isinstance(params, MatMulTransposedParameters): mat1, mat2 = in_tensors[0], np.transpose(in_tensors[1], (1, 0)) else: mat1, mat2 = in_tensors[0], in_tensors[1] mat2 = mat2.astype(np.int32) - qrec.in_qs[1].zero_point.astype( np.int32) if len(in_tensors) > 2: biases = in_tensors[2] if len(biases.shape) == 1: if biases.shape[0] == mat1.shape[0]: biases = np.expand_dims(biases, -1) else: biases = 0 out_tensor = np.matmul(mat1, mat2) + biases mul_biases_q = qrec.cache['mul_biases_q'] scale_axis = None if len(mul_biases_q.scale) == 1 else 1 out_tensor = mul_biases_q.apply_scales(out_tensor, scale_axis) return qrec.get_outputs(params, [out_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = [ in_tensor.astype(np.int32) for in_tensor in qrec.prepare_inputs( params, in_tensors, ktype="symmetric") ] if isinstance(params, MatMulTransposedParameters): mat1, mat2 = in_tensors[0], np.transpose(in_tensors[1], (1, 0)) else: mat1, mat2 = in_tensors[0], in_tensors[1] if len(in_tensors) > 2: biases = in_tensors[2] if len(biases.shape) == 1: if biases.shape[0] == mat1.shape[0]: biases = np.expand_dims(biases, -1) else: biases = 0 # expect biases in in_q1 + in_q2 q_calc = QType.Pow2(bits=32, q=qrec.in_qs[0].q + qrec.in_qs[1].q, signed=True) out_tensor = np.matmul(mat1, mat2) + biases out_tensor = qrec.out_qs[0].reduce_from(out_tensor, q_calc) return qrec.get_outputs(params, [out_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() details = kwargs.get('details') if details is not None: current_control = SymbolStats() Symbol.set_default_control(current_control) results = {} else: results = None current_control = None in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float") in_vars = {params.input_symbols[i]: in_tensor for i, in_tensor in enumerate(in_tensors)} func_col = qrec.cache.get('qfunc_col') if func_col is None: func_col = params.func_col out_vars = func_col(**in_vars, calculate_ranges=current_control is not None, track_results=results) out_tensors = [out_vars[out_sym_name] for out_sym_name in params.output_symbols] if current_control: details.update(current_control.stats) details['results'] = results return qrec.get_outputs(params, out_tensors, ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] in_dim, out_dim = params.in_dims[0], params.out_dims[0] in_tensor = in_tensor.transpose( in_dim.transpose_to_order(("h", "w", "c"))) w_out = out_dim.w h_out = out_dim.h c_out = out_dim.c w_in = in_dim.w h_in = in_dim.h wstep = (w_in - 1) / (w_out - 1) hstep = (h_in - 1) / (h_out - 1) out_tensor = np.empty((h_out, w_out, c_out)) for i in range(h_out): h_rounded = int(round(hstep * i)) for j in range(w_out): w_rounded = int(round(wstep * j)) out_tensor[i, j, :] = in_tensor[h_rounded, w_rounded, :] out_tensor = out_tensor.transpose( out_dim.transpose_from_order(("h", "w", "c"))) return qrec.get_outputs(params, [out_tensor], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): qname = kwargs['qname'] in_tensor = qrec.prepare_inputs(params, in_tensors, ktype=qname)[0] return qrec.get_outputs( params, [in_tensor * np.ones(params.shape, dtype=in_tensor.dtype)], ktype=qname)
def _handle(cls, params, in_qs, _, ktype, **kwargs): force_out_qs = kwargs.get('force_out_qs') force_out_q = force_out_qs and force_out_qs[0] forced_in_qs = [in_q for in_q in in_qs if in_q.forced] # two inputs cannot be forced to different values if forced_in_qs and not QType.forced_equal(*forced_in_qs): LOG.debug( 'two input qtypes of %s are forced to different qtypes - rejecting', params.name) return None # input cannot be forced to different value than output if force_out_q and not force_out_q.can_force(force_out_q, *in_qs): LOG.debug( 'output and input of %s are forced to different qtypes - rejecting', params.name) return None backwards = kwargs.get('backwards') if backwards: if force_out_q: in_qs = [deepcopy(force_out_q) for _ in in_qs] return QRec(in_qs=in_qs, out_qs=[deepcopy(force_out_q)], ktype=ktype) elif force_out_q and not all(in_q == force_out_q for in_q in in_qs): # if going forwards and our output is forced and does not match input then # we cannot satisfy LOG.debug( "output of %s is forced and inputs don't match - rejecting", params.name) return None return QRec(in_qs=in_qs, out_qs=[deepcopy(in_qs[0])], ktype=ktype)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float") if isinstance(params, MatMulTransposedParameters): mat1, mat2 = in_tensors[0], np.transpose(in_tensors[1], (1, 0)) else: mat1, mat2 = in_tensors[0], in_tensors[1] if len(in_tensors) > 2: biases = in_tensors[2] if len(biases.shape) == 1: if biases.shape[0] == mat1.shape[0]: biases = np.expand_dims(biases, -1) else: biases = 0 out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith( 'float') else np.float32 output_tensor = np.matmul(mat1, mat2).astype( out_dtype) + np.atleast_1d(biases).astype(out_dtype) return qrec.get_outputs(params, [output_tensor], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() old_err = np.seterr(over='raise') in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] in_tensor = softmax_func(in_tensor, axis=params.axis) np.seterr(**old_err) return qrec.get_outputs(params, [in_tensor], ktype="float")
def get_qrec_pass(G, qrec, node, copy_qs): if qrec is None: if copy_qs: return QRec.scaled(in_qs=copy_qs, out_qs=copy_qs) return None in_qs = copy_qs if not qrec.in_qs or qrec.in_qs is None else qrec.in_qs out_qs = copy_qs if not qrec.out_qs or qrec.out_qs is None else qrec.out_qs return QRec.scaled(in_qs=in_qs, out_qs=out_qs)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] return qrec.get_outputs(params, [np.minimum(np.maximum(in_tensor, -1.0), 1.0)], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] output = in_tensor * (in_tensor > 0) + in_tensor * \ params.leak_factor * (in_tensor < 0) return qrec.get_outputs(params, [output], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] upper_bound = qrec.in_qs[0].quantize(np.array([1.])) in_tensor = np.minimum(np.maximum(in_tensor, -upper_bound), upper_bound) return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
def sigmoid(params, in_tensors, qrec: QRec, details=None): del details if qrec.ktype == 'scaled': raise NotImplementedError() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] dqinput = qrec.in_qs[0].dequantize(in_tensor) return qrec.get_outputs( params, [qrec.out_qs[0].quantize(1 / (1 + np.exp(-dqinput)))], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") LOG.debug("matscale input %s", ",".join([t.dtype.name for t in in_tensors])) if len(params.in_dims) == 3: output_tensor = cls.matscale3(in_tensors, qrec) else: output_tensor = cls.matscale2(in_tensors, qrec) return qrec.get_outputs(params, [output_tensor], ktype="symmetric")
def _handle(cls, params, in_qs, _, **kwargs): force_out_qs = kwargs['force_out_qs'] force_out_q = force_out_qs[0] if force_out_qs else None forced_in_qs = [in_q for in_q in in_qs if in_q.forced] # two inputs cannot be forced to different values if forced_in_qs and not QType.forced_equal(*forced_in_qs): LOG.info( 'two input qtypes of concat %s are forced to different qtypes', params.name) return None # input cannot be forced to different value than output if force_out_q and not force_out_q.can_force(*forced_in_qs): LOG.info( 'output and input of concat %s are forced to different qtypes', params.name) return None backwards = kwargs.get('backwards') # if we are going backwards or are forced if backwards: if force_out_q: ok = True if force_out_q.forced_dtype and any( in_q.dtype != force_out_q.dtype for in_q in in_qs): ok = False if force_out_q.forced_zero_point or force_out_q.forced_scale or force_out_q.forced_q: ok = False # if output must be forced if not ok: in_qs = [deepcopy(force_out_q) for _ in in_qs] return QRec(ktype=cls.KTYPE, in_qs=in_qs, out_qs=[deepcopy(force_out_q)]) # if all the inputs are the same qtype then we output that qtype if all(in_qs[0] == in_q for in_q in in_qs[1::]): return QRec(ktype=cls.KTYPE, in_qs=in_qs, out_qs=[deepcopy(in_qs[0])]) # our output cannot be forced at this point # if an input has scale or q forced then all forced inputs must be the same here # TODO - have a general function for this problem - should pick with force constraints respecting dtype if forced_in_qs and any(fin_qs.forced_scale or fin_qs.forced_q for fin_qs in forced_in_qs): in_qs = [deepcopy(forced_in_qs[0]) for _ in in_qs] return QRec(ktype=cls.KTYPE, in_qs=in_qs, out_qs=[deepcopy(forced_in_qs[0])]) # if all the inputs are not the same then force all of them to the maximum input size with a Q that # fits the most int bits common_q = cls._get_common_q(in_qs) in_qs = [deepcopy(common_q) for _ in in_qs] return QRec(ktype=cls.KTYPE, in_qs=in_qs, out_qs=[deepcopy(common_q)])
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): old_err = np.seterr(over='raise') in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] # TODO - Implement properly quantized version in_tensor = qrec.in_qs[0].dequantize(in_tensor) in_tensor = qrec.out_qs[0].quantize(softmax_func(in_tensor)) np.seterr(**old_err) return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float") output = cls.FUNC(*in_tensors) return qrec.get_outputs(params, [output], ktype="float")
def sum_execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] return qrec.get_outputs(params, [ np.sum( in_tensor, axis=tuple(params.axis), keepdims=params.keep_dims) ], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): qname = kwargs['qname'] in_tensors = qrec.prepare_inputs(params, in_tensors, ktype=qname) if qrec: in_q = qrec.in_qs[0] out_q = qrec.out_qs[0] float_conversion = in_q.is_floating or out_q.is_floating bit_conversion = in_q.bits != out_q.bits if not float_conversion: same_sign = in_q.signed == out_q.signed if in_q.bits > out_q.bits: bit_diff = in_q.bits - out_q.bits same_scale = np.allclose(in_q.scale * np.power(2, bit_diff), out_q.scale, atol=0.0001) same_zeropoint = np.all( in_q.zero_point >> bit_diff == out_q.zero_point) elif out_q.bits > in_q.bits: bit_diff = out_q.bits - in_q.bits same_scale = np.allclose(out_q.scale * np.power(2, bit_diff), in_q.scale, atol=0.0001) same_zeropoint = np.all( in_q.zero_point == out_q.zero_point >> bit_diff) else: same_scale = np.allclose(out_q.scale, in_q.scale, atol=0.0001) same_zeropoint = np.all( in_q.zero_point == out_q.zero_point) if same_scale and same_sign and bit_conversion and same_zeropoint: if in_q.bits > out_q.bits: if in_q.signed: out_tensor = out_q.clip( at_norm(in_tensors[0].astype(np.int32), in_q.bits - out_q.bits)) else: out_tensor = out_q.clip( at_norm(in_tensors[0].astype(np.uint32), in_q.bits - out_q.bits)) else: out_tensor = in_tensors[0].astype( out_q.dtype) << (out_q.bits - in_q.bits) return qrec.get_outputs(params, [out_tensor], ktype=qname) # in all other conversions should be numerically equivalent to this (within 1 bit) out_tensor = qrec.out_qs[0].quantize_from(in_tensors[0], qrec.in_qs[0]) else: out_tensor = in_tensors[0] return qrec.get_outputs(params, [out_tensor], ktype=qname)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith( 'float') else np.float32 out_tensor = np.pad(in_tensor, params.padding, 'constant', constant_values=params.pad_vals).astype(out_dtype) return qrec.get_outputs(params, [out_tensor], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] fac_1, upper_bound, lower_bound = hswish_mult_gen_factors(qrec) in_tensor = in_tensor.astype(np.int32) in_tensor_relued = np.minimum( np.maximum(in_tensor + fac_1, lower_bound), upper_bound) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] in_tensor = scale_mul_biases_q.apply_scales(in_tensor * in_tensor_relued) return qrec.get_outputs(params, [in_tensor], ktype="symmetric")