def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") if params.transpose_in: in_tensors = [(np.transpose(in_tensor, params.transpose_in[idx]) if params.transpose_in[idx] else in_tensor) for idx, in_tensor in enumerate(in_tensors)] if isinstance(params, Broadcastable) and params.is_broadcasted: in_tensors = params.broadcast_inputs(in_tensors) func = PIECEWISE_OPS[params.__class__] op = func['op'] if func['is_mult']: qrec.set_scale(in_idx=(0, 1), out_idx=0) i1 = in_tensors[0].astype(np.int32) i2 = in_tensors[1].astype(np.int32) out_tensor = qrec.scale_mul_biases_q.apply_scales( op(i1, i2, np.int32)) else: # larger scale should be scaled qrec.set_add_scale() if qrec.scaled_idx: i1 = in_tensors[0].astype(np.int32) i2 = qrec.scale_in_mul_biases_q.apply_scales(in_tensors[1]) else: i1 = qrec.scale_in_mul_biases_q.apply_scales(in_tensors[0]) i2 = in_tensors[1].astype(np.int32) out_tensor = qrec.scale_mul_biases_q.apply_scales(op(i1, i2, None)) if params.transpose_out: out_tensor = np.transpose(out_tensor, params.transpose_out[0]) return qrec.get_outputs(params, [qrec.out_qs[0].clip(out_tensor)], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") func = PIECEWISE_OPS[params.__class__] op = func['op'] if func['is_mult']: qrec.set_scale(in_idx=(0, 1), out_idx=0) i1 = in_tensors[0].astype(np.int32) i2 = in_tensors[1].astype(np.int32) res = qrec.scale_mul_biases_q.apply_scales(op(i1, i2, np.int32)) else: # larger scale should be scaled qrec.set_add_scale() if qrec.scaled_idx: i1 = in_tensors[0].astype(np.int32) i2 = qrec.scale_in_mul_biases_q.apply_scales(in_tensors[1]) else: i1 = qrec.scale_in_mul_biases_q.apply_scales(in_tensors[0]) i2 = in_tensors[1].astype(np.int32) res = qrec.scale_mul_biases_q.apply_scales(op(i1, i2, None)) return qrec.get_outputs(params, [qrec.out_qs[0].clip(res)], ktype="symmetric")