def execute(cls, params, in_tensors, qrec: QRec, **kwargs): qname = kwargs['qname'] in_tensors = qrec.prepare_inputs(params, in_tensors, ktype=qname) if qrec: in_q = qrec.in_qs[0] out_q = qrec.out_qs[0] float_conversion = in_q.is_floating or out_q.is_floating bit_conversion = in_q.bits != out_q.bits if not float_conversion: same_sign = in_q.signed == out_q.signed if in_q.bits > out_q.bits: bit_diff = in_q.bits - out_q.bits same_scale = np.allclose(in_q.scale * np.power(2, bit_diff), out_q.scale, atol=0.0001) same_zeropoint = np.all( in_q.zero_point >> bit_diff == out_q.zero_point) elif out_q.bits > in_q.bits: bit_diff = out_q.bits - in_q.bits same_scale = np.allclose(out_q.scale * np.power(2, bit_diff), in_q.scale, atol=0.0001) same_zeropoint = np.all( in_q.zero_point == out_q.zero_point >> bit_diff) else: same_scale = np.allclose(out_q.scale, in_q.scale, atol=0.0001) same_zeropoint = np.all( in_q.zero_point == out_q.zero_point) if same_scale and same_sign and bit_conversion and same_zeropoint: if in_q.bits > out_q.bits: if in_q.signed: out_tensor = out_q.clip( at_norm(in_tensors[0].astype(np.int32), in_q.bits - out_q.bits)) else: out_tensor = out_q.clip( at_norm(in_tensors[0].astype(np.uint32), in_q.bits - out_q.bits)) else: out_tensor = in_tensors[0].astype( out_q.dtype) << (out_q.bits - in_q.bits) return qrec.get_outputs(params, [out_tensor], ktype=qname) # in all other conversions should be numerically equivalent to this (within 1 bit) out_tensor = qrec.out_qs[0].quantize_from(in_tensors[0], qrec.in_qs[0]) else: out_tensor = in_tensors[0] return qrec.get_outputs(params, [out_tensor], ktype=qname)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float") if len(params.in_dims) == 3: return qrec.get_outputs(params, [in_tensors[0] * in_tensors[1] * in_tensors[2]], ktype="float") return qrec.get_outputs(params, [in_tensors[0] * in_tensors[1]], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] compute_in_out_scale(qrec) neg_in = at_norm(in_tensor * leak_mult_gen_factor_q7(params), 7) in_tensor = in_tensor * (in_tensor > 0) + neg_in * (in_tensor < 0) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] in_tensor = scale_mul_biases_q.apply_scales(in_tensor) if qrec.out_qs[0] != qrec.in_qs[0]: return qrec.get_outputs( params, [qrec.out_qs[0].reduce_from(in_tensor, qrec.in_qs[0])], ktype="symmetric") return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] if params.upper_bound is None: return qrec.get_outputs( params, [np.maximum(in_tensor, params.lower_bound)], ktype="float") return qrec.get_outputs(params, [ np.minimum(np.maximum(in_tensor, params.lower_bound), params.upper_bound) ], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = [ in_tensor.astype(np.int32) for in_tensor in qrec.prepare_inputs( params, in_tensors, ktype="symmetric") ] if isinstance(params, MatMulTransposedParameters): mat1, mat2 = in_tensors[0], np.transpose(in_tensors[1], (1, 0)) else: mat1, mat2 = in_tensors[0], in_tensors[1] mat2 = mat2.astype(np.int32) - qrec.in_qs[1].zero_point.astype( np.int32) if len(in_tensors) > 2: biases = in_tensors[2] if len(biases.shape) == 1: if biases.shape[0] == mat1.shape[0]: biases = np.expand_dims(biases, -1) else: biases = 0 out_tensor = np.matmul(mat1, mat2) + biases mul_biases_q = qrec.cache['mul_biases_q'] scale_axis = None if len(mul_biases_q.scale) == 1 else 1 out_tensor = mul_biases_q.apply_scales(out_tensor, scale_axis) return qrec.get_outputs(params, [out_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): qname = kwargs['qname'] in_tensor = qrec.prepare_inputs(params, in_tensors, ktype=qname)[0] return qrec.get_outputs( params, [in_tensor * np.ones(params.shape, dtype=in_tensor.dtype)], ktype=qname)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): qname = kwargs['qname'] in_tensor = qrec.prepare_inputs(params, in_tensors, ktype=qname)[0] if params.transpose: in_tensor = np.transpose(in_tensor, params.transpose) return qrec.get_outputs(params, [in_tensor], ktype=qname)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] return qrec.get_outputs(params, [1 / (1 + np.exp(-in_tensor))], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = [ in_tensor.astype(np.int32) for in_tensor in qrec.prepare_inputs( params, in_tensors, ktype="symmetric") ] if isinstance(params, MatMulTransposedParameters): mat1, mat2 = in_tensors[0], np.transpose(in_tensors[1], (1, 0)) else: mat1, mat2 = in_tensors[0], in_tensors[1] if len(in_tensors) > 2: biases = in_tensors[2] if len(biases.shape) == 1: if biases.shape[0] == mat1.shape[0]: biases = np.expand_dims(biases, -1) else: biases = 0 # expect biases in in_q1 + in_q2 q_calc = QType.Pow2(bits=32, q=qrec.in_qs[0].q + qrec.in_qs[1].q, signed=True) out_tensor = np.matmul(mat1, mat2) + biases out_tensor = qrec.out_qs[0].reduce_from(out_tensor, q_calc) return qrec.get_outputs(params, [out_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] in_dim, out_dim = params.in_dims[0], params.out_dims[0] in_tensor = in_tensor.transpose( in_dim.transpose_to_order(("h", "w", "c"))) w_out = out_dim.w h_out = out_dim.h c_out = out_dim.c w_in = in_dim.w h_in = in_dim.h wstep = (w_in - 1) / (w_out - 1) hstep = (h_in - 1) / (h_out - 1) out_tensor = np.empty((h_out, w_out, c_out)) for i in range(h_out): h_rounded = int(round(hstep * i)) for j in range(w_out): w_rounded = int(round(wstep * j)) out_tensor[i, j, :] = in_tensor[h_rounded, w_rounded, :] out_tensor = out_tensor.transpose( out_dim.transpose_from_order(("h", "w", "c"))) return qrec.get_outputs(params, [out_tensor], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] relu_lb = qrec.in_qs[0].quantize(params.lower_bound) in_tensor = np.maximum(in_tensor, relu_lb) if params.upper_bound is not None: relu_ub = qrec.in_qs[0].quantize(params.upper_bound) in_tensor = np.minimum(in_tensor, relu_ub) if qrec.out_qs[0] != qrec.in_qs[0]: return qrec.get_outputs( params, [qrec.out_qs[0].reduce_from(in_tensor, qrec.in_qs[0])], ktype="symmetric") return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() details = kwargs.get('details') if details is not None: current_control = SymbolStats() Symbol.set_default_control(current_control) results = {} else: results = None current_control = None in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float") in_vars = {params.input_symbols[i]: in_tensor for i, in_tensor in enumerate(in_tensors)} func_col = qrec.cache.get('qfunc_col') if func_col is None: func_col = params.func_col out_vars = func_col(**in_vars, calculate_ranges=current_control is not None, track_results=results) out_tensors = [out_vars[out_sym_name] for out_sym_name in params.output_symbols] if current_control: details.update(current_control.stats) details['results'] = results return qrec.get_outputs(params, out_tensors, ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float") if isinstance(params, MatMulTransposedParameters): mat1, mat2 = in_tensors[0], np.transpose(in_tensors[1], (1, 0)) else: mat1, mat2 = in_tensors[0], in_tensors[1] if len(in_tensors) > 2: biases = in_tensors[2] if len(biases.shape) == 1: if biases.shape[0] == mat1.shape[0]: biases = np.expand_dims(biases, -1) else: biases = 0 out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith( 'float') else np.float32 output_tensor = np.matmul(mat1, mat2).astype( out_dtype) + np.atleast_1d(biases).astype(out_dtype) return qrec.get_outputs(params, [output_tensor], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] in_dtype = qrec.in_qs[0].dtype if qrec.ktype.startswith( 'float') else np.float32 if params.upper_bound is None: return qrec.get_outputs( params, [np.maximum(in_tensor, in_dtype(params.lower_bound))], ktype="float") return qrec.get_outputs(params, [ np.minimum(np.maximum(in_tensor, in_dtype(params.lower_bound)), in_dtype(params.upper_bound)) ], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] in_dims, out_dims = cls.calc_transposed_dims(params) in_tensor = in_tensor.transpose(in_dims[0].transpose_to_order( ("h", "w", "c"))) w_out = out_dims[0].w h_out = out_dims[0].h c_out = out_dims[0].c w_in = in_dims[0].w h_in = in_dims[0].h wstep = (w_in - 1) / w_out hstep = (h_in - 1) / h_out out_tensor = np.empty((h_out, w_out, c_out)) for i in range(h_out): y_l, y_h = math.floor(hstep * i), math.ceil(hstep * i) hc = (hstep * i) - y_l for j in range(w_out): x_l, x_h = math.floor(wstep * j), math.ceil(wstep * j) wc = (wstep * j) - x_l P1 = in_tensor[y_l, x_l, :] P2 = in_tensor[y_l, x_h, :] P3 = in_tensor[y_h, x_l, :] P4 = in_tensor[y_h, x_h, :] out_tensor[i, j, :] = P1 * (1 - wc) * (1 - hc) \ + P2 * wc * (1 - hc) \ + P3 * (1 - wc) * hc \ + P4 * wc * hc out_tensor = out_tensor.transpose(out_dims[0].transpose_from_order( ("h", "w", "c"))) return qrec.get_outputs(params, [out_tensor], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") func = PIECEWISE_OPS[params.__class__] op = func['op'] if func['is_mult']: i1 = in_tensors[0].astype(np.int32) i2 = in_tensors[1].astype(np.int32) res = op(i1, i2, np.int32) q_calc = QType.Pow2( bits=32, q=qrec.in_qs[0].q+qrec.in_qs[1].q, signed=True) res = qrec.out_qs[0].reduce_from(res, q_calc) else: off_in = abs(qrec.in_qs[0].q - qrec.in_qs[1].q) if qrec.in_qs[0].q > qrec.in_qs[1].q: i1 = at_norm(in_tensors[0].astype(np.int32), off_in) i2 = in_tensors[1].astype(np.int32) else: i1 = in_tensors[0].astype(np.int32) i2 = at_norm(in_tensors[1].astype(np.int32), off_in) res = op(i1, i2, None) q_calc = QType.Pow2(bits=32, q=min(qrec.in_qs[0].q, qrec.in_qs[1].q), signed=True) res = qrec.out_qs[0].reduce_from(res, q_calc) return qrec.get_outputs(params, [res], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): qname = kwargs['qname'] params = typing_cast(SplitParameters, params) in_tensor = qrec.prepare_inputs(params, in_tensors, ktype=qname)[0] out_tensors = params.numpy_split(in_tensor) return qrec.get_outputs(params, out_tensors, ktype=qname)
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") if isinstance(params, Broadcastable) and params.is_broadcasted: in_tensors = params.broadcast_inputs(in_tensors) func = PIECEWISE_OPS[params.__class__] op = func['op'] if func['is_mult']: compute_in_out_scale(qrec, in_idx=(0, 1), out_idx=0) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] i1 = in_tensors[0].astype(np.int32) i2 = in_tensors[1].astype(np.int32) out_tensor = scale_mul_biases_q.apply_scales(op(i1, i2, np.int32)) else: # larger scale should be scaled set_add_in_scale(qrec) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] if qrec.cache['scaled_idx']: i1 = in_tensors[0].astype(np.int32) i2 = qrec.cache['scale_in_mul_biases_q'].apply_scales(in_tensors[1]) else: i1 = qrec.cache['scale_in_mul_biases_q'].apply_scales(in_tensors[0]) i2 = in_tensors[1].astype(np.int32) out_tensor = scale_mul_biases_q.apply_scales(op(i1, i2, None)) return qrec.get_outputs(params, [qrec.out_qs[0].clip(out_tensor)], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] compute_in_out_scale(qrec) relu_lb = qrec.in_qs[0].quantize(params.lower_bound) in_tensor = np.maximum(in_tensor, relu_lb) if params.upper_bound is not None and not NNForceRelu.FORCE_RELU: relu_ub = qrec.in_qs[0].quantize(params.upper_bound) in_tensor = np.minimum(in_tensor, relu_ub) scale_mul_biases_q = qrec.cache['scale_mul_biases_q'] in_tensor = scale_mul_biases_q.apply_scales(in_tensor) if qrec.out_qs[0] != qrec.in_qs[0]: return qrec.get_outputs( params, [qrec.out_qs[0].reduce_from(in_tensor, qrec.in_qs[0])], ktype="symmetric") return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): boxes = in_tensors[0][ 0] if not params.center_point_box else convert_cnts2cors( in_tensors[0][0]) scores = in_tensors[1][0] n_boxes = len(scores[0]) n_classes = len(scores) scores_q = qrec.in_qs[1] indexes = np.zeros((params.max_output_boxes_per_class * n_classes, 3)) idxs_count = 0 for class_id in range(n_classes): bbox_buff = [] counter = 0 for box_id in range(n_boxes): class_score = scores[class_id, box_id] if class_score > scores_q.quanize(params.nms_score_threshold): bbox_buff.append({ "index": box_id, "score": class_score, "box": boxes[box_id], "alive": True }) counter += 1 # Bubble sort to sort the scores changed = True while changed: changed = False for i in range(counter - 1): if bbox_buff[i]["score"] < bbox_buff[i + 1]["score"]: temp = bbox_buff[i] bbox_buff[i] = bbox_buff[i + 1] bbox_buff[i + 1] = temp changed = True # NMS for idx in range(counter): for idx_int in range(idx + 1, counter): if not bbox_buff[idx_int]["alive"]: continue intersection = rect_intersect_area( bbox_buff[idx]["box"], bbox_buff[idx_int]["box"]) union = rect_union_area(bbox_buff[idx]["box"], bbox_buff[idx_int]["box"]) if intersection >= (scores_q.quanize( params.nms_iou_threshold) * union): bbox_buff[idx_int]["alive"] = False class_idxs_count_start = idxs_count for bb in bbox_buff: if (idxs_count - class_idxs_count_start ) > params.max_output_boxes_per_class: break if bb["alive"]: indexes[idxs_count] = np.array([0, class_id, bb['index']]) idxs_count += 1 return qrec.get_outputs(params, [np.array(indexes)], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] return qrec.get_outputs(params, [np.minimum(np.maximum(in_tensor, -1.0), 1.0)], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] output = in_tensor * (in_tensor > 0) + in_tensor * \ params.leak_factor * (in_tensor < 0) return qrec.get_outputs(params, [output], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] upper_bound = qrec.in_qs[0].quantize(np.array([1.])) in_tensor = np.minimum(np.maximum(in_tensor, -upper_bound), upper_bound) return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() old_err = np.seterr(over='raise') in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] in_tensor = softmax_func(in_tensor, axis=params.axis) np.seterr(**old_err) return qrec.get_outputs(params, [in_tensor], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") LOG.debug("matscale input %s", ",".join([t.dtype.name for t in in_tensors])) if len(params.in_dims) == 3: output_tensor = cls.matscale3(in_tensors, qrec) else: output_tensor = cls.matscale2(in_tensors, qrec) return qrec.get_outputs(params, [output_tensor], ktype="symmetric")
def sigmoid(params, in_tensors, qrec: QRec, details=None): del details if qrec.ktype == 'scaled': raise NotImplementedError() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] dqinput = qrec.in_qs[0].dequantize(in_tensor) return qrec.get_outputs( params, [qrec.out_qs[0].quantize(1 / (1 + np.exp(-dqinput)))], ktype="symmetric")
def sum_execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] return qrec.get_outputs(params, [ np.sum( in_tensor, axis=tuple(params.axis), keepdims=params.keep_dims) ], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): old_err = np.seterr(over='raise') in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] # TODO - Implement properly quantized version in_tensor = qrec.in_qs[0].dequantize(in_tensor) in_tensor = qrec.out_qs[0].quantize(softmax_func(in_tensor)) np.seterr(**old_err) return qrec.get_outputs(params, [in_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0] in_dtype = qrec.in_qs[0].dtype if qrec.ktype.startswith( 'float') else np.float32 if in_dtype == np.float32: return qrec.get_outputs(params, [np.tanh(in_tensor).astype(in_dtype)], ktype="float") else: if qrec.cache.get('kernel_type') == "lut": return qrec.get_outputs(params, [tanh_lut_float(in_tensor, in_dtype)], ktype="float") return qrec.get_outputs( params, [np_fasttanh(in_tensor, dtype=in_dtype, doalt=True)], ktype="float")
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): if qrec is None: qrec = AllFloatQRec() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float") output = cls.FUNC(*in_tensors) return qrec.get_outputs(params, [output], ktype="float")