def average_execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] in_dims = params.in_dims[0] out_dims = params.out_dims[0] filter_sz = params.filter.h * params.filter.w pool_factor = np.array(1.0 / filter_sz, dtype=qrec.dtype(ktype="float32")) out_tensor = np.zeros(out_dims.shape, dtype=qrec.dtype(ktype="float32")) if params.padding.h + params.padding.w > 0: in_tensor = np.pad(in_tensor, params.padding.numpy_pad_shape(in_dims), mode='constant', constant_values=0.0) pad_w = params.padding.w pad_h = params.padding.h else: pad_w = pad_h = 0 out_h = 0 for h_idx in range(0, in_dims.h - params.filter.h + pad_h + 1, params.stride.h): out_w = 0 for w_idx in range(0, in_dims.w - params.filter.w + pad_w + 1, params.stride.w): # accumulate - potentially with different Q out_slice_args = out_dims.srange(h=out_h, w=out_w) in_slice_args = in_dims.srange( c=[0, out_dims.c, 1], h=[h_idx, h_idx + params.filter.h, 1], w=[w_idx, w_idx + params.filter.w, 1]) res_shape = out_tensor[out_slice_args].shape sum_filter = np.sum( in_tensor[in_slice_args], dtype=qrec.dtype(ktype="float32"), axis=(out_dims.keys.index('h'), out_dims.keys.index('w'))).reshape(res_shape) sum_filter = np.multiply(sum_filter, pool_factor) out_tensor[out_slice_args] = sum_filter out_w += 1 out_h += 1 return qrec.get_outputs(params, [out_tensor], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): details = kwargs.get('details') # qrec is set by default to Float32QuantizationRecord if None if qrec is None or isinstance(qrec, Float32QuantizationRecord): qrec = Float32ScalableFilterQuantizationRecord() in_dims = params.in_dims[0] out_dims = params.out_dims[0] prepared_in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") in_tensor = prepared_in_tensors[0] weights = prepared_in_tensors[1] biases = prepared_in_tensors[2] if details is not None: details['min_acc'] = float("Infinity") details['max_acc'] = float("-Infinity") if params.has_bias: acc_tensor = np.ones(out_dims.shape, dtype=qrec.dtype(ktype="float32")) * biases else: acc_tensor = np.zeros(out_dims.shape, dtype=qrec.dtype(ktype="float32")) in_tensor = in_tensor.reshape((in_dims.size())) filt = params.filter.get_filter_dims() for out_c in range(out_dims.c): # Expand and normalize the accumulator w_slice = weights[filt.srange(out_c=out_c)].reshape((in_dims.size())) res = np.dot(in_tensor, w_slice) if details is not None: details['min_acc'] = min(np.sum(res[res < 0]), details['min_acc']) details['max_acc'] = min(np.sum(res[res > 0]), details['max_acc']) acc_tensor[out_c] += res if details is not None: details['min_acc'] = min(np.min(acc_tensor[out_c]), details['min_acc']) details['max_acc'] = max(np.max(acc_tensor[out_c]), details['max_acc']) acc_tensor = qrec.apply_multiplicative_bias( params, acc_tensor, 0, ktype="float32") return qrec.get_outputs(params, [acc_tensor], ktype="float32")
def average_execute(cls, params, in_tensors, qrec: QuantizationRecordBase): # Prepare the quantization levels in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] in_dims = params.in_dims[0] out_dims = params.out_dims[0] filter_sz = params.filter.h * params.filter.w pool_factor = (1 << 16) // filter_sz out_tensor = np.zeros(out_dims.shape, dtype=np.int32) if params.padding.h + params.padding.w > 0: in_tensor = np.pad(in_tensor, params.padding.numpy_pad_shape(in_dims), mode='constant', constant_values=qrec.in_qs[0].pad_zero_point) pad_w = params.padding.w pad_h = params.padding.h else: pad_w = pad_h = 0 out_h = 0 for h_idx in range(0, in_dims.h - params.filter.h + pad_h + 1, params.stride.h): out_w = 0 for w_idx in range(0, in_dims.w - params.filter.w + pad_w + 1, params.stride.w): # accumulate - potentially with different Q out_slice_args = out_dims.srange(h=out_h, w=out_w) in_slice_args = in_dims.srange( c=[0, out_dims.c, 1], h=[h_idx, h_idx + params.filter.h, 1], w=[w_idx, w_idx + params.filter.w, 1]) res_shape = out_tensor[out_slice_args].shape sum_filter = np.sum( in_tensor[in_slice_args], dtype=qrec.dtype(ktype="float32"), axis=(out_dims.keys.index('h'), out_dims.keys.index('w'))).reshape(res_shape) sum_filter = np.multiply(sum_filter, pool_factor) out_tensor[out_slice_args] = sum_filter out_w += 1 out_h += 1 return qrec.get_outputs(params, [ qrec.out_qs[0].clip(at_norm(out_tensor, 16), qrec.out_qs[0].dtype) ], ktype="symmetric")
def average_execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] sum_by_chan = np.sum(in_tensor, dtype=qrec.dtype(ktype="float32"), axis=tuple(params.axis), keepdims=params.keep_dims) sz = reduce( lambda x, y: x * y, [i for idx, i in enumerate(in_tensor.shape) if idx in params.axis]) return qrec.get_outputs( params, [(sum_by_chan / sz).reshape(params.out_dims[0].shape)], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): in_dim = params.in_dims[0] out_dim = params.out_dims[0] res = in_tensors[0] res = FORMAT_CHANGES[params.format_change](res, in_dim, out_dim) res = NORMALIZATIONS[params.norm_func](res) if qrec is None or isinstance(qrec, (Float32QuantizationRecord, Float16QuantizationRecord, Bfloat16QuantizationRecord)): iinfo = np.iinfo(res.dtype) if res.dtype == np.int8 or res.dtype == np.int16: res = res.astype(qrec.dtype(ktype="float32")) / -iinfo.min else: raise ValueError("unsure how to dequantize this output from imageformatter") return [res] return [qrec.out_qs[0].dequantize(res) if qrec.out_qs else res]
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): '''3D convolution by sub-matrix summing. ''' details = kwargs.get('details') # qrec is set by default to Float32QuantizationRecord if None if qrec is None or isinstance(qrec, Float32QuantizationRecord): qrec = Float32ScalableFilterQuantizationRecord() in_dims = params.in_dims[0] out_dims = params.out_dims[0] prepared_in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") in_tensor = prepared_in_tensors[0] weights = prepared_in_tensors[1] biases = prepared_in_tensors[2] if details is not None: details['min_acc'] = float("Infinity") details['max_acc'] = float("-Infinity") details['min_pre_mul_bias'] = float("Infinity") details['max_pre_mul_bias'] = float("-Infinity") in_tensor = in_tensor.transpose( in_dims.transpose_to_order(['h', 'w', 'c'])) if params.padding.h + params.padding.w > 0: in_tensor = np.pad(in_tensor, ([params.padding.t, params.padding.b ], [params.padding.l, params.padding.r]) + ([0, 0], ) * (np.ndim(in_tensor) - 2), mode='constant', constant_values=0.0) pad_w = params.padding.w pad_h = params.padding.h else: pad_w = pad_h = 0 weights = weights.transpose( params.filter.transpose_to_order(['out_c', 'h', 'w', 'in_c'])) filt_w = params.filter.w filt_h = params.filter.h in_w = in_dims.w in_h = in_dims.h out_c = params.filter.out_c in_c_per_group = in_dims.c // params.groups out_c_per_group = out_c // params.groups in_c_off = 0 out_c_cnt = 0 dillated_filter_w = (params.dilation.w - 1) * (filt_w - 1) + filt_w dillated_filter_h = (params.dilation.h - 1) * (filt_h - 1) + filt_h out_w = ((in_w - dillated_filter_w + pad_w)) // params.stride.w + 1 out_h = ((in_h - dillated_filter_h + pad_h)) // params.stride.h + 1 if params.has_bias: # biases = qrec.prepare_biases(params, params.get_uncompressed_biases(), # params.get_uncompressed_weights(), ktype="float32") result = np.broadcast_to(biases.reshape(out_c, 1, 1), (out_c, out_h, out_w)).copy().astype( qrec.dtype(ktype="float32")) else: result = np.zeros((out_c, out_h, out_w), dtype=qrec.dtype(ktype="float32")) const_h = pad_h + in_h - dillated_filter_h + 1 const_w = pad_w + in_w - dillated_filter_w + 1 for out_c_i in range(out_dims.c): for cur_h in range(filt_h): for cur_w in range(filt_w): # selects all elements that the filter element needs to multiply slabhw = np.multiply( in_tensor[cur_h * params.dilation.h:const_h + cur_h * params.dilation.h:params.stride.h, cur_w * params.dilation.w:const_w + cur_w * params.dilation.w:params.stride.w, in_c_off:in_c_off + in_c_per_group:1], weights[out_c_i, cur_h, cur_w], dtype=qrec.dtype(ktype="float32")) # add depthwise slabhw = slabhw.sum(axis=-1) # add to the previous filter elements result[out_c_i] += slabhw if details is not None: details['min_acc'] = min(np.min(result[out_c_i]), details['min_acc']) details['max_acc'] = max(np.max(result[out_c_i]), details['max_acc']) out_c_cnt += 1 if out_c_cnt >= out_c_per_group: out_c_cnt = 0 in_c_off += in_c_per_group if details is not None: details['min_pre_mul_bias'] = min(np.min(result), details['min_pre_mul_bias']) details['max_pre_mul_bias'] = max(np.max(result), details['max_pre_mul_bias']) result = qrec.apply_multiplicative_bias(params, result, axis=0, ktype="float32") result = result.transpose( out_dims.transpose_from_order(['c', 'h', 'w'])) return qrec.get_outputs(params, [result], ktype="float32")