def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): details = kwargs.get('details') # qrec is set by default to Float32QuantizationRecord if None if qrec is None or isinstance(qrec, Float32QuantizationRecord): qrec = Float32ScalableFilterQuantizationRecord() in_dims = params.in_dims[0] out_dims = params.out_dims[0] weights = qrec.prepare_weights(params, params.get_uncompressed_weights(), ktype="float32") in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] if details is not None: details['min_acc'] = float("Infinity") details['max_acc'] = float("-Infinity") if params.has_bias: biases = qrec.prepare_biases(params, params.get_uncompressed_biases(), params.get_uncompressed_weights(), ktype="float32") acc_tensor = np.ones(out_dims.shape, dtype=np.float32) * biases else: acc_tensor = np.zeros(out_dims.shape, dtype=np.float32) in_tensor = in_tensor.reshape((in_dims.size())) filt = params.filter.get_filter_dims() for out_c in range(out_dims.c): # Expand and normalize the accumulator w_slice = weights[filt.srange(out_c=out_c)].reshape( (in_dims.size())) res = np.dot(in_tensor, w_slice) if details is not None: details['min_acc'] = min(np.sum(res[res < 0]), details['min_acc']) details['max_acc'] = min(np.sum(res[res > 0]), details['max_acc']) acc_tensor[out_c] += res if details is not None: details['min_acc'] = min(np.min(acc_tensor[out_c]), details['min_acc']) details['max_acc'] = max(np.max(acc_tensor[out_c]), details['max_acc']) acc_tensor = qrec.apply_multiplicative_bias(params, acc_tensor, 0, ktype="float32") return qrec.get_outputs(params, [acc_tensor], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): details = kwargs.get('details') in_dims = params.in_dims[0] out_dims = params.out_dims[0] weights = qrec.prepare_weights(params, params.weights, ktype="symmetric") in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] if details is not None: details['min_acc'] = float("Infinity") details['max_acc'] = float("-Infinity") if params.has_bias: biases = qrec.prepare_biases(params, params.biases, params.weights, ktype="symmetric") acc_tensor = np.ones(biases.shape, dtype=qrec.acc_q.dtype) * biases if qrec.acc_q != qrec.biases_q: acc_tensor = qrec.acc_q.expand_from(acc_tensor, qrec.biases_q) else: acc_tensor = np.zeros(out_dims.shape, dtype=qrec.acc_q.dtype) # force the bit dimension of the input tensor to the bit width of the calc # so that the dot product occurs in this precision in_tensor = in_tensor.astype(qrec.calc_q.dtype) in_tensor = in_tensor.reshape((in_dims.size())) filt = params.filter.get_filter_dims() for out_c in range(out_dims.c): # Expand and normalize the accumulator if qrec.calc_q != qrec.acc_q: acc_tensor = qrec.calc_q.expand_from(acc_tensor, qrec.acc_q) w_slice = weights[filt.srange(out_c=out_c)].reshape((in_dims.size())) res = np.dot(in_tensor, w_slice) if details is not None: details['min_acc'] = min(np.sum(res[res < 0]), details['min_acc']) details['max_acc'] = min(np.sum(res[res > 0]), details['max_acc']) acc_tensor[out_c] += res if qrec.calc_q != qrec.acc_q: acc_tensor = qrec.acc_q.reduce_from(acc_tensor, qrec.calc_q) if details is not None: details['min_acc'] = min(np.min(acc_tensor[out_c]), details['min_acc']) details['max_acc'] = max(np.max(acc_tensor[out_c]), details['max_acc']) # details['acc_before'] = acc_tensor.copy() acc_tensor = qrec.apply_multiplicative_bias( params, acc_tensor, 0, ktype="symmetric") # details['acc_after'] = acc_tensor.copy() out_q = qrec.out_qs[0] if qrec and out_q != qrec.acc_q: acc_tensor = out_q.reduce_from(acc_tensor, qrec.acc_q) return qrec.get_outputs(params, [acc_tensor], ktype="symmetric")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): '''3D convolution by sub-matrix summing. ''' details = kwargs.get('details') # qrec is set by default to Float32QuantizationRecord if None if qrec is None or isinstance(qrec, Float32QuantizationRecord): qrec = Float32ScalableFilterQuantizationRecord() in_dims = params.in_dims[0] out_dims = params.out_dims[0] weights = qrec.prepare_weights(params, params.get_uncompressed_weights(), ktype="float32") in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] if details is not None: details['min_acc'] = float("Infinity") details['max_acc'] = float("-Infinity") details['pre_mul_bias_min'] = float("Infinity") details['pre_mul_bias_max'] = float("-Infinity") in_tensor = in_tensor.transpose( in_dims.transpose_to_order(['h', 'w', 'c'])).astype(np.float32) if params.padding.h + params.padding.w > 0: in_tensor = np.pad(in_tensor, ([params.padding.t, params.padding.b ], [params.padding.l, params.padding.r]) + ([0, 0], ) * (np.ndim(in_tensor) - 2), mode='constant', constant_values=0.0) pad_w = params.padding.w pad_h = params.padding.h else: pad_w = pad_h = 0 weights = weights.transpose( params.filter.transpose_to_order(['out_c', 'h', 'w', 'in_c'])).astype(np.float32) filt_w = params.filter.w filt_h = params.filter.h in_w = in_dims.w in_h = in_dims.h out_c = params.filter.out_c in_c_per_group = in_dims.c // params.groups out_c_per_group = out_c // params.groups in_c_off = 0 out_c_cnt = 0 dillated_filter_w = (params.dilation.w - 1) * (filt_w - 1) + filt_w dillated_filter_h = (params.dilation.h - 1) * (filt_h - 1) + filt_h out_w = ((in_w - dillated_filter_w + pad_w)) // params.stride.w + 1 out_h = ((in_h - dillated_filter_h + pad_h)) // params.stride.h + 1 if params.has_bias: biases = qrec.prepare_biases(params, params.get_uncompressed_biases(), params.get_uncompressed_weights(), ktype="float32") result = np.ones( (out_c, out_h, out_w), dtype=np.float32) * biases.reshape( out_c, 1, 1) else: result = np.zeros((out_c, out_h, out_w), dtype=np.float32) const_h = pad_h + in_h - dillated_filter_h + 1 const_w = pad_w + in_w - dillated_filter_w + 1 for out_c_i in range(out_dims.c): for cur_h in range(filt_h): for cur_w in range(filt_w): # selects all elements that the filter element needs to multiply slabhw = np.multiply( in_tensor[cur_h * params.dilation.h:const_h + cur_h * params.dilation.h:params.stride.h, cur_w * params.dilation.w:const_w + cur_w * params.dilation.w:params.stride.w, in_c_off:in_c_off + in_c_per_group:1], weights[out_c_i, cur_h, cur_w], dtype=np.float32) # add depthwise slabhw = slabhw.sum(axis=-1) # add to the previous filter elements result[out_c_i] += slabhw if details is not None: details['min_acc'] = min(np.min(result[out_c_i]), details['min_acc']) details['max_acc'] = max(np.max(result[out_c_i]), details['max_acc']) out_c_cnt += 1 if out_c_cnt >= out_c_per_group: out_c_cnt = 0 in_c_off += in_c_per_group if details is not None: details['pre_mul_bias_min'] = min(np.min(result), details['pre_mul_bias_min']) details['pre_mul_bias_max'] = max(np.max(result), details['pre_mul_bias_max']) result = qrec.apply_multiplicative_bias(params, result, axis=0, ktype="float32") result = result.transpose( out_dims.transpose_from_order(['c', 'h', 'w'])) return qrec.get_outputs(params, [result], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): '''3D convolution by sub-matrix summing. ''' details = kwargs.get('details') in_dims = params.in_dims[0] out_dims = params.out_dims[0] weights = qrec.prepare_weights(params, params.weights, ktype="symmetric") in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="symmetric")[0] if details is not None: details['min_acc'] = float("Infinity") details['max_acc'] = float("-Infinity") in_tensor = in_tensor.transpose( in_dims.transpose_to_order(['h', 'w', 'c'])) if params.padding.h + params.padding.w > 0: if hasattr(qrec.in_qs[0], 'zero_point'): const_pad = qrec.in_qs[0].zero_point[0] else: const_pad = 0 in_tensor = np.pad(in_tensor, ([params.padding.t, params.padding.b ], [params.padding.l, params.padding.r]) + ([0, 0], ) * (np.ndim(in_tensor) - 2), mode='constant', constant_values=const_pad) pad_w = params.padding.w pad_h = params.padding.h else: pad_w = pad_h = 0 weights = weights.transpose( params.filter.transpose_to_order(['out_c', 'h', 'w', 'in_c'])) filt_w = params.filter.w filt_h = params.filter.h in_w = in_dims.w in_h = in_dims.h out_c = params.filter.out_c in_c_per_group = in_dims.c // params.groups out_c_per_group = out_c // params.groups in_c_off = 0 out_c_cnt = 0 dillated_filter_w = (params.dilation.w - 1) * (filt_w - 1) + filt_w dillated_filter_h = (params.dilation.h - 1) * (filt_h - 1) + filt_h out_w = ((in_w - dillated_filter_w + pad_w)) // params.stride.w + 1 out_h = ((in_h - dillated_filter_h + pad_h)) // params.stride.h + 1 if params.has_bias: biases = qrec.prepare_biases(params, params.biases, params.weights, ktype="symmetric") if qrec.acc_q != qrec.biases_q: biases = qrec.acc_q.expand_from(biases, qrec.biases_q) result = np.ones( (out_c, out_h, out_w), dtype=qrec.acc_q.dtype) * biases.reshape(out_c, 1, 1) else: result = np.zeros((out_c, out_h, out_w), dtype=qrec.acc_q.dtype) const_h = pad_h + in_h - dillated_filter_h + 1 const_w = pad_w + in_w - dillated_filter_w + 1 if FORCE_INT64: result = result.astype(np.int64) for out_c_i in range(out_dims.c): for cur_h in range(filt_h): for cur_w in range(filt_w): # selects all elements that the filter element needs to multiply slabhw = np.multiply( in_tensor[cur_h * params.dilation.h:const_h + cur_h * params.dilation.h:params.stride.h, cur_w * params.dilation.w:const_w + cur_w * params.dilation.w:params.stride.w, in_c_off:in_c_off + in_c_per_group:1], weights[out_c_i, cur_h, cur_w], dtype=np.int64 if FORCE_INT64 else qrec.calc_q.dtype) if qrec.calc_q != qrec.acc_q: slabhw = qrec.acc_q.reduce_from(slabhw, qrec.calc_q) # add depthwise slabhw = slabhw.sum( axis=-1, dtype=np.int64 if FORCE_INT64 else qrec.calc_q.dtype) # add to the previous filter elements result[out_c_i] += slabhw if details is not None: details['min_acc'] = min(np.min(result[out_c_i]), details['min_acc']) details['max_acc'] = max(np.max(result[out_c_i]), details['max_acc']) out_c_cnt += 1 if out_c_cnt >= out_c_per_group: out_c_cnt = 0 in_c_off += in_c_per_group result = qrec.apply_multiplicative_bias(params, result, 0, ktype="symmetric") result = result.transpose( out_dims.transpose_from_order(['c', 'h', 'w'])) if qrec.out_qs[0] != qrec.acc_q: result = qrec.out_qs[0].reduce_from(result, qrec.acc_q) return qrec.get_outputs(params, [result], ktype="symmetric")