def from_float(cls, mod): r"""Create a dynamic quantized module from a float module or qparams_dict Args: mod (Module): a float module, either produced by torch.quantization utilities or provided by the user """ assert type( mod ) == NNLinear, 'nn.quantized.dynamic.Linear.from_float only works for nn.Linear' assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' if mod.qconfig is not None and mod.qconfig.weight is not None: weight_observer = mod.qconfig.weight() else: # We have the circular import issues if we import the qconfig in the beginning of this file: # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the # import until we need it. from torch.quantization.qconfig import default_dynamic_qconfig weight_observer = default_dynamic_qconfig.weight() dtype = weight_observer.dtype assert dtype in [ torch.qint8, torch.float16 ], 'The only supported dtypes for dynamic quantized linear are qint8 and float16' weight_observer(mod.weight) if dtype == torch.qint8: qweight = _quantize_weight(mod.weight.float(), weight_observer) elif dtype == torch.float16: qweight = mod.weight.float() else: raise RuntimeError( 'Unsupported dtype specified for dynamic quantized Linear!') qlinear = Linear(mod.in_features, mod.out_features, dtype=dtype) qlinear.set_weight_bias(qweight, mod.bias) return qlinear
def from_float(cls, mod): r"""Create a dynamic quantized module from a float module or qparams_dict Args: mod (Module): a float module, either produced by torch.quantization utilities or provided by the user """ assert type( mod ) == NNLinear, 'nn.quantized.dynamic.Linear.from_float only works for nn.Linear' assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' if mod.qconfig is not None and mod.qconfig.weight is not None: weight_observer = mod.qconfig.weight() else: # We have the circular import issues if we import the qconfig in the beginning of this file: # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the # import until we need it. from torch.quantization.qconfig import default_dynamic_qconfig weight_observer = default_dynamic_qconfig.weight() assert weight_observer.dtype == torch.qint8, 'Weight observer must have dtype torch.qint8' weight_observer(mod.weight) wt_scale, wt_zp = weight_observer.calculate_qparams() qweight = torch.quantize_per_tensor(mod.weight.float(), float(wt_scale), int(wt_zp), torch.qint8) qlinear = Linear(mod.in_features, mod.out_features) qlinear.set_weight_bias(qweight, mod.bias) return qlinear
def from_float(cls, mod): r"""Create a quantized sparse dynamic module from a float module. We only care about the convert at this stage, no need for observers just yet. """ assert type(mod) == cls._FLOAT_MODULE, ' nnq.' + cls.__name__ + '.from_float only works for ' + \ cls._FLOAT_MODULE.__name__ # TODO: Need to add options to qconfig to avoid the calibration. # TODO: Add calibration for the sparsity assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' if type(mod) == nni.LinearReLU: mod = mod[0] if mod.qconfig is not None and mod.qconfig.weight is not None: weight_observer = mod.qconfig.weight() else: # We have the circular import issues if we import the qconfig in the beginning of this file: # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the # import until we need it. from torch.quantization.qconfig import default_dynamic_qconfig weight_observer = default_dynamic_qconfig.weight() # It is important to multiply by the mask BEFORE calling the `weight_observer` # TODO (zaf): Mask might not be part of the qconfig (T83295194) weight = mod.weight if getattr(mod.qconfig, 'mask', False): weight = mod.qconfig.mask * mod.weight weight_observer(weight) dtype = weight_observer.dtype assert dtype == torch.qint8, 'Weight observer must have dtype torch.qint8' w_sc, w_zp = weight_observer.calculate_qparams() if isinstance(w_zp, torch.Tensor): assert not torch.any( w_zp.bool()), "All weight zero points must map to 0" else: assert w_zp == 0, 'Weight zero point must map to 0' qweight = _quantize_weight(weight.float(), weight_observer) # Use these default values until we figure out how to augment # `mod` to contain sparse config row_block_size, col_block_size = QNNPACKLinearBlockSparsePattern.block_size( ) qlinear = cls(mod.in_features, mod.out_features, row_block_size, col_block_size, dtype=dtype) qlinear.set_weight_bias(qweight, mod.bias, row_block_size, col_block_size) return qlinear
def from_float(cls, mod): r"""Create a dynamic quantized module from a float module or qparams_dict Args: mod (Module): a float module, either produced by torch.quantization utilities or provided by the user """ float_modules = [ torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.intrinsic.modules.fused.LinearReLU ] assert type(mod) in float_modules, \ 'nn.quantized.dynamic.Linear.from_float only works for one of' + \ str([float_mod.__name__ for float_mod in float_modules]) assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' if type(mod) == nni.LinearReLU: mod = mod[0] if mod.qconfig is not None and mod.qconfig.weight is not None: weight_observer = mod.qconfig.weight() else: # We have the circular import issues if we import the qconfig in the beginning of this file: # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the # import until we need it. from torch.quantization.qconfig import default_dynamic_qconfig weight_observer = default_dynamic_qconfig.weight() dtype = weight_observer.dtype assert dtype in [torch.qint8, torch.float16], "The only supported dtypes for " \ "dynamic quantized linear are qint8 and float16 got: {}".format(dtype) weight_observer(mod.weight) if dtype == torch.qint8: qweight = _quantize_weight(mod.weight.float(), weight_observer) elif dtype == torch.float16: qweight = mod.weight.float() else: raise RuntimeError( 'Unsupported dtype specified for dynamic quantized Linear!') qlinear = cls(mod.in_features, mod.out_features, dtype=dtype) qlinear.set_weight_bias(qweight, mod.bias) return qlinear
def from_float(cls, mod): assert type( mod ) == torch.nn.LSTM, 'nn.quantized.dynamic.RNNBase.from_float only works for nn.LSTM' assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' if mod.qconfig is not None and mod.qconfig.weight is not None: weight_observer = mod.qconfig.weight() else: # We have the circular import issues if we import the qconfig in the beginning of this file: # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the # import until we need it. from torch.quantization.qconfig import default_dynamic_qconfig weight_observer = default_dynamic_qconfig.weight() dtype = weight_observer.dtype supported_scalar_types = [torch.qint8, torch.float16] if dtype not in supported_scalar_types: raise RuntimeError( 'Unsupported dtype for dynamic RNN quantization: {}'.format( dtype)) if mod.mode == 'LSTM': qRNNBase = LSTM(mod.input_size, mod.hidden_size, mod.num_layers, mod.bias, mod.batch_first, mod.dropout, mod.bidirectional, dtype) else: raise NotImplementedError( 'Only LSTM is supported for QuantizedRNN for now') num_directions = 2 if mod.bidirectional else 1 assert mod.bias qRNNBase._all_weight_names = [] _all_weight_values = [] for layer in range(qRNNBase.num_layers): for direction in range(num_directions): layer_input_size = qRNNBase.input_size if layer == 0 else qRNNBase.hidden_size * num_directions def process_weights(ihhh, layer, suffix, dtype): weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix) bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix) weight = getattr(mod, weight_name) bias = getattr(mod, bias_name) if dtype == torch.qint8: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # w_ih, w_hh weight_observer(weight) wt_scale, wt_zp = weight_observer.calculate_qparams() qweight = torch.quantize_per_tensor( weight.float(), float(wt_scale), int(wt_zp), torch.qint8) packed_weight = \ torch.ops.quantized.linear_prepack(qweight, bias) params = [packed_weight] pos_names = ['w'] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] return params, ret_name else: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # packed_ih, packed_hh, b_ih, b_hh packed_weight = torch.fbgemm_pack_gemm_matrix_fp16( weight.float()) params = [packed_weight, bias] pos_names = ['packed', 'b'] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] return params, ret_name suffix = '_reverse' if direction == 1 else '' ih_params, ih_param_names = process_weights( 'ih', layer, suffix, dtype) hh_params, hh_param_names = process_weights( 'hh', layer, suffix, dtype) for (ih, ih_name), (hh, hh_name) in zip( zip(ih_params, ih_param_names), zip(hh_params, hh_param_names)): qRNNBase._all_weight_names.extend([ih_name, hh_name]) _all_weight_values.extend( [PackedParameter(p) for p in [ih, hh]]) qRNNBase._all_weight_values = torch.nn.ModuleList(_all_weight_values) return qRNNBase
def from_float(cls, mod): assert type( mod ) == torch.nn.LSTM, 'nn.quantized.dynamic.RNNBase.from_float only works for nn.LSTM' assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' if mod.qconfig is not None and mod.qconfig.weight is not None: weight_observer = mod.qconfig.weight() else: # We have the circular import issues if we import the qconfig in the beginning of this file: # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the # import until we need it. from torch.quantization.qconfig import default_dynamic_qconfig weight_observer = default_dynamic_qconfig.weight() dtype = weight_observer.dtype supported_scalar_types = [torch.qint8, torch.float16] if dtype not in supported_scalar_types: raise RuntimeError( 'Unsupported dtype for dynamic RNN quantization: {}'.format( dtype)) if mod.mode == 'LSTM': qRNNBase = LSTM(mod.input_size, mod.hidden_size, mod.num_layers, mod.bias, mod.batch_first, mod.dropout, mod.bidirectional, dtype) else: raise NotImplementedError( 'Only LSTM is supported for QuantizedRNN for now') num_directions = 2 if mod.bidirectional else 1 assert mod.bias _all_weight_values = [] for layer in range(qRNNBase.num_layers): for direction in range(num_directions): layer_input_size = qRNNBase.input_size if layer == 0 else qRNNBase.hidden_size * num_directions suffix = '_reverse' if direction == 1 else '' def retrieve_weight_bias(ihhh): weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix) bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix) weight = getattr(mod, weight_name) bias = getattr(mod, bias_name) return weight, bias weight_ih, bias_ih = retrieve_weight_bias('ih') weight_hh, bias_hh = retrieve_weight_bias('hh') if dtype == torch.qint8: def quantize_and_pack(w, b): weight_observer(w) wt_scale, wt_zp = weight_observer.calculate_qparams() qweight = torch.quantize_per_tensor( w.float(), float(wt_scale), int(wt_zp), torch.qint8) packed_weight = \ torch.ops.quantized.linear_prepack(qweight, b) return packed_weight packed_ih = quantize_and_pack(weight_ih, bias_ih) packed_hh = quantize_and_pack(weight_hh, bias_hh) cell_params = torch.ops.quantized.make_quantized_cell_params_dynamic( packed_ih, packed_hh, bias_ih, bias_hh) else: packed_ih = torch.ops.quantized.linear_prepack_fp16( weight_ih.float()) packed_hh = torch.ops.quantized.linear_prepack_fp16( weight_hh.float()) cell_params = torch.ops.quantized.make_quantized_cell_params_fp16( packed_ih, packed_hh, bias_ih, bias_hh) _all_weight_values.append(PackedParameter(cell_params)) qRNNBase._all_weight_values = torch.nn.ModuleList(_all_weight_values) qRNNBase._all_params = ([m.param for m in qRNNBase._all_weight_values]) return qRNNBase