def process_weights(ihhh, layer, suffix, qweight, bias, dtype): if dtype == torch.qint8: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # w_ih, w_hh packed_weight = \ torch.ops.quantized.linear_prepack(qweight, bias) params = [packed_weight] pos_names = ['w'] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] return params, ret_name else: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # packed_ih, packed_hh, b_ih, b_hh packed_weight = torch.fbgemm_pack_gemm_matrix_fp16( qweight) params = [packed_weight, bias] pos_names = ['packed', 'b'] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] return params, ret_name
def __init__(self, other): super(QuantizedLinearFP16, self).__init__() self.in_features = other.in_features self.out_features = other.out_features self.original_weight = other.weight self.weight = torch.fbgemm_pack_gemm_matrix_fp16( other.weight.clone(memory_format=torch.contiguous_format).float()) assert other.bias is not None, 'QuantizedLinearFP16 requires a bias' self.bias = torch.nn.Parameter(other.bias.clone(memory_format=torch.contiguous_format).float(), requires_grad=False) self.register_buffer('packed_weight', self.weight)
def process_weights(ihhh, layer, suffix, dtype): weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix) bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix) weight = getattr(other, weight_name) bias = getattr(other, bias_name) if dtype == torch.int8: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # w_ih, w_hh, b_ih, b_hh, packed_ih, packed_hh, col_offsets_ih, # col_offsets_hh, scale_ih, scale_hh, zero_point_ih, zero_point_hh qweight, col_offsets, scale, zero_point = \ torch.fbgemm_linear_quantize_weight(weight.clone(memory_format=torch.contiguous_format).float()) packed_weight = torch.fbgemm_pack_quantized_matrix( qweight) params = [ qweight, bias, packed_weight, col_offsets, scale, zero_point ] pos_names = [ 'w', 'b', 'packed', 'col_offsets', 'scale', 'zero_point' ] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] self._quantized_weights_names.append(ret_name[0]) self._packed_weights_names.append(ret_name[2]) return params, ret_name else: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # packed_ih, packed_hh, b_ih, b_hh packed_weight = torch.fbgemm_pack_gemm_matrix_fp16( weight.clone( memory_format=torch.contiguous_format).float()) self._orig_weights_names.append(weight_name) self.register_buffer(weight_name, weight) params = [packed_weight, bias] pos_names = ['packed', 'b'] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] self._packed_weights_names.append(ret_name[0]) self._quantized_weights_names.append(ret_name[0]) return params, ret_name
def __init__(self, other): super(QuantizedLinearFP16, self).__init__() warnings.warn( "torch.jit.QuantizedLinearFP16 is deprecated and will be removed in an upcoming " "PyTorch release. Please use the torch.nn.quantized.dynamic.Linear instead." ) self.in_features = other.in_features self.out_features = other.out_features self.original_weight = other.weight self.weight = torch.fbgemm_pack_gemm_matrix_fp16( other.weight.clone(memory_format=torch.contiguous_format).float()) assert other.bias is not None, 'QuantizedLinearFP16 requires a bias' self.bias = torch.nn.Parameter( other.bias.clone(memory_format=torch.contiguous_format).float(), requires_grad=False) self.register_buffer('packed_weight', self.weight)
def _unpack(self): if self.dtype == torch.int8: packed_weights = self._packed_weights quantized_weights = self._quantized_weights assert len(packed_weights) == len(quantized_weights) for i in range(len(packed_weights)): packed = packed_weights[i] quantized = quantized_weights[i] packed.set_(torch.fbgemm_pack_quantized_matrix(quantized)) else: packed_weights = self._packed_weights orig_weights = self._orig_weights assert len(packed_weights) == len(orig_weights) for i in range(len(packed_weights)): packed = packed_weights[i] orig_weight = orig_weights[i] packed.set_(torch.fbgemm_pack_gemm_matrix_fp16(orig_weight))
def process_weights(ihhh, layer, suffix, dtype): weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix) bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix) weight = getattr(mod, weight_name) bias = getattr(mod, bias_name) if dtype == torch.qint8: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # w_ih, w_hh weight_observer(weight) wt_scale, wt_zp = weight_observer.calculate_qparams() qweight = torch.quantize_per_tensor( weight.float(), float(wt_scale), int(wt_zp), torch.qint8) packed_weight = \ torch.ops.quantized.linear_prepack(qweight, bias) params = [packed_weight] pos_names = ['w'] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] return params, ret_name else: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # packed_ih, packed_hh, b_ih, b_hh packed_weight = torch.fbgemm_pack_gemm_matrix_fp16( weight.float()) params = [packed_weight, bias] pos_names = ['packed', 'b'] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] return params, ret_name
def _unpack(self): if self.dtype == torch.uint8: packed_weights = self._get_packed_weights() quantized_weights = self._get_quantized_weights() assert len(packed_weights) == len(quantized_weights) for i in range(len(packed_weights)): packed = packed_weights[i] quantized = quantized_weights[i] packed.set_( torch.fbgemm_pack_quantized_matrix(quantized, quantized.size(1), quantized.size(0))) else: packed_weights = self._get_packed_weights() orig_weights = self._get_orig_weights() assert len(packed_weights) == len(orig_weights) for i in range(len(packed_weights)): packed = packed_weights[i] orig_weight = orig_weights[i] packed.set_(torch.fbgemm_pack_gemm_matrix_fp16(orig_weight))
def _unpack(self): self.packed_weight.set_( torch.fbgemm_pack_gemm_matrix_fp16(self.original_weight))