def __init__(self, other): super(QuantizedRNNCellBase, self).__init__() warnings.warn( "torch.jit.QuantizedRNNCellBase is deprecated and will be removed in an upcoming " "PyTorch release. Please use the torch.nn.quantized.dynamic.RNNCell instead." ) self.input_size = other.input_size self.hidden_size = other.hidden_size self.bias = other.bias if not self.bias: raise ValueError("Quantized RNN cells require bias terms") weight_ih, col_offsets_ih, self.scale_ih, self.zero_point_ih = \ torch.fbgemm_linear_quantize_weight(other.weight_ih.clone(memory_format=torch.contiguous_format).float()) self.register_buffer('weight_ih', weight_ih) self.register_buffer('col_offsets_ih', col_offsets_ih) weight_hh, col_offsets_hh, self.scale_hh, self.zero_point_hh = \ torch.fbgemm_linear_quantize_weight(other.weight_hh.clone(memory_format=torch.contiguous_format).float()) self.register_buffer('weight_hh', weight_hh) self.register_buffer('col_offsets_hh', col_offsets_hh) packed_ih = torch.fbgemm_pack_quantized_matrix(self.weight_ih) self.register_buffer('packed_ih', packed_ih) packed_hh = torch.fbgemm_pack_quantized_matrix(self.weight_hh) self.register_buffer('packed_hh', packed_hh) self.bias_ih = torch.nn.Parameter( other.bias_ih.clone(memory_format=torch.contiguous_format).float(), requires_grad=False) self.bias_hh = torch.nn.Parameter( other.bias_hh.clone(memory_format=torch.contiguous_format).float(), requires_grad=False)
def __init__(self, other): super(QuantizedRNNCellBase, self).__init__() self.input_size = other.input_size self.hidden_size = other.hidden_size self.bias = other.bias if not self.bias: raise ValueError("Quantized RNN cells require bias terms") weight_ih, col_offsets_ih, self.scale_ih, self.zero_point_ih = \ torch.fbgemm_linear_quantize_weight(other.weight_ih.clone().float()) self.register_buffer('weight_ih', weight_ih) self.register_buffer('col_offsets_ih', col_offsets_ih) weight_hh, col_offsets_hh, self.scale_hh, self.zero_point_hh = \ torch.fbgemm_linear_quantize_weight(other.weight_hh.clone().float()) self.register_buffer('weight_hh', weight_hh) self.register_buffer('col_offsets_hh', col_offsets_hh) packed_ih = torch.fbgemm_pack_quantized_matrix(self.weight_ih) self.register_buffer('packed_ih', packed_ih) packed_hh = torch.fbgemm_pack_quantized_matrix(self.weight_hh) self.register_buffer('packed_hh', packed_hh) self.bias_ih = torch.nn.Parameter(other.bias_ih.clone().float(), requires_grad=False) self.bias_hh = torch.nn.Parameter(other.bias_hh.clone().float(), requires_grad=False)
def process_weights(ihhh, layer, suffix): weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix) bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix) weight = getattr(other, weight_name) bias = getattr(other, bias_name) qweight, col_offsets, scale, zero_point = \ torch.fbgemm_linear_quantize_weight(weight.clone().float()) packed_weight = torch.fbgemm_pack_quantized_matrix( qweight, weight.size(1), weight.size(0)) params = [ qweight, bias, packed_weight, col_offsets, scale, zero_point ] pos_names = [ 'w', 'b', 'packed', 'col_offsets', 'scale', 'zero_point' ] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] quantized_weights.append(ret_name[0]) packed_weights.append(ret_name[2]) return params, ret_name
def process_weights(ihhh, layer, suffix, dtype): weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix) bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix) weight = getattr(other, weight_name) bias = getattr(other, bias_name) if dtype == torch.int8: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # w_ih, w_hh, b_ih, b_hh, packed_ih, packed_hh, col_offsets_ih, # col_offsets_hh, scale_ih, scale_hh, zero_point_ih, zero_point_hh qweight, col_offsets, scale, zero_point = \ torch.fbgemm_linear_quantize_weight(weight.clone(memory_format=torch.contiguous_format).float()) packed_weight = torch.fbgemm_pack_quantized_matrix( qweight) params = [ qweight, bias, packed_weight, col_offsets, scale, zero_point ] pos_names = [ 'w', 'b', 'packed', 'col_offsets', 'scale', 'zero_point' ] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] self._quantized_weights_names.append(ret_name[0]) self._packed_weights_names.append(ret_name[2]) return params, ret_name else: # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # packed_ih, packed_hh, b_ih, b_hh packed_weight = torch.fbgemm_pack_gemm_matrix_fp16( weight.clone( memory_format=torch.contiguous_format).float()) self._orig_weights_names.append(weight_name) self.register_buffer(weight_name, weight) params = [packed_weight, bias] pos_names = ['packed', 'b'] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] self._packed_weights_names.append(ret_name[0]) self._quantized_weights_names.append(ret_name[0]) return params, ret_name
def __init__(self, other): super(QuantizedLinear, self).__init__() self.in_features = other.in_features self.out_features = other.out_features # Quantize weight and discard the original self.weight, self.col_offsets, self.scale, self.zero_point = torch.fbgemm_linear_quantize_weight( other.weight.clone(memory_format=torch.contiguous_format).float()) self.weight = torch.nn.Parameter(self.weight, requires_grad=False) self.col_offsets = torch.nn.Parameter(self.col_offsets, requires_grad=False) assert other.bias is not None, 'QuantizedLinear requires a bias' self.bias = torch.nn.Parameter(other.bias.clone(memory_format=torch.contiguous_format).float(), requires_grad=False) self.register_buffer( 'packed_tensor_ptr', torch.fbgemm_pack_quantized_matrix(self.weight.clone(memory_format=torch.contiguous_format)))
def test_quantized_linear(self, shape, out_features): input = torch.rand(shape) weight = torch.rand(out_features, shape[1]) bias = torch.rand(out_features) q_weight, col_offsets, scale, zero_point = \ torch.fbgemm_linear_quantize_weight(weight.clone().float()) packed_weight = torch.fbgemm_pack_quantized_matrix(q_weight.clone()) def fbgemm_quantized_linear(input, weight, bias): return torch.fbgemm_linear_int8_weight_fp32_activation( input.float(), q_weight, packed_weight, col_offsets, scale, zero_point, bias.float()) ref_out, tvm_out = self.runBoth(fbgemm_quantized_linear, input, weight, bias) # relax the constraint to avoid flaky test assert torch.allclose(ref_out, tvm_out, rtol=0.5, atol=0.5)
def __init__(self, other): super(QuantizedLinear, self).__init__() warnings.warn( "torch.jit.QuantizedLinear is deprecated and will be removed in an upcoming " "PyTorch release. Please use the torch.nn.quantized.dynamic.Linear instead." ) self.in_features = other.in_features self.out_features = other.out_features # Quantize weight and discard the original self.weight, self.col_offsets, self.scale, self.zero_point = torch.fbgemm_linear_quantize_weight( other.weight.clone(memory_format=torch.contiguous_format).float()) self.weight = torch.nn.Parameter(self.weight, requires_grad=False) self.col_offsets = torch.nn.Parameter(self.col_offsets, requires_grad=False) assert other.bias is not None, 'QuantizedLinear requires a bias' self.bias = torch.nn.Parameter( other.bias.clone(memory_format=torch.contiguous_format).float(), requires_grad=False) self.register_buffer( 'packed_tensor_ptr', torch.fbgemm_pack_quantized_matrix( self.weight.clone(memory_format=torch.contiguous_format)))