def forward(self, w: torch.Tensor) -> torch.Tensor: # type: ignore """Forward pass of quantizing weight using least squares 1 bit.""" if self.training: v1, w_q = quantization.quantizer_ls_1(w) self.v1.copy_(v1) # type: ignore else: _, w_q = quantization.quantizer_ls_1(w, self.v1) # type: ignore return w_q
def test_quantizer_ls_1_optimal(): """Test 1-bit optimal least-squares scaled binary quantization.""" torch.manual_seed(1234) x = torch.randn(1000, 3, 64, 64) _, x_q = quantization.quantizer_ls_1(x) assert x_q.shape == x.shape # Check x_q has lower least-squares error compared with using random scaling factors subopt_scaling_factor = torch.randn(1000, 1, 1, 1).abs() subopt_quantization = subopt_scaling_factor * binarize(x) opt_costs = torch.norm((x_q - x).view(1000, -1), dim=1) subopt_costs = torch.norm((subopt_quantization - x).view(1000, -1), dim=1) assert torch.all(opt_costs <= subopt_costs)
def test_quantizer_ls2_better_than_gf2(): """Test ls-2 is better than gf-2, which is better than ls-1.""" torch.manual_seed(1234) x = torch.randn(1000, 3, 64, 64) _, _, x_q_ls2 = quantization.quantizer_ls_2(x, skip=1) _, x_q_gf2 = quantization.quantizer_gf(x, k=2) _, x_q_ls1 = quantization.quantizer_ls_1(x) ls2_costs = torch.norm((x_q_ls2 - x).view(1000, -1), dim=1) gf2_costs = torch.norm((x_q_gf2 - x).view(1000, -1), dim=1) ls1_costs = torch.norm((x_q_ls1 - x).view(1000, -1), dim=1) assert torch.all(ls2_costs <= gf2_costs) assert torch.all(gf2_costs <= ls1_costs)
def test_moving_average_eval_only_multi_gpu(): """Test moving average option with eval_only mode set in activation quantizer, with 2 GPUs.""" alpha = 0.9 activation_quantizer = ActivationQuantizerLS1('eval_only', alpha) activation_quantizer = nn.DataParallel(activation_quantizer, device_ids=[0, 1]) device = torch.device('cuda:0') activation_quantizer.to(device) activation_quantizer.train() for i in range(10): x_gpu0 = i * torch.ones( 8, 1, 20, 20, requires_grad=True, device=device) x_gpu1 = 42 * torch.ones( 8, 1, 20, 20, requires_grad=True, device=device) x = torch.cat([x_gpu0, x_gpu1], dim=0) x_q = activation_quantizer(x) x_q.sum().backward() # Moving average internal statistics should be updated actual_ma = activation_quantizer.module.moving_avg_module.moving_average ma_i = _compute_moving_average_closed_form(i, alpha) expected_ma = torch.tensor(ma_i, device=device).expand_as(actual_ma) assert torch.allclose(expected_ma, actual_ma) # Quantization should NOT be computed from moving average scalars assert torch.allclose(x, x_q) activation_quantizer.eval() for i in range(5): x = 42 * torch.ones(16, 1, 20, 20, requires_grad=True, device=device) x_q = activation_quantizer(x) x_q.sum().backward() actual_ma = activation_quantizer.module.moving_avg_module.moving_average # scalars should be memorized from train and not updated ma_i = _compute_moving_average_closed_form(9, alpha) expected_ma = torch.tensor(ma_i, device=device).expand_as(actual_ma) assert torch.allclose(expected_ma, actual_ma) # Quantization should be using the moving average scalar from the 1st GPU during training _, expected = quantizer_ls_1( x, torch.tensor([ma_i], device=device).expand(16)) assert torch.allclose(x_q, expected)
def test_activation_quantizer_ls1_no_ma(): """Test no moving average mode of activation quantizer for least squares 1 bit.""" torch.manual_seed(1234) x = torch.ones(32, 16, 3, 3) * 2 x2 = torch.rand(32, 16, 3, 3) # some random, but all positive tensor quantizer_ls1_no_ma = ActivationQuantizerLS1('off') quantizer_ls1_no_ma.train() quantizer_ls1_no_ma(x) # v1 should be 2 for all examples x_q_train_no_ma = quantizer_ls1_no_ma( x) # call twice so moving avg changes if used assert torch.all(x_q_train_no_ma == 2.0) quantizer_ls1_no_ma.eval() x_q_eval_no_ma = quantizer_ls1_no_ma(x2) # v1 should not be cached, so it should be recomputed _, expected = quantization.quantizer_ls_1(x2) assert torch.all(x_q_eval_no_ma.eq(expected)) assert not torch.all(x_q_eval_no_ma.eq(x_q_train_no_ma))
def test_moving_average_train_and_eval(): """Test moving average with train_and_eval mode set in activation quantizer.""" alpha = 0.9 devices = [torch.device('cpu')] if torch.cuda.is_available(): devices.append(torch.device('cuda:0')) for device in devices: activation_quantizer = ActivationQuantizerLS1('train_and_eval', alpha) activation_quantizer.to(device) activation_quantizer.train() for i in range(10): x = i * torch.ones(8, 1, 20, 20, requires_grad=True, device=device) x_q = activation_quantizer(x) x_q.sum().backward() # Moving average internal statistics should be updated actual_ma = activation_quantizer.moving_avg_module.moving_average ma_i = _compute_moving_average_closed_form(i, alpha) expected_ma = torch.tensor(ma_i, device=device).expand_as(actual_ma) assert torch.allclose(expected_ma, actual_ma) # Quantization should be computed from moving average scalars _, expected_quantization = quantizer_ls_1( x, torch.tensor([ma_i], device=device).expand(8)) assert torch.allclose(expected_quantization, x_q) activation_quantizer.eval() for i in range(5): x = i * torch.ones(8, 1, 20, 20, requires_grad=True, device=device) activation_quantizer(x).sum().backward() actual_ma = activation_quantizer.moving_avg_module.moving_average # scalars should be memorized from train and not updated expected_ma = torch.tensor(_compute_moving_average_closed_form( 9, alpha), device=device).expand_as(actual_ma) assert torch.allclose(expected_ma, actual_ma)
def _moving_average_quantization(self, x: torch.Tensor, vs: List[torch.Tensor]) -> torch.Tensor: """Return quantized x using vs.""" v1 = vs[0] _, x_q = quantization.quantizer_ls_1(x, v1) return x_q
def _batch_quantization( self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """Return a 2-tuple of (scaling factors, quantized x).""" batch_v1, x_q = quantization.quantizer_ls_1(x) return batch_v1.view(1, -1), x_q