def forward(self, w: torch.Tensor) -> torch.Tensor:  # type: ignore
     """Forward pass of quantizing weight using least squares 1 bit."""
     if self.training:
         v1, w_q = quantization.quantizer_ls_1(w)
         self.v1.copy_(v1)  # type: ignore
     else:
         _, w_q = quantization.quantizer_ls_1(w, self.v1)  # type: ignore
     return w_q
Example #2
0
def test_quantizer_ls_1_optimal():
    """Test 1-bit optimal least-squares scaled binary quantization."""
    torch.manual_seed(1234)
    x = torch.randn(1000, 3, 64, 64)

    _, x_q = quantization.quantizer_ls_1(x)
    assert x_q.shape == x.shape

    # Check x_q has lower least-squares error compared with using random scaling factors
    subopt_scaling_factor = torch.randn(1000, 1, 1, 1).abs()
    subopt_quantization = subopt_scaling_factor * binarize(x)
    opt_costs = torch.norm((x_q - x).view(1000, -1), dim=1)
    subopt_costs = torch.norm((subopt_quantization - x).view(1000, -1), dim=1)
    assert torch.all(opt_costs <= subopt_costs)
Example #3
0
def test_quantizer_ls2_better_than_gf2():
    """Test ls-2 is better than gf-2, which is better than ls-1."""
    torch.manual_seed(1234)
    x = torch.randn(1000, 3, 64, 64)

    _, _, x_q_ls2 = quantization.quantizer_ls_2(x, skip=1)
    _, x_q_gf2 = quantization.quantizer_gf(x, k=2)
    _, x_q_ls1 = quantization.quantizer_ls_1(x)

    ls2_costs = torch.norm((x_q_ls2 - x).view(1000, -1), dim=1)
    gf2_costs = torch.norm((x_q_gf2 - x).view(1000, -1), dim=1)
    ls1_costs = torch.norm((x_q_ls1 - x).view(1000, -1), dim=1)

    assert torch.all(ls2_costs <= gf2_costs)
    assert torch.all(gf2_costs <= ls1_costs)
def test_moving_average_eval_only_multi_gpu():
    """Test moving average option with eval_only mode set in activation quantizer, with 2 GPUs."""
    alpha = 0.9
    activation_quantizer = ActivationQuantizerLS1('eval_only', alpha)

    activation_quantizer = nn.DataParallel(activation_quantizer,
                                           device_ids=[0, 1])
    device = torch.device('cuda:0')
    activation_quantizer.to(device)

    activation_quantizer.train()
    for i in range(10):
        x_gpu0 = i * torch.ones(
            8, 1, 20, 20, requires_grad=True, device=device)
        x_gpu1 = 42 * torch.ones(
            8, 1, 20, 20, requires_grad=True, device=device)
        x = torch.cat([x_gpu0, x_gpu1], dim=0)
        x_q = activation_quantizer(x)
        x_q.sum().backward()

        # Moving average internal statistics should be updated
        actual_ma = activation_quantizer.module.moving_avg_module.moving_average
        ma_i = _compute_moving_average_closed_form(i, alpha)
        expected_ma = torch.tensor(ma_i, device=device).expand_as(actual_ma)
        assert torch.allclose(expected_ma, actual_ma)

        # Quantization should NOT be computed from moving average scalars
        assert torch.allclose(x, x_q)

    activation_quantizer.eval()
    for i in range(5):
        x = 42 * torch.ones(16, 1, 20, 20, requires_grad=True, device=device)
        x_q = activation_quantizer(x)
        x_q.sum().backward()
        actual_ma = activation_quantizer.module.moving_avg_module.moving_average

        # scalars should be memorized from train and not updated
        ma_i = _compute_moving_average_closed_form(9, alpha)
        expected_ma = torch.tensor(ma_i, device=device).expand_as(actual_ma)
        assert torch.allclose(expected_ma, actual_ma)

        # Quantization should be using the moving average scalar from the 1st GPU during training
        _, expected = quantizer_ls_1(
            x,
            torch.tensor([ma_i], device=device).expand(16))
        assert torch.allclose(x_q, expected)
Example #5
0
def test_activation_quantizer_ls1_no_ma():
    """Test no moving average mode of activation quantizer for least squares 1 bit."""
    torch.manual_seed(1234)
    x = torch.ones(32, 16, 3, 3) * 2
    x2 = torch.rand(32, 16, 3, 3)  # some random, but all positive tensor

    quantizer_ls1_no_ma = ActivationQuantizerLS1('off')
    quantizer_ls1_no_ma.train()
    quantizer_ls1_no_ma(x)  # v1 should be 2 for all examples
    x_q_train_no_ma = quantizer_ls1_no_ma(
        x)  # call twice so moving avg changes if used
    assert torch.all(x_q_train_no_ma == 2.0)

    quantizer_ls1_no_ma.eval()
    x_q_eval_no_ma = quantizer_ls1_no_ma(x2)
    # v1 should not be cached, so it should be recomputed
    _, expected = quantization.quantizer_ls_1(x2)
    assert torch.all(x_q_eval_no_ma.eq(expected))
    assert not torch.all(x_q_eval_no_ma.eq(x_q_train_no_ma))
def test_moving_average_train_and_eval():
    """Test moving average with train_and_eval mode set in activation quantizer."""
    alpha = 0.9

    devices = [torch.device('cpu')]
    if torch.cuda.is_available():
        devices.append(torch.device('cuda:0'))

    for device in devices:
        activation_quantizer = ActivationQuantizerLS1('train_and_eval', alpha)
        activation_quantizer.to(device)
        activation_quantizer.train()
        for i in range(10):
            x = i * torch.ones(8, 1, 20, 20, requires_grad=True, device=device)
            x_q = activation_quantizer(x)
            x_q.sum().backward()

            # Moving average internal statistics should be updated
            actual_ma = activation_quantizer.moving_avg_module.moving_average
            ma_i = _compute_moving_average_closed_form(i, alpha)
            expected_ma = torch.tensor(ma_i,
                                       device=device).expand_as(actual_ma)
            assert torch.allclose(expected_ma, actual_ma)

            # Quantization should be computed from moving average scalars
            _, expected_quantization = quantizer_ls_1(
                x,
                torch.tensor([ma_i], device=device).expand(8))
            assert torch.allclose(expected_quantization, x_q)

        activation_quantizer.eval()
        for i in range(5):
            x = i * torch.ones(8, 1, 20, 20, requires_grad=True, device=device)
            activation_quantizer(x).sum().backward()
            actual_ma = activation_quantizer.moving_avg_module.moving_average
            # scalars should be memorized from train and not updated
            expected_ma = torch.tensor(_compute_moving_average_closed_form(
                9, alpha),
                                       device=device).expand_as(actual_ma)
            assert torch.allclose(expected_ma, actual_ma)
Example #7
0
 def _moving_average_quantization(self, x: torch.Tensor,
                                  vs: List[torch.Tensor]) -> torch.Tensor:
     """Return quantized x using vs."""
     v1 = vs[0]
     _, x_q = quantization.quantizer_ls_1(x, v1)
     return x_q
Example #8
0
 def _batch_quantization(
         self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
     """Return a 2-tuple of (scaling factors, quantized x)."""
     batch_v1, x_q = quantization.quantizer_ls_1(x)
     return batch_v1.view(1, -1), x_q