def testCompressConvBnRelu(self):
        inp, oup, kernel_size, groups = 3, 5, 3, 1
        m0 = mb.ConvBNReLU(inp,
                           oup,
                           kernel_size,
                           groups=groups,
                           active_fn=nn.ReLU)
        m0.apply(random_bn)
        mask = torch.tensor([False, True, True, False, False])
        num_remain = mask.sum().item()
        m1 = mb.ConvBNReLU(inp,
                           num_remain,
                           kernel_size,
                           groups=groups,
                           active_fn=nn.ReLU)
        m1.apply(random_bn)
        infos = cu.compress_conv_bn_relu(m1,
                                         m0,
                                         mask,
                                         prefix_new='new',
                                         prefix_old='old')
        inputs = torch.randn(2, 3, 10, 10)
        self._apply_info(infos)

        for m in [m0, m1]:
            m.train()
        lhs = m0(inputs)
        rhs = m1(inputs)
        assertAllClose(lhs[:, mask], rhs)

        for m in [m0, m1]:
            m.eval()
        lhs = m0(inputs)
        rhs = m1(inputs)
        assertAllClose(lhs[:, mask], rhs)
Beispiel #2
0
    def testAdjustEmaRate(self):
        num_repeat = 3
        for num_repeat in [1, 5, 9]:
            for momentum in [0.25, 0.9999]:
                momentum = 0.25
                name = 'v'
                values = torch.randn(5)
                values_long = values.repeat(num_repeat, 1).permute(
                    (1, 0)).contiguous().view(-1)

                ema = optim.ExponentialMovingAverage(momentum)
                ema.register(name, values[0])
                for v in values:
                    ema(name, v)
                lhs = ema.average(name)

                momentum = optim.ExponentialMovingAverage.adjust_momentum(
                    momentum, num_repeat)
                ema = optim.ExponentialMovingAverage(momentum)
                ema.register(name, values[0])
                for v in values_long:
                    ema(name, v)
                rhs = ema.average(name)

                assertAllClose(lhs, rhs)
Beispiel #3
0
 def testCalMaskNetworkSlimmingByThreshold(self):
     x = [
         torch.tensor(val, dtype=torch.float32)
         for val in [[1, 2, 5], [3, 6, 0, 1.1]]
     ]
     mask = prune.cal_mask_network_slimming_by_threshold(x, 1.5)
     expected = [
         torch.tensor([False, True, True]),
         torch.tensor([True, True, False, False])
     ]
     assertAllClose(mask, expected)
Beispiel #4
0
 def testAverageVariablesUpdateNumUpdates_Vector(self):
     ema = optim.ExponentialMovingAverage(0.25)
     name = 'tens'
     tens = _Repeat(10.0, dim=5)
     var = torch.tensor(tens)
     ema.register(name, var, zero_init=False)
     for num_updates in range(2):
         var.add_(1)
         ema(name, var, num_updates=num_updates)
     expected = _Repeat(
         (10 * 0.1 + 11 * 0.9) * 2.0 / 11.0 + 12 * 9.0 / 11.0, dim=5)
     assertAllClose(expected, ema.average(name))
Beispiel #5
0
 def testRhoScheduler(self):
     prune_params = {
         'rho': 1.0,
         'epoch_free': 1,
         'epoch_warmup': 3,
         'scheduler': 'linear',
         'stepwise': True,
     }
     rho_scheduler = prune.get_rho_scheduler(prune_params, 2)
     res = [rho_scheduler(i) for i in range(15)]
     expected = [0, 0, 0, 0.25, 0.50, 0.75] + [1.0] * 9
     assertAllClose(expected, res)
    def testCompressConv(self):
        inp, oup, kernel_size, groups = 3, 5, 3, 1
        conv0 = nn.Conv2d(inp, oup, kernel_size, groups=groups)

        mask = torch.tensor([False, True, True, False, False])
        conv1 = nn.Conv2d(inp, mask.sum().item(), kernel_size, groups=groups)
        infos = cu.compress_conv(conv1, conv0, mask, 0)
        self._apply_info(infos)

        inputs = torch.randn(1, 3, 10, 10)
        lhs = conv0(inputs)
        rhs = conv1(inputs)
        assertAllClose(lhs[:, mask], rhs)
Beispiel #7
0
    def testBnL1Loss(self):
        rho = 1.0
        penalties = [1.0]
        for i in range(10):
            var = torch.rand(10, requires_grad=True)
            var.grad = torch.zeros_like(var)
            update_bn_network_slimming([var], penalties, rho)
            lhs = var.grad.detach().cpu().numpy()

            var.grad.zero_()
            loss = prune.cal_bn_l1_loss([var], penalties, rho)
            loss.backward()
            rhs = var.grad.detach().cpu().numpy()
            assertAllClose(lhs, rhs)
Beispiel #8
0
    def testSaveLoad(self):
        ema = optim.ExponentialMovingAverage(0.25)
        name = 'tens'
        tens = _Repeat(10.0, dim=5)
        var = torch.tensor(tens)
        ema.register(name, var, zero_init=False)
        state_dict = ema.state_dict()
        for name in ['info', 'shadow', 'param']:
            assert name in state_dict
        assert 'tens' in state_dict['shadow']
        assertAllClose(state_dict['shadow']['tens'], var)

        ema.load_state_dict(state_dict)
        state_dict['param']['momentum'] = 0.5
        self.assertWarns(RuntimeWarning,
                         lambda: ema.load_state_dict(state_dict))
    def testCompressConvDepthwise(self):
        inp, oup, kernel_size, groups = 5, 5, 3, 5
        conv0 = nn.Conv2d(inp, oup, kernel_size, groups=groups)
        mask = torch.tensor([False, True, True, False, False])
        num_remain = mask.sum().item()
        conv1 = nn.Conv2d(num_remain,
                          num_remain,
                          kernel_size,
                          groups=num_remain)
        infos = cu.compress_conv(conv1, conv0, mask, 0)
        self._apply_info(infos)

        inputs = torch.randn(1, 5, 10, 10)
        lhs = conv0(inputs)
        rhs = conv1(inputs[:, mask])
        assertAllClose(lhs[:, mask], rhs)
Beispiel #10
0
    def testSgdDecay(self):
        var = np.random.randn(10)
        weight_decay = 1e-1

        var0 = torch.tensor(var, requires_grad=True, dtype=torch.float32)
        var0.grad = torch.zeros_like(var0)
        optimizer = torch.optim.SGD([var0], weight_decay=weight_decay, lr=0.1)
        optimizer.zero_grad()
        optimizer.step()

        var1 = torch.tensor(var, requires_grad=True, dtype=torch.float32)
        optimizer = torch.optim.SGD([var1], weight_decay=0, lr=0.1)
        optimizer.zero_grad()
        loss = (weight_decay * 0.5) * (var1**2).sum()
        loss.backward()
        optimizer.step()

        assertAllClose(to_numpy(var0.grad), to_numpy(var1.grad))
        assertAllClose(to_numpy(var0), to_numpy(var1))
Beispiel #11
0
 def testCompress(self):
     ema = optim.ExponentialMovingAverage(0.25)
     ema.register('var_prune', torch.arange(5).float())
     ema.register('var_keep', torch.arange(5, 10).float())
     ema('var_prune', torch.arange(5).float())
     info = {
         'var_old_name': 'var_prune',
         'var_new_name': 'var_new',
         'var_new': torch.randn(3),
         'mask': torch.tensor([False, True, False, True, True]),
         'mask_hook': lambda lhs, rhs, mask: lhs.data.copy_(rhs.data[mask])
     }
     ema.compress_mask(info, verbose=False)
     self.assertTrue(info['var_new_name'] in ema._shadow)
     self.assertTrue(info['var_new_name'] in ema._info)
     self.assertTrue(info['var_old_name'] not in ema._shadow)
     self.assertTrue(info['var_old_name'] not in ema._info)
     self.assertEqual(ema._info[info['var_new_name']]['num_updates'], 1)
     assertAllClose(ema.average(info['var_new_name']), [1, 3, 4])
Beispiel #12
0
 def testSoftmaxLabelSmoothing(self):
     # Softmax Cross Entropy Loss is:
     #   -\sum_i p_i \log q_i
     # where for a softmax activation
     # \log q_i = x_i - \log \sum_j \exp x_j
     #          = x_i - x_max - \log \sum_j \exp (x_j - x_max)
     # For our activations, [100, -100, -100] the log partition function
     # becomes \log ( exp(0) + exp(-200) + exp(-200) ) = 0
     # so our log softmaxes become: [0, -200, -200]
     # so our cross entropy loss is:
     # -(1 - L + L/n) * 0 + 400 * L/n = 400 L/n
     logits = torch.tensor([[100.0, -100.0, -100.0]])
     labels = torch.tensor([0], dtype=torch.int64)
     label_smoothing = 0.1
     criterion = optim.CrossEntropyLabelSmooth(logits.size(1),
                                               label_smoothing)
     expected_value = 400.0 * label_smoothing / 3.0
     res = criterion(logits, labels).item()
     assertAllClose(res, expected_value)
Beispiel #13
0
    def testCalMaskNetworkSlimmingByFlops(self):
        names = ['one', 'two']
        x = [
            torch.tensor(val, dtype=torch.float32)
            for val in [[1, 2, 5], [3, 6, 0, 1.1]]
        ]
        per_channel_flops = [3, 5]
        prune_info = prune.PruneInfo(names, [0, 1])
        prune_info.add_info_list('per_channel_flops', per_channel_flops)
        flops_total = sum(
            flops * len(val) for flops, val in zip(per_channel_flops, x))

        flops_to_prune = 12
        mask, threshold = prune.cal_mask_network_slimming_by_flops(
            x, prune_info, flops_to_prune)
        prune_info.add_info_list('mask', mask)
        assertAllClose(threshold, 1.1)
        expected = [
            torch.tensor([False, True, True]),
            torch.tensor([True, True, False, False])
        ]
        assertAllClose(mask, expected)
        pruned_flops, info = prune.cal_pruned_flops(prune_info)
        self.assertTrue(pruned_flops >= flops_to_prune)

        flops_to_prune = 13
        mask, threshold = prune.cal_mask_network_slimming_by_flops(
            x, prune_info, flops_to_prune)
        prune_info.add_info_list('mask', mask)
        assertAllClose(threshold, 2)
        expected = [
            torch.tensor([False, False, True]),
            torch.tensor([True, True, False, False])
        ]
        assertAllClose(mask, expected)
        pruned_flops, info = prune.cal_pruned_flops(prune_info)
        self.assertTrue(pruned_flops >= flops_to_prune)
Beispiel #14
0
    def _CheckDecay(self,
                    ema,
                    actual_decay,
                    dim,
                    num_updates=None,
                    vars_pre_hooks=None,
                    num_updates_post_hook=None):
        def _Update():
            nonlocal num_updates
            if vars_pre_hooks is not None:
                assert len(vals) == vars_pre_hooks
                for val, var_prehook in zip(vals, vars_pre_hooks):
                    var_prehook(val)
            for name, val in zip(names, vals):
                ema(name, val, num_updates)
            if num_updates_post_hook:
                num_updates = num_updates_post_hook

        def _Scale(dk, steps):
            if ema._zero_debias:
                return 1 - dk**steps
            else:
                return 1

        tens = _Repeat(10.0, dim)
        thirties = _Repeat(30.0, dim)
        var0 = torch.tensor(tens)
        var1 = torch.tensor(thirties)
        # Note that tensor2 is not a Variable but just a plain Tensor resulting
        # from the sum operation.
        tensor2 = var0 + var1
        names = ['tens', 'thirties', 'tensor2']
        vals = [var0, var1, tensor2]
        zero_inits = [False, False, True]
        for name, var, zero_init in zip(names, vals, zero_inits):
            ema.register(name, var, zero_init)

        # Check that averages are initialized correctly.
        assertAllClose(tens, ema.average('tens'))
        assertAllClose(thirties, ema.average('thirties'))
        # Note that averages of Tensor's initialize to zeros_like since no value
        # of the Tensor is known because the Op has not been run (yet).
        assertAllClose(_Repeat(0.0, dim), ema.average('tensor2'))

        # Update the averages and check.
        _Update()
        dk = actual_decay

        expected = _Repeat(10.0 * dk + 10.0 * (1 - dk), dim)
        assertAllClose(expected, ema.average('tens'))
        expected = _Repeat(30.0 * dk + 30.0 * (1 - dk), dim)
        assertAllClose(expected, ema.average('thirties'))
        expected = _Repeat(0.0 * dk + (10.0 + 30.0) * (1 - dk) / _Scale(dk, 1),
                           dim)
        assertAllClose(expected, ema.average('tensor2'))

        # Again, update the averages and check.
        _Update()
        expected = _Repeat(
            (10.0 * dk + 10.0 * (1 - dk)) * dk + 10.0 * (1 - dk), dim)
        assertAllClose(expected, ema.average('tens'))
        expected = _Repeat(
            (30.0 * dk + 30.0 * (1 - dk)) * dk + 30.0 * (1 - dk), dim)
        assertAllClose(expected, ema.average('thirties'))
        expected = _Repeat(((0.0 * dk + (10.0 + 30.0) * (1 - dk)) * dk +
                            (10.0 + 30.0) * (1 - dk)) / _Scale(dk, 2), dim)
        assertAllClose(expected, ema.average('tensor2'))
Beispiel #15
0
    def testCompressUpdate(self):
        params, info = self._construct_info()

        params0 = copy.deepcopy(params)
        apply_gradients([p.grad for p in params], params0)
        optimizer = RMSprop(params0, lr=0.1, momentum=0.5)
        optimizer.step()

        params1 = copy.deepcopy(params)
        apply_gradients([p.grad for p in params], params1)
        optimizer1 = RMSprop(params1, lr=0.1, momentum=0.5)
        optimizer1.step()

        assertAllClose(params0[1], params1[1])
        assertAllClose(params0[2], params1[2])
        assertAllClose(params0[0], params1[0])

        info['var_old'] = params1[0]
        optimizer1.compress_mask(info, verbose=True)
        optimizer1.compress_drop({'var_old': params1[2], 'type': 'variable'})
        info['mask_hook'](info['var_new'], info['var_old'], info['mask'])
        params1[0] = info['var_new']
        params1[0].grad = params0[0].grad.data[info['mask']]

        optimizer1.step()  # params1[2] not updated
        assertAllClose(params0[2], params1[2])

        optimizer.step()
        assertAllClose(params0[1], params1[1])
        assertAllClose(params0[0][info['mask']], params1[0])