def collect_conv_details(model, dataset): dummy_input = get_dummy_input(dataset) g = SummaryGraph(model.cuda(), dummy_input.cuda()) conv_layers = OrderedDict() total_macs = 0 total_nnz = 0 for id, (name, m) in enumerate(model.named_modules()): if isinstance(m, torch.nn.Conv2d): conv = SimpleNamespace() conv.t = len(conv_layers) conv.k = m.kernel_size[0] conv.stride = m.stride # Use the SummaryGraph to obtain some other details of the models conv_op = g.find_op(normalize_module_name(name)) assert conv_op is not None conv.weights_vol = conv_op['attrs']['weights_vol'] total_nnz += conv.weights_vol conv.macs = conv_op['attrs']['MACs'] conv_pname = name + ".weight" conv_p = distiller.model_find_param(model, conv_pname) conv.macs *= distiller.density_ch(conv_p) total_macs += conv.macs conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2] conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3] conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2] conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3] conv.name = name conv.id = id conv_layers[len(conv_layers)] = conv return conv_layers, total_macs, total_nnz
def get_macs(self, layer): """Return the number of MACs required to compute <layer>'s Convolution""" if layer is None: return 0 conv_module = distiller.model_find_module(self.model, layer.name) # MACs = volume(OFM) * (#IFM * K^2) dense_macs = (conv_module.out_channels * layer.ofm_h * layer.ofm_w) * (conv_module.in_channels * layer.k**2) if PERFORM_THINNING: return dense_macs # If we didn't physically remove structures, we need to use the structural sparsity to compute MACs conv_pname = layer.name + ".weight" conv_p = distiller.model_find_param(self.model, conv_pname) return dense_macs * distiller.density_ch(conv_p)
def arbitrary_channel_pruning(config, channels_to_remove, is_parallel): """Test removal of arbitrary channels. The test receives a specification of channels to remove. Based on this specification, the channels are pruned and then physically removed from the model (via a "thinning" process). """ model, zeros_mask_dict = common.setup_test(config.arch, config.dataset, is_parallel) pair = config.module_pairs[0] conv2 = common.find_module_by_name(model, pair[1]) assert conv2 is not None # Test that we can access the weights tensor of the first convolution in layer 1 conv2_p = distiller.model_find_param(model, pair[1] + ".weight") assert conv2_p is not None assert conv2_p.dim() == 4 num_channels = conv2_p.size(1) cnt_nnz_channels = num_channels - len(channels_to_remove) mask = create_channels_mask(conv2_p, channels_to_remove) assert distiller.density_ch(mask) == (conv2.in_channels - len(channels_to_remove)) / conv2.in_channels # Cool, so now we have a mask for pruning our channels. # Use the mask to prune zeros_mask_dict[pair[1] + ".weight"].mask = mask zeros_mask_dict[pair[1] + ".weight"].apply_mask(conv2_p) all_channels = set([ch for ch in range(num_channels)]) nnz_channels = set(distiller.find_nonzero_channels_list(conv2_p, pair[1] + ".weight")) channels_removed = all_channels - nnz_channels logger.info("Channels removed {}".format(channels_removed)) # Now, let's do the actual network thinning distiller.remove_channels(model, zeros_mask_dict, config.arch, config.dataset, optimizer=None) conv1 = common.find_module_by_name(model, pair[0]) assert conv1 assert conv1.out_channels == cnt_nnz_channels assert conv2.in_channels == cnt_nnz_channels assert conv1.weight.size(0) == cnt_nnz_channels assert conv2.weight.size(1) == cnt_nnz_channels if config.bn_name is not None: bn1 = common.find_module_by_name(model, config.bn_name) assert bn1.running_var.size(0) == cnt_nnz_channels assert bn1.running_mean.size(0) == cnt_nnz_channels assert bn1.num_features == cnt_nnz_channels assert bn1.bias.size(0) == cnt_nnz_channels assert bn1.weight.size(0) == cnt_nnz_channels dummy_input = common.get_dummy_input(config.dataset) optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.1) run_forward_backward(model, optimizer, dummy_input) # Let's test saving and loading a thinned model. # We save 3 times, and load twice, to make sure to cover some corner cases: # - Make sure that after loading, the model still has hold of the thinning recipes # - Make sure that after a 2nd load, there no problem loading (in this case, the # - tensors are already thin, so this is a new flow) # (1) save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None) model_2 = create_model(False, config.dataset, config.arch, parallel=is_parallel) model(dummy_input) model_2(dummy_input) conv2 = common.find_module_by_name(model_2, pair[1]) assert conv2 is not None with pytest.raises(KeyError): model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar') compression_scheduler = distiller.CompressionScheduler(model) hasattr(model, 'thinning_recipes') run_forward_backward(model, optimizer, dummy_input) # (2) save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None, scheduler=compression_scheduler) model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar') assert hasattr(model_2, 'thinning_recipes') logger.info("test_arbitrary_channel_pruning - Done") # (3) save_checkpoint(epoch=0, arch=config.arch, model=model_2, optimizer=None, scheduler=compression_scheduler) model_2 = load_lean_checkpoint(model_2, 'checkpoint.pth.tar') assert hasattr(model_2, 'thinning_recipes') logger.info("test_arbitrary_channel_pruning - Done 2")
def arbitrary_channel_pruning(config, channels_to_remove): """Test removal of arbitrary channels. The test receives a specification of channels to remove. Based on this specification, the channels are pruned and then physically removed from the model (via a "thinning" process). """ model, zeros_mask_dict = common.setup_test(config.arch, config.dataset) conv2 = common.find_module_by_name(model, config.conv2_name) assert conv2 is not None # Test that we can access the weights tensor of the first convolution in layer 1 conv2_p = distiller.model_find_param(model, config.conv2_name + ".weight") assert conv2_p is not None assert conv2_p.dim() == 4 num_filters = conv2_p.size(0) num_channels = conv2_p.size(1) kernel_height = conv2_p.size(2) kernel_width = conv2_p.size(3) cnt_nnz_channels = num_channels - len(channels_to_remove) # Let's build our 4D mask. # We start with a 1D mask of channels, with all but our specified channels set to one channels = torch.ones(num_channels) for ch in channels_to_remove: channels[ch] = 0 # Now let's expand back up to a 4D mask mask = channels.expand(num_filters, num_channels) mask.unsqueeze_(-1) mask.unsqueeze_(-1) mask = mask.expand(num_filters, num_channels, kernel_height, kernel_width).contiguous() assert mask.shape == conv2_p.shape assert distiller.density_ch(mask) == (conv2.in_channels - len(channels_to_remove)) / conv2.in_channels # Cool, so now we have a mask for pruning our channels. # Use the mask to prune zeros_mask_dict[config.conv2_name + ".weight"].mask = mask zeros_mask_dict[config.conv2_name + ".weight"].apply_mask(conv2_p) all_channels = set([ch for ch in range(num_channels)]) nnz_channels = set(distiller.find_nonzero_channels_list(conv2_p, config.conv2_name + ".weight")) channels_removed = all_channels - nnz_channels logger.info("Channels removed {}".format(channels_removed)) # Now, let's do the actual network thinning distiller.remove_channels(model, zeros_mask_dict, config.arch, config.dataset) conv1 = common.find_module_by_name(model, config.conv1_name) logger.info(conv1) logger.info(conv2) assert conv1.out_channels == cnt_nnz_channels assert conv2.in_channels == cnt_nnz_channels assert conv1.weight.size(0) == cnt_nnz_channels assert conv2.weight.size(1) == cnt_nnz_channels if config.bn_name is not None: bn1 = common.find_module_by_name(model, config.bn_name) assert bn1.running_var.size(0) == cnt_nnz_channels assert bn1.running_mean.size(0) == cnt_nnz_channels assert bn1.num_features == cnt_nnz_channels assert bn1.bias.size(0) == cnt_nnz_channels assert bn1.weight.size(0) == cnt_nnz_channels # Let's test saving and loading a thinned model. # We save 3 times, and load twice, to make sure to cover some corner cases: # - Make sure that after loading, the model still has hold of the thinning recipes # - Make sure that after a 2nd load, there no problem loading (in this case, the # - tensors are already thin, so this is a new flow) # (1) save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None) model_2 = create_model(False, config.dataset, config.arch, parallel=False) dummy_input = torch.randn(1, 3, 32, 32) model(dummy_input) model_2(dummy_input) conv2 = common.find_module_by_name(model_2, config.conv2_name) assert conv2 is not None with pytest.raises(KeyError): model_2, compression_scheduler, start_epoch = load_checkpoint(model_2, 'checkpoint.pth.tar') compression_scheduler = distiller.CompressionScheduler(model) hasattr(model, 'thinning_recipes') # (2) save_checkpoint(epoch=0, arch=config.arch, model=model, optimizer=None, scheduler=compression_scheduler) model_2, compression_scheduler, start_epoch = load_checkpoint(model_2, 'checkpoint.pth.tar') assert hasattr(model_2, 'thinning_recipes') logger.info("test_arbitrary_channel_pruning - Done") # (3) save_checkpoint(epoch=0, arch=config.arch, model=model_2, optimizer=None, scheduler=compression_scheduler) model_2, compression_scheduler, start_epoch = load_checkpoint(model_2, 'checkpoint.pth.tar') assert hasattr(model_2, 'thinning_recipes') logger.info("test_arbitrary_channel_pruning - Done 2")
def test_arbitrary_channel_pruning(): ARCH = "resnet20_cifar" DATASET = "cifar10" model, zeros_mask_dict = setup_test(ARCH, DATASET) conv2 = find_module_by_name(model, "layer1.0.conv2") assert conv2 is not None # Test that we can access the weights tensor of the first convolution in layer 1 conv2_p = distiller.model_find_param(model, "layer1.0.conv2.weight") assert conv2_p is not None assert conv2_p.dim() == 4 num_filters = conv2_p.size(0) num_channels = conv2_p.size(1) kernel_height = conv2_p.size(2) kernel_width = conv2_p.size(3) channels_to_remove = [0, 2] # Let's build our 4D mask. # We start with a 1D mask of channels, with all but our specified channels set to one channels = torch.ones(num_channels) for ch in channels_to_remove: channels[ch] = 0 # Now let's expand back up to a 4D mask mask = channels.expand(num_filters, num_channels) mask.unsqueeze_(-1) mask.unsqueeze_(-1) mask = mask.expand(num_filters, num_channels, kernel_height, kernel_width).contiguous() assert mask.shape == conv2_p.shape assert distiller.density_ch(mask) == ( conv2.in_channels - len(channels_to_remove)) / conv2.in_channels # Cool, so now we have a mask for pruning our channels. # Use the mask to prune zeros_mask_dict["layer1.0.conv2.weight"].mask = mask zeros_mask_dict["layer1.0.conv2.weight"].apply_mask(conv2_p) all_channels = set([ch for ch in range(num_channels)]) channels_removed = all_channels - set( distiller.find_nonzero_channels(conv2_p, "layer1.0.conv2.weight")) logger.info(channels_removed) # Now, let's do the actual network thinning distiller.remove_channels(model, zeros_mask_dict, ARCH, DATASET) conv1 = find_module_by_name(model, "layer1.0.conv1") logger.info(conv1) logger.info(conv2) assert conv1.out_channels == 14 assert conv2.in_channels == 14 assert conv1.weight.size(0) == 14 assert conv2.weight.size(1) == 14 bn1 = find_module_by_name(model, "layer1.0.bn1") assert bn1.running_var.size(0) == 14 assert bn1.running_mean.size(0) == 14 assert bn1.num_features == 14 assert bn1.bias.size(0) == 14 assert bn1.weight.size(0) == 14 # Let's test saving and loading a thinned model. # We save 3 times, and load twice, to make sure to cover some corner cases: # - Make sure that after loading, the model still has hold of the thinning recipes # - Make sure that after a 2nd load, there no problem loading (in this case, the # - tensors are already thin, so this is a new flow) save_checkpoint(epoch=0, arch=ARCH, model=model, optimizer=None) model_2 = create_model(False, DATASET, ARCH, parallel=False) dummy_input = torch.randn(1, 3, 32, 32) model(dummy_input) model_2(dummy_input) conv2 = find_module_by_name(model_2, "layer1.0.conv2") assert conv2 is not None with pytest.raises(KeyError): model_2, compression_scheduler, start_epoch = load_checkpoint( model_2, 'checkpoint.pth.tar') compression_scheduler = distiller.CompressionScheduler(model) hasattr(model, 'thinning_recipes') save_checkpoint(epoch=0, arch=ARCH, model=model, optimizer=None, scheduler=compression_scheduler) model_2, compression_scheduler, start_epoch = load_checkpoint( model_2, 'checkpoint.pth.tar') assert hasattr(model_2, 'thinning_recipes') logger.info("test_arbitrary_channel_pruning - Done") save_checkpoint(epoch=0, arch=ARCH, model=model_2, optimizer=None, scheduler=compression_scheduler) model_2, compression_scheduler, start_epoch = load_checkpoint( model_2, 'checkpoint.pth.tar') logger.info("test_arbitrary_channel_pruning - Done 2")