def main(): parser = argparse.ArgumentParser("multiple gpu with pruning") parser.add_argument("--epochs", type=int, default=160) parser.add_argument("--retrain", default=False, action="store_true") parser.add_argument("--parallel", default=False, action="store_true") args = parser.parse_args() torch.manual_seed(0) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_loader = torch.utils.data.DataLoader(datasets.CIFAR10( './data.cifar10', train=True, download=True, transform=transforms.Compose([ transforms.Pad(4), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=64, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.CIFAR10( './data.cifar10', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=200, shuffle=False) model = VGG(depth=19) model.to(device) # Train the base VGG-19 model if args.retrain: print('=' * 10 + 'Train the unpruned base model' + '=' * 10) epochs = args.epochs optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) for epoch in range(epochs): if epoch in [epochs * 0.5, epochs * 0.75]: for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 print("epoch {}".format(epoch)) train(model, device, train_loader, optimizer, True) test(model, device, test_loader) torch.save(model.state_dict(), 'vgg19_cifar10.pth') else: assert os.path.isfile( 'vgg19_cifar10.pth'), "can not find checkpoint 'vgg19_cifar10.pth'" model.load_state_dict(torch.load('vgg19_cifar10.pth')) # Test base model accuracy print('=' * 10 + 'Test the original model' + '=' * 10) test(model, device, test_loader) # top1 = 93.60% # Pruning Configuration, in paper 'Learning efficient convolutional networks through network slimming', configure_list = [{ 'sparsity': 0.7, 'op_types': ['BatchNorm2d'], }] # Prune model and test accuracy without fine tuning. print('=' * 10 + 'Test the pruned model before fine tune' + '=' * 10) pruner = SlimPruner(model, configure_list) model = pruner.compress() if args.parallel: if torch.cuda.device_count() > 1: print("use {} gpus for pruning".format(torch.cuda.device_count())) model = nn.DataParallel(model) # model = nn.DataParallel(model, device_ids=[0, 1]) else: print("only detect 1 gpu, fall back") model.to(device) # Fine tune the pruned model for 40 epochs and test accuracy print('=' * 10 + 'Fine tuning' + '=' * 10) optimizer_finetune = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4) best_top1 = 0 for epoch in range(40): pruner.update_epoch(epoch) print('# Epoch {} #'.format(epoch)) train(model, device, train_loader, optimizer_finetune) top1 = test(model, device, test_loader) if top1 > best_top1: best_top1 = top1 # Export the best model, 'model_path' stores state_dict of the pruned model, # mask_path stores mask_dict of the pruned model pruner.export_model(model_path='pruned_vgg19_cifar10.pth', mask_path='mask_vgg19_cifar10.pth') # Test the exported model print('=' * 10 + 'Test the export pruned model after fine tune' + '=' * 10) new_model = VGG(depth=19) new_model.to(device) new_model.load_state_dict(torch.load('pruned_vgg19_cifar10.pth')) test(new_model, device, test_loader)
def main(): torch.manual_seed(0) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_loader = torch.utils.data.DataLoader(datasets.CIFAR10( './data.cifar10', train=True, download=True, transform=transforms.Compose([ transforms.Pad(4), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=64, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.CIFAR10( './data.cifar10', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=200, shuffle=False) model = VGG(depth=16) model.to(device) # Train the base VGG-16 model print('=' * 10 + 'Train the unpruned base model' + '=' * 10) optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, 160, 0) for epoch in range(160): print('# Epoch {} #'.format(epoch)) train(model, device, train_loader, optimizer) test(model, device, test_loader) lr_scheduler.step(epoch) torch.save(model.state_dict(), 'vgg16_cifar10.pth') # Test base model accuracy print('=' * 10 + 'Test on the original model' + '=' * 10) model.load_state_dict(torch.load('vgg16_cifar10.pth')) test(model, device, test_loader) # top1 = 93.51% # Pruning Configuration, all convolution layers are pruned out 80% filters according to the L1 norm configure_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'], }] # Prune model and test accuracy without fine tuning. print('=' * 10 + 'Test on the pruned model before fine tune' + '=' * 10) pruner = L1FilterPruner(model, configure_list) model = pruner.compress() test(model, device, test_loader) # top1 = 10.00% # Fine tune the pruned model for 40 epochs and test accuracy print('=' * 10 + 'Fine tuning' + '=' * 10) optimizer_finetune = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4) best_top1 = 0 kd_teacher_model = VGG(depth=16) kd_teacher_model.to(device) kd_teacher_model.load_state_dict(torch.load('vgg16_cifar10.pth')) kd = KnowledgeDistill(kd_teacher_model, kd_T=5) for epoch in range(40): pruner.update_epoch(epoch) print('# Epoch {} #'.format(epoch)) train(model, device, train_loader, optimizer_finetune, kd) top1 = test(model, device, test_loader) if top1 > best_top1: best_top1 = top1 # Export the best model, 'model_path' stores state_dict of the pruned model, # mask_path stores mask_dict of the pruned model pruner.export_model(model_path='pruned_vgg16_cifar10.pth', mask_path='mask_vgg16_cifar10.pth') # Test the exported model print('=' * 10 + 'Test on the pruned model after fine tune' + '=' * 10) new_model = VGG(depth=16) new_model.to(device) new_model.load_state_dict(torch.load('pruned_vgg16_cifar10.pth')) test(new_model, device, test_loader)
def main(): parser = argparse.ArgumentParser("multiple gpu with pruning") parser.add_argument("--epochs", type=int, default=160) parser.add_argument("--retrain", default=False, action="store_true") parser.add_argument("--parallel", default=False, action="store_true") args = parser.parse_args() torch.manual_seed(0) device = torch.device('cuda') train_loader = torch.utils.data.DataLoader(datasets.CIFAR10( './data.cifar10', train=True, download=True, transform=transforms.Compose([ transforms.Pad(4), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=64, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.CIFAR10( './data.cifar10', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=200, shuffle=False) model = VGG(depth=16) model.to(device) # Train the base VGG-16 model if args.retrain: print('=' * 10 + 'Train the unpruned base model' + '=' * 10) optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, 160, 0) for epoch in range(args.epochs): train(model, device, train_loader, optimizer) test(model, device, test_loader) lr_scheduler.step(epoch) torch.save(model.state_dict(), 'vgg16_cifar10.pth') # Test base model accuracy print('=' * 10 + 'Test on the original model' + '=' * 10) model.load_state_dict(torch.load('vgg16_cifar10.pth')) test(model, device, test_loader) # top1 = 93.51% # Pruning Configuration, in paper 'PRUNING FILTERS FOR EFFICIENT CONVNETS', # Conv_1, Conv_8, Conv_9, Conv_10, Conv_11, Conv_12 are pruned with 50% sparsity, as 'VGG-16-pruned-A' configure_list = [{ 'sparsity': 0.5, 'op_types': ['default'], 'op_names': [ 'feature.0', 'feature.24', 'feature.27', 'feature.30', 'feature.34', 'feature.37' ] }] # Prune model and test accuracy without fine tuning. print('=' * 10 + 'Test on the pruned model before fine tune' + '=' * 10) pruner = ActivationMeanRankFilterPruner(model, configure_list) model = pruner.compress() if args.parallel: if torch.cuda.device_count() > 1: print("use {} gpus for pruning".format(torch.cuda.device_count())) model = nn.DataParallel(model) else: print("only detect 1 gpu, fall back") model.to(device) test(model, device, test_loader) # top1 = 88.19% # Fine tune the pruned model for 40 epochs and test accuracy print('=' * 10 + 'Fine tuning' + '=' * 10) optimizer_finetune = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4) best_top1 = 0 for epoch in range(40): pruner.update_epoch(epoch) print('# Epoch {} #'.format(epoch)) train(model, device, train_loader, optimizer_finetune) top1 = test(model, device, test_loader) if top1 > best_top1: best_top1 = top1 # Export the best model, 'model_path' stores state_dict of the pruned model, # mask_path stores mask_dict of the pruned model pruner.export_model(model_path='pruned_vgg16_cifar10.pth', mask_path='mask_vgg16_cifar10.pth') # Test the exported model print('=' * 10 + 'Test on the pruned model after fine tune' + '=' * 10) new_model = VGG(depth=16) new_model.to(device) new_model.load_state_dict(torch.load('pruned_vgg16_cifar10.pth')) test(new_model, device, test_loader)
def main(): torch.manual_seed(0) device = torch.device('cuda') train_loader = torch.utils.data.DataLoader( datasets.CIFAR10('./data.cifar10', train=True, download=True, transform=transforms.Compose([ transforms.Pad(4), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=64, shuffle=True) test_loader = torch.utils.data.DataLoader( datasets.CIFAR10('./data.cifar10', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])), batch_size=200, shuffle=False) model = VGG(depth=19) model.to(device) # Train the base VGG-19 model print('=' * 10 + 'Train the unpruned base model' + '=' * 10) epochs = 160 optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) for epoch in range(epochs): if epoch in [epochs * 0.5, epochs * 0.75]: for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 train(model, device, train_loader, optimizer, True) test(model, device, test_loader) torch.save(model.state_dict(), 'vgg19_cifar10.pth') # Test base model accuracy print('=' * 10 + 'Test the original model' + '=' * 10) model.load_state_dict(torch.load('vgg19_cifar10.pth')) test(model, device, test_loader) # top1 = 93.60% # Pruning Configuration, in paper 'Learning efficient convolutional networks through network slimming', configure_list = [{ 'sparsity': 0.7, 'op_types': ['BatchNorm2d'], }] # Prune model and test accuracy without fine tuning. print('=' * 10 + 'Test the pruned model before fine tune' + '=' * 10) pruner = SlimPruner(model, configure_list) model = pruner.compress() test(model, device, test_loader) # top1 = 93.55% # Fine tune the pruned model for 40 epochs and test accuracy print('=' * 10 + 'Fine tuning' + '=' * 10) optimizer_finetune = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4) best_top1 = 0 for epoch in range(40): pruner.update_epoch(epoch) print('# Epoch {} #'.format(epoch)) train(model, device, train_loader, optimizer_finetune) top1 = test(model, device, test_loader) if top1 > best_top1: best_top1 = top1 # Export the best model, 'model_path' stores state_dict of the pruned model, # mask_path stores mask_dict of the pruned model pruner.export_model(model_path='pruned_vgg19_cifar10.pth', mask_path='mask_vgg19_cifar10.pth') # Test the exported model print('=' * 10 + 'Test the export pruned model after fine tune' + '=' * 10) new_model = VGG(depth=19) new_model.to(device) new_model.load_state_dict(torch.load('pruned_vgg19_cifar10.pth')) test(new_model, device, test_loader)