Esempio n. 1
0
def build_model(device, model_name, num_classes=10):
    """构建模型:vgg、vggnonorm、resnet、preactresnet、googlenet、densenet、
                resnext、mobilenet、mobilenetv2、dpn、shufflenetg2、senet、shufflenetv2

    :param device: 'cuda' if you have a GPU, 'cpu' otherwise
    :param model_name: One of the models available in the folder 'models'
    :param num_classes: 10 or 100 depending on the chosen dataset
    :return: The model architecture
    """
    print('==> Building model..')
    model_name = model_name.lower()
    if model_name == 'vgg':
        net = VGG('VGG19', num_classes=num_classes)
    elif model_name == 'vggnonorm':
        net = VGG('VGG19', num_classes=num_classes, batch_norm=False)
    elif model_name == 'resnet':
        net = ResNet18(num_classes=num_classes)
    elif model_name == 'preactresnet':
        net = PreActResNet18()
    elif model_name == 'googlenet':
        net = GoogLeNet()
    elif model_name == 'densenet':
        net = DenseNet121()
    elif model_name == 'resnext':
        net = ResNeXt29_2x64d()
    elif model_name == 'mobilenet':
        net = MobileNet()
    elif model_name == 'mobilenetv2':
        net = MobileNetV2()
    elif model_name == 'dpn':
        net = DPN92()
    elif model_name == 'shufflenetg2':
        net = ShuffleNetG2()
    elif model_name == 'senet':
        net = SENet18()
    elif model_name == 'shufflenetv2':
        net = ShuffleNetV2(1)
    else:
        raise ValueError('Error: the specified model is incorrect ({})'.format(model_name))

    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True
    return net
Esempio n. 2
0
def model_build(resume):
    print('==> Building model..')
    net = VGG('VGG16')
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    # print(device)
    # use_cuda = torch.cuda.is_available()
    net = net.to(device)
    # 如果GPU可用,使用GPU
    if device == 'cuda':

        # parallel use GPU
        net = torch.nn.DataParallel(net)
        # speed up slightly
        cudnn.benchmark = True
    #else:
    #net = VGG('VGG16')
    if resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isdir(
            'checkpoint'), 'Error: no checkpoint directory found!'
        # .pth格式模型加载
        #checkpoint = torch.load('./checkpoint/ckpt.pth', map_location=torch.device('cpu'))
        #net.load_state_dict(checkpoint['net'])
        #best_acc = checkpoint['acc']
        #start_epoch = checkpoint['epoch']

        # .pkl格式模型加载
        #net.load_state_dict(torch.load('./checkpoint/ckpt.pkl', map_location=torch.device('cpu')))

        net_dict = torch.load('./checkpoint/ckpt.pkl',
                              map_location=torch.device('cpu'))
        # 如果提示module.出错放开下面的代码
        new_state_dict = OrderedDict()
        for k, v in net_dict.items():
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v
        # load params
        net.load_state_dict(new_state_dict)

    return net, device
Esempio n. 3
0
    model_id = 0    # Change this to correspond to the model in the list
    if model_id == 0:
        model = VGG3D()
    elif model_id == 1:
        model = resnet34()
    elif model_id == 2:
        model = ResNet2D(3)
    elif model_id == 3:
        model = VGG(3)

    train_size  = 15
    test_size   = 15

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    model = model.to(device)

    model = torch.load("model_m.pt")
    model.eval()

    train_loader = get_data_loader(train = True, batch_size = train_size,
                                split = 'train', model = models[model_id])
    test_loader = get_data_loader(train = False, batch_size = test_size,
                                split = 'test', model = models[model_id])

    _ , train_acc = test(model, train_loader, device)
    print("Final Train Accuracy: ", train_acc)

    _ , test_acc = test(model, test_loader, device)
    print("Final Accuracy: ", test_acc)
def main():

    """
    This code sets up the data and loads the model obtained after ADMM based training
    for retraining to enforce pruning constraints.

    The function retrains_model present in the utils file enforces the hard sparsity 
    constraints on the weights on the model obtained after ADMM based training 
    and then retrains the model to improve the accuracy.

    """
    
    #model = LeNet5()
    model = VGG(n_class=10)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    Path = 'saved_model/admm_model/cifar10_vgg_acc_0.688'    # Path to the saved model after ADMM based training
    
    model.load_state_dict(torch.load(Path))

    model.to(device)

    #data_transforms = transforms.Compose([transforms.CenterCrop(32),transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
    train_data = datasets.CIFAR10('data/', train=True, download=False,
                         transform=transforms.Compose([
                             transforms.Pad(4),
                             transforms.RandomCrop(32),
                             transforms.RandomHorizontalFlip(),
                             transforms.ToTensor(),
                             transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                         ]))
    #train_data = datasets.MNIST(root='data/',download=False,train=True,transform=data_transforms)

    # Splitting the training dataset into training and validation dataset
    
    N_train = len(train_data)
    val_split = 0.1
    N_val = int(val_split*N_train)

    train_data,val_data = torch.utils.data.random_split(train_data,(N_train-N_val,N_val))
    

    ## Test data
    test_data = datasets.CIFAR10('data/', train=False, download=False,
                         transform=transforms.Compose([
                             transforms.Pad(4),
                             transforms.RandomCrop(32),
                             transforms.RandomHorizontalFlip(),
                             transforms.ToTensor(),
                             transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                         ]))
    #test_data = datasets.MNIST(root='data/',download=False,train=False,transform=data_transforms)

    batch_size = 128
    num_epochs = 20
    log_step = 100

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=1e-2)
    #optimizer = torch.optim.SGD(model.parameters(), lr =5e-4,momentum =0.9, weight_decay = 5e-4 )
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones = [10], gamma = 0.1)

    
    
    #######  Re-Training ##############
    # Parameters
    prune_type = 'filter'
    
    # Number of non-zero filters at each convolutional layer
    l = {'conv1':32,'conv2':64,'conv3':128,'conv4':128,'conv5':256,'conv6':256,'conv7':256,'conv8':256}
    

    retrain_model(model,train_data,val_data,batch_size,loss_fn,num_epochs,log_step,optimizer,scheduler,l,prune_type,device)

    
    # Check the test accuracy
    model.eval()
    test_accuracy = eval_accuracy_data(test_data,model,batch_size,device)
    print('Test accuracy is',test_accuracy)
Esempio n. 5
0
def main():
    """
    This code implements the ADMM based training of a CNN. 
    """

    #model = LeNet5()
    model = VGG(n_class=10)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    Path = 'saved_model/pre_train_models/cifar10_vgg_acc_0.943'  # Path to the baseline model
    model.load_state_dict(torch.load(Path))

    model.to(device)

    #data_transforms = transforms.Compose([transforms.CenterCrop(32),transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
    train_data = datasets.CIFAR10('data/',
                                  train=True,
                                  download=False,
                                  transform=transforms.Compose([
                                      transforms.Pad(4),
                                      transforms.RandomCrop(32),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(
                                          (0.4914, 0.4822, 0.4465),
                                          (0.2023, 0.1994, 0.2010))
                                  ]))
    #train_data = datasets.MNIST(root='data/',download=False,train=True,transform=data_transforms)
    """
    N_train = len(train_data)
    val_split = 0.1
    N_val = int(val_split*N_train)

    train_data,val_data = torch.utils.data.random_split(train_data,(N_train-N_val,N_val))
    """

    ## Test data
    test_data = datasets.CIFAR10('data/',
                                 train=False,
                                 download=False,
                                 transform=transforms.Compose([
                                     transforms.Pad(4),
                                     transforms.RandomCrop(32),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         (0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010))
                                 ]))
    #test_data = datasets.MNIST(root='data/',download=False,train=False,transform=data_transforms)

    batch_size = 128
    num_epochs = 50
    log_step = 100

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    #optimizer = torch.optim.SGD(model.parameters(), lr =5e-4,momentum =0.9, weight_decay = 5e-4 )
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[15, 30],
                                                     gamma=0.1)

    ####### ADMM Training ##############
    ## Parameters

    fc_prune = False  # True if the fully connected layers are also pruned
    prune_type = 'filter'  # Type of structural pruning at the convolutional layers

    # Number of non zero filters at each convolutional layer
    l = {
        'conv1': 32,
        'conv2': 64,
        'conv3': 128,
        'conv4': 128,
        'conv5': 256,
        'conv6': 256,
        'conv7': 256,
        'conv8': 256
    }

    # ADMM parameters
    rho_val = 1.5e-3
    num_admm_steps = 10

    Z = {}
    U = {}
    rho = {}
    best_accuracy = 0
    all_acc = False

    ## Initialization of the variable Z and dual variable U

    for name_net in model.named_modules():
        name, net = name_net

        if isinstance(net, nn.Conv2d):
            Z[name] = net.weight.clone().detach().requires_grad_(False)
            Z[name] = Projection_structured(Z[name], l[name], prune_type)
            U[name] = torch.zeros_like(net.weight, requires_grad=False)
            rho[name] = rho_val

        elif fc_prune and isinstance(net, nn.Linear):
            Z[name] = net.weight.clone().detach().requires_grad_(False)
            l_unst = int(len(net.weight.data.reshape(-1, )) * prune_ratio)
            Z[name], _ = Projection_unstructured(Z[name], l_unst)
            U[name] = torch.zeros_like(net.weight, requires_grad=False)

    ## ADMM loop

    for i in range(num_admm_steps):
        print('ADMM step number {}'.format(i))
        # First train the VGG model
        train_model_admm(model, train_data, batch_size, loss_fn, optimizer,
                         scheduler, num_epochs, log_step, Z, U, rho, fc_prune,
                         device)

        # Update the variable Z
        for name_net in model.named_modules():
            name, net = name_net
            if isinstance(net, nn.Conv2d):
                Z[name] = Projection_structured(net.weight.detach() + U[name],
                                                l[name], prune_type)

            elif fc_prune and isinstance(net, nn.Linear):
                l_unst = int(len(net.weight.data.reshape(-1, )) * prune_ratio)
                Z[name], _ = Projection_unstructured(
                    net.weight.detach() + U[name], l_unst)

        # Updating the dual variable U
        for name_net in model.named_modules():
            name, net = name_net
            if isinstance(net, nn.Conv2d):
                U[name] = U[name] + net.weight.detach() - Z[name]
            elif fc_prune and isinstance(net, nn.Linear):
                U[name] = U[name] + net.weight.detach() - Z[name]

        ## Check the test accuracy
        model.eval()
        test_accuracy = eval_accuracy_data(test_data, model, batch_size,
                                           device)
        print('Test accuracy is', test_accuracy)
        if test_accuracy > best_accuracy:
            print(
                'Saving model with test accuracy {:.3f}'.format(test_accuracy))
            torch.save(
                model.state_dict(),
                'saved_model/admm_model/cifar10_vgg_acc_{:.3f}'.format(
                    test_accuracy))
            if all_acc:
                print('Removing model with test accuracy {:.3f}'.format(
                    best_accuracy))
                os.remove(
                    'saved_model/admm_model/cifar10_vgg_acc_{:.3f}'.format(
                        best_accuracy))
            best_accuracy = test_accuracy
            all_acc = True
Esempio n. 6
0
def main():
    """
    This code implements the conventional training pipeline for a CNN for obtaining the baseline 
    model for the ADMM based training
    
    """

    #model = LeNet5()
    model = VGG(n_class=10)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    #Path = 'saved_model/cifar10_vgg_acc_0.939'
    #model.load_state_dict(torch.load(Path))

    model.to(device)

    #data_transforms = transforms.Compose([transforms.CenterCrop(32),transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
    train_data = datasets.CIFAR10('data/',
                                  train=True,
                                  download=False,
                                  transform=transforms.Compose([
                                      transforms.Pad(4),
                                      transforms.RandomCrop(32),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(
                                          (0.4914, 0.4822, 0.4465),
                                          (0.2023, 0.1994, 0.2010))
                                  ]))
    #train_data = datasets.MNIST(root='data/',download=False,train=True,transform=data_transforms)

    # Splitting the training dataset into training and validation dataset
    N_train = len(train_data)
    val_split = 0.1
    N_val = int(val_split * N_train)

    train_data, val_data = torch.utils.data.random_split(
        train_data, (N_train - N_val, N_val))

    ## Test data
    test_data = datasets.CIFAR10('data/',
                                 train=False,
                                 download=False,
                                 transform=transforms.Compose([
                                     transforms.Pad(4),
                                     transforms.RandomCrop(32),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         (0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010))
                                 ]))
    #test_data = datasets.MNIST(root='data/',download=False,train=False,transform=data_transforms)

    batch_size = 128
    num_epochs = 50
    log_step = 100

    loss_fn = nn.CrossEntropyLoss()
    #optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=5e-4,
                                momentum=0.9,
                                weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[15, 30],
                                                     gamma=0.1)

    train(model,
          train_data,
          val_data,
          batch_size,
          loss_fn,
          optimizer,
          scheduler,
          num_epochs,
          log_step,
          device,
          save_model=True)

    #test_sample = iter(torch.utils.data.DataLoader(test_data,batch_size=len(test_data))).next()
    #X_test,y_test = test_sample[0].to(device),test_sample[1].to(device)
    print('Test accuracy is',
          eval_accuracy_data(test_data, model, batch_size, device))
    torch.save(model)