Ejemplo n.º 1
0
def main():
    # check the configurations
    use_cuda = torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')

    # prepare data for training
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset = torchvision.datasets.CIFAR10(root='../data',
                                            train=True,
                                            download=True,
                                            transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              shuffle=True,
                                              batch_size=128,
                                              num_workers=4,
                                              pin_memory=True)

    testset = torchvision.datasets.CIFAR10(root='../data',
                                           train=False,
                                           download=True,
                                           transform=transform)
    testloader = torch.utils.data.DataLoader(testset,
                                             shuffle=False,
                                             batch_size=128,
                                             num_workers=4,
                                             pin_memory=True)

    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    # initilizae the model
    net = VGG().cuda() if use_cuda else VGG()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(),
                          lr=0.05,
                          momentum=0.9,
                          weight_decay=5e-4)

    # training or loading the neural network
    # model_train(net, trainloader, criterion, optimizer, epochs=5)
    net.load_state_dict(
        torch.load('resources/vgg16_cifar10.bin', map_location='cpu'))
    print('Neural network ready.')

    # evaluate the model performance
    accuracy, _ = model_eval(net, testloader, criterion)
    print('Accuracy of the network on the clean test images: %d %%' %
          (100 * accuracy))

    accuracy, _ = model_eval(net,
                             testloader,
                             criterion,
                             attack_method=illcm_attack)
    print('Accuracy of the network on the adversarial test images: %d %%' %
          (100 * accuracy))
Ejemplo n.º 2
0
def build_model(config):
    if config['model'] == 'ResNet18':
        model = ResNet18(color_channel=config['color_channel'])
    elif config['model'] == 'VGG11':
        model = VGG('VGG11', color_channel=config['color_channel'])
    elif config['model'] == 'VGG13':
        model = VGG('VGG13', color_channel=config['color_channel'])
    else:
        print('wrong model option')
        model = None
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=config['lr'],
                          momentum=config['momentum'],
                          weight_decay=config['weight_decay'])

    return model, loss_function, optimizer
Ejemplo n.º 3
0
def main(num_epochs=200,
         learning_rate=0.005,
         momentum=0.5,
         log_interval=500,
         *args,
         **kwargs):

    train_loader, test_loader = loaders.loader(batch_size_train=100,
                                               batch_size_test=1000)

    # Train the model
    total_step = len(train_loader)
    curr_lr1 = learning_rate

    model1 = VGG().to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer1 = torch.optim.Adam(model1.parameters(), lr=learning_rate)

    # Train the model
    total_step = len(train_loader)

    best_accuracy1 = 0

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward
            outputs = model1(images)
            loss1 = criterion(outputs, labels)

            # Backward and optimize
            optimizer1.zero_grad()
            loss1.backward()
            optimizer1.step()

            if i == 499:
                print(
                    "Ordinary Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}".format(
                        epoch + 1, num_epochs, i + 1, total_step,
                        loss1.item()))

        # Test the model
        model1.eval()

        with torch.no_grad():
            correct1 = 0
            total1 = 0

            for images, labels in test_loader:
                images = images.to(device)
                labels = labels.to(device)

                outputs = model1(images)
                _, predicted = torch.max(outputs.data, 1)
                total1 += labels.size(0)
                correct1 += (predicted == labels).sum().item()

            if best_accuracy1 >= correct1 / total1:
                curr_lr1 = learning_rate * np.asscalar(
                    pow(np.random.rand(1), 3))
                update_lr(optimizer1, curr_lr1)
                print('Test Accuracy of NN: {} % Best: {} %'.format(
                    100 * correct1 / total1, 100 * best_accuracy1))
            else:
                best_accuracy1 = correct1 / total1
                net_opt1 = model1
                print('Test Accuracy of NN: {} % (improvement)'.format(
                    100 * correct1 / total1))

            model1.train()
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(description='Style transfer')
    parser.add_argument('--image',
                        '-i',
                        type=str,
                        default=None,
                        help='image path e.g. image.jpg')
    parser.add_argument('--style',
                        '-s',
                        type=str,
                        default=None,
                        help='style path e.g. picasso.jpg')

    args = parser.parse_args()

    #
    # Initialise
    #

    # ----------------- get data -----------------------------------------------------------
    prep = prep_data(512)
    postpa, postpb = post()

    # ----------------- get model -----------------------------------------------------------
    vgg = VGG()
    vgg.load_state_dict(torch.load(model_dir + 'vgg_conv.pth'))
    for param in vgg.parameters():
        param.requires_grad = False
    if torch.cuda.is_available():
        vgg.cuda()

    # ----------------- load images -----------------------------------------------------------
    img_dirs = [image_dir, image_dir]
    img_names = [args.style, args.image]
    imgs = [Image.open(img_dirs[i] + name) for i, name in enumerate(img_names)]
    imgs_torch = [prep(img) for img in imgs]
    if torch.cuda.is_available():
        imgs_torch = [Variable(img.unsqueeze(0).cuda()) for img in imgs_torch]
    else:
        imgs_torch = [Variable(img.unsqueeze(0)) for img in imgs_torch]
    style_image, content_image = imgs_torch

    opt_img = Variable(content_image.data.clone(), requires_grad=True)

    # ----------------- define layers -----------------------------------------------------------
    style_layers = ['r11', 'r21', 'r31', 'r41', 'r51']
    content_layers = ['r42']
    loss_layers = style_layers + content_layers
    loss_fns = [GramMSELoss()] * len(style_layers) + [nn.MSELoss()
                                                      ] * len(content_layers)
    if torch.cuda.is_available():
        loss_fns = [loss_fn.cuda() for loss_fn in loss_fns]

    #these are good weights settings:
    style_weights = [1e3 / n**2 for n in [64, 128, 256, 512, 512]]
    content_weights = [1e0]
    weights = style_weights + content_weights

    #compute optimization targets
    style_targets = [
        GramMatrix()(A).detach() for A in vgg(style_image, style_layers)
    ]
    content_targets = [A.detach() for A in vgg(content_image, content_layers)]
    targets = style_targets + content_targets

    #
    # Low res
    #

    print("processing low res...")

    out_img = train(opt_img,
                    vgg,
                    weights,
                    loss_fns,
                    targets,
                    optim,
                    loss_layers,
                    postpa,
                    postpb,
                    low_res=True)

    #
    # high res
    #

    print("processing high res...")

    #prep hr images
    prep_hr = prep_data(800)
    imgs_torch = [prep_hr(img) for img in imgs]
    if torch.cuda.is_available():
        imgs_torch = [Variable(img.unsqueeze(0).cuda()) for img in imgs_torch]
    else:
        imgs_torch = [Variable(img.unsqueeze(0)) for img in imgs_torch]
    style_image, content_image = imgs_torch

    #now initialise with upsampled lowres result
    opt_img = prep_hr(out_img).unsqueeze(0)
    opt_img = Variable(opt_img.type_as(content_image.data), requires_grad=True)

    #compute hr targets
    style_targets = [
        GramMatrix()(A).detach() for A in vgg(style_image, style_layers)
    ]
    content_targets = [A.detach() for A in vgg(content_image, content_layers)]
    targets = style_targets + content_targets

    out_img_hr = train(opt_img,
                       vgg,
                       weights,
                       loss_fns,
                       targets,
                       optim,
                       loss_layers,
                       postpa,
                       postpb,
                       low_res=False)

    out_img_hr.save(
        f'outputs/{str(img_names[1]).split(".")[0]}_{str(img_names[0]).split(".")[0]}_out_hr.jpg'
    )

    print(
        f'output saved to: outputs/{str(img_names[1]).split(".")[0]}_{str(img_names[0]).split(".")[0]}_out_hr.jpg'
    )
def main():

    """
    This code sets up the data and loads the model obtained after ADMM based training
    for retraining to enforce pruning constraints.

    The function retrains_model present in the utils file enforces the hard sparsity 
    constraints on the weights on the model obtained after ADMM based training 
    and then retrains the model to improve the accuracy.

    """
    
    #model = LeNet5()
    model = VGG(n_class=10)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    Path = 'saved_model/admm_model/cifar10_vgg_acc_0.688'    # Path to the saved model after ADMM based training
    
    model.load_state_dict(torch.load(Path))

    model.to(device)

    #data_transforms = transforms.Compose([transforms.CenterCrop(32),transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
    train_data = datasets.CIFAR10('data/', train=True, download=False,
                         transform=transforms.Compose([
                             transforms.Pad(4),
                             transforms.RandomCrop(32),
                             transforms.RandomHorizontalFlip(),
                             transforms.ToTensor(),
                             transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                         ]))
    #train_data = datasets.MNIST(root='data/',download=False,train=True,transform=data_transforms)

    # Splitting the training dataset into training and validation dataset
    
    N_train = len(train_data)
    val_split = 0.1
    N_val = int(val_split*N_train)

    train_data,val_data = torch.utils.data.random_split(train_data,(N_train-N_val,N_val))
    

    ## Test data
    test_data = datasets.CIFAR10('data/', train=False, download=False,
                         transform=transforms.Compose([
                             transforms.Pad(4),
                             transforms.RandomCrop(32),
                             transforms.RandomHorizontalFlip(),
                             transforms.ToTensor(),
                             transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                         ]))
    #test_data = datasets.MNIST(root='data/',download=False,train=False,transform=data_transforms)

    batch_size = 128
    num_epochs = 20
    log_step = 100

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=1e-2)
    #optimizer = torch.optim.SGD(model.parameters(), lr =5e-4,momentum =0.9, weight_decay = 5e-4 )
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones = [10], gamma = 0.1)

    
    
    #######  Re-Training ##############
    # Parameters
    prune_type = 'filter'
    
    # Number of non-zero filters at each convolutional layer
    l = {'conv1':32,'conv2':64,'conv3':128,'conv4':128,'conv5':256,'conv6':256,'conv7':256,'conv8':256}
    

    retrain_model(model,train_data,val_data,batch_size,loss_fn,num_epochs,log_step,optimizer,scheduler,l,prune_type,device)

    
    # Check the test accuracy
    model.eval()
    test_accuracy = eval_accuracy_data(test_data,model,batch_size,device)
    print('Test accuracy is',test_accuracy)
Ejemplo n.º 6
0
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    dataset = CIFAR10(root="./data/",
                      train=True,
                      download=True,
                      transform=transform)
    train_set, val_set = torch.utils.data.random_split(dataset, [40000, 10000])
    test_set = CIFAR10(root="./data/",
                       train=False,
                       download=True,
                       transform=transform)

    #model = vgg16(pretrained = False)
    model = VGG('VGG16')
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    trainer = trainer(train_set,
                      val_set,
                      test_set,
                      model,
                      optimizer,
                      criterion,
                      device='cuda')
    history = trainer.fit(epochs=10, liveplot=False, es_epochs=5)
    trainer.test()
    trainer.genPlots()
Ejemplo n.º 7
0
    log = 50

    train_loader = get_data_loader(train=True,
                                   batch_size=train_size,
                                   split='train',
                                   model=models[model_id])
    val_loader = get_data_loader(train=False,
                                 batch_size=val_size,
                                 split='val',
                                 model=models[model_id])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=1,
                                                gamma=gamma)
    # weight = torch.tensor([0.2, 0.3, 0.5]).to(device)
    criterion = torch.nn.CrossEntropyLoss()

    for epoch in range(epochs):
        count = 0
        current_loss = 0

        for data, target in train_loader:

            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
Ejemplo n.º 8
0
def main():
    """
    This code implements the ADMM based training of a CNN. 
    """

    #model = LeNet5()
    model = VGG(n_class=10)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    Path = 'saved_model/pre_train_models/cifar10_vgg_acc_0.943'  # Path to the baseline model
    model.load_state_dict(torch.load(Path))

    model.to(device)

    #data_transforms = transforms.Compose([transforms.CenterCrop(32),transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
    train_data = datasets.CIFAR10('data/',
                                  train=True,
                                  download=False,
                                  transform=transforms.Compose([
                                      transforms.Pad(4),
                                      transforms.RandomCrop(32),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(
                                          (0.4914, 0.4822, 0.4465),
                                          (0.2023, 0.1994, 0.2010))
                                  ]))
    #train_data = datasets.MNIST(root='data/',download=False,train=True,transform=data_transforms)
    """
    N_train = len(train_data)
    val_split = 0.1
    N_val = int(val_split*N_train)

    train_data,val_data = torch.utils.data.random_split(train_data,(N_train-N_val,N_val))
    """

    ## Test data
    test_data = datasets.CIFAR10('data/',
                                 train=False,
                                 download=False,
                                 transform=transforms.Compose([
                                     transforms.Pad(4),
                                     transforms.RandomCrop(32),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         (0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010))
                                 ]))
    #test_data = datasets.MNIST(root='data/',download=False,train=False,transform=data_transforms)

    batch_size = 128
    num_epochs = 50
    log_step = 100

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    #optimizer = torch.optim.SGD(model.parameters(), lr =5e-4,momentum =0.9, weight_decay = 5e-4 )
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[15, 30],
                                                     gamma=0.1)

    ####### ADMM Training ##############
    ## Parameters

    fc_prune = False  # True if the fully connected layers are also pruned
    prune_type = 'filter'  # Type of structural pruning at the convolutional layers

    # Number of non zero filters at each convolutional layer
    l = {
        'conv1': 32,
        'conv2': 64,
        'conv3': 128,
        'conv4': 128,
        'conv5': 256,
        'conv6': 256,
        'conv7': 256,
        'conv8': 256
    }

    # ADMM parameters
    rho_val = 1.5e-3
    num_admm_steps = 10

    Z = {}
    U = {}
    rho = {}
    best_accuracy = 0
    all_acc = False

    ## Initialization of the variable Z and dual variable U

    for name_net in model.named_modules():
        name, net = name_net

        if isinstance(net, nn.Conv2d):
            Z[name] = net.weight.clone().detach().requires_grad_(False)
            Z[name] = Projection_structured(Z[name], l[name], prune_type)
            U[name] = torch.zeros_like(net.weight, requires_grad=False)
            rho[name] = rho_val

        elif fc_prune and isinstance(net, nn.Linear):
            Z[name] = net.weight.clone().detach().requires_grad_(False)
            l_unst = int(len(net.weight.data.reshape(-1, )) * prune_ratio)
            Z[name], _ = Projection_unstructured(Z[name], l_unst)
            U[name] = torch.zeros_like(net.weight, requires_grad=False)

    ## ADMM loop

    for i in range(num_admm_steps):
        print('ADMM step number {}'.format(i))
        # First train the VGG model
        train_model_admm(model, train_data, batch_size, loss_fn, optimizer,
                         scheduler, num_epochs, log_step, Z, U, rho, fc_prune,
                         device)

        # Update the variable Z
        for name_net in model.named_modules():
            name, net = name_net
            if isinstance(net, nn.Conv2d):
                Z[name] = Projection_structured(net.weight.detach() + U[name],
                                                l[name], prune_type)

            elif fc_prune and isinstance(net, nn.Linear):
                l_unst = int(len(net.weight.data.reshape(-1, )) * prune_ratio)
                Z[name], _ = Projection_unstructured(
                    net.weight.detach() + U[name], l_unst)

        # Updating the dual variable U
        for name_net in model.named_modules():
            name, net = name_net
            if isinstance(net, nn.Conv2d):
                U[name] = U[name] + net.weight.detach() - Z[name]
            elif fc_prune and isinstance(net, nn.Linear):
                U[name] = U[name] + net.weight.detach() - Z[name]

        ## Check the test accuracy
        model.eval()
        test_accuracy = eval_accuracy_data(test_data, model, batch_size,
                                           device)
        print('Test accuracy is', test_accuracy)
        if test_accuracy > best_accuracy:
            print(
                'Saving model with test accuracy {:.3f}'.format(test_accuracy))
            torch.save(
                model.state_dict(),
                'saved_model/admm_model/cifar10_vgg_acc_{:.3f}'.format(
                    test_accuracy))
            if all_acc:
                print('Removing model with test accuracy {:.3f}'.format(
                    best_accuracy))
                os.remove(
                    'saved_model/admm_model/cifar10_vgg_acc_{:.3f}'.format(
                        best_accuracy))
            best_accuracy = test_accuracy
            all_acc = True
Ejemplo n.º 9
0
def main():
    """
    This code implements the conventional training pipeline for a CNN for obtaining the baseline 
    model for the ADMM based training
    
    """

    #model = LeNet5()
    model = VGG(n_class=10)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    #Path = 'saved_model/cifar10_vgg_acc_0.939'
    #model.load_state_dict(torch.load(Path))

    model.to(device)

    #data_transforms = transforms.Compose([transforms.CenterCrop(32),transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
    train_data = datasets.CIFAR10('data/',
                                  train=True,
                                  download=False,
                                  transform=transforms.Compose([
                                      transforms.Pad(4),
                                      transforms.RandomCrop(32),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(
                                          (0.4914, 0.4822, 0.4465),
                                          (0.2023, 0.1994, 0.2010))
                                  ]))
    #train_data = datasets.MNIST(root='data/',download=False,train=True,transform=data_transforms)

    # Splitting the training dataset into training and validation dataset
    N_train = len(train_data)
    val_split = 0.1
    N_val = int(val_split * N_train)

    train_data, val_data = torch.utils.data.random_split(
        train_data, (N_train - N_val, N_val))

    ## Test data
    test_data = datasets.CIFAR10('data/',
                                 train=False,
                                 download=False,
                                 transform=transforms.Compose([
                                     transforms.Pad(4),
                                     transforms.RandomCrop(32),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         (0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010))
                                 ]))
    #test_data = datasets.MNIST(root='data/',download=False,train=False,transform=data_transforms)

    batch_size = 128
    num_epochs = 50
    log_step = 100

    loss_fn = nn.CrossEntropyLoss()
    #optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=5e-4,
                                momentum=0.9,
                                weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[15, 30],
                                                     gamma=0.1)

    train(model,
          train_data,
          val_data,
          batch_size,
          loss_fn,
          optimizer,
          scheduler,
          num_epochs,
          log_step,
          device,
          save_model=True)

    #test_sample = iter(torch.utils.data.DataLoader(test_data,batch_size=len(test_data))).next()
    #X_test,y_test = test_sample[0].to(device),test_sample[1].to(device)
    print('Test accuracy is',
          eval_accuracy_data(test_data, model, batch_size, device))
    torch.save(model)