Example #1
0
    net.train()
    loss = 0
    correct = 0
    total = 0

    print('\n[Epoch: %d] \nTraining' % (epoch))

    for batch_idx, (inputs, targets) in enumerate(target_train_loader):

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        outputs = net(inputs)
        losses = nn.CrossEntropyLoss()(outputs, targets)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        loss += losses.item()
        _, predicted = torch.max(outputs, dim=1)
        correct += predicted.eq(targets.data).cpu().sum().item()
        total += targets.size(0)

        progress_bar(
            batch_idx, len(target_train_loader), "Loss: %.3f | Acc: %.3f%%" %
            (loss / (batch_idx + 1), 100.0 * correct / total))
    print('Test')
    validate(net, target_test_loader)
Example #2
0
    net.load_state_dict(torch.load('ft'))

if use_cuda:
    net.cuda()
    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=5e-4)

break_count = 0
for epoch in range(start_epoch, start_epoch+200):
    print('\nEpoch: %d' %epoch)
    train(net, source_train_loader, optimizer=optimizer, n_epoch=1)
    acc_s = validate(net, source_test_loader)
    acc_t = validate(net, target_test_loader)

    if acc_s > best_acc:
        print('Saving..')
        if not os.path.exists('./checkpoint'):
            os.makedirs('./checkpoint')

        state = {
            'net': net.module if use_cuda else net,
            'acc': acc_s,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/%s_%s_ckpt.t7' % (model_name, source_dataset))
Example #3
0
def QuantizedNet(dataset,model_name,batch_size,lr,kbits,require_first_test,quantized_first_and_last):

    use_cuda = torch.cuda.is_available()
    # ---------------------------- Configuration --------------------------
    model_name = args.model_name
    dataset = args.dataset
    exp_spec = args.exp_sec
    # Initialize some folder for data saving
    folder_init(model_name, ['train_record', 'val_record', 'save_models', \
                             'trainable_names/LDNQ%s' %(exp_spec)])
    pretrain_path = './%s/%s_%s_%s_pretrain.pth' %(model_name, dataset, model_name,1)
    quantized_path = './%s/%s_save_models/LDNQ%s.pth' %(model_name, dataset,exp_spec)
    hessian_root = './%s/hessian' %model_name
    kbits = args.kbits
    trainable_names_record_root = './%s/trainable_names/LDNQ%s' %(model_name, exp_spec)
    train_record = open('./%s/train_record/LDNQ%s.txt' %(model_name, exp_spec), 'w')
    val_record = open('./%s/val_record/LDNQ%s.txt' %(model_name, exp_spec), 'w')
    init_lr = 0.001
    # --------------------------------------------------------------------
    """print ('You are going to quantize model %s into %d bits, using dataset %s, with specification name as %s' \
           %(model_name, kbits, dataset, exp_spec))
    input('Press any to continue. Ctrl+C to break.')"""
    ################
    
    # Load Dataset #
    ################
    """train_loader = get_dataloader(dataset, 'limited', batch_size = batch_size, ratio=0.01)
    print ('Length of train loader: %d' %(len(train_loader)))
    hessian_loader = get_dataloader(dataset, 'limited', batch_size = 2)
    print ('Length of hessian loader: %d' %(len(hessian_loader)))
    test_loader = get_dataloader(dataset, 'test', batch_size = 100)"""

    train_loader = data_loading(DataPath,dataset,'limited',args.batch_size)
    print ('Length of train loader: %d' %(len(train_loader)))
    
    hessian_loader = data_loading(DataPath,dataset, 'limited', batch_size = 2)
    print ('Length of hessian loader: %d' %(len(hessian_loader)))
    
    test_loader = data_loading(DataPath,dataset,'test', batch_size = 100)
    print ('Length of test loader: %d' %(len(test_loader)))

    ################
    # Load Models ##
    ################
    if dataset =='MNIST':
        quantized_net = MnistResNet()
        pretrain_param = torch.load(pretrain_path)
        #quantized_net.load_state_dict(pretrain_param)

        original_net = MnistResNet()
        #original_net.load_state_dict(pretrain_param)

    elif dataset =='CIFAR10':   
        quantized_net = Resnet20_CIFAR10(1)
        #quantized_net = resnet18() # For quantization of ResNet18 using ImageNet
        pretrain_param = torch.load(pretrain_path)
        quantized_net.load_state_dict(pretrain_param)

        original_net = Resnet20_CIFAR10(1)
        #original_net = resnet18() # For quantization of ResNet18 using ImageNet
        original_net.load_state_dict(pretrain_param)

    if use_cuda:
        print('Dispatch model in %d GPUs' % (len(range(torch.cuda.device_count()))))
        quantized_net.cuda()
        quantized_net = torch.nn.DataParallel(quantized_net, device_ids=range(torch.cuda.device_count()))

        original_net.cuda()
        original_net = torch.nn.DataParallel(original_net, device_ids=range(torch.cuda.device_count()))
        cudnn.benchmark = False

    ####################
    # First Validation #
    ####################
    if require_first_test:
        acc = validate(quantized_net, test_loader, dataset_name=dataset_name)
        print('Full-precision accuracy: %.3f' %acc)
        val_record.write('Full-precision accuracy: %.3f\n' %acc)

    # Generate layer name list: layers to be quantized
    layer_collection_list = generate_layer_name_collections(quantized_net, model_name=model_name, quantized_first_last_layer=quantized_first_and_last)

    ###############
    # Begin L-DNQ #
    ###############
    for layer_idx, layer_name in enumerate(layer_collection_list):

        print ('[%s] Process layer %s' % (datetime.now(), layer_name))
        if train_record is not None:
            train_record.write('Process layer %s\n' % layer_name)
        if val_record is not None:
            val_record.write('Process layer %s\n' % layer_name)

        state_dict = quantized_net.state_dict()
        if 'linear' in layer_name or 'fc' in layer_name:
            # Generate Hessian
            hessian = generate_hessian(quantized_net, hessian_loader, layer_name, layer_type='F')
            updated_weight = state_dict['%s.weight' % (layer_name)].cpu().numpy() if use_cuda else \
                state_dict['%s.weight' % (layer_name)].numpy()
            updated_bias = state_dict['%s.bias' % (layer_name)].cpu().numpy() if use_cuda else \
                state_dict['%s.bias' % (layer_name)].numpy()
            # Perform Quantization
            quantized_weight, quantized_bias = ADMM_quantization(layer_name=layer_name, layer_type='F',
                                                                 kernel=updated_weight, bias=updated_bias,
                                                                 hessian=hessian, kbits=kbits)
            state_dict['%s.weight' % (layer_name)] = torch.FloatTensor(quantized_weight)
            state_dict['%s.bias' % (layer_name)] = torch.FloatTensor(quantized_bias)
        else:
            # Generate Hessian
            hessian = generate_hessian(quantized_net, hessian_loader, layer_name, layer_type='R', stride_factor = 1)
            updated_kernel = state_dict['%s.weight' % (layer_name)].cpu().numpy() if use_cuda else \
                state_dict['%s.weight' % (layer_name)].numpy()
            # Perform Quantization
            quantized_kernel = ADMM_quantization(layer_name=layer_name, layer_type='R',
                                                 kernel=updated_kernel, bias=None, hessian=hessian, kbits=kbits)
            # Step 2: Assignment
            # Assign processed layer with quantized weights
            state_dict['%s.weight' % (layer_name)] = torch.FloatTensor(quantized_kernel)

        ###########################
        # Cascaded Weights Update #
        ###########################
        quantized_net.load_state_dict(state_dict)
        print ('[%s] Finish layer %s' % (datetime.now(), layer_name))
        # Generate the non-quantized / trainable parameters
        trainable_parameters, trainable_names = \
            generate_trainable_parameters(
                quantized_net.named_parameters(), layer_name + '.weight',
                model_name=model_name, quantized_first_last_layer=quantized_first_and_last
            )
        print ('Length of trainable parameters: %d' %(len(trainable_names)))
        trainable_names_record = open('%s/%s.txt' % (trainable_names_record_root, layer_name), 'w')
        for name in trainable_names:
            trainable_names_record.write(name + '\n')
        trainable_names_record.close()

        optimizer = optim.SGD(trainable_parameters, lr=init_lr, momentum=0.9, weight_decay=5e-4)
        cascade_soft_update(quantized_net, original_net, train_loader, dataset_name=dataset_name,
                            optimizer=optimizer, train_record=train_record)
        # Record test acc
        acc = validate(quantized_net, test_loader, dataset_name=dataset_name, val_record=val_record)

    torch.save(quantized_net.module.state_dict() if use_cuda else quantized_net.state_dict(), quantized_path)
    train_record.close()
    val_record.close()
def main_worker(gpu, args):
    args.gpu = gpu

    if not args.cpu:
        if args.gpu is not None:
            print("Use GPU: {} for training".format(args.gpu))
    else:
        print("Use CPU")

    # create model
    if args.dataset == 'imagenet':
        if args.arch in mymodel_names:
            model = mymodels.__dict__[args.arch]()
        else:
            print("=> creating model '{}'".format(args.arch))
            model = models.__dict__[args.arch]()

    # load model
    if os.path.isfile(args.model_file):
        print("=> loading model '{}'".format(args.model_file))
        checkpoint = torch.load(args.model_file)
        d = checkpoint['state_dict']
        for old_key in list(d.keys()):
            if 'module.' in old_key:
                d[old_key.replace('module.','')] = d.pop(old_key,None) 
        model.load_state_dict(d)
        print("=> loaded model '{}'".format(args.model_file))
    else:
        print("=> no model found at '{}'".format(args.model_file))
        return

    if not args.cpu:
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model = model.cuda(args.gpu)
        else:
            # DataParallel will divide and allocate batch_size to all available GPUs
            if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
                model.features = torch.nn.DataParallel(model.features)
                model.cuda()
            else:
                model = torch.nn.DataParallel(model).cuda()

        cudnn.benchmark = True

    # Data loading code
    if args.dataset == 'imagenet': 
        # ImageNet
        valdir = os.path.join(args.data, 'val_dir')

        if args.nonblacklist:
            val_dataset = mydatasets.ImageNetValFolder(
                valdir, 
                args.test_transform 
                )
            comment = 'non-blacklisted validation set'
        else:
            val_dataset = datasets.ImageFolder(
                valdir,
                args.test_transform 
                )
            comment = 'whole validation set'

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # define loss function (criterion)
    criterion = nn.CrossEntropyLoss()
    if not args.cpu:
        criterion = criterion.cuda(args.gpu)

    # evaluate on validation set
    validate(val_loader, model, criterion, args)

    # @ Primary worker, show the final results
    print('on {}'.format(comment))
"""
This code validate the *.pth file when generated
"""

import torch
from utils.dataset import get_dataloader
from utils.train import validate
from models_CIFAR9STL9.tucker_CIFARNet_dual import CIFARNet
# from models_CIFAR9STL9.tucker_CIFARNet_dual import CIFARNet2 as CIFARNet
# Initial model
net = CIFARNet()

pretrain_param = torch.load('./checkpoint/tucker_CIFARNet9_dual.pth')
# pretrain_param = torch.load('./checkpoint/tucker_CIFARNet9_dual_2.pth')
net.load_state_dict(pretrain_param)

# Load dataset
test_loader = get_dataloader('STL9', 'test', 100)

net.cuda()
validate(net, test_loader)
    '6': [48, 48]
}

'''
N = len(model.features._modules.keys())
for i, key in enumerate(model.features._modules.keys()):

    # if i >= N - 2:
    #     break
    if isinstance(model.features._modules[key], torch.nn.modules.conv.Conv2d):
        conv_layer = model.features._modules[key]
        if use_cp:
            rank = max(conv_layer.weight.data.numpy().shape)//2
            decomposed = cp_decomposition_conv_layer(conv_layer, rank)
        else:
            decomposed = tucker_decomposition_conv_layer(conv_layer, None)

        model.features._modules[key] = decomposed
'''

for i, key in enumerate(model.classifier._modules.keys()):

    if isinstance(model.classifier._modules[key], nn.Linear):
        fc_linear = model.classifier._modules[key]


# torch.save(model, './checkpoint/%s_CIFARNet9.p' %('cp' if use_cp else 'tucker'))
test_loader = get_dataloader('CIFAR9', 'test', 128)
model.cuda()
validate(model, test_loader)
Example #7
0
def main_worker(gpu, ngpus_per_node, args):
    global stats
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)
    
    # create model
    if args.dataset == 'imagenet':
        if args.arch in mymodel_names:
            model = mymodels.__dict__[args.arch](num_classes=1000)
        elif args.pretrained:
            print("=> using pre-trained model '{}'".format(args.arch))
            model = models.__dict__[args.arch](pretrained=True)
        else:
            print("=> creating model '{}'".format(args.arch))
            model = models.__dict__[args.arch]()
    print(model)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            # only model
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=torch.device('cpu'))
            for old_key in list(checkpoint['state_dict'].keys()):
                if 'module' in old_key:
                    new_key = old_key.replace('module.','')
                    checkpoint['state_dict'][new_key] = checkpoint['state_dict'].pop(old_key, None)
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {}) for model"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
            return


    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # Data loading code
    if args.dataset == 'imagenet': 
        # ImageNet
        traindir = os.path.join(args.data, 'train')
        valdir = os.path.join(args.data, 'val_dir')

        train_dataset = datasets.ImageFolder(
            traindir,
            args.train_transform
            )
        val_dataset = datasets.ImageFolder(
            valdir, 
            args.test_transform
            )

    # Data Sampling
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    lossname = args.loss.pop('name')
    if lossname == 'Softmax':
        criterion = nn.CrossEntropyLoss().cuda(args.gpu)
    elif lossname == 'LargeMarginInSoftmax':
        criterion = LargeMarginInSoftmaxLoss(**args.loss).cuda(args.gpu)
    else:
        raise ValueError("loss function of {} is not supported".format(lossname))

    args.loss['name'] = lossname

    optimizer = torch.optim.SGD(model.parameters(), args.lrs[0],
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        # other state parameters
        if os.path.isfile(args.resume):
            args.start_epoch = checkpoint['epoch']
            stats = checkpoint['stats']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {}) for the others"
                  .format(args.resume, checkpoint['epoch']))

    cudnn.benchmark = True

    # Do Train/Eval
    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    primary_worker = not args.multiprocessing_distributed or \
                    (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0)

    if primary_worker:
        progress = ProgressPlotter( titles=('LR', 'Loss', 'Top-1 Error.', 'Top-5 Error.'), 
            legends=(('learning rate',),('train','val'),('train','val'),('train','val')), ylims=((1e-6,1),(0,10),(0,100),(0,100)),
            yscales=('log','linear','linear','linear'), 
            vals=((args.lrs[:args.start_epoch],), (stats['train_loss'],stats['test_loss']), (stats['train_err1'],stats['test_err1']), (stats['train_err5'],stats['test_err5']) ) ) 

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        lr = adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        trnerr1, trnerr5, trnloss = train(train_loader, model, criterion, optimizer, epoch, args)

        # evaluate on validation set
        valerr1, valerr5, valloss = validate(val_loader, model, criterion, args)

        # statistics
        stats['train_err1'].append(trnerr1)
        stats['train_err5'].append(trnerr5)
        stats['train_loss'].append(trnloss)
        stats['test_err1'].append(valerr1)
        stats['test_err5'].append(valerr5)
        stats['test_loss'].append(valloss)

        # remember best err@1
        is_best = valerr1 <= min(stats['test_err1'])

        # @ Primary worker, show and save results
        if primary_worker:
            # progress.plot( ((trnloss,valloss), (trnerr1, valerr1), (trnerr5, valerr5)) )
            progress.plot( ((lr,), (trnloss,valloss), (trnerr1, valerr1), (trnerr5, valerr5)) )
            progress.save(filename=os.path.join(args.out_dir, args.pdf_filename))
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'stats': stats,
                'optimizer' : optimizer.state_dict(),
                'args' : args
            }, is_best, args.save_last_checkpoint, filename=os.path.join(args.out_dir, 'checkpoint-epoch{:d}.pth.tar'.format(epoch+1)))

    # @ Primary worker, show the final results
    if primary_worker:
        minind = stats['test_err1'].index(min(stats['test_err1']))
        print(' *BEST* Err@1 {:.3f} Err@5 {:.3f}'.format(stats['test_err1'][minind], stats['test_err5'][minind]))
Example #8
0
                                                                        negative_mining_ratio=10)
                # calc loss
                l = calc_loss(cls_loss, bbox_loss, cls_preds, cls_labels,
                              bbox_preds, bbox_labels, bbox_masks)
            l.backward()
            trainer.step(batch_size)
            acc_sum += cls_eval(cls_preds, cls_labels)
            n += cls_labels.size
            mae_sum += bbox_eval(bbox_preds, bbox_labels, bbox_masks)
            m += bbox_labels.size
        print('epoch %2d, class err %.2e, bbox mae %.2e, time %.1f sec' % (
            epoch + 1, 1 - acc_sum / n, mae_sum / m, time.time() - start))
        # Checkpoint
        if (epoch + 1) % 5 == 0:
            net.export('FPN')
            _1, _2, _3 = validate(val_iter, net, ctx)
            val_recorder[int(epoch / 5)] = (_1, _2, _3)
            print(val_recorder)
    # plt.figure()
    # plt.plot(val_recorder)
    # plt.title("validating curve");
    # plt.show()


def predict(X):
    anchors, cls_preds, bbox_preds = net(X.as_in_context(ctx))
    cls_probs = cls_preds.softmax().transpose((0, 2, 1))
    output = nd.contrib.MultiBoxDetection(cls_probs, bbox_preds, anchors)
    idx = [i for i, row in enumerate(output[0]) if row[0].asscalar() != -1]
    if idx == []: return nd.array([[0, 0, 0, 0, 0, 0, 0]])
    return output[0, idx]
Example #9
0
File: main.py Project: wps712/L-DNQ
    quantized_net.cuda()
    quantized_net = torch.nn.DataParallel(quantized_net,
                                          device_ids=range(
                                              torch.cuda.device_count()))

    original_net.cuda()
    original_net = torch.nn.DataParallel(original_net,
                                         device_ids=range(
                                             torch.cuda.device_count()))
    cudnn.benchmark = True

####################
# First Validation #
####################
if args.require_first_test:
    acc = validate(quantized_net, test_loader, dataset_name=dataset_name)
    print('Full-precision accuracy: %.3f' % acc)
    val_record.write('Full-precision accuracy: %.3f\n' % acc)

# Generate layer name list: layers to be quantized
layer_collection_list = generate_layer_name_collections(
    quantized_net,
    model_name=model_name,
    quantized_first_last_layer=args.quantized_first_and_last)

###############
# Begin L-DNQ #
###############
for layer_idx, layer_name in enumerate(layer_collection_list):

    print('[%s] Process layer %s' % (datetime.now(), layer_name))