Esempio n. 1
0
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

if args.sp:
    from sp_mbnet import sp_mbnet as mbnet
else:
    from mobilenetv1 import MobileNetV1 as mbnet

assert args.load
assert os.path.isfile(args.load)
#    sys.exit(0)

checkpoint = torch.load(args.load)

model = mbnet(dataset=args.dataset, cfg=checkpoint['cfg'])

# load weights, otherwise, only the arch is used
model.load_state_dict(checkpoint['state_dict'])
model.to(device)

train_loader, test_loader = \
        get_data_loader(dataset = args.dataset, train_batch_size = args.batch_size, test_batch_size = args.test_batch_size, use_cuda=args.cuda)


def test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
Esempio n. 2
0
ch = logging.StreamHandler()
fh = logging.FileHandler(os.path.join(args.save, logging_file_path))

formatter = logging.Formatter('%(asctime)s - %(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)

log.addHandler(fh)
log.addHandler(ch)
#########################################################

assert args.load
assert os.path.isfile(args.load)
log.info("=> loading checkpoint '{}'".format(args.load))
checkpoint = torch.load(args.load)
model = mbnet(dataset=args.dataset, cfg=checkpoint['cfg']).to(device)

from collections import OrderedDict

new_state_dict = OrderedDict()

model.load_state_dict(checkpoint['state_dict'])
log.info("=> loaded checkpoint '{}' ".format(args.load))
del checkpoint


def test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
Esempio n. 3
0

# define loss function (criterion) and optimizer
num_classes = 1000

# Data loading code
train_loader, val_loader = \
    get_data_loader(args.data, train_batch_size=args.batch_size, test_batch_size=args.test_batch_size, workers=args.workers)

## loading pretrained model ##
assert args.load
assert os.path.isfile(args.load)
print("=> loading checkpoint '{}'".format(args.load))
checkpoint = torch.load(args.load)

model = mbnet(cfg=checkpoint['cfg'])
total_flops = print_model_param_flops(model, 224, multiply_adds=False) 
print(total_flops)

if args.use_cuda: 
    model.cuda()

selected_model_keys = [k for k in model.state_dict().keys() if not (k.endswith('.y') or k.endswith('.v') or k.startswith('net_params') or k.startswith('y_params') or k.startswith('v_params'))]
saved_model_keys = checkpoint['state_dict']
from collections import OrderedDict
new_state_dict = OrderedDict()
if len(selected_model_keys) == len(saved_model_keys):

    for k0, k1 in zip(selected_model_keys, saved_model_keys):
        new_state_dict[k0] = checkpoint['state_dict'][k1]   
    
def main():
    global best_prec1, log

    batch_size = args.batch_size * max(1, args.num_gpus)
    args.lr = args.lr * (batch_size // 256)
    print(batch_size, args.lr, args.num_gpus)

    num_classes = 1000
    num_training_samples = 1281167
    args.num_batches_per_epoch = num_training_samples // batch_size

    assert args.exp_name
    args.save = os.path.join(args.save, args.exp_name)
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    hyper_str = "run_{}_lr_{}_decay_{}_b_{}_gpu_{}".format(args.epochs, args.lr, \
                                args.lr_mode, batch_size, args.num_gpus)

    ## bn-based pruning base model ##
    if args.sr:
        hyper_str = "{}_sr_grow_{}_s_{}".format(hyper_str, args.m, args.s)
    ## using amc configuration ##
    elif args.amc:
        hyper_str = "{}_amc".format(hyper_str)
    elif args.sp:
        hyper_str = "{}_sp_base_{}".format(hyper_str, args.sp_cfg)
    else:
        hyper_str = "{}_grow_{}".format(hyper_str, args.m)

    args.model_save_path = \
            os.path.join(args.save, 'mbv1_{}.pth.tar'.format(hyper_str))

    #args.logging_file_path = \
    #        os.path.join(args.save, 'mbv1_{}.log'.format(hyper_str))
    #print(args.model_save_path, args.logging_file_path)

    ##########################################################
    ## create file handler which logs even debug messages
    #import logging
    #log = logging.getLogger()
    #log.setLevel(logging.INFO)

    #ch = logging.StreamHandler()
    #fh = logging.FileHandler(args.logging_file_path)

    #formatter = logging.Formatter('%(asctime)s - %(message)s')
    #ch.setFormatter(formatter)
    #fh.setFormatter(formatter)
    #log.addHandler(fh)
    #log.addHandler(ch)
    #########################################################
    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # Use CUDA
    use_cuda = torch.cuda.is_available()
    args.use_cuda = use_cuda

    # Random seed
    random.seed(0)
    torch.manual_seed(0)
    if use_cuda:
        torch.cuda.manual_seed_all(0)
        device = 'cuda'
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.evaluate == 1:
        device = 'cuda:0'

    if args.sp:
        model = mbnet(default=args.sp_cfg)
    else:
        #model = mobilenetv1(amc=args.amc, m=args.m)
        model = mbnet(amc=args.amc, m=args.m)
        print(model.cfg)

    cfg = model.cfg

    total_params = print_model_param_nums(model.cpu())
    total_flops = print_model_param_flops(model.cpu(),
                                          224,
                                          multiply_adds=False)
    print(total_params, total_flops)

    if not args.distributed:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)

    # define loss function (criterion) and optimizer
    if args.label_smoothing:
        criterion = CrossEntropyLabelSmooth(num_classes).cuda()
    else:
        criterion = nn.CrossEntropyLoss().cuda()

    ### all parameter ####
    no_wd_params, wd_params = [], []
    for name, param in model.named_parameters():
        if param.requires_grad:
            if ".bn" in name or '.bias' in name:
                no_wd_params.append(param)
            else:
                wd_params.append(param)
    no_wd_params = nn.ParameterList(no_wd_params)
    wd_params = nn.ParameterList(wd_params)

    optimizer = torch.optim.SGD([
        {
            'params': no_wd_params,
            'weight_decay': 0.
        },
        {
            'params': wd_params,
            'weight_decay': args.weight_decay
        },
    ],
                                args.lr,
                                momentum=args.momentum,
                                nesterov=True)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.model_save_path):
            print("=> loading checkpoint '{}'".format(args.model_save_path))
            checkpoint = torch.load(args.model_save_path)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.model_save_path, checkpoint['epoch']))
        else:
            pass
            #print("=> no checkpoint found at '{}'".format(args.model_save_path))

    # Data loading code
    train_loader, val_loader = \
        get_data_loader(args.data, train_batch_size=batch_size, test_batch_size=32, workers=args.workers)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        #adjust_learning_rate(optimizer, epoch)
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'cfg': cfg,
                'sr': args.sr,
                'amc': args.amc,
                's': args.s,
                'args': args,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, args.model_save_path)
Esempio n. 5
0
fh = logging.FileHandler(logging_file_path)

formatter = logging.Formatter('%(asctime)s - %(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)

log.addHandler(fh)
log.addHandler(ch)
#########################################################

from dataloader import get_data_loader
train_loader, test_loader = \
        get_data_loader(dataset = args.dataset, train_batch_size = args.batch_size, test_batch_size = args.test_batch_size, use_cuda=args.cuda)

model = mbnet(cfg=checkpoint['cfg'],
              dataset=args.dataset,
              dummy_layer=args.layer)
# load weights, otherwise, only the arch is used
print(model.cfg)
if not os.path.exists('config'):
    os.makedirs('config')
if args.layer != -1:
    pickle.dump(
        model.cfg,
        open('config/{}_{}.pkl'.format(args.dataset, str(args.rd)), 'wb'))
else:
    pickle.dump(
        model.cfg,
        open('config/{}_{}.pkl'.format(args.dataset, str(args.rd + 1)), 'wb'))

if not args.retrain:
Esempio n. 6
0
log.setLevel(logging.INFO)

ch = logging.StreamHandler()
fh = logging.FileHandler(os.path.join(args.save, logging_file))
formatter = logging.Formatter('%(asctime)s - %(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)

log.addHandler(fh)
log.addHandler(ch)
#########################################################

train_loader, test_loader = \
        get_data_loader(dataset = args.dataset, train_batch_size = args.batch_size, test_batch_size = args.test_batch_size, use_cuda=args.cuda)

model = mbnet(dataset = args.dataset)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

# additional subgradient descent on the sparsity-induced penalty term
def updateBN():
    for m in model.modules():
        if isinstance(m, MbBlock):
            m.bn2.weight.grad.data.add_(args.s*torch.sign(m.bn2.weight.data))  # L1
        elif isinstance(m, ConvBlock):
            m.bn.weight.grad.data.add_(args.s*torch.sign(m.bn.weight.data))  # L1

def train(epoch):
    model.train()
    #global history_score
Esempio n. 7
0
log.info('model save path: {}'.format(model_save_path))
log.info('log save path: {}'.format(logging_file_path))

##### check exsiting models ##
#if os.path.isfile(model_save_path) and args.resume:
#    pre_check = torch.load(model_save_path)
#    if pre_check['epoch'] == args.epochs and pre_check['cfg'] == checkpoint['cfg']:
#        print('no need to run, load from {}'.format(model_save_path))
#        sys.exit(0)

from dataloader import get_data_loader
train_loader, test_loader = \
        get_data_loader(dataset = args.dataset, train_batch_size = args.batch_size, test_batch_size = args.test_batch_size, use_cuda=args.cuda)

model = mbnet(cfg=checkpoint['cfg'], dataset=args.dataset)
# load weights, otherwise, only the arch is used
if not args.retrain:
    model.load_state_dict(checkpoint['state_dict'])

if args.cuda: model.cuda()

optimizer = optim.SGD(model.parameters(),
                      lr=args.lr,
                      momentum=args.momentum,
                      weight_decay=args.weight_decay)


def train(epoch):
    model.train()
    avg_loss = 0.