def get_model_metrics(model, args, loggers): local_model = model if isinstance(model, torch.nn.DataParallel): local_model = model.module input_size = (args.input_channels, ) + args.input_size summary_info = summary(local_model, input_size, device=args.gpu_id, batch_size=-1) # args.train_batch) loggers['model_parameters'] = summary_info[0] loggers['model_trainable_parameters'] = summary_info[1] # next metrics given in MB. Transform to B loggers['model_input_size'] = summary_info[2] * (1024**2) loggers['model_passes_size'] = summary_info[3] * (1024**2) loggers['model_parameters_size'] = summary_info[4] * (1024**2) loggers['model_memory_computed'] = summary_info[5] * (1024**2) loggers['model_memory_cuda'], _ = get_memory_cuda() with torch.cuda.device(args.gpu_id): flops, params = get_model_complexity_info(local_model, input_size, as_strings=False, print_per_layer_stat=False) loggers['model_flops'] = flops
def get_complexity(self, model): """ This method receives a model or even a intermediate model and returns number of flops and parameters to execute this model """ flops, params = get_model_complexity_info(model, self.input_size, print_per_layer_stat=False, as_strings=False) return flops, params
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) data_aug_scale = (0.08, 1.0) if args.modelsize == 'large' else (0.2, 1.0) train_loader = torch.utils.data.DataLoader( datasets.ImageFolder(traindir, transforms.Compose([ transforms.RandomResizedCrop(224, scale = data_aug_scale), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.test_batch, shuffle=True, num_workers=args.workers, pin_memory=True) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) elif 'resnext' in args.arch: model = models.__dict__[args.arch]( baseWidth=args.base_width, cardinality=args.cardinality, ) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() flops, params = get_model_complexity_info(model, (224, 224), as_strings=False, print_per_layer_stat=False) print('Flops: %.3f' % (flops / 1e9)) print('Params: %.2fM' % (params / 1e6)) if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Resume title = 'ImageNet-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..', args.resume) assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] # model may have more keys t = model.state_dict() c = checkpoint['state_dict'] flag = True for k in t: if k not in c: print('not in loading dict! fill it', k, t[k]) c[k] = t[k] flag = False model.load_state_dict(c) if flag: print('optimizer load old state') optimizer.load_state_dict(checkpoint['optimizer']) else: print('new optimizer !') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(val_loader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(0, args.epochs): adjust_learning_rate(optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(val_loader, model, criterion, epoch, use_cuda) # append logger file logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() print('Best acc:') print(best_acc)
import torch import argparse from flops_counter import get_model_complexity_info from utils import get_network parser = argparse.ArgumentParser(description='Counting network\'s pararmeters') parser.add_argument('--network', '-n', required=True) parser.add_argument('--dataset', type=str, default='cifar100') parser.add_argument('--input-size', '-i', type=int, default=32) parser.add_argument('--cuda', action='store_true') args = parser.parse_args() device = torch.device('cuda:0' if ( torch.cuda.is_available() and args.cuda) else 'cpu') net = get_network(args.network, args.dataset, device) flops, params = get_model_complexity_info( net, (3, args.input_size, args.input_size), as_strings=True, print_per_layer_stat=True) print('Flops: {}\n{}'.format(flops, params))
from flops_counter import get_model_complexity_info import models as models customized_models_names = sorted( name for name in models.__dict__ if name.islower() and not name.startswith("__") and callable(models.__dict__[name])) print('-'.ljust(40, '-')) for modelname in customized_models_names: if '50' in modelname: model = models.__dict__[modelname]() flops, params = get_model_complexity_info(model, (224, 224), as_strings=True, print_per_layer_stat=False) fixname = modelname.ljust(18, ' ') info = fixname + '\t' + params + '\t' + flops print(info) print('-'.ljust(40, '-'))
BEST_ACC = 0 train_acc = [0.0] test_acc = [0.0] loss_list = [] MAC_list = [] params_list = [] iteration = 1 epoch = 0 start_time = time.time() #grow network while True: logger.info('Iteration **{}**'.format(iteration)) mac, params = get_model_complexity_info(model, (3, RESOLUTION, RESOLUTION), as_strings=True, print_per_layer_stat=False) MAC_list.append(mac) params_list.append(params) GROWN = False tr_acc, loss = train(logger, epoch, model, trainloader, criterion, optimizer, device) te_acc = test(logger, epoch, model, testloader, criterion, device) train_acc.append(tr_acc) test_acc.append(te_acc) loss_list.append(loss) if te_acc > BEST_ACC: #save the best checkpoint BEST_ACC = te_acc logger.info('Saving best %.3f @ %d ...' %(te_acc, epoch))
#model = models.resnet18() #model = ERFNet( 19) #model = NDNet_ende(19) #model = NDNet_fcn32(19) #model = contextnet(19) #model = MobileNetV2(19) #model = ENet(19) #model = icnet(19)#icenet use 513 1025 or 1025 2049 for test #model = segnet(19)# model = EESPNet_Seg(19).cuda() #net = models.densenet161() model.eval() flops, params = get_model_complexity_info(model, (3,1024, 2048), as_strings=True, print_per_layer_stat=True) print('Flops: ' + flops) print('Params: ' + params) #.....................................NDNET # #Flops: 6.92 GMac ndnet ende-1024*2018 #Params: 502.62 k #Flops: 1.73 GMac ndnet ende-512*1024
def main(): global best_prec1, args if args.local_rank == 0 and not os.path.isdir(args.save_dir): mkdir_p(args.save_dir) args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() if args.fp16: assert torch.backends.cudnn.enabled, "fp16 requires cudnn backend to be enabled." if args.static_loss_scale != 1.0: if not args.fp16: print( "Warning: if --fp16 is not used, static_loss_scale will be ignored." ) # create model if args.pretrained: if args.local_rank == 0: print("=> using pre-trained model '{}'".format(args.arch)) elif args.arch.startswith('resnet'): model = resnets.__dict__[args.arch](pretrained=True) elif args.arch.startswith('mobilenet'): model = mobilenets.__dict__[args.arch](pretrained=True) else: raise NotImplementedError("Unkown network arch.") else: if args.local_rank == 0: print("=> creating {}".format(args.arch)) # update args if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) elif 'resnext' in args.arch: model = models.__dict__[args.arch]( baseWidth=args.base_width, cardinality=args.cardinality, ) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.local_rank == 0: if args.dataset.startswith('cifar'): H, W = 32, 32 elif args.dataset.startswith('imagenet'): H, W = 224, 224 else: raise NotImplementedError("Unknown dataset") flops, params = get_model_complexity_info(model, (224, 224), as_strings=False, print_per_layer_stat=False) print('=> FLOPs: {:.6f}G, Params: {:.6f}M'.format( flops / 1e9, params / 1e6)) print('=> Params (double-check): %.6fM' % (sum(p.numel() for p in model.parameters()) / 1e6)) if args.sync_bn: import apex if args.local_rank == 0: print("using apex synced BN") model = apex.parallel.convert_syncbn_model(model) model = model.cuda() if args.fp16: model = FP16Model(model) if args.distributed: # By default, apex.parallel.DistributedDataParallel overlaps communication with # computation in the backward pass. # model = DDP(model) # delay_allreduce delays all communication to the end of the backward pass. model = DDP(model, delay_allreduce=True) if args.pretrained: model.load_state_dict(checkpoint['state_dict']) # Scale learning rate based on global batch size args.lr = args.lr * float(args.batch_size * args.world_size) / 256 if args.remove_norm_weight_decay: if args.local_rank == 0: print("=> ! Weight decay NOT applied to FeatNorm parameters ") norm_params = set() #TODO: need to check this via experiments rest_params = set() for m in model.modules(): if isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): for param in m.parameters(False): norm_params.add(param) else: for param in m.parameters(False): rest_params.add(param) optimizer = torch.optim.SGD([{ 'params': list(norm_params), 'weight_decay': 0.0 }, { 'params': list(rest_params) }], args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) else: if args.local_rank == 0: print("=> ! Weight decay applied to FeatNorm parameters ") optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) if args.fp16: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.static_loss_scale, dynamic_loss_scale=args.dynamic_loss_scale) # define loss function (criterion) and optimizer criterion_train = nn.CrossEntropyLoss().cuda() if args.labelsmoothing_rate == 0.0 \ else LabelSmoothing(args.labelsmoothing_rate).cuda() criterion_val = nn.CrossEntropyLoss().cuda() # Optionally resume from a checkpoint if args.resume: # Use a local scope to avoid dangling references def resume(): if os.path.isfile(args.resume): if args.local_rank == 0: print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if args.local_rank == 0: print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: if args.local_rank == 0: print("=> no checkpoint found at '{}'".format(args.resume)) resume() # Data loading code if args.dataset == "cifar10": train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip() ]) train_dataset = datasets.CIFAR10('./datasets', train=True, download=False, transform=train_transform) val_dataset = datasets.CIFAR10('./datasets', train=False, download=False) elif args.dataset == "cifar100": train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip() ]) train_dataset = datasets.CIFAR100('./datasets', train=True, download=False, transform=train_transform) val_dataset = datasets.CIFAR100('./datasets', train=False, download=False) elif args.dataset == "imagenet": traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'valf') crop_size = args.crop_size # 224 val_size = args.crop_size + 32 # 256 train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop( crop_size, interpolation=args.crop_interpolation), transforms.RandomHorizontalFlip(), # transforms.ToTensor(), Too slow # normalize, ])) val_dataset = datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(val_size, interpolation=args.crop_interpolation), transforms.CenterCrop(crop_size), ])) train_sampler = None val_sampler = None if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) val_sampler = torch.utils.data.distributed.DistributedSampler( val_dataset) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler, collate_fn=fast_collate) if args.evaluate: validate(val_loader, model, criterion_val) return scheduler = CosineAnnealingLR( optimizer.optimizer if args.fp16 else optimizer, args.epochs, len(train_loader), eta_min=0., warmup=args.warmup_epochs) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # train for one epoch train(train_loader, model, criterion_train, optimizer, epoch, scheduler, args.warmup_epochs, args.mixup_rate, args.labelsmoothing_rate) #TODO: warmup_epochs, labelsmoothing_rate, mixup_rate, args.dataset, args.cropsize, args.crop_interpolation if args.prof: break # evaluate on validation set prec1 = validate(val_loader, model, criterion_val) # remember best prec@1 and save checkpoint if args.local_rank == 0: is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.save_dir)
('CifarResNetBasic', [24, 24, 24], 94.26), ('CifarResNetBasic', [48, 48, 48], 94.54), # growing with gaussian # ('CifarResNetBasic', [5, 5, 4], 92.68), ('CifarResNetBasic', [4, 6, 3], 93.11), ('CifarResNetBasic', [10, 10, 10], 93.34), ('CifarResNetBasic', [11, 8, 11], 93.41), # ('CifarResNetBasic', [9, 16, 16], 93.34), ('CifarResNetBasic', [24, 23, 23], 93.48), ('CifarResNetBasic', [33, 32, 32], 94.15), ('CifarResNetBasic', [92, 91, 91], 94.41), # growing with zero # ('CifarResNetBasic', [6, 3, 5], 92.28), # ('CifarResNetBasic', [3, 3, 6], 92.56), ('CifarResNetBasic', [5, 5, 4], 92.99), ('CifarResNetBasic', [22, 6, 6], 93.18), ('CifarResNetBasic', [49, 18, 18], 93.33), ('CifarResNetBasic', [32, 31, 31], 93.86), ('CifarResNetBasic', [42, 42, 41], 94.31), ] with torch.cuda.device(0): print('Net, flops, params, accuracy:') for net_type, num_blocks, accu in nets: net = getattr(mymodels, net_type)(num_blocks) flops, params = get_model_complexity_info(net, (32, 32), as_strings=False, print_per_layer_stat=False) print('{}-{}\t{}\t{}\t{}'.format(net_type, num_blocks, flops, params, accu))
def main_worker(local_rank, nprocs, args): best_acc = 0 # best test accuracy dist.init_process_group(backend='nccl') torch.cuda.set_device(local_rank) train_batch = int(args.train_batch / nprocs) test_batch = int(args.test_batch / nprocs) start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) data_aug_scale = (0.08, 1.0) if args.modelsize == 'l' else (0.2, 1.0) train_dataset = datasets.ImageFolder(traindir, transforms.Compose([ transforms.RandomResizedCrop(224, scale = data_aug_scale), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch, num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_dataset = datasets.ImageFolder(valdir, transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=test_batch, num_workers=args.workers, pin_memory=True, sampler=val_sampler) # create model print("=> creating model MixNet.") model = MixNet(args.modelsize) flops, params = get_model_complexity_info(model, (224, 224), as_strings=False, print_per_layer_stat=False) print('Flops: %.3fG' % (flops / 1e9)) print('Params: %.2fM' % (params / 1e6)) model.cuda(local_rank) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], find_unused_parameters=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(local_rank) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) cudnn.benchmark = True lr_mode = args.lr_mode lr_decay_period = args.lr_decay_period lr_decay_epoch = args.lr_decay_epoch lr_decay = args.lr_decay if lr_decay_period > 0: lr_decay_epoch = list(range(lr_decay_period, num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in lr_decay_epoch.split(",")] if (lr_mode == "step") and (lr_decay_period != 0): lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=optimizer, step_size=lr_decay_period, gamma=lr_decay, last_epoch=-1) elif (lr_mode == "multistep") or ((lr_mode == "step") and (lr_decay_period == 0)): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer=optimizer, milestones=lr_decay_epoch, gamma=lr_decay, last_epoch=-1) elif lr_mode == "cosine": for group in optimizer.param_groups: group.setdefault("initial_lr", group["lr"]) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer=optimizer, T_max=args.epochs, last_epoch=(args.epochs - 1)) # Resume title = 'ImageNet-MixNet' if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..', args.resume) assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] # model may have more keys t = model.state_dict() c = checkpoint['state_dict'] flag = True for k in t: if k not in c: print('not in loading dict! fill it', k, t[k]) c[k] = t[k] flag = False model.load_state_dict(c) if flag: print('optimizer load old state') optimizer.load_state_dict(checkpoint['optimizer']) else: print('new optimizer !') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Epoch', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(val_loader, model, criterion, start_epoch, local_rank, nprocs, args) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) # TensorBoardX Logs train_writer = tensorboardX.SummaryWriter(args.logdir) # Train and val for epoch in range(start_epoch, args.epochs): train_sampler.set_epoch(epoch) val_sampler.set_epoch(epoch) lr_scheduler.step() if epoch < args.warmup_epochs: for param_group in optimizer.param_groups: param_group['lr'] = args.lr * ((epoch + 1) / args.warmup_epochs) print('\nEpoch: [%d | %d] Learning Rate : %f' % (epoch + 1, args.epochs, optimizer.param_groups[0]['lr'])) train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, local_rank, nprocs, args) test_loss, test_acc = test(val_loader, model, criterion, epoch, local_rank, nprocs, args) #add scalars train_writer.add_scalar('train_epoch_loss', train_loss, epoch) train_writer.add_scalar('train_epoch_acc', train_acc, epoch) train_writer.add_scalar('test_epoch_acc', test_acc, epoch) # append logger file logger.append([epoch, train_loss, test_loss, train_acc, test_acc]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() train_writer.close() print('Best acc:') print(best_acc)
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) data_aug_scale = (0.08, 1.0) if args.modelsize == 'large' else (0.2, 1.0) train_loader = torch.utils.data.DataLoader(datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224, scale=data_aug_scale), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.test_batch, shuffle=True, num_workers=args.workers, pin_memory=True) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) elif 'resnext' in args.arch: model = models.__dict__[args.arch]( baseWidth=args.base_width, cardinality=args.cardinality, ) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() flops, params = get_model_complexity_info(model, (224, 224), as_strings=False, print_per_layer_stat=False) print('Flops: %.3f' % (flops / 1e9)) print('Params: %.2fM' % (params / 1e6)) if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Resume title = 'ImageNet-' + args.arch # cmp = model.module.fc.weight if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..', args.resume) assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] # model may have more keys t = model.state_dict() c = checkpoint['state_dict'] # if "checkpoint" not in args.resume: # tmp1 = c['module.fc.weight'][0:2] # tmp1[1] = c['module.fc.weight'][627] # c['module.fc.weight'] = tmp1 # tmp2 = c['module.fc.bias'][0:2] # c['module.fc.bias'] = tmp2 #c['module.fc.weight']*=0 model.load_state_dict(c) model.module.fc = nn.Linear(2048, 3, True) model.cuda() flag = True for k in t: if k not in c: print('not in loading dict! fill it', k, t[k]) c[k] = t[k] flag = False if flag: print('optimizer load old state') optimizer.load_state_dict(checkpoint['optimizer']) else: print('new optimizer !') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(val_loader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return 24000 if args.output: normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_transforms = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) demo_path = os.path.join(args.data, "demo") img_dataset = datasets.ImageFolder(demo_path, transform=val_transforms) TP, TN, FP, FN, correct = 0, 0, 0, 0, 0 for idx, (img_path, img_label) in enumerate(img_dataset.imgs): img_name = img_path.split('/')[-1] img = Image.open(img_path) img_out = val_transforms(img) img_out = torch.unsqueeze(img_out, 0) with torch.no_grad(): img_out = img_out.to('cuda') feat = model(img_out) feat = torch.squeeze(feat, 0) if feat[0] >= feat[1]: save_path = "./data/demo_result/car/" if img_label == 0: correct += 1 TN += 1 img.save(save_path + img_name) else: FN += 1 print(FN, "th motorcycle is wrongly considered as car.") img.save(save_path + str(FN) + ".jpg") else: save_path = "./data/demo_result/motorcycle/" if img_label == 0: FP += 1 print(FP, "th car is wrongly considered as motorcycle.") img.save(save_path + str(FP) + ".jpg") else: correct += 1 TP += 1 img.save(save_path + img_name) print("The number of correctly classified pic is ", correct) print("The acc is {:.4f}".format(correct / len(img_dataset))) print("The precision is {:.4f}".format(TP / (TP + FP))) print("The recall is {:.4f}".format(TP / (TP + FN))) return temp = model.module.layer4._modules[ '2'].conv3.weight * model.module.fc.weight[:, 0] # ignored_params = list(map(id,model.module.fc.parameters())) # base_params = filter(lambda p: id(p) not in ignored_params, model.parameters()) # params_list = [{'params':base_params,'lr':args.lr},] # params_list.append({'params':model.module.fc.parameters(),'lr':0}) # # # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Train and val for epoch in range( 0, args.epochs ): #defult is for epoch in range(start_epoch, args.epochs): print(model.module.fc.weight[0][0]) print(model.module.fc.weight[0][1000]) print(model.module.fc.weight[1][2000]) adjust_learning_rate(optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(val_loader, model, criterion, epoch, use_cuda) # append logger file logger.append( [state['lr'], train_loss, test_loss, train_acc, test_acc]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) # temp = model.module.layer4 logger.close() print('Best acc:') print(best_acc)
transforms.ToTensor(), # -> [0,1] transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) # ->[-1,1] ]) # mask只需要转换为tensor y_transforms = transforms.ToTensor() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) args = getArgs() logging = getLog(args) print('**************************') print('models:%s,\nepoch:%s,\nbatch size:%s\ndataset:%s' % \ (args.arch, args.epoch, args.batch_size,args.dataset)) logging.info('\n=======\nmodels:%s,\nepoch:%s,\nbatch size:%s\ndataset:%s\n========' % \ (args.arch, args.epoch, args.batch_size,args.dataset)) print('**************************') model = getModel(args) macs, params = get_model_complexity_info(model, (3, 512, 512), as_strings=True, print_per_layer_stat=True, verbose=True) print('{:<30} {:<8}'.format('Computational complexity: ', macs)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) train_dataloaders, val_dataloaders, test_dataloaders = getDataset(args) criterion = torch.nn.BCELoss() # criterion = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters()) # if 'train' in args.action: # train(model, criterion, optimizer, train_dataloaders,val_dataloaders, args) # if 'test' in args.action: # test(test_dataloaders, save_predict=True)
def set_complexity(model): """get model complexity in terms of FLOPs and the number of parameters""" flops, params = get_model_complexity_info(model, model.input_shape,\ print_per_layer_stat=False, as_strings=False) model.complexity = [(flops, params)]
def main(): global model parser = argparse.ArgumentParser(description='DeFiAN') parser.add_argument("--cuda", default=True, action="store_true", help="Use cuda?") parser.add_argument('--n_GPUs', type=int, default=1, help='parallel training with multiple GPUs') parser.add_argument('--GPU_ID', type=int, default=0, help='parallel training with multiple GPUs') parser.add_argument('--threads', type=int, default=4, help='number of threads for data_scribble loading') parser.add_argument('--seed', type=int, default=1, help='random seed') parser.add_argument('--scale', type=int, default=2, help='scale factor') parser.add_argument('--attention', default=True, help='True for DeFiAN') parser.add_argument('--n_modules', type=int, default=10, help='num of DeFiAM: 10 for DeFiAN_L; 5 for DeFiAN_S') parser.add_argument('--n_blocks', type=int, default=20, help='num of RCABs: 20 for DeFiAN_L; 10 for DeFiAN_S') parser.add_argument( '--n_channels', type=int, default=64, help='num of channels: 64 for DeFiAN_L; 32 for DeFiAN_S') parser.add_argument('--activation', default=nn.ReLU(True), help='activation function') args = parser.parse_args() if args.cuda and not torch.cuda.is_available(): raise Exception("No GPU found, please run without --cuda") print("Random Seed: ", args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) if args.n_GPUs == 1: torch.cuda.set_device(args.GPU_ID) cudnn.benchmark = True model_path = 'checkpoints/' if args.n_modules == 5: model_path = model_path + 'DeFiAN_S_x' + str(args.scale) result_pathes = 'DeFiAN_S/' elif args.n_modules == 10: model_path = model_path + 'DeFiAN_L_x' + str(args.scale) result_pathes = 'DeFiAN_L/' else: raise InterruptedError print("===> Building model") model = Generator(args.n_channels, args.n_blocks, args.n_modules, args.activation, attention=args.attention, scale=[args.scale]) print("===> Calculating NumParams & FLOPs") input_size = (3, 480 // args.scale, 360 // args.scale) flops, params = get_model_complexity_info(model, input_size, as_strings=False, print_per_layer_stat=False) print('\tParam = {:.3f}K\n\tFLOPs = {:.3f}G on {}'.format( params * (1e-3), flops * (1e-9), input_size)) cpk = torch.load(model_path + '.pth', map_location={'cuda:1': 'cuda:0'})["state_dict"] model.load_state_dict(cpk, strict=False) model = model.cuda() data_valid = [ 'Set5_LR_bicubic', 'Urban100_LR_bicubic', 'Manga109_LR_bicubic' ] print('====>Testing...') for i in range(len(data_valid)): result_path = result_pathes + data_valid[i] + '_x' + str(args.scale) valid_path = '/mnt/Datasets/Test/' + data_valid[i] if not os.path.exists(result_path): os.makedirs(result_path) valid_psnr, valid_ssim = validation(valid_path, result_path, model, args.scale) print('\t {} --- PSNR = {:.4f} SSIM = {:.4f}'.format( data_valid[i], valid_psnr, valid_ssim))
def get_complexity(self, model): """get model complexity in terms of FLOPs and the number of parameters""" flops, params = get_model_complexity_info(model, self.input_shape,\ print_per_layer_stat=False, as_strings=False) return flops, params
def main(): global best_acc print(args) start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'test') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) data_aug_scale = (0.08, 1.0) if args.modelsize == 'large' else (0.2, 1.0) train_loader = torch.utils.data.DataLoader(datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224, scale=data_aug_scale), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.test_batch, shuffle=True, num_workers=args.workers, pin_memory=True) # create model print(args.pretrained) Pretrained_ImageNet_or_Places365 = 0 # 1: ImageNet 0: Places365 if args.arch.startswith('densenet'): model = load_pretrained_Places365_model( args, '/media/clq/Work/datasets/pretrained_models/densenet161_places365.pth.tar' ) elif args.arch == 'sk_resnet101' or args.arch == 'sksa_resnet101': if Pretrained_ImageNet_or_Places365 == 1: print( 'Training {} with pretrained ImageNet models on MIT67'.format( args.arch)) model = load_pretrained_ImageNet_model( args, '/media/clq/Work/datasets/pretrained_models/sk_resnet101.pth.tar' ) else: print( 'clq 0601 Training {} with pretrained Places365 models on MIT67' .format(args.arch)) model = load_pretrained_ImageNet_model( args, '/mnt/disk/home1/clq/PytorchInsight/classification/checkpoints/Places365_standard/sksa_resnet101_2/model_best.pth.tar', Pretrained_ImageNet_or_Places365=0) elif args.arch == 'sk_resnet50': model = load_pretrained_ImageNet_model( args, '/media/clq/Work/datasets/pretrained_models/sk_resnet150.pth.tar') # Frozen some layers # 在训练时如果想要固定网络的底层,那么可以令这部分网络对应子图的参数requires_grad为False。 # 这样,在反向过程中就不会计算这些参数对应的梯度 for param in model.parameters(): param.requires_grad = False flag_finetune_style = 0 # 1: finetune stage 4 and last fc 0: only finetune last fc if flag_finetune_style == 1: for param in model.layer4.parameters(): param.requires_grad = True # finetune the last conv stage else: pass # only finetune the last fc layer print('only finetune the last fc layer') # 修改类别数 if args.arch.startswith('resnet') or args.arch.startswith( 'sk_resnet') or args.arch.startswith('sksa_resnet'): in_ftrs = model.fc.in_features # 最后一层全连接层 print('pretrained model.fc.size={}*{}'.format(model.fc.in_features, model.fc.out_features)) model.fc = nn.Linear(in_ftrs, args.num_classes) # 修改类别数 if args.arch.startswith('densenet'): in_ftrs = model.classifier.in_features # 最后一层全连接层 print('pretrained model.fc.size={}*{}'.format( model.classifier.in_features, model.classifier.out_features)) model.fc = nn.Linear(in_ftrs, args.num_classes) # 修改类别数 # Optimize only the classifier if flag_finetune_style == 1: optimizer = optim.SGD( filter(lambda p: p.requires_grad, model.parameters()), # 记住一定要加上filter(),不然会报错 lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: optimizer = optim.SGD(model.fc.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) print(model) flops, params = get_model_complexity_info(model, (224, 224), as_strings=False, print_per_layer_stat=False) print('Flops: %.3f' % (flops / 1e9)) print('Params: %.2fM' % (params / 1e6)) # 多GPU训练 if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Resume title = args.data + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..', args.resume) assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] # model may have more keys t = model.state_dict() c = checkpoint['state_dict'] #flag = True # for k in t: # if k not in c: # print('not in loading dict! fill it', k, t[k]) # c[k] = t[k] # flag = False # model.load_state_dict(c) flag = False # modified by CLQ for k in c: if k.startswith('module'): t[k[7:]] = c[k] model.load_state_dict(t) if flag: print('optimizer load old state') optimizer.load_state_dict(checkpoint['optimizer']) else: print('new optimizer !') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'epoch', 'LR', 'Train Loss', 'Valid Loss', 'Train Top1', 'Valid Top1.', 'Train Top5', 'Valid Top5' ]) #args.evaluate=True print('args.evaluate:{}'.format(args.evaluate)) if args.evaluate: print('\nEvaluation only') test_loss, test_acc, test_top5 = test(val_loader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f, Test Top5: %.2f' % (test_loss, test_acc, test_top5)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc, train_top5 = train(train_loader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc, test_top5 = test(val_loader, model, criterion, epoch, use_cuda) # append logger file logger.append([ int(epoch), state['lr'], train_loss, test_loss, train_acc, test_acc, train_top5, test_top5 ]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) print('Best acc:{}'.format(best_acc)) print(args) logger.set_names(['Best acc']) logger.append([best_acc]) logger.close()
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint) and args.local_rank == 0: mkdir_p(args.checkpoint) args.distributed = True args.gpu = args.local_rank torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() print('world_size = ', args.world_size) assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled." # create model print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() flops, params = get_model_complexity_info(model, (224, 224), as_strings=False, print_per_layer_stat=False) print('Flops: %.3f' % (flops / 1e9)) print('Params: %.2fM' % (params / 1e6)) cudnn.benchmark = True # define loss function (criterion) and optimizer # criterion = nn.CrossEntropyLoss().cuda() criterion = SoftCrossEntropyLoss(label_smoothing=args.label_smoothing).cuda() model = model.cuda() args.lr = float(0.1 * float(args.train_batch*args.world_size)/256.) state['lr'] = args.lr optimizer = set_optimizer(model) #optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, keep_batchnorm_fp32=args.keep_batchnorm_fp32, loss_scale=args.loss_scale) #model = torch.nn.DataParallel(model).cuda() #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) model = DDP(model, delay_allreduce=True) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) data_aug_scale = (0.08, 1.0) train_dataset = datasets.ImageFolder(traindir, transforms.Compose([ transforms.RandomResizedCrop(224, scale = data_aug_scale), transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, ])) val_dataset = datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), # transforms.ToTensor(), # normalize, ])) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.train_batch, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler, collate_fn=fast_collate) # Resume title = 'ImageNet-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..', args.resume) assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) #checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(args.gpu)) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] # model may have more keys t = model.state_dict() c = checkpoint['state_dict'] for k in t: if k not in c: print('not in loading dict! fill it', k, t[k]) c[k] = t[k] model.load_state_dict(c) print('optimizer load old state') optimizer.load_state_dict(checkpoint['optimizer']) if args.local_rank == 0: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: if args.local_rank == 0: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(val_loader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) if args.local_rank == 0: print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(val_loader, model, criterion, epoch, use_cuda) # save model if args.local_rank == 0: # append logger file logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc]) is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) if args.local_rank == 0: logger.close() print('Best acc:') print(best_acc)
from convs.condconv import * from convs.cc_inf import * from convs.dyconv import * from convs.dyres_conv import * from convs.dyres_inf import * from convs.ddsnet import * from convs.dds_exp import * from convs.dychannel import * from flops_counter import get_model_complexity_info import torch x = torch.randn(1, 16, 32, 32) net = CondConv(x.size(1), x.size(1), 3, num_experts=4) flops, params = get_model_complexity_info(net, (x.size(1), 32, 32), as_strings=True, print_per_layer_stat=False) print('--CondConv\nFlops: {}\nParams: {}'.format(flops, params)) net = DyConv(x.size(1), x.size(1), 3, num_experts=4) flops, params = get_model_complexity_info(net, (x.size(1), 32, 32), as_strings=True, print_per_layer_stat=False) print('--DyConv\nFlops: {}\nParams: {}'.format(flops, params)) net = DyResConv_Inf(x.size(1), x.size(1), 3, num_experts=4, mode='A') flops, params = get_model_complexity_info(net, (x.size(1), 32, 32), as_strings=True, print_per_layer_stat=False) print('--DyResA\nFlops: {}\nParams: {}'.format(flops, params))