def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data if 'cifar' in args.dataset: print('==> Preparing cifar dataset %s' % args.dataset) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) if args.dataset == 'cifar10': dataloader = datasets.CIFAR10 num_classes = 10 else: dataloader = datasets.CIFAR100 num_classes = 100 trainset = dataloader(root=os.path.join(homeDir,args.dataset), train=True, download=True, transform=transform_train) train_sampler=None toShuffle = True if args.subsample < 1: toShuffle=False n = int(float(len(trainset)) * args.subsample) assert n > 0,'must sample a positive number of training examples.' train_sampler = data.sampler.SubsetRandomSampler(range(n)) print('==>SAMPLING FIRST',n,'TRAINING IMAGES') if args.class_subset != '_': print('*'+args.class_subset+'*') toShuffle = False args.class_subset = [int(i) for i in args.class_subset.split('_')] indices = [i for i, p in enumerate(trainset.train_labels) if p in args.class_subset] train_sampler = data.sampler.SubsetRandomSampler(indices); trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=toShuffle, num_workers=args.workers,sampler=train_sampler) testset = dataloader(root=os.path.join(homeDir ,args.dataset), train=False, download=False, transform=transform_test) test_sampler=None if args.test_subsample < 1: n = int(float(len(testset)) * args.test_subsample) assert n > 0, 'must sample a positive number of training examples.' test_sampler = data.sampler.SubsetRandomSampler(range(n)) print('==>SAMPLING FIRST', n, 'TESTTING IMAGES') if type(args.class_subset) is list: toShuffle = False indices = [i for i, p in enumerate(testset.test_labels) if p in args.class_subset] test_sampler = data.sampler.SubsetRandomSampler(indices); testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers,sampler=test_sampler) else: train_dir = '/home/amir/data/imagenet12/train' test_dir = '/home/amir/data/imagenet12/val' normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_train = transforms.Compose([transforms.RandomCrop(64),transforms.RandomHorizontalFlip(), transforms.ToTensor(),normalize]) transform_test=transforms.Compose([transforms.CenterCrop((64,64)), transforms.ToTensor(), normalize]) trainloader = DataLoader(dataset=datasets.ImageFolder(root = train_dir, transform=transform_train), batch_size=args.train_batch, shuffle=True,num_workers=1) testloader = DataLoader(dataset=datasets.ImageFolder(root = test_dir, transform=transform_test), batch_size=64, shuffle=True) # Model print("==> creating model '{}'".format(args.arch)) if 'cifar' in args.dataset: if args.arch.startswith('resnext'): model = models.__dict__[args.arch]( cardinality=args.cardinality, num_classes=num_classes, depth=args.depth, widen_factor=args.widen_factor, dropRate=args.drop, ) elif args.arch.startswith('densenet'): if 'partial' in args.arch: model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, growthRate=args.growthRate, compressionRate=args.compressionRate, dropRate=args.drop,part=args.part, zero_fixed_part=args.zero_fixed_part,do_init=True, split_dim = args.dim_slice ) else: model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, growthRate=args.growthRate, compressionRate=args.compressionRate, dropRate=args.drop,lateral_inhibition=args.lateral_inhibition ) elif args.arch.startswith('wrn'): if 'partial' in args.arch: print('==> initializing partial learning with p=',args.part) print('classes',num_classes,'depth',args.depth,'widen',args.widen_factor,'drop',args.drop,'part:',args.part) model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, widen_factor=args.widen_factor, dropRate=args.drop, part=args.part, zero_fixed_part=args.zero_fixed_part ) else: model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, widen_factor=args.widen_factor, dropRate=args.drop,lateral_inhibition=args.lateral_inhibition ) elif args.arch.endswith('resnet'): model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, ) else: if 'partial' in args.arch: print ('PARTIAL!!!!',args.arch) #print '!!!!!!!!!!!!!!!!1' model = models.__dict__[args.arch](num_classes=num_classes,part=args.part,zero_fixed_part=args.zero_fixed_part) else: print('BOOYAH---------------!!') model = models.__dict__[args.arch](num_classes=num_classes) else: # must be imagenet if 'partial' not in args.arch: model = imagenet_models.__dict__[args.arch](depth=28, widen_factor=4, num_classes=1000) else: model = imagenet_models.__dict__[args.arch](depth=28, widen_factor=4, num_classes=1000,part=args.part,zero_fixed_part=args.zero_fixed_part) # hack hack #print('==============================', arch,'===============') #if 'squeeze' in args.arch: # model.classifier = nn.Sequential(nn.Dropout(.5), nn.Conv2d(512, 10, kernel_size=(1, 1), stride=(1, 1)), # nn.ReLU()) # print(model) from copy import deepcopy model = torch.nn.DataParallel(model).cuda() if args.load_fixed_path != '': # load the (presumably) full model dict. print('ensembling - loading old dict') fixed_model_dict = torch.load(args.load_fixed_path)['state_dict'] #if 'partial' if 'partial' in args.arch: print('transerring to new and splitting') split_model(fixed_model_dict,model) else: # just load the dictionary as is and continue from this point. model.load_state_dict(fixed_model_dict) if False: if args.load_fixed_path != '': print('ENSEMBLING') # load the fixed part of this classifier for ensembling fixed_model_dict = torch.load(args.load_fixed_path)['state_dict'] model_dict = model.state_dict() if args.part == -1: print('-------------BABU-----------------') model.load_state_dict(fixed_model_dict) elif args.only_layer != 'none': # just copy everything, the layer will be reinitialized layer. for a,b in model_dict.items(): if args.only_layer not in a: model_dict[a] = deepcopy(fixed_model_dict[a]) model.load_state_dict(model_dict) else: for a, b in model_dict.items(): # transfer all fixed values from loaded dictionary. if args.part < .5: # re-train learned part. if 'fixed' in a: model_dict[a] = deepcopy(fixed_model_dict[a]) else: # re-train what was at first the random part :-) if 'learn' in a: model_dict[a] = deepcopy(fixed_model_dict[a]) model.load_state_dict(model_dict) if args.part >= .5: # switch training between fixed / learned parts. print('HAHA, SWITCHING FIXED AND LEARNING') for a,b in model.module.named_parameters(): if 'learn' in a: b.requires_grad = False else: b.requires_grad = True # otherwise, keep it as it is. assert not (args.retrain_layer != 'none' and args.only_layer != 'none'),'retrain-layer and only-layer options are mutually exclusive' if args.retrain_layer != 'none': initial_dict = deepcopy(model.state_dict()) cudnn.benchmark = True #model = models.squeezenet1_1() # criterion = nn.CrossEntropyLoss() opt_ = args.optimizer.lower() if args.only_layer != 'none': model = only_layer(model,args.only_layer) # apply the learn-bn. for m1,m2 in model.named_modules(): if 'bn' in m1: for p in m2.parameters(): p.requires_grad = args.learn_bn if args.learn_inhibition: for p in model.module.parameters(): p.requires_grad=True params = trainableParams(model) print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters() if p.requires_grad)/1000000.0)) if opt_ == 'sgd': print('optimizer.... - sgd') optimizer = optim.SGD(params , lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif opt_ == 'adam': optimizer = optim.Adam(params) elif opt_ == 'yf': print('USING YF OPTIMIZER') optimizer = YFOptimizer( params, lr=args.lr, mu=0.0, weight_decay=args.weight_decay, clip_thresh=2.0, curv_win_width=20) optimizer._sparsity_debias = False else: raise Exception('unsupported optimizer type',opt_) nParamsPath = os.path.join(args.checkpoint, 'n_params.txt') with open(nParamsPath, 'w') as f: s1 = 'active_params {} \n'.format(sum(p.numel() for p in model.parameters() if p.requires_grad)) f.write(s1) s2 = 'total_params {} \n'.format(sum(p.numel() for p in model.parameters())) f.write(s2) if args.print_params_and_exit: exit() # Resume title = 'cifar-10-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoinxt..') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' #args.checkpoint = os.path.dirname(args.checkpoint) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] #start_epoch = checkpoint['epoch'] start_epoch = args.start_epoch model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if args.retrain_layer!='none': logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=False) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) params = trainableParams(model) print('number of trainable params:',len(list(params))) model = reinit_model_layer(model,args.retrain_layer,initial_dict) params = trainableParams(model) print('number of trainable params:',len(list(params))) optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=False) # Was True logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val #scheduler = CosineAnnealingLR( optimizer, T_max=args.epochs)# eta_min = 1e-9, last_epoch=args.epochs) if args.half: model = model.half() for epoch in range(start_epoch, args.epochs): if args.sgdr > 0: #raise Exception('currently not supporting sgdr') scheduler.step() else: adjust_learning_rate(optimizer, epoch) if type(optimizer) is YFOptimizer: print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, optimizer.get_lr_factor())) # state['lr'])) else: print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, optimizer.param_groups[0]['lr']))# state['lr'])) train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda) #if req_perf_after_10_epochs > -1 # append logger file logger.append([state['lr'], train_loss, test_loss, train_acc, test_acc]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) if epoch % 10 == 0: # save each 10 epochs anyway save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint,filename='checkpoint.pth.tar_'+str(epoch).zfill(4)) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
if args.print_params_and_exit: exit() # Resume title = 'cifar-10-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoinxt..') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' #args.checkpoint = os.path.dirname(args.checkpoint) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] #start_epoch = checkpoint['epoch'] start_epoch = args.start_epoch model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if args.retrain_layer!='none': logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=False) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) params = trainableParams(model) print('number of trainable params:',len(list(params))) model = reinit_model_layer(model,args.retrain_layer,initial_dict) params = trainableParams(model) print('number of trainable params:',len(list(params))) optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=False) # Was True logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])