def __init__(self, model, dataset, datadir, optim, scheduler, optim2=None, scheduler2=None, batch_size=128, seed=None, num_gpus=1, verbose=False): # Parameters like learning rate and momentum can be speicified by the # config search space. If not specified, fall back to the args self.dataset = dataset self.datadir = datadir self.num_gpus = num_gpus self.verbose = verbose self.model = model self.optimizer = optim self.scheduler = scheduler self.optimizer2 = optim2 self.scheduler2 = scheduler2 num_workers = 4 if seed is not None: np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) num_workers = 0 if self.use_cuda: torch.cuda.manual_seed(seed) else: seed = random.randint(0, 10000) # ###################################################################### # Data kwargs = {} if self.use_cuda: kwargs = {'num_workers': num_workers, 'pin_memory': True} if dataset.startswith('cifar'): self.train_loader, self.test_loader = cifar.get_data( 32, datadir, dataset=dataset, batch_size=batch_size, **kwargs) elif dataset == 'tiny_imagenet': self.train_loader, self.test_loader = tiny_imagenet.get_data( 64, datadir, val_only=False, batch_size=batch_size, **kwargs) # ###################################################################### # Split across GPUs if torch.cuda.device_count() > 1 and num_gpus > 1: self.model = nn.DataParallel(self.model) model = self.model.module else: model = self.model if self.use_cuda: self.model.cuda()
def _setup(self, config): args = config.pop("args") vars(args).update(config) type_ = config.get('type', 'gainA') use_dwt = config.get('dwt', False) C = config.get('num_channels', 64) dataset = config.get('dataset', args.dataset) if hasattr(args, 'verbose'): self._verbose = args.verbose num_workers = 4 if args.seed is not None: np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) num_workers = 0 if self.use_cuda: torch.cuda.manual_seed(args.seed) # ###################################################################### # Data kwargs = {'num_workers': num_workers, 'pin_memory': True} if self.use_cuda else {} if args.dataset.startswith('cifar'): self.train_loader, self.test_loader = cifar.get_data( 32, args.datadir, dataset=dataset, batch_size=args.batch_size, trainsize=args.trainsize, **kwargs) elif args.dataset == 'tiny_imagenet': self.train_loader, self.test_loader = tiny_imagenet.get_data( 64, args.datadir, val_only=False, batch_size=args.batch_size, trainsize=args.trainsize, distributed=False, **kwargs) # ###################################################################### # Build the network based on the type parameter. θ are the optimal # hyperparameters found by cross validation. if type_.startswith('ref'): θ = (0.1, 0.9, 1e-4) else: θ = (0.45, 0.8, 1e-4) # raise ValueError('Unknown type') lr, mom, wd = θ # If the parameters were provided as an option, use them lr = config.get('lr', lr) mom = config.get('mom', mom) wd = config.get('wd', wd) wd1 = config.get('wd1', wd) std = config.get('std', 1.0) # Build the network self.model = MixedNet(args.dataset, type_, use_dwt, C, wd, wd1) init = lambda x: net_init(x, std) self.model.apply(init) # Split across GPUs if torch.cuda.device_count() > 1 and config.get('num_gpus', 0) > 1: self.model = nn.DataParallel(self.model) model = self.model.module else: model = self.model if self.use_cuda: self.model.cuda() # ###################################################################### # Build the optimizer - use separate parameter groups for the gain # and convolutional layers default_params = list(model.fc1.parameters()) gain_params = [] for name, module in model.net.named_children(): params = [p for p in module.parameters() if p.requires_grad] if name.startswith('gain'): gain_params += params else: default_params += params self.optimizer, self.scheduler = optim.get_optim( 'sgd', default_params, init_lr=lr, steps=args.steps, wd=wd, gamma=0.2, momentum=mom, max_epochs=args.epochs) if len(gain_params) > 0: # Get special optimizer parameters lr1 = config.get('lr1', lr) gamma1 = config.get('gamma1', 0.2) mom1 = config.get('mom1', mom) opt1 = config.get('opt1', 'sgd') if lr1 is None: lr1 = lr if mom1 is None: mom1 = mom # Do not use the optimizer's weight decay, call a special method to # do it. self.optimizer1, self.scheduler1 = optim.get_optim( opt1, gain_params, init_lr=lr1, steps=args.steps, wd=0, gamma=gamma1, momentum=mom1, max_epochs=args.epochs) if self.verbose: print(self.model)
def _setup(self, config): args = config.pop("args") vars(args).update(config) # Parameters like learning rate and momentum can be speicified by the # config search space. If not specified, fall back to the args type = config.get('type', 'ref') lr = config.get('lr', args.lr) mom = config.get('mom', args.mom) wd = config.get('wd', args.wd) C = config.get('num_channels', args.C) dataset = config.get('dataset', args.dataset) num_gpus = config.get('num_gpus', args.num_gpus) # Get optimizer parameters for gainlayer mom1 = config.get('mom1', args.mom1) lr1 = config.get('lr1', args.lr1) wd1 = config.get('wd1', args.wd1) opt1 = config.get('opt1', args.opt1) gamma1 = config.get('gamma1', 0.2) if mom1 is None: mom1 = mom if lr1 is None: lr1 = lr # Get nonlinearity options pixel_nl = config.get('pixel_nl', args.pixel_nl) lp_nl = config.get('lp_nl', args.lp_nl) bp_nl = config.get('bp_nl', args.bp_nl) lp_q = config.get('lp_q', args.lp_q) bp_q = config.get('bp_q', args.bp_q) lp_thresh = config.get('lp_thresh', 1) bp_thresh = config.get('bp_thresh', 1) if hasattr(args, 'verbose'): self._verbose = args.verbose num_workers = 4 if args.seed is not None: np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) num_workers = 0 if self.use_cuda: torch.cuda.manual_seed(args.seed) else: args.seed = random.randint(0, 10000) # ###################################################################### # Data kwargs = { 'num_workers': num_workers, 'pin_memory': True } if self.use_cuda else {} if dataset.startswith('cifar'): self.train_loader, self.test_loader = cifar.get_data( 32, args.datadir, dataset=dataset, batch_size=args.batch_size, trainsize=args.trainsize, **kwargs) elif dataset == 'tiny_imagenet': self.train_loader, self.test_loader = tiny_imagenet.get_data( 64, args.datadir, val_only=False, batch_size=args.batch_size, trainsize=args.trainsize, distributed=False, **kwargs) # ###################################################################### # Build the network based on the type parameter. θ are the optimal # hyperparameters found by cross validation. # Build the network self.model = MixedNet(dataset, type, C, wd, wd1, pixel_nl=pixel_nl, lp_nl=lp_nl, bp_nl=bp_nl, lp_nl_kwargs=dict(q=lp_q, thresh=lp_thresh), bp_nl_kwargs=dict(q=bp_q, thresh=bp_thresh)) init = lambda x: net_init(x, 1.0) self.model.apply(init) # Split across GPUs if torch.cuda.device_count() > 1 and num_gpus > 1: self.model = nn.DataParallel(self.model) model = self.model.module else: model = self.model if self.use_cuda: self.model.cuda() # ###################################################################### # Build the optimizer - use separate parameter groups for the gain # and convolutional layers default_params = list(model.fc1.parameters()) gain_params = [] for name, module in model.net.named_children(): params = [p for p in module.parameters() if p.requires_grad] if name.startswith('wave'): gain_params += params else: default_params += params self.optimizer, self.scheduler = optim.get_optim( 'sgd', default_params, init_lr=lr, steps=args.steps, wd=0, gamma=0.2, momentum=mom, max_epochs=args.epochs) if len(gain_params) > 0: # Do not use the optimizer's weight decay, call a special method to # do it. self.optimizer1, self.scheduler1 = optim.get_optim( opt1, gain_params, init_lr=lr1, steps=args.steps, wd=0, gamma=gamma1, momentum=mom1, max_epochs=args.epochs) if self.verbose: print(self.model)