def test_bayesopt_searcher(): @at.args(lr=at.Real(1e-3, 1e-2, log=True), wd=at.Real(1e-3, 1e-2)) def train_fn(args, reporter): for e in range(10): dummy_accuracy = 1 - np.power(1.8, -np.random.uniform(e, 2 * e)) reporter(epoch=e, accuracy=dummy_accuracy, lr=args.lr, wd=args.wd) random_searcher = at.searcher.RandomSearcher(train_fn.cs) lazy_configs = [] for i in range(10): lazy_configs.append(random_searcher.get_config()) searcher = at.searcher.BayesOptSearcher(train_fn.cs, lazy_configs=lazy_configs) config = searcher.get_config() for i in range(20): if i < 10: assert config in lazy_configs searcher.update(config, np.random.uniform(0.1, 0.9), done=True) config = searcher.get_config()
x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 16 * 4 * 4) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x ############################################################### # - Convert the Training Function to Be Searchable # # We can simply add a decorator :func:`autotorch.args` to convert the `train_mnist` function argument values to be tuned by AutoTorch's hyperparameter optimizer. In the example below, we specify that the lr argument is a real-value that should be searched on a log-scale in the range 0.01 - 0.2. Before passing lr to your train function, AutoTorch always selects an actual floating point value to assign to lr so you do not need to make any special modifications to your existing code to accommodate the hyperparameter search. @at.args( lr=at.Real(0.01, 0.2, log=True), wd=at.Real(1e-4, 5e-4, log=True), net=Net(), epochs=5, ) def at_train_mnist(args, reporter): return train_mnist(args, reporter) ############################################################### # - Create the Scheduler and Launch the Experiment scheduler = at.scheduler.FIFOScheduler(at_train_mnist, resource={ 'num_cpus': 4, 'num_gpus': 1
layer1_channels=at.Int(8,64), last_stride=at.Int(1,2), stage_blocks=at.List( at.Int(1,6), at.Int(1,6), at.Int(1,6), at.Int(1,6), ), stage_expands=at.List( at.Int(2,6), at.Int(2,6), at.Int(2,6), at.Int(2,6), ), stage_planes_ratio=at.List( at.Real(1.0,4.0), at.Real(1.0,4.0), at.Real(1.0,4.0), at.Real(1.0,4.0), at.Real(1.0,4.0), ), ) class GenConfig1: def __init__(self, **kwargs): d = {} d.update(**kwargs) for k, v in d.items(): setattr(self, k, v) self.m = 1.0 def stage_blocks_multi(self, m):
class GlobalAvgPool2d(nn.Module): def __init__(self): """Global average pooling over the input's spatial dimensions""" super(GlobalAvgPool2d, self).__init__() def forward(self, inputs): return nn.functional.adaptive_avg_pool2d(inputs, 1).view(inputs.size(0), -1) @at.obj( bottleneck_ratio=1, #at.Int(1, 2), initial_width=at.Int(16, 320), slope=at.Real(24, 128, log=True), quantized_param=at.Real(2.0, 3.2), network_depth=at.Int(12, 28), group_width=at.Int(8, 240), ) class GenConfg(BaseGen): def dump_config(self, config_file=None): config = configparser.ConfigParser() config['DEFAULT'] = {'bottleneck_ratio': '1'} config['net'] = {} self.group_width = self.group_width if self.group_width <= self.initial_width \ else self.initial_width self.group_width = int(self.group_width // 8 * 8) #self.initial_width = int(self.initial_width // self.group_width * self.group_width) for k, v in self.items(): config['net'][k] = str(v)
import numpy as np import autotorch as at @at.args(lr=at.Real(1e-3, 1e-2, log=True), wd=at.Real(1e-3, 1e-2)) def train_fn(args, reporter): for e in range(10): dummy_accuracy = 1 - np.power(1.8, -np.random.uniform(e, 2 * e)) reporter(epoch=e, accuracy=dummy_accuracy, lr=args.lr, wd=args.wd) @at.args(lr=at.Choice(1e-3, 1e-2), wd=at.Choice(1e-3, 1e-2)) def rl_train_fn(args, reporter): for e in range(10): dummy_accuracy = 1 - np.power(1.8, -np.random.uniform(e, 2 * e)) reporter(epoch=e, accuracy=dummy_accuracy, lr=args.lr, wd=args.wd) def test_fifo_scheduler(): scheduler = at.scheduler.FIFOScheduler(train_fn, resource={ 'num_cpus': 2, 'num_gpus': 0 }, num_trials=10, reward_attr='accuracy', time_attr='epoch') scheduler.run() scheduler.join_jobs()
@at.obj( name=at.Choice('auto', 'torch'), ) class myobj: def __init__(self, name): self.name = name @at.func( framework=at.Choice('mxnet', 'pytorch'), ) def myfunc(framework): return framework @at.args( a=at.Real(1e-3, 1e-2, log=True), b=at.Real(1e-3, 1e-2), c=at.Int(1, 10), d=at.Choice('a', 'b', 'c', 'd'), e=at.Bool(), f=at.List( at.Int(1, 2), at.Choice(4, 5), ), g=at.Dict( a=at.Real(0, 10), obj=myobj(), ), h=at.Choice('test', myobj()), i=myfunc(), )
def train_network(args, gpu_manager, split_idx, return_dict): gpu = gpu_manager.request() print('gpu: {}, split_idx: {}'.format(gpu, split_idx)) # single gpu training only for evaluating the configurations model = encoding.models.get_model(args.model) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd) model.cuda(gpu) criterion.cuda(gpu) if args.amp: model, optimizer = amp.initialize(model, optimizer, opt_level='O2') # init dataloader base_size = args.base_size if args.base_size is not None else int( 1.0 * args.crop_size / 0.875) transform_train, _ = get_transform(args.dataset, args.base_size, args.crop_size) total_set = encoding.datasets.get_dataset('imagenet', root=args.data_dir, transform=transform_train, train=True, download=True) trainset, valset = subsample_dataset(total_set, args.nfolds, split_idx, args.reduced_size) train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True, pin_memory=True) # lr scheduler lr_scheduler = LR_Scheduler('cos', base_lr=args.lr, num_epochs=args.epochs, iters_per_epoch=len(train_loader), quiet=True) # write results into config file def train(epoch): model.train() top1 = AverageMeter() for batch_idx, (data, target) in enumerate(train_loader): lr_scheduler(optimizer, batch_idx, epoch, 0) data, target = data.cuda(gpu), target.cuda(gpu) optimizer.zero_grad() output = model(data) loss = criterion(output, target) if args.amp: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() def validate(auto_policy): model.eval() top1 = AverageMeter() _, transform_val = get_transform(args.dataset, args.base_size, args.crop_size) if auto_policy is not None: transform_val.transforms.insert(0, Augmentation(auto_policy)) valset.transform = transform_val val_loader = torch.utils.data.DataLoader(valset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) for batch_idx, (data, target) in enumerate(val_loader): data, target = data.cuda(gpu), target.cuda(gpu) with torch.no_grad(): output = model(data) acc1 = accuracy(output, target, topk=(1, )) top1.update(acc1[0], data.size(0)) return top1.avg for epoch in tqdm(range(0, args.epochs)): train(epoch) #acc = validate(None) #print('baseline accuracy: {}'.format(acc)) ops = list(augment_dict.keys()) sub_policy = at.List( at.List(at.Choice(*ops), at.Real(0, 1), at.Real(0, 1)), at.List(at.Choice(*ops), at.Real(0, 1), at.Real(0, 1)), ) searcher = at.searcher.RandomSearcher(sub_policy.cs) # avoid same defaults config = searcher.get_config() for i in range(args.num_trials): config = searcher.get_config() auto_policy = sub_policy.sample(**config) acc = validate(auto_policy) searcher.update(config, acc.item(), done=True) gpu_manager.release(gpu) topK_cfgs = searcher.get_topK_configs(5) policy = [sub_policy.sample(**cfg) for cfg in topK_cfgs] return_dict[split_idx] = policy
import numpy as np import autotorch as at from nose.plugins.attrib import attr @at.args( lr=at.Real(1e-3, 1e-2, log=True), wd=at.Real(1e-3, 1e-2)) def train_fn(args, reporter): for e in range(10): dummy_accuracy = 1 - np.power(1.8, -np.random.uniform(e, 2*e)) reporter(epoch=e, accuracy=dummy_accuracy, lr=args.lr, wd=args.wd) @at.args( lr=at.Choice(1e-3, 1e-2), wd=at.Choice(1e-3, 1e-2)) def rl_train_fn(args, reporter): for e in range(10): dummy_accuracy = 1 - np.power(1.8, -np.random.uniform(e, 2*e)) reporter(epoch=e, accuracy=dummy_accuracy, lr=args.lr, wd=args.wd) def test_fifo_scheduler(): scheduler = at.scheduler.FIFOScheduler(train_fn, resource={'num_cpus': 2, 'num_gpus': 0}, num_trials=10, reward_attr='accuracy', time_attr='epoch') scheduler.run() scheduler.join_jobs() def test_hyperband_scheduler():
help='checkpoint path (default: None)') parser.add_argument('--debug', action='store_true', default=False, help='debug if needed') args = parser.parse_args() return args @at.args( batch_size=64, num_workers=2, num_gpus=1, model='cifar_resnet20_v1', j=4, lr=at.Real(1e-2, 1e-1, log=True), momentum=0.9, wd=at.Real(1e-5, 1e-3, log=True), epochs=20, ) def train_cifar(args, reporter): print('args', args) batch_size = args.batch_size num_gpus = args.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = args.num_workers model_name = args.model
from mmcv.cnn import get_model_complexity_info except ImportError: raise ImportError('Please upgrade mmcv to >0.6.2') @at.obj( block=at.Choice('BasicBlock', 'Bottleneck'), base_channels=at.Int(8, 64), stage_blocks=at.List( at.Int(1, 10), at.Int(1, 10), at.Int(1, 10), at.Int(1, 10), ), stage_planes_ratio=at.List( at.Real(1.0, 4.0), at.Real(1.0, 4.0), at.Real(1.0, 4.0), ), ) class GenConfigBackbone: def __init__(self, **kwargs): d = {} d.update(**kwargs) for k, v in d.items(): setattr(self, k, v) self.m = 1.0 def stage_blocks_multi(self, m): self.m = m
# :class:`autotorch.searcher.RandomSearcher` will try: a = at.Int(lower=0, upper=10, default=2) print(a.default) ############################################################### # Pick a random value. print(a.rand) ############################################################### # - Real Space :class:`autotorch.space.Real` # # A real number is chosen between lower and upper value during the # searcher sampling. b = at.Real(lower=1e-4, upper=1e-2) print(b) ############################################################### # Real space in log scale: c = at.Real(lower=1e-4, upper=1e-2, log=True) print(c) ############################################################### # - Choice Space :class:`autotorch.space.Choice` # # Choice Space chooses one value from all the possible values during the searcher sampling. d = at.Choice('Monday', 'Tuesday', 'Wednesday') print(d)