print('=====> Preparing for training') epochs, batch_size, ema_epochs = args.epochs, args.batch_size, 2 #epochs = 10 transforms = (bot.Crop(32, 32), bot.FlipLR()) #learning rate schedules if args.lr_scheduler == 'PiecewiseLinear': lr_schedule = lambda knots, vals, batch_size: bot.PiecewiseLinear( np.array(knots) * len(valid_batches(batch_size)), np.array(vals) / batch_size) opt_params = { 'lr': lr_schedule([0, epochs / 5, epochs - ema_epochs], [0.0, 1.0, 0.1], batch_size), 'weight_decay': bot.Const(args.weight_decay * batch_size), 'momentum': bot.Const(0.9) } opt_params_bias = { 'lr': lr_schedule([0, epochs / 5, epochs - ema_epochs], [0.0, 1.0 * 64, 0.1 * 64], batch_size), 'weight_decay': bot.Const(args.weight_decay * batch_size / 64), 'momentum': bot.Const(0.9) } elif args.lr_scheduler == 'Piecewise': base = 0.9 bot.opt_flag = False
print('=====> Building model (with input whitening network)') net = bot.getResNet8BOT(input_whitening_net) print('=====> Preparing for training') epochs, batch_size, ema_epochs = 200, args.batch_size, 2 #epochs = 10 transforms = (bot.Crop(32, 32), bot.FlipLR()) lr_schedule = lambda knots, vals, batch_size: bot.PiecewiseLinear( np.array(knots) * len(train_batches(batch_size)), np.array(vals) / batch_size) opt_params = { 'lr': lr_schedule([0, epochs / 5, epochs - ema_epochs], [0.0, 1.0, 0.1], batch_size), 'weight_decay': bot.Const(5e-4 * batch_size), 'momentum': bot.Const(0.9) } opt_params_bias = { 'lr': lr_schedule([0, epochs / 5, epochs - ema_epochs], [0.0, 1.0 * 64, 0.1 * 64], batch_size), 'weight_decay': bot.Const(5e-4 * batch_size / 64), 'momentum': bot.Const(0.9) } is_bias = bot.group_by_key( ('bias' in k, v) for k, v in bot.trainable_params(net).items())
nn.ReLU: bot.partial(nn.CELU, 0.3), bot.BatchNorm: bot.partial(bot.GhostBatchNorm, num_splits=16, weight=False)}) print('=====> Building model (with input whitening network)') net = bot.getResNet8BOT(input_whitening_net) print('=====> Preparing for training') epochs, batch_size, ema_epochs=args.epochs, args.batch_size, 2 #epochs = 10 transforms = (bot.Crop(32, 32), bot.FlipLR()) #learning rate schedules if args.lr_scheduler == 'PiecewiseLinear': lr_schedule = lambda knots, vals, batch_size: bot.PiecewiseLinear(np.array(knots)*len(train_batches(batch_size)), np.array(vals)/batch_size) opt_params = {'lr': lr_schedule([0, epochs/5, epochs - ema_epochs], [0.0, 1.0, 0.1], batch_size), 'weight_decay': bot.Const(args.weight_decay*batch_size), 'momentum': bot.Const(0.9)} opt_params_bias = {'lr': lr_schedule([0, epochs/5, epochs - ema_epochs], [0.0, 1.0*64, 0.1*64], batch_size), 'weight_decay': bot.Const(args.weight_decay*batch_size/64), 'momentum': bot.Const(0.9)} elif args.lr_scheduler == 'Piecewise': base = 0.9 bot.opt_flag = False lr_schedule2 = lambda base, total_epochs: bot.Piecewise(base, epochs) opt_params = {'lr': lr_schedule2(base, epochs), 'weight_decay': bot.Const(args.weight_decay*batch_size), 'momentum': bot.Const(0.9)} opt_params_bias = {'lr': lr_schedule2(base, epochs), 'weight_decay': bot.Const(args.weight_decay*batch_size/64), 'momentum': bot.Const(0.9)} else: #SAM bot.opt_flag = False lr_schedule3 = lambda base_lr, T_max, eta_min: bot.cosineAnnealingLR(base_lr, T_max, eta_min) base_lr, T_max, eta_min = 0.1, epochs, 0 opt_params = {'lr': lr_schedule3(base_lr, T_max, eta_min), 'weight_decay': bot.Const(args.weight_decay*batch_size), 'momentum': bot.Const(0.9)} opt_params_bias = {'lr': lr_schedule3(base_lr, T_max, eta_min), 'weight_decay': bot.Const(args.weight_decay*batch_size/64), 'momentum': bot.Const(0.9)} is_bias = bot.group_by_key(('bias' in k, v) for k, v in bot.trainable_params(net).items())