def main(): global args, best_prec1 args = parser.parse_args() # ensuring reproducibility SEED = 42 torch.manual_seed(SEED) torch.backends.cudnn.benchmark = False kwargs = {'num_workers': 1, 'pin_memory': True} device = torch.device("cuda") num_epochs = 7 # create model model = WideResNet(args.layers, 10, args.widen_factor, dropRate=args.droprate).to(device) optimizer = torch.optim.Adam(model.parameters(), args.learning_rate, weight_decay=args.weight_decay) # instantiate loaders train_loader = get_data_loader(args.data_dir, args.batch_size, **kwargs) test_loader = get_test_loader(args.data_dir, 128, **kwargs) tic = time.time() for epoch in range(1, num_epochs + 1): train(model, device, train_loader, optimizer, epoch) test(model, device, test_loader, epoch) toc = time.time() print("Time Elapsed: {}s".format(toc - tic))
def main(): global args, best_prec1 args = parser.parse_args() # ensuring reproducibility SEED = 42 torch.manual_seed(SEED) torch.backends.cudnn.benchmark = False kwargs = {'num_workers': 1, 'pin_memory': True} device = torch.device("cuda") num_epochs = 7 # create model model = WideResNet(args.layers, 10, args.widen_factor, dropRate=args.droprate).to(device) optimizer = torch.optim.Adam( model.parameters(), args.learning_rate, weight_decay=args.weight_decay ) # instantiate loaders train_loader = get_data_loader(args.data_dir, args.batch_size, **kwargs) test_loader = get_test_loader(args.data_dir, 128, **kwargs) tic = time.time() for epoch in range(1, num_epochs+1): train(model, device, train_loader, optimizer, epoch) test(model, device, test_loader, epoch) toc = time.time() print("Time Elapsed: {}s".format(toc-tic))
def main(): global args, best_prec1 args = parser.parse_args() # ensuring reproducibility SEED = 42 torch.manual_seed(SEED) torch.backends.cudnn.benchmark = False kwargs = {'num_workers': 1, 'pin_memory': True} device = torch.device("cuda") num_epochs_transient = 2 num_epochs_steady = 7 perc_to_remove = 10 torch.manual_seed(SEED) # create model model = WideResNet(args.layers, 10, args.widen_factor, dropRate=args.droprate).to(device) optimizer = torch.optim.Adam( model.parameters(), args.learning_rate, weight_decay=args.weight_decay ) # instantiate loaders train_loader = get_data_loader(args.data_dir, args.batch_size, **kwargs) test_loader = get_test_loader(args.data_dir, 128, **kwargs) tic = time.time() seen_losses = None for epoch in range(1, 3): if epoch == 1: seen_losses = train_transient(model, device, train_loader, optimizer, epoch, track=True) else: train_transient(model, device, train_loader, optimizer, epoch) test(model, device, test_loader, epoch) for epoch in range(3, 4): seen_losses = [v for sublist in seen_losses for v in sublist] sorted_loss_idx = sorted(range(len(seen_losses)), key=lambda k: seen_losses[k][1], reverse=True) removed = sorted_loss_idx[-int((perc_to_remove / 100) * len(sorted_loss_idx)):] sorted_loss_idx = sorted_loss_idx[:-int((perc_to_remove / 100) * len(sorted_loss_idx))] to_add = list(np.random.choice(removed, int(0.33*len(sorted_loss_idx)), replace=False)) sorted_loss_idx = sorted_loss_idx + to_add sorted_loss_idx.sort() weights = [seen_losses[idx][1] for idx in sorted_loss_idx] train_loader = get_weighted_loader(args.data_dir, 64*2, weights, **kwargs) seen_losses = train_steady_state(model, device, train_loader, optimizer, epoch) test(model, device, test_loader, epoch) for epoch in range(4, 8): train_transient(model, device, train_loader, optimizer, epoch) test(model, device, test_loader, epoch) toc = time.time() print("Time Elapsed: {}s".format(toc-tic))
def main(args): # writer = SummaryWriter('./runs/CIFAR_100_exp') train_transform = transforms.Compose([transforms.Pad(4, padding_mode='reflect'), transforms.RandomRotation(15), transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.ToTensor(), transforms.Normalize((0.5071, 0.4867, 0.4408),(0.2675,0.2565,0.2761))]) test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5071, 0.4867, 0.4408),(0.2675,0.2565,0.2761))]) train_dataset = datasets.CIFAR100('./dataset',train = True, transform = train_transform, download=True) test_dataset = datasets.CIFAR100('./dataset',train = False, transform = test_transform, download=True) train_loader = DataLoader(train_dataset, batch_size = args.batch_size, shuffle=True, num_workers=args.num_workers) test_loader = DataLoader(test_dataset, batch_size = args.batch_size, shuffle=False, num_workers=args.num_workers) Teacher = WideResNet(depth=args.teacher_depth, num_classes=100, widen_factor=args.teacher_width_factor, drop_rate=0.3) Teacher.cuda() Teacher.eval() teacher_weight_path = path.join(args.teacher_root_path, 'model_best.pth.tar') t_load = torch.load(teacher_weight_path)['state_dict'] Teacher.load_state_dict(t_load) Student = WideResNet(depth = args.student_depth, num_classes=100, widen_factor=args.student_width_factor, drop_rate=0.0) Student.cuda() cudnn.benchmark = True optimizer = torch.optim.SGD(Student.parameters(), lr = args.lr, momentum=0.9, weight_decay=5e-4, nesterov=True) opt_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones = [60, 120, 160], gamma=2e-1) criterion = nn.CrossEntropyLoss() best_acc = 0 best_acc5 = 0 best_flag = False for epoch in range(args.total_epochs): for iter_, data in enumerate(train_loader): images, labels = data images, labels = images.cuda(), labels.cuda() t_outs, *t_acts = Teacher(images) s_outs, *s_acts = Student(images) cls_loss = criterion(s_outs, labels) """ statistical matching and AdaIN losses """ if args.aux_flag==0: aux_loss_1 = SM_Loss(t_acts[2], s_acts[2]) # group conv2 else: aux_loss_1 = 0 for i in range(3): aux_loss_1 += SM_Loss(t_acts[i], s_acts[i]) F_hat = AdaIN(t_acts[2], s_acts[2]) interim_out_q = Teacher.bn1(F_hat) interim_out_q = Teacher.relu(interim_out_q) interim_out_q = F.avg_pool2d(interim_out_q, 8) interim_out_q = interim_out_q.view(-1, Teacher.last_ch) q = Teacher.fc(interim_out_q) aux_loss_2 = torch.mean(torch.pow(t_outs-q, 2)) total_loss = cls_loss + aux_loss_1 + aux_loss_2 optimizer.zero_grad() total_loss.backward() optimizer.step() top1, top5 = evaluator(test_loader, Student) if top1 > best_acc: best_acc = top1 best_acc5 = top5 best_flag = True if best_flag: state = {'epoch':epoch+1, 'state_dict':Student.state_dict(), 'optimizer': optimizer.state_dict()} save_ckpt(state, is_best=best_flag, root_path = args.student_weight_path) best_flag = False opt_scheduler.step() # writer.add_scalar('acc/top1', top1, epoch) # writer.add_scalar('acc/top5', top5, epoch) # writer.close() print("Best top 1 acc: {}".format(best_acc)) print("Best top 5 acc: {}".format(best_acc5))
net = resnet110(num_classes=n_classes) elif args.model == 'wideresnet': net = WideResNet(depth=28, widen_factor=10, dropRate=0.3, num_classes=n_classes) elif args.model == 'resnext': net = CifarResNeXt(cardinality=8, depth=29, base_width=64, widen_factor=4, nlabels=n_classes) else: raise Exception('Invalid model name') # create optimizer optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) net.to('cuda') if torch.cuda.device_count() > 1: net = torch.nn.DataParallel(net) cudnn.benchmark = True criterion = nn.CrossEntropyLoss().cuda() # trainer if args.adversarial: if args.regu == 'no': trainer = AdversarialTrainer(net, criterion, optimizer, args) elif args.regu == 'random-svd': trainer = AdversarialOrthReguTrainer(net, criterion, optimizer,
def main(): global args, best_prec1 args = parser.parse_args() # ensuring reproducibility SEED = 42 torch.manual_seed(SEED) torch.backends.cudnn.benchmark = False kwargs = {'num_workers': 1, 'pin_memory': True} device = torch.device("cuda") num_epochs_transient = 2 num_epochs_steady = 7 perc_to_remove = 10 torch.manual_seed(SEED) # create model model = WideResNet(args.layers, 10, args.widen_factor, dropRate=args.droprate).to(device) optimizer = torch.optim.Adam(model.parameters(), args.learning_rate, weight_decay=args.weight_decay) # instantiate loaders train_loader = get_data_loader(args.data_dir, args.batch_size, **kwargs) test_loader = get_test_loader(args.data_dir, 128, **kwargs) tic = time.time() seen_losses = None for epoch in range(1, 3): if epoch == 1: seen_losses = train_transient(model, device, train_loader, optimizer, epoch, track=True) else: train_transient(model, device, train_loader, optimizer, epoch) test(model, device, test_loader, epoch) for epoch in range(3, 4): seen_losses = [v for sublist in seen_losses for v in sublist] sorted_loss_idx = sorted(range(len(seen_losses)), key=lambda k: seen_losses[k][1], reverse=True) removed = sorted_loss_idx[-int((perc_to_remove / 100) * len(sorted_loss_idx)):] sorted_loss_idx = sorted_loss_idx[:-int((perc_to_remove / 100) * len(sorted_loss_idx))] to_add = list( np.random.choice(removed, int(0.33 * len(sorted_loss_idx)), replace=False)) sorted_loss_idx = sorted_loss_idx + to_add sorted_loss_idx.sort() weights = [seen_losses[idx][1] for idx in sorted_loss_idx] train_loader = get_weighted_loader(args.data_dir, 64 * 2, weights, **kwargs) seen_losses = train_steady_state(model, device, train_loader, optimizer, epoch) test(model, device, test_loader, epoch) for epoch in range(4, 8): train_transient(model, device, train_loader, optimizer, epoch) test(model, device, test_loader, epoch) toc = time.time() print("Time Elapsed: {}s".format(toc - tic))