Example #1
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # ensuring reproducibility
    SEED = 42
    torch.manual_seed(SEED)
    torch.backends.cudnn.benchmark = False

    kwargs = {'num_workers': 1, 'pin_memory': True}
    device = torch.device("cuda")

    num_epochs = 7

    # create model
    model = WideResNet(args.layers,
                       10,
                       args.widen_factor,
                       dropRate=args.droprate).to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 args.learning_rate,
                                 weight_decay=args.weight_decay)

    # instantiate loaders
    train_loader = get_data_loader(args.data_dir, args.batch_size, **kwargs)
    test_loader = get_test_loader(args.data_dir, 128, **kwargs)

    tic = time.time()
    for epoch in range(1, num_epochs + 1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader, epoch)
    toc = time.time()
    print("Time Elapsed: {}s".format(toc - tic))
Example #2
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # ensuring reproducibility
    SEED = 42
    torch.manual_seed(SEED)
    torch.backends.cudnn.benchmark = False

    kwargs = {'num_workers': 1, 'pin_memory': True}
    device = torch.device("cuda")

    num_epochs = 7

    # create model
    model = WideResNet(args.layers, 10, args.widen_factor, dropRate=args.droprate).to(device)

    optimizer = torch.optim.Adam(
        model.parameters(),
        args.learning_rate,
        weight_decay=args.weight_decay
    )

    # instantiate loaders
    train_loader = get_data_loader(args.data_dir, args.batch_size, **kwargs)
    test_loader = get_test_loader(args.data_dir, 128, **kwargs)

    tic = time.time()
    for epoch in range(1, num_epochs+1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader, epoch)
    toc = time.time()
    print("Time Elapsed: {}s".format(toc-tic))
Example #3
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # ensuring reproducibility
    SEED = 42
    torch.manual_seed(SEED)
    torch.backends.cudnn.benchmark = False

    kwargs = {'num_workers': 1, 'pin_memory': True}
    device = torch.device("cuda")

    num_epochs_transient = 2
    num_epochs_steady = 7
    perc_to_remove = 10

    torch.manual_seed(SEED)

    # create model
    model = WideResNet(args.layers, 10, args.widen_factor, dropRate=args.droprate).to(device)

    optimizer = torch.optim.Adam(
        model.parameters(),
        args.learning_rate,
        weight_decay=args.weight_decay
    )

    # instantiate loaders
    train_loader = get_data_loader(args.data_dir, args.batch_size, **kwargs)
    test_loader = get_test_loader(args.data_dir, 128, **kwargs)

    tic = time.time()
    seen_losses = None
    for epoch in range(1, 3):
        if epoch == 1:
            seen_losses = train_transient(model, device, train_loader, optimizer, epoch, track=True)
        else:
            train_transient(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader, epoch)

    for epoch in range(3, 4):
        seen_losses = [v for sublist in seen_losses for v in sublist]
        sorted_loss_idx = sorted(range(len(seen_losses)), key=lambda k: seen_losses[k][1], reverse=True)
        removed = sorted_loss_idx[-int((perc_to_remove / 100) * len(sorted_loss_idx)):]
        sorted_loss_idx = sorted_loss_idx[:-int((perc_to_remove / 100) * len(sorted_loss_idx))]
        to_add = list(np.random.choice(removed, int(0.33*len(sorted_loss_idx)), replace=False))
        sorted_loss_idx = sorted_loss_idx + to_add
        sorted_loss_idx.sort()
        weights = [seen_losses[idx][1] for idx in sorted_loss_idx]
        train_loader = get_weighted_loader(args.data_dir, 64*2, weights, **kwargs)
        seen_losses = train_steady_state(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader, epoch)

    for epoch in range(4, 8):
        train_transient(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader, epoch)
    toc = time.time()
    print("Time Elapsed: {}s".format(toc-tic))
Example #4
0
def main(args):
    # writer = SummaryWriter('./runs/CIFAR_100_exp')
     
    train_transform = transforms.Compose([transforms.Pad(4, padding_mode='reflect'),
                                          transforms.RandomRotation(15),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.RandomCrop(32),
                                          transforms.ToTensor(),
                                          transforms.Normalize((0.5071, 0.4867, 0.4408),(0.2675,0.2565,0.2761))])
    
    test_transform = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize((0.5071, 0.4867, 0.4408),(0.2675,0.2565,0.2761))])
    
    train_dataset = datasets.CIFAR100('./dataset',train = True, transform = train_transform, download=True)
    test_dataset = datasets.CIFAR100('./dataset',train = False, transform = test_transform, download=True)
    
    train_loader = DataLoader(train_dataset, batch_size = args.batch_size, shuffle=True, num_workers=args.num_workers)
    test_loader = DataLoader(test_dataset, batch_size = args.batch_size, shuffle=False, num_workers=args.num_workers)
    
    Teacher = WideResNet(depth=args.teacher_depth, num_classes=100, widen_factor=args.teacher_width_factor, drop_rate=0.3)
    Teacher.cuda()
    Teacher.eval()
    
    teacher_weight_path = path.join(args.teacher_root_path, 'model_best.pth.tar')
    t_load = torch.load(teacher_weight_path)['state_dict']
    Teacher.load_state_dict(t_load)
    
    Student = WideResNet(depth = args.student_depth, num_classes=100, widen_factor=args.student_width_factor, drop_rate=0.0)
    Student.cuda()
    
    cudnn.benchmark = True
    
    optimizer = torch.optim.SGD(Student.parameters(), lr = args.lr, momentum=0.9, weight_decay=5e-4, nesterov=True)
    opt_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones = [60, 120, 160], gamma=2e-1)
    
    criterion = nn.CrossEntropyLoss()
    
    best_acc = 0
    best_acc5 = 0
    best_flag = False
    
    for epoch in range(args.total_epochs):
        for iter_, data in enumerate(train_loader):
            images, labels = data
            images, labels = images.cuda(), labels.cuda()
            t_outs, *t_acts = Teacher(images)
            s_outs, *s_acts = Student(images)
            
            cls_loss = criterion(s_outs, labels)
            
            """
            statistical matching and AdaIN losses
            """
            
            if args.aux_flag==0:
                aux_loss_1 = SM_Loss(t_acts[2], s_acts[2]) # group conv2
            else:
                aux_loss_1 = 0
                for i in range(3):
                    aux_loss_1 += SM_Loss(t_acts[i], s_acts[i])
                    
            F_hat = AdaIN(t_acts[2], s_acts[2])
            interim_out_q = Teacher.bn1(F_hat)
            interim_out_q = Teacher.relu(interim_out_q)
            interim_out_q = F.avg_pool2d(interim_out_q, 8)
            interim_out_q = interim_out_q.view(-1, Teacher.last_ch)
            q = Teacher.fc(interim_out_q)
            
            aux_loss_2 = torch.mean(torch.pow(t_outs-q, 2))
            
            total_loss = cls_loss + aux_loss_1 + aux_loss_2
            
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()
    
        top1, top5 = evaluator(test_loader, Student)
        
        if top1 > best_acc:
            best_acc = top1
            best_acc5 = top5
            best_flag = True    
        if best_flag:
            state = {'epoch':epoch+1, 'state_dict':Student.state_dict(), 'optimizer': optimizer.state_dict()}       
            save_ckpt(state, is_best=best_flag, root_path = args.student_weight_path)
            best_flag = False
        
        opt_scheduler.step()
        
        # writer.add_scalar('acc/top1', top1, epoch)
        # writer.add_scalar('acc/top5', top5, epoch)
        # writer.close()
        
        
    print("Best top 1 acc: {}".format(best_acc))
    print("Best top 5 acc: {}".format(best_acc5))    
Example #5
0
     net = resnet110(num_classes=n_classes)
 elif args.model == 'wideresnet':
     net = WideResNet(depth=28,
                      widen_factor=10,
                      dropRate=0.3,
                      num_classes=n_classes)
 elif args.model == 'resnext':
     net = CifarResNeXt(cardinality=8,
                        depth=29,
                        base_width=64,
                        widen_factor=4,
                        nlabels=n_classes)
 else:
     raise Exception('Invalid model name')
 # create optimizer
 optimizer = torch.optim.SGD(net.parameters(),
                             args.lr,
                             momentum=args.momentum,
                             nesterov=args.nesterov,
                             weight_decay=args.weight_decay)
 net.to('cuda')
 if torch.cuda.device_count() > 1:
     net = torch.nn.DataParallel(net)
 cudnn.benchmark = True
 criterion = nn.CrossEntropyLoss().cuda()
 # trainer
 if args.adversarial:
     if args.regu == 'no':
         trainer = AdversarialTrainer(net, criterion, optimizer, args)
     elif args.regu == 'random-svd':
         trainer = AdversarialOrthReguTrainer(net, criterion, optimizer,
Example #6
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # ensuring reproducibility
    SEED = 42
    torch.manual_seed(SEED)
    torch.backends.cudnn.benchmark = False

    kwargs = {'num_workers': 1, 'pin_memory': True}
    device = torch.device("cuda")

    num_epochs_transient = 2
    num_epochs_steady = 7
    perc_to_remove = 10

    torch.manual_seed(SEED)

    # create model
    model = WideResNet(args.layers,
                       10,
                       args.widen_factor,
                       dropRate=args.droprate).to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 args.learning_rate,
                                 weight_decay=args.weight_decay)

    # instantiate loaders
    train_loader = get_data_loader(args.data_dir, args.batch_size, **kwargs)
    test_loader = get_test_loader(args.data_dir, 128, **kwargs)

    tic = time.time()
    seen_losses = None
    for epoch in range(1, 3):
        if epoch == 1:
            seen_losses = train_transient(model,
                                          device,
                                          train_loader,
                                          optimizer,
                                          epoch,
                                          track=True)
        else:
            train_transient(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader, epoch)

    for epoch in range(3, 4):
        seen_losses = [v for sublist in seen_losses for v in sublist]
        sorted_loss_idx = sorted(range(len(seen_losses)),
                                 key=lambda k: seen_losses[k][1],
                                 reverse=True)
        removed = sorted_loss_idx[-int((perc_to_remove / 100) *
                                       len(sorted_loss_idx)):]
        sorted_loss_idx = sorted_loss_idx[:-int((perc_to_remove / 100) *
                                                len(sorted_loss_idx))]
        to_add = list(
            np.random.choice(removed,
                             int(0.33 * len(sorted_loss_idx)),
                             replace=False))
        sorted_loss_idx = sorted_loss_idx + to_add
        sorted_loss_idx.sort()
        weights = [seen_losses[idx][1] for idx in sorted_loss_idx]
        train_loader = get_weighted_loader(args.data_dir, 64 * 2, weights,
                                           **kwargs)
        seen_losses = train_steady_state(model, device, train_loader,
                                         optimizer, epoch)
        test(model, device, test_loader, epoch)

    for epoch in range(4, 8):
        train_transient(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader, epoch)
    toc = time.time()
    print("Time Elapsed: {}s".format(toc - tic))