def train_fusionWRN_last3( self, epochs1=40, epochs2=25, device="cuda:1" ): # https://github.com/xternalz/WideResNet-pytorch.git #120 80 with torch.cuda.device(1): sys.path.append('/media/rene/code/WideResNet-pytorch') from wideresnet import WideResNet epochs1, epochs2 = int(epochs1), int(epochs2) num_workers = 4 PATH = Path('/media/rene/data/') save_path = Path('/media/rene/code/WideResNet-pytorch/runs') model_name_list = [ 'WideResNet-28-10_0/model_best.pth.tar', 'WideResNet-28-10_1/model_best.pth.tar', 'WideResNet-28-10_2/model_best.pth.tar', 'WideResNet-28-10_3/model_best.pth.tar', 'WideResNet-28-10_4/model_best.pth.tar', 'WideResNet-28-10_5/model_best.pth.tar' ] batch_size = 300 transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) ]) dataloaders, dataset_sizes = make_batch_gen_cifar( str(PATH), batch_size, num_workers, valid_name='valid', transformation=transform_test) pretrained_model_list = [] # First trained model was with DATA PARALLEL model = WideResNet(28, 10, 20) model = model.to(device) state_dict = torch.load( os.path.join( save_path, 'WideResNet-28-10_0/model_best.pth.tar'))['state_dict'] # create new OrderedDict that does not contain `module.` new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v model.load_state_dict(new_state_dict) pretrained_model_list.append(model) # get all the models for i, model_name in enumerate(model_name_list[1:3]): print('------------loading model: ', model_name) model = WideResNet(28, 10, 20) model = model.to(device) # original saved file with DataParallel state_dict = torch.load(os.path.join(save_path, model_name))['state_dict'] model.load_state_dict(state_dict) pretrained_model_list.append(model) model = Fusion3(pretrained_model_list, num_input=30, num_output=10) ###################### TRAIN LAST FEW LAYERS print('training last few layers') model_name = 'fusionWRN_last3_1' for p in model.parameters(): p.requires_grad = True for p in model.model1.parameters(): p.requires_grad = False for p in model.model2.parameters(): p.requires_grad = False for p in model.model3.parameters(): p.requires_grad = False # criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(filter(lambda p: p.requires_grad,model.parameters()), lr=.005, momentum=0.9, weight_decay=5e-4) # scheduler = lr_scheduler.StepLR(optimizer, step_size=int(epochs1/3), gamma=0.3) # best_acc, model = train_model(model, criterion, optimizer, scheduler, epochs1, # dataloaders, dataset_sizes, device=device) # torch.save(model.state_dict(), str(save_path / model_name)) ######################## TRAIN ALL LAYERS model.load_state_dict(torch.load(save_path / 'fusionWRN_last3_1')) model = model.to(device) model_name = 'fusionWRN_last3_2' batch_size = 88 dataloaders, dataset_sizes = make_batch_gen_cifar( str(PATH), batch_size, num_workers, valid_name='valid', transformation=transform_test) ### ONLY THE LAST BLOCK: for i, child in enumerate(model.model1.children()): if (i >= 3): for p in child.parameters(): p.requires_grad = True for i, child in enumerate(model.model2.children()): if (i >= 3): for p in child.parameters(): p.requires_grad = True for i, child in enumerate(model.model3.children()): if (i >= 3): for p in child.parameters(): p.requires_grad = True criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=.0001, momentum=0.9, weight_decay=5e-4) scheduler = lr_scheduler.StepLR(optimizer, step_size=int(epochs2 / 2), gamma=0.1) best_acc, model = train_model(model, criterion, optimizer, scheduler, 2, dataloaders, dataset_sizes, device=device, multi_gpu=False) optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=.001, momentum=0.9, weight_decay=5e-4) scheduler = lr_scheduler.StepLR(optimizer, step_size=int(epochs2 / 2), gamma=0.2) best_acc, model = train_model(model, criterion, optimizer, scheduler, epochs2, dataloaders, dataset_sizes, device=device, multi_gpu=False) torch.save(model.state_dict(), str(save_path / model_name))
def train_fusionWRN6( self, epochs1=120, epochs2=3, device="cuda:0" ): # https://github.com/xternalz/WideResNet-pytorch.git #120 80 sys.path.append('/media/rene/code/WideResNet-pytorch') from wideresnet import WideResNet epochs1, epochs2 = int(epochs1), int(epochs2) num_workers = 4 PATH = Path('/media/rene/data/') save_path = Path('/media/rene/code/WideResNet-pytorch/runs') model_name_list = [ 'WideResNet-28-10_0/model_best.pth.tar', 'WideResNet-28-10_1/model_best.pth.tar', 'WideResNet-28-10_2/model_best.pth.tar', 'WideResNet-28-10_3/model_best.pth.tar', 'WideResNet-28-10_4/model_best.pth.tar', 'WideResNet-28-10_5/model_best.pth.tar' ] batch_size = 8 dataloaders, dataset_sizes = make_batch_gen_cifar(str(PATH), batch_size, num_workers, valid_name='valid') pretrained_model_list = [] # First trained model was with DATA PARALLEL model = WideResNet(28, 10, 20) model = model.to(device) state_dict = torch.load( os.path.join( save_path, 'WideResNet-28-10_0/model_best.pth.tar'))['state_dict'] # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v model.load_state_dict(new_state_dict) pretrained_model_list.append(model) # get all the models for i, model_name in enumerate(model_name_list[1:]): print('------------loading model: ', model_name) model = WideResNet(28, 10, 20) model = model.to(device) # original saved file with DataParallel state_dict = torch.load(os.path.join(save_path, model_name))['state_dict'] model.load_state_dict(state_dict) pretrained_model_list.append(model) model = Fusion6(pretrained_model_list, num_input=60, num_output=10) ###################### TRAIN LAST FEW LAYERS # print('training last few layers') model_name = 'Fusion6_WRN_1' for p in model.parameters(): p.requires_grad = True for p in model.model1.parameters(): p.requires_grad = False for p in model.model2.parameters(): p.requires_grad = False for p in model.model3.parameters(): p.requires_grad = False for p in model.model4.parameters(): p.requires_grad = False for p in model.model5.parameters(): p.requires_grad = False for p in model.model6.parameters(): p.requires_grad = False criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=.005, momentum=0.9, weight_decay=5e-4) scheduler = lr_scheduler.StepLR(optimizer, step_size=int(epochs1 / 3), gamma=0.1) best_acc, model = train_model(model, criterion, optimizer, scheduler, epochs1, dataloaders, dataset_sizes, device=device) torch.save(model.state_dict(), str(save_path / model_name)) ######################## TRAIN ALL LAYERS # net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) # model.load_state_dict(torch.load(save_path / 'Fusion2_WRN_1')) model_name = 'Fusion6_WRN_2' batch_size = 1 print('---------', batch_size) dataloaders, dataset_sizes = make_batch_gen_cifar(str(PATH), batch_size, num_workers, valid_name='valid') for p in model.parameters(): p.requires_grad = True criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=.0001, momentum=0.9, weight_decay=5e-4) scheduler = lr_scheduler.StepLR(optimizer, step_size=int(epochs2 / 3), gamma=0.1) best_acc, model = train_model(model, criterion, optimizer, scheduler, epochs2, dataloaders, dataset_sizes, device=device) torch.save(model.state_dict(), str(save_path / model_name))
shuffle=False, num_workers=2, pin_memory=True) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') print('==> Building model pgd..') if args.model == 'wideresnet': net = WideResNet(depth=28, num_classes=10, widen_factor=2) elif args.model == 'wideresnet28': net = WideResNet(depth=28, num_classes=10, widen_factor=10) else: net = models.__dict__[args.model]() net = torch.nn.Sequential(NormalizeLayer(), net) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir( './PGDtrain/checkpoint'), 'Error: no checkpoint directory found!' ckpname = ('./PGDtrain/checkpoint/' + args.loss + '_' + args.model + '_' + args.name + '.pth') checkpoint = torch.load(ckpname) net.load_state_dict(checkpoint['net']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] + 1
def main(): global args, best_prec1 args = parser.parse_args() if args.tensorboard: configure(args.checkpoint_dir+"/%s"%(args.name)) # Data loading code normalize = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]], std=[x/255.0 for x in [63.0, 62.1, 66.7]]) if args.augment: transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: F.pad(x.unsqueeze(0), (4,4,4,4),mode='reflect').squeeze()), transforms.ToPILImage(), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([ transforms.ToTensor(), normalize ]) kwargs = {'num_workers': 1, 'pin_memory': True} assert(args.dataset == 'cifar10' or args.dataset == 'cifar100') train_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()](args.dataset_path, train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()](args.dataset_path, train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) device = "cuda" if torch.cuda.is_available() else "cpu" # create model model = WideResNet(args.layers, args.dataset == 'cifar10' and 10 or 100, args.widen_factor, dropRate=args.droprate, semantic_loss=args.sloss, device=device) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use # model = torch.nn.DataParallel(model).cuda() model = model.to(device) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) # cosine learning rate # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)*args.epochs, eta_min=1e-6) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=.2) calc_logic, logic_net, examples, logic_optimizer, decoder_optimizer, logic_scheduler, decoder_scheduler = None,None,None,None,None,None,None if args.dataset == "cifar100": examples, logic_fn, group_precision = get_cifar100_experiment_params(train_loader.dataset) assert logic_fn(torch.arange(100), examples).all() else: examples, logic_fn, group_precision = get_cifar10_experiment_params(train_loader.dataset) assert logic_fn(torch.arange(10), examples).all() if args.sloss: examples = examples.to(device) logic_net = LogicNet(num_classes=len(train_loader.dataset.classes)) logic_net.to(device) # logic_optimizer = torch.optim.Adam(logic_net.parameters(), 1e-1*args.lr) logic_optimizer = torch.optim.SGD(logic_net.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) # logic_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(logic_optimizer, len(train_loader) * args.epochs, eta_min=1e-8) logic_scheduler = torch.optim.lr_scheduler.StepLR(logic_optimizer, step_size=25, gamma=.2) # decoder_optimizer = torch.optim.Adam(model.global_paramters, args.lr) decoder_optimizer = torch.optim.SGD(model.global_paramters, args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) # decoder_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(decoder_optimizer, len(train_loader) * args.epochs, eta_min=1e-8) decoder_scheduler = torch.optim.lr_scheduler.StepLR(decoder_optimizer, step_size=25, gamma=.2) calc_logic = lambda predictions, targets: calc_logic_loss(predictions, targets, logic_net, logic_fn, num_classes=model.num_classes, device=device) # override the oprimizer from above optimizer = torch.optim.SGD(model.local_parameters, # TODO: still might be better for parameters() args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader) * args.epochs) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=.2) name = "_".join([str(getattr(args, source)) for source in ['lr', 'sloss', 'sloss_weight', 'dataset']]) if args.resume: targets, preds, outs = validate(val_loader, model, criterion, 1, args, group_precision, device=device) from sklearn.metrics import confusion_matrix import pickle confusion_matrix(targets, preds) group_precision(torch.tensor(targets), torch.tensor(np.concatenate(outs, axis=0))) dict_ = {"targets": targets, "pred": np.concatenate(outs, axis=0)} f = open('../semantic_loss/notebooks/results.pickle', 'wb') pickle.dump(dict_, f) f.close() import pdb pdb.set_trace() for epoch in range(args.start_epoch, args.epochs): # train for one epoch train(train_loader, model, logic_net, criterion, examples, optimizer, logic_optimizer, decoder_optimizer, scheduler, logic_scheduler, decoder_scheduler, epoch, args, calc_logic, device=device) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch, args, group_precision, device=device) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, filename=f"{name}.checkpoint.pt") if args.sloss: logic_scheduler.step() decoder_scheduler.step() scheduler.step() print('Best accuracy: ', best_prec1)
def main(): args = get_args() device = args.gpu load_model = args.load_model model_dir = args.model_dir architecture = args.architecture similarity = args.similarity loss_type = args.loss_type data_dir = args.data_dir data_name = args.out_dataset batch_size = args.batch_size train = args.train weight_decay = args.weight_decay epochs = args.epochs test = args.test noise_magnitudes = args.magnitudes # Create necessary directories if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) if architecture == 'densenet': underlying_net = DenseNet3(depth = 100, num_classes = 10) elif architecture == 'resnet': underlying_net = ResNet34() elif architecture == 'wideresnet': underlying_net = WideResNet(depth = 28, num_classes = 10, widen_factor = 10) underlying_net.to(device) # Construct g, h, and the composed deconf net baseline = (similarity == 'baseline') if baseline: h = InnerDeconf(underlying_net.output_size, 10) else: h = h_dict[similarity](underlying_net.output_size, 10) h.to(device) deconf_net = DeconfNet(underlying_net, underlying_net.output_size, 10, h, baseline) deconf_net.to(device) parameters = [] h_parameters = [] for name, parameter in deconf_net.named_parameters(): if name == 'h.h.weight' or name == 'h.h.bias': h_parameters.append(parameter) else: parameters.append(parameter) optimizer = optim.SGD(parameters, lr = 0.1, momentum = 0.9, weight_decay = weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones = [int(epochs * 0.5), int(epochs * 0.75)], gamma = 0.1) h_optimizer = optim.SGD(h_parameters, lr = 0.1, momentum = 0.9) # No weight decay h_scheduler = optim.lr_scheduler.MultiStepLR(h_optimizer, milestones = [int(epochs * 0.5), int(epochs * 0.75)], gamma = 0.1) # Load the model (capable of resuming training or inference) # from the checkpoint file if load_model: checkpoint = torch.load(f'{model_dir}/checkpoint.pth') epoch_start = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) h_optimizer.load_state_dict(checkpoint['h_optimizer']) deconf_net.load_state_dict(checkpoint['deconf_net']) scheduler.load_state_dict(checkpoint['scheduler']) h_scheduler.load_state_dict(checkpoint['h_scheduler']) epoch_loss = checkpoint['epoch_loss'] else: epoch_start = 0 epoch_loss = None #get outlier data train_data, val_data, test_data, open_data = get_datasets(data_dir, data_name, batch_size) criterion = losses_dict[loss_type] # Train the model if train: deconf_net.train() num_batches = len(train_data) epoch_bar = tqdm(total = num_batches * epochs, initial = num_batches * epoch_start) for epoch in range(epoch_start, epochs): total_loss = 0.0 for batch_idx, (inputs, targets) in enumerate(train_data): if epoch_loss is None: epoch_bar.set_description(f'Training | Epoch {epoch + 1}/{epochs} | Batch {batch_idx + 1}/{num_batches}') else: epoch_bar.set_description(f'Training | Epoch {epoch + 1}/{epochs} | Epoch loss = {epoch_loss:0.2f} | Batch {batch_idx + 1}/{num_batches}') inputs = inputs.to(device) targets = targets.to(device) h_optimizer.zero_grad() optimizer.zero_grad() logits, _, _ = deconf_net(inputs) loss = criterion(logits, targets) loss.backward() optimizer.step() h_optimizer.step() total_loss += loss.item() epoch_bar.update() epoch_loss = total_loss h_scheduler.step() scheduler.step() checkpoint = { 'epoch': epoch + 1, 'optimizer': optimizer.state_dict(), 'h_optimizer': h_optimizer.state_dict(), 'deconf_net': deconf_net.state_dict(), 'scheduler': scheduler.state_dict(), 'h_scheduler': h_scheduler.state_dict(), 'epoch_loss': epoch_loss, } torch.save(checkpoint, f'{model_dir}/checkpoint.pth') # For continuing training or inference torch.save(deconf_net.state_dict(), f'{model_dir}/model.pth') # For exporting / sharing / inference only if epoch_loss is None: epoch_bar.set_description(f'Training | Epoch {epochs}/{epochs} | Batch {num_batches}/{num_batches}') else: epoch_bar.set_description(f'Training | Epoch {epochs}/{epochs} | Epoch loss = {epoch_loss:0.2f} | Batch {num_batches}/{num_batches}') epoch_bar.close() if test: deconf_net.eval() best_val_score = None best_auc = None for score_func in ['h', 'g', 'logit']: print(f'Score function: {score_func}') for noise_magnitude in noise_magnitudes: print(f'Noise magnitude {noise_magnitude:.5f} ') validation_results = np.average(testData(deconf_net, device, val_data, noise_magnitude, criterion, score_func, title = 'Validating')) print('ID Validation Score:',validation_results) id_test_results = testData(deconf_net, device, test_data, noise_magnitude, criterion, score_func, title = 'Testing ID') ood_test_results = testData(deconf_net, device, open_data, noise_magnitude, criterion, score_func, title = 'Testing OOD') auroc = calc_auroc(id_test_results, ood_test_results)*100 tnrATtpr95 = calc_tnr(id_test_results, ood_test_results) print('AUROC:', auroc, 'TNR@TPR95:', tnrATtpr95) if best_auc is None: best_auc = auroc else: best_auc = max(best_auc, auroc) if best_val_score is None or validation_results > best_val_score: best_val_score = validation_results best_val_auc = auroc best_tnr = tnrATtpr95 print('supposedly best auc: ', best_val_auc, ' and tnr@tpr95 ', best_tnr) print('true best auc:' , best_auc)