def get_architecture(arch: str, dataset: str) -> torch.nn.Module: """ Return a neural network (with random weights) :param arch: the architecture - should be in the ARCHITECTURES list above :param dataset: the dataset - should be in the datasets.DATASETS list :return: a Pytorch module """ ORDERED_CLASS_LABELS = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] if arch == "resnet50" and dataset == "imagenet": model = torch.nn.DataParallel(resnet50(pretrained=False)).cuda() cudnn.benchmark = True elif arch == "cifar_resnet20": model = DeepCNN(len(ORDERED_CLASS_LABELS)) # model = resnet_cifar(depth=20, num_classes=10).cuda() elif arch == "cifar_resnet110": model = DeepCNN(len(ORDERED_CLASS_LABELS)) # model = resnet_cifar(depth=110, num_classes=10).cuda() elif arch == "imagenet32_resnet110": model = resnet_cifar(depth=110, num_classes=1000).cuda() # Both layers work fine, We tried both, and they both # give very similar results # IF YOU USE ONE OF THESE FOR TRAINING, MAKE SURE # TO USE THE SAME WHEN CERTIFYING. normalize_layer = get_normalize_layer(dataset) # normalize_layer = get_input_center_layer(dataset) return torch.nn.Sequential(normalize_layer, model)
def get_architecture(arch: str, dataset: str) -> torch.nn.Module: """ Return a neural network (with random weights) :param arch: the architecture - should be in the ARCHITECTURES list above :param dataset: the dataset - should be in the datasets.DATASETS list :return: a Pytorch module """ if arch == "resnet50" and dataset in ["imagenet", "restricted_imagenet"]: model = resnet50(pretrained=False) if dataset == "restricted_imagenet": model.fc = torch.nn.Linear(in_features=2048, out_features=10, bias=True) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True elif arch == "cifar_resnet20": model = resnet_cifar(depth=20, num_classes=10).cuda() elif arch == "cifar_resnet110": model = resnet_cifar(depth=110, num_classes=10).cuda() elif arch == "imagenet32_resnet110": model = resnet_cifar(depth=110, num_classes=1000).cuda() # Both layers work fine, We tried both, and they both # give very similar results # IF YOU USE ONE OF THESE FOR TRAINING, MAKE SURE # TO USE THE SAME WHEN CERTIFYING. normalize_layer = get_normalize_layer(dataset) # normalize_layer = get_input_center_layer(dataset) if dataset == 'cifar10': V = VingetteModule((3, 32, 32), 'circ', 2).to('cuda') else: V = VingetteModule((3, 224, 224), 'circ', 2).to('cuda') return torch.nn.Sequential(V, normalize_layer, model)
def get_architecture(arch: str, dataset: str) -> torch.nn.Module: """ Return a neural network (with random weights) :param arch: the architecture - should be in the ARCHITECTURES list above :param dataset: the dataset - should be in the datasets.DATASETS list :return: a Pytorch module """ if arch == "resnet50" and dataset == "imagenet": model = torch.nn.DataParallel(resnet50(pretrained=False)).cuda() cudnn.benchmark = True normalize_layer = get_normalize_layer(dataset) return torch.nn.Sequential(normalize_layer, model)
def get_architecture(arch: str, dataset: str) -> torch.nn.Module: """ Return a neural network (with random weights) :param arch: the architecture - should be in the ARCHITECTURES list above :param dataset: the dataset - should be in the datasets.DATASETS list :return: a Pytorch module """ if arch == "resnet50" and dataset == "imagenet": model = torch.nn.DataParallel(resnet50(pretrained=False)).cuda() cudnn.benchmark = True elif arch == "cifar_resnet20": model = resnet_cifar(depth=20, num_classes=10).cuda() elif arch == "cifar_resnet110": model = resnet_cifar(depth=110, num_classes=10).cuda() elif arch == "cifar_wideresnet34by10": model = WideResNet() normalize_layer = get_normalize_layer(dataset) return torch.nn.Sequential(normalize_layer, model)
def get_architecture(arch: str, dataset: str, pytorch_pretrained: bool = False) -> torch.nn.Module: """ Return a neural network (with random weights) :param arch: the architecture - should be in the ARCHITECTURES list above :param dataset: the dataset - should be in the datasets.DATASETS list :return: a Pytorch module """ ## ImageNet classifiers if arch == "resnet18" and dataset == "imagenet": model = torch.nn.DataParallel( resnet18(pretrained=pytorch_pretrained)).cuda() cudnn.benchmark = True elif arch == "resnet34" and dataset == "imagenet": model = torch.nn.DataParallel( resnet34(pretrained=pytorch_pretrained)).cuda() cudnn.benchmark = True elif arch == "resnet50" and dataset == "imagenet": model = torch.nn.DataParallel( resnet50(pretrained=pytorch_pretrained)).cuda() cudnn.benchmark = True ## Cifar classifiers elif arch == "cifar_resnet20": model = resnet_cifar(depth=20, num_classes=10).cuda() elif arch == "cifar_resnet110": model = resnet_cifar(depth=110, num_classes=10).cuda() elif arch == "imagenet32_resnet110": model = resnet_cifar(depth=110, num_classes=1000).cuda() elif arch == "imagenet32_wrn": model = WideResNet(depth=28, num_classes=1000, widen_factor=10).cuda() # Cifar10 Models from https://github.com/kuangliu/pytorch-cifar # The 14 models we use in the paper as surrogate models elif arch == "cifar_wrn": model = WideResNet(depth=28, num_classes=10, widen_factor=10).cuda() elif arch == "cifar_wrn40": model = WideResNet(depth=40, num_classes=10, widen_factor=10).cuda() elif arch == "VGG16": model = VGG('VGG16').cuda() elif arch == "VGG19": model = VGG('VGG19').cuda() elif arch == "ResNet18": model = ResNet18().cuda() elif arch == "PreActResNet18": model = PreActResNet18().cuda() elif arch == "GoogLeNet": model = GoogLeNet().cuda() elif arch == "DenseNet121": model = DenseNet121().cuda() elif arch == "ResNeXt29_2x64d": model = ResNeXt29_2x64d().cuda() elif arch == "MobileNet": model = MobileNet().cuda() elif arch == "MobileNetV2": model = MobileNetV2().cuda() elif arch == "SENet18": model = SENet18().cuda() elif arch == "ShuffleNetV2": model = ShuffleNetV2(1).cuda() elif arch == "EfficientNetB0": model = EfficientNetB0().cuda() ## Image Denoising Architectures elif arch == "cifar_dncnn": model = DnCNN(image_channels=3, depth=17, n_channels=64).cuda() return model elif arch == "cifar_dncnn_wide": model = DnCNN(image_channels=3, depth=17, n_channels=128).cuda() return model elif arch == 'memnet': model = MemNet(in_channels=3, channels=64, num_memblock=3, num_resblock=6).cuda() return model elif arch == "imagenet_dncnn": model = torch.nn.DataParallel( DnCNN(image_channels=3, depth=17, n_channels=64)).cuda() cudnn.benchmark = True return model elif arch == 'imagenet_memnet': model = torch.nn.DataParallel( MemNet(in_channels=3, channels=64, num_memblock=3, num_resblock=6)).cuda() cudnn.benchmark = True return model else: raise Exception('Unknown architecture.') normalize_layer = get_normalize_layer(dataset) return torch.nn.Sequential(normalize_layer, model)
if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # load the base classifier checkpoint = torch.load(args.base_classifier) base_classifier = get_architecture(checkpoint["arch"], args.dataset) if checkpoint["arch"] == 'resnet50' and args.dataset == "imagenet": try: base_classifier.load_state_dict(checkpoint['state_dict']) except: base_classifier = torchvision.models.resnet50( pretrained=False).cuda() # fix normalize_layer = get_normalize_layer('imagenet').cuda() base_classifier = torch.nn.Sequential(normalize_layer, base_classifier) base_classifier.load_state_dict(checkpoint['state_dict']) # iterate through the dataset dataset = get_dataset(args.dataset, args.split) # init transformers rotationT = RotationTransformer(dataset[0][0]) transformer = None # if abs(args.noise_b) < EPS and abs(args.noise_k) < EPS: # transformer = GaussianTransformer(args.noise_sd) if abs(args.noise_k) < EPS: transformer = RotationBrightnessNoiseTransformer(
def main(args): if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # load the base classifier checkpoint = torch.load(args.base_classifier) base_classifier = get_architecture(checkpoint["arch"], args.dataset) print('arch:', checkpoint['arch']) if checkpoint["arch"] == 'resnet50' and args.dataset == "imagenet": try: base_classifier.load_state_dict(checkpoint['state_dict']) except Exception as e: print('direct load failed, try alternative') try: base_classifier = torchvision.models.resnet50( pretrained=False).cuda() base_classifier.load_state_dict(checkpoint['state_dict']) # fix # normalize_layer = get_normalize_layer('imagenet').cuda() # base_classifier = torch.nn.Sequential(normalize_layer, base_classifier) except Exception as e: print('alternative failed again, try alternative 2') base_classifier = torchvision.models.resnet50( pretrained=False).cuda() # base_classifier.load_state_dict(checkpoint['state_dict']) normalize_layer = get_normalize_layer('imagenet').cuda() base_classifier = torch.nn.Sequential(normalize_layer, base_classifier) base_classifier.load_state_dict(checkpoint['state_dict']) else: base_classifier.load_state_dict(checkpoint['state_dict']) # iterate through the dataset dataset = get_dataset(args.dataset, args.split) # generate transformer transformer = gen_inference_transformer(args, dataset[0][0]) smoothed_classifier = SemanticSmooth(base_classifier, get_num_classes(args.dataset), transformer) # generate image-level transform and params tinst1, tfunc1, tinst2, tfunc2, param1l, param1r, param2l, param2r, candidates = gen_transform_and_params( args, dataset[0][0]) # init random number generator m1 = Uniform(param1l, param1r) if param2l is not None: m2 = Uniform(param2l, param2r) # init metrics tot = tot_benign = tot_robust = 0 # [main] attack section for i in range(len(dataset)): # only certify every args.skip examples if i % args.skip != 0: continue print('working on #', i) (x, y) = dataset[i] # clean_x = x.cuda().unsqueeze(0) pred = smoothed_classifier.predict(x, args.N0, args.p, args.batch) if pred != y: pass else: tot_benign += 1 robust = True for j in range(0, args.tries): xp = None if args.transtype == 'translation': param_sample1 = candidates[int(m1.sample().item())] xp = tfunc1(tinst1, x, param_sample1[0].item(), param_sample1[1].item()) else: param_sample1 = m1.sample().item() if param2l is not None: param_sample2 = m2.sample().item() xp = tfunc1(tinst1, x, param_sample1) if param2l is not None: xp = tfunc2(tinst2, xp, param_sample2) # xp = xp.contiguous().cuda() # xp_old = xp # if args.l2 is not None and args.l2 > EPS: # xp = fgsm(model, xp, torch.tensor([y], dtype=torch.long).expand(now_batch).cuda(), args.l2) # print(torch.norm((xp_old - xp).reshape(xp.size()[0], -1), dim=1)) xp = xp.type_as(x) if args.transtype in [ 'rotation-brightness-l2', 'scaling-brightness-l2' ]: # compute the gradient by soft label and empirical mean smoothed_classifier.base_classifier.eval() grad = torch.zeros( (args.N0, xp.shape[0], xp.shape[1], xp.shape[2])) n = 0 while n < args.N0: now_batch = min(args.batch, args.N0 - n) batch = xp.repeat((now_batch, 1, 1, 1)) batch_noised = smoothed_classifier.transformer.process( batch).cuda() batch_noised = Variable( batch_noised.data, requires_grad=True).contiguous() opt = torch.optim.Adam([batch_noised], lr=1e-3) opt.zero_grad() loss = torch.nn.CrossEntropyLoss()( smoothed_classifier.base_classifier(batch_noised), torch.tensor( [y], dtype=torch.long).expand(now_batch).cuda()) loss.backward() grad[n:n + now_batch, :, :, :] = batch_noised.grad.data n += now_batch grad = torch.mean(grad, dim=0) unit_grad = F.normalize(grad, p=2, dim=list(range(grad.dim()))) delta = unit_grad * args.l2_r # print(xp) xp = xp + delta # print(xp + delta) # print(delta) # print(torch.norm(delta.reshape(-1))) pred = smoothed_classifier.predict(xp, args.N0, args.p, args.batch) if (pred != y): robust = False break print(f"> {j}/{args.tries}", end='\r', flush=True) tot_robust += int(robust) tot += 1 print( f'#{i} clean acc={tot_benign / tot} robust acc={tot_robust / tot}') if args.outfile is None: param_str = '' if args.transtype != 'translation': param_str = f'{param1r}' if param2r is not None: param_str += f'_{param2r}' else: param_str = f'{args.displacement}' args.outfile = args.transtype + '/' + args.dataset + '/' + param_str + '/' + 'result.txt' out_full_path = os.path.join(args.outfolder, args.outfile) print('output result to ' + out_full_path) if not os.path.exists(os.path.dirname(out_full_path)): os.makedirs(os.path.dirname(out_full_path)) f = open(out_full_path, 'w') f.write( f'clean {tot_benign / tot},{tot_benign} robust={tot_robust / tot},{tot_robust} tot={tot}\n' ) f.close() print('done')
def main(args): if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # load the base classifier checkpoint = torch.load(args.base_classifier) base_classifier = get_architecture(checkpoint["arch"], args.dataset) print('arch:', checkpoint['arch']) if checkpoint["arch"] == 'resnet50' and args.dataset == "imagenet": try: base_classifier.load_state_dict(checkpoint['state_dict']) except Exception as e: print('direct load failed, try alternative') try: base_classifier = torchvision.models.resnet50( pretrained=False).cuda() base_classifier.load_state_dict(checkpoint['state_dict']) # fix # normalize_layer = get_normalize_layer('imagenet').cuda() # base_classifier = torch.nn.Sequential(normalize_layer, base_classifier) except Exception as e: print('alternative failed again, try alternative 2') base_classifier = torchvision.models.resnet50( pretrained=False).cuda() # base_classifier.load_state_dict(checkpoint['state_dict']) normalize_layer = get_normalize_layer('imagenet').cuda() base_classifier = torch.nn.Sequential(normalize_layer, base_classifier) base_classifier.load_state_dict(checkpoint['state_dict']) else: base_classifier.load_state_dict(checkpoint['state_dict']) # iterate through the dataset dataset = get_dataset(args.dataset, args.split) # generate transformer transformer = gen_inference_transformer(args, dataset[0][0]) smoothed_classifier = SemanticSmooth(base_classifier, get_num_classes(args.dataset), transformer) # generate image-level transform and params tinst1, tfunc1, tinst2, tfunc2, param1l, param1r, param2l, param2r, candidates = gen_transform_and_params( args, dataset[0][0]) # init random number generator # m1 = Uniform(param1l, param1r) # if param2l is not None: # m2 = Uniform(param2l, param2r) # m1 = Uniform(param1l, param1r) m1 = Beta(0.5, 0.5) if param2l is not None: m2 = Beta(0.5, 0.5) # m2 = Uniform(param2l, param2r) # init metrics tot = tot_benign = tot_robust = 0 # [main] attack section for i in range(len(dataset)): # only certify every args.skip examples if i % args.skip != 0: continue print('working on #', i) (x, y) = dataset[i] pred = predict(smoothed_classifier, base_classifier, args, x) if pred != y: pass else: tot_benign += 1 robust = True for j in range(0, args.tries): param_sample1 = (m1.sample() * (param1r - param1l) + param1l).item() if param2l is not None: param_sample2 = (m2.sample() * (param2r - param2l) + param2l).item() else: param_sample2 = None xp = process(x, tfunc1, tinst1, tfunc2, tinst2, param_sample1, param_sample2) pre_loss = getloss(smoothed_classifier, base_classifier, args, xp, y) if param_sample2 is None: print( f"{i} > {j}/{args.tries} begin para1={param_sample1:4.2f} loss={pre_loss}", flush=True) else: print( f"{i} > {j}/{args.tries} begin para1={param_sample1:4.2f} para2={param_sample2:4.2f} loss={pre_loss}", flush=True) # first work on param1 eps = (param1r - param1l) / args.stepdiv xp_l = process(x, tfunc1, tinst1, tfunc2, tinst2, min(max(param_sample1 - eps, param1l), param1r), param_sample2) xp_r = process(x, tfunc1, tinst1, tfunc2, tinst2, min(max(param_sample1 + eps, param1l), param1r), param_sample2) loss_l = getloss(smoothed_classifier, base_classifier, args, xp_l, y) loss_r = getloss(smoothed_classifier, base_classifier, args, xp_r, y) coef_1 = 1 if loss_r > loss_l else -1 now_loss = max(loss_l, loss_r) now_param1 = param_sample1 if now_loss > pre_loss: while True: incre = min(max(now_param1 + coef_1 * eps, param1l), param1r) new_xp = process(x, tfunc1, tinst1, tfunc2, tinst2, incre, param_sample2) new_loss = getloss(smoothed_classifier, base_classifier, args, new_xp, y) # print(f"{i} > {j}/{args.tries} iter para1={now_param1 + coef_1 * eps} loss={new_loss}", flush=True) if new_loss < now_loss or ( not param1l < incre < param1r): break now_param1 = incre now_loss = new_loss tmp_l = now_param1 - coef_1 * eps tmp_r = now_param1 + coef_1 * eps tmp_l = min(max(tmp_l, param1l), param1r) tmp_r = min(max(tmp_r, param1l), param1r) # tri-section search while tmp_r - tmp_l > eps / args.stepdiv: tmp_m1 = (2.0 * tmp_l + tmp_r) / 3.0 tmp_m2 = (tmp_l + 2.0 * tmp_r) / 3.0 xp_m1 = process(x, tfunc1, tinst1, tfunc2, tinst2, tmp_m1, param_sample2) xp_m2 = process(x, tfunc1, tinst1, tfunc2, tinst2, tmp_m2, param_sample2) loss_m1 = getloss(smoothed_classifier, base_classifier, args, xp_m1, y) loss_m2 = getloss(smoothed_classifier, base_classifier, args, xp_m2, y) # print(f"{i} > {j}/{args.tries} search para1={tmp_m1} loss={loss_m1}", flush=True) # print(f"{i} > {j}/{args.tries} search para1={tmp_m2} loss={loss_m2}", flush=True) if loss_m1 > loss_m2: tmp_r = tmp_m2 else: tmp_l = tmp_m1 targ_param1 = (tmp_l + tmp_r) / 2.0 # now work on param2 if tfunc2 is not None: eps = (param2r - param2l) / args.stepdiv xp = process(x, tfunc1, tinst1, tfunc2, tinst2, targ_param1, param_sample2) pre_loss2 = getloss(smoothed_classifier, base_classifier, args, xp, y) xp_l = process( x, tfunc1, tinst1, tfunc2, tinst2, targ_param1, min(max(param_sample2 - eps, param2l), param2r)) xp_r = process( x, tfunc1, tinst1, tfunc2, tinst2, targ_param1, min(max(param_sample2 + eps, param2l), param2r)) loss_l = getloss(smoothed_classifier, base_classifier, args, xp_l, y) loss_r = getloss(smoothed_classifier, base_classifier, args, xp_r, y) coef_2 = 1 if loss_r > loss_l else -1 now_loss = max(loss_l, loss_r) now_param2 = param_sample2 if now_loss > pre_loss2: while True: incre = min( max(now_param2 + coef_2 * eps, param2l), param2r) new_xp = process(x, tfunc1, tinst1, tfunc2, tinst2, targ_param1, incre) new_loss = getloss(smoothed_classifier, base_classifier, args, new_xp, y) if new_loss < now_loss or ( not param2l < incre < param2r): break now_param2 = incre now_loss = new_loss tmp_l = now_param2 - coef_2 * eps tmp_r = now_param2 + coef_2 * eps tmp_l = min(max(tmp_l, param2l), param2r) tmp_r = min(max(tmp_r, param2l), param2r) # tri-section search while tmp_r - tmp_l > eps / args.stepdiv: tmp_m1 = (2.0 * tmp_l + tmp_r) / 3.0 tmp_m2 = (tmp_l + 2.0 * tmp_r) / 3.0 xp_m1 = process(x, tfunc1, tinst1, tfunc2, tinst2, targ_param1, tmp_m1) xp_m2 = process(x, tfunc1, tinst1, tfunc2, tinst2, targ_param1, tmp_m2) loss_m1 = getloss(smoothed_classifier, base_classifier, args, xp_m1, y) loss_m2 = getloss(smoothed_classifier, base_classifier, args, xp_m2, y) if loss_m1 > loss_m2: tmp_r = tmp_m2 else: tmp_l = tmp_m1 targ_param2 = (tmp_l + tmp_r) / 2.0 xp = tfunc1(tinst1, x, targ_param1) if param2l is not None: xp = tfunc2(tinst2, xp, targ_param2) xp = xp.type_as(x) fin_loss = getloss(smoothed_classifier, base_classifier, args, xp, y) if param_sample2 is None: print( f"{i} > {j}/{args.tries} end para1={targ_param1:4.2f} loss={fin_loss}", flush=True) else: print( f"{i} > {j}/{args.tries} end para1={targ_param1:4.2f} para2={targ_param2:4.2f} loss={fin_loss}", flush=True) if args.transtype in [ 'rotation-brightness-l2', 'scaling-brightness-l2' ]: # compute the gradient by soft label and empirical mean smoothed_classifier.base_classifier.eval() grad = torch.zeros( (args.N0, xp.shape[0], xp.shape[1], xp.shape[2])) n = 0 while n < args.N0: now_batch = min(args.batch, args.N0 - n) if args.nosmooth is True: batch_noised = xp.repeat((1, 1, 1, 1)) else: batch = xp.repeat((now_batch, 1, 1, 1)) batch_noised = smoothed_classifier.transformer.process( batch).cuda() batch_noised = Variable( batch_noised.data, requires_grad=True).contiguous() opt = torch.optim.Adam([batch_noised], lr=1e-3) opt.zero_grad() loss = torch.nn.CrossEntropyLoss()( smoothed_classifier.base_classifier(batch_noised), torch.tensor( [y], dtype=torch.long).expand(now_batch).cuda()) loss.backward() grad[n:n + now_batch, :, :, :] = batch_noised.grad.data n += now_batch grad = torch.mean(grad, dim=0) unit_grad = F.normalize(grad, p=2, dim=list(range(grad.dim()))) delta = unit_grad * args.l2_r # print(xp) xp = xp + delta # print(xp + delta) # print(delta) # print(torch.norm(delta.reshape(-1))) if args.nosmooth is True: base_classifier.eval() pred = base_classifier(xp.cuda().unsqueeze(0)).argmax(1)[0] else: pred = smoothed_classifier.predict(xp, args.N, args.p, args.batch) if (pred != y): robust = False break print(f"{i} > {j}/{args.tries}", flush=True) tot_robust += int(robust) tot += 1 print( f'#{i} clean acc={tot_benign / tot} robust acc={tot_robust / tot}') if args.outfile is None: param_str = '' if args.transtype != 'translation': param_str = f'{param1r}' if param2r is not None: param_str += f'_{param2r}' else: param_str = f'{args.displacement}' args.outfile = args.transtype + '/' + args.dataset + '/' + param_str + '/' + 'result.txt' out_full_path = os.path.join(args.outfolder, args.outfile) print('output result to ' + out_full_path) if not os.path.exists(os.path.dirname(out_full_path)): os.makedirs(os.path.dirname(out_full_path)) f = open(out_full_path, 'a') f.write( f'clean {tot_benign / tot},{tot_benign} robust={tot_robust / tot},{tot_robust} tot={tot}\n' ) f.close() print('done')
def main(): if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if not os.path.exists(args.outdir): os.makedirs(args.outdir) train_dataset = get_dataset(args.dataset, 'train') test_dataset = get_dataset(args.dataset, 'test') pin_memory = (args.dataset == "imagenet") train_loader = DataLoader(train_dataset, shuffle=True, batch_size=args.batch, num_workers=args.workers, pin_memory=pin_memory) test_loader = DataLoader(test_dataset, shuffle=False, batch_size=args.batch, num_workers=args.workers, pin_memory=pin_memory) model = get_architecture(args.arch, args.dataset) if args.pretrain is not None: if args.pretrain == 'torchvision': # load pretrain model from torchvision if args.dataset == 'imagenet' and args.arch == 'resnet50': model = torchvision.models.resnet50(True).cuda() # fix normalize_layer = get_normalize_layer('imagenet').cuda() model = torch.nn.Sequential(normalize_layer, model) print('loaded from torchvision for imagenet resnet50') else: raise Exception(f'Unsupported pretrain arg {args.pretrain}') else: # load the base classifier checkpoint = torch.load(args.pretrain) model.load_state_dict(checkpoint['state_dict']) print(f'loaded from {args.pretrain}') logfilename = os.path.join(args.outdir, 'log.txt') init_logfile(logfilename, "epoch\ttime\tlr\ttrain loss\ttrain acc\ttestloss\ttest acc") writer = SummaryWriter(args.outdir) canopy = None for (inputs, targets) in train_loader: canopy = inputs[0] break transformer = gen_transformer(args, canopy) criterion = CrossEntropyLoss().cuda() optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=args.lr_step_size, gamma=args.gamma) for epoch in range(args.epochs): before = time.time() train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, transformer, writer) test_loss, test_acc = test(test_loader, model, criterion, epoch, transformer, writer, args.print_freq) after = time.time() scheduler.step(epoch) log( logfilename, "{}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}".format( epoch, str(datetime.timedelta(seconds=(after - before))), scheduler.get_lr()[0], train_loss, train_acc, test_loss, test_acc)) torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, os.path.join(args.outdir, 'checkpoint.pth.tar'))