def evaluate(description): load_cfg_fom_args(description) # configure model base_model = load_model(cfg.MODEL.ARCH, cfg.CKPT_DIR, cfg.CORRUPTION.DATASET, ThreatModel.corruptions).cuda() if cfg.MODEL.ADAPTATION == "source": logger.info("test-time adaptation: NONE") model = setup_source(base_model) if cfg.MODEL.ADAPTATION == "norm": logger.info("test-time adaptation: NORM") model = setup_norm(base_model) if cfg.MODEL.ADAPTATION == "tent": logger.info("test-time adaptation: TENT") model = setup_tent(base_model) # evaluate on each severity and type of corruption in turn for severity in cfg.CORRUPTION.SEVERITY: for corruption_type in cfg.CORRUPTION.TYPE: # reset adaptation for each combination of corruption x severity # note: for evaluation protocol, but not necessarily needed try: model.reset() logger.info("resetting model") except: logger.warning("not resetting model") x_test, y_test = load_cifar10c(cfg.CORRUPTION.NUM_EX, severity, cfg.DATA_DIR, False, [corruption_type]) x_test, y_test = x_test.cuda(), y_test.cuda() acc = accuracy(model, x_test, y_test, cfg.TEST.BATCH_SIZE) err = 1. - acc logger.info(f"error % [{corruption_type}{severity}]: {err:.2%}")
def test_clean_acc_jsons_fast(self): config = get_test_config() n_ex = 200 x_test, y_test = load_cifar10(n_ex, config['data_dir']) for norm in model_dicts.keys(): print('Test models robust wrt {}'.format(norm)) models = list(model_dicts[norm].keys()) models.remove( 'Standard' ) # removed temporarily to avoid an error for pytorch 1.4.0 n_tests_passed = 0 for model_name in models: model = load_model(model_name, config['model_dir'], norm).cuda().eval() acc = clean_accuracy(model, x_test, y_test, batch_size=config['batch_size']) self.assertGreater(round(acc * 100., 2), 70.0) success = round(acc * 100., 2) > 70.0 n_tests_passed += success print( '{}: clean accuracy {:.2%} (on {} examples), test passed: {}' .format(model_name, acc, n_ex, success)) print('Test is passed for {}/{} models.'.format( n_tests_passed, len(models)))
def _accuracy_computation(success_criterion: Callable[[str, float, str, str], bool], n_ex: int) -> None: config = get_test_config() device = torch.device(config["device"]) tot_models = 0 n_tests_passed = 0 for dataset, dataset_dict in model_dicts.items(): print(f"Test models trained on {dataset.value}") x_test, y_test = load_clean_dataset(dataset, n_ex, config["data_dir"]) for threat_model, threat_model_dict in dataset_dict.items(): print(f"Test models robust wrt {threat_model.value}") models = list(threat_model_dict.keys()) tot_models += len(models) for model_name in models: model = load_model(model_name, config["model_dir"], dataset, threat_model).to(device) acc = clean_accuracy(model, x_test, y_test, batch_size=config["batch_size"], device=device) success = success_criterion(model_name, acc, dataset.value, threat_model.value) n_tests_passed += int(success) print( f"{model_name}: clean accuracy {acc:.2%} (on {n_ex} examples)," f" test passed: {success}") print(f"Test is passed for {n_tests_passed}/{tot_models} models.")
def evaluate(description): load_cfg_fom_args(description) # configure model base_model = load_model(cfg.MODEL.ARCH, cfg.CKPT_DIR, cfg.CORRUPTION.DATASET, ThreatModel.corruptions).cuda() if cfg.MODEL.ADAPTATION == "source": logger.info("test-time adaptation: NONE") model = setup_source(base_model) if cfg.MODEL.ADAPTATION == "norm": logger.info("test-time adaptation: NORM") model = setup_norm(base_model) if cfg.MODEL.ADAPTATION == "tent": logger.info("test-time adaptation: TENT") model = setup_tent(base_model) # evaluate on each severity and type of corruption in turn for severity in cfg.CORRUPTION.SEVERITY: for corruption_type in cfg.CORRUPTION.TYPE: # reset adaptation for each combination of corruption x severity # note: for evaluation protocol, but not necessarily needed try: model.reset() logger.info("resetting model") except: logger.warning("not resetting model") x_test, y_test = load_cifar10c(cfg.CORRUPTION.NUM_EX, severity, cfg.DATA_DIR, False, [corruption_type]) x_test, y_test = x_test.cuda(), y_test.cuda() batch_size = cfg.TEST.BATCH_SIZE n_batches = math.ceil(x_test.shape[0] / batch_size) correct = 0. # updating for the online time_i data and output for counter in range(n_batches): x_curr = x_test[counter * batch_size:(counter + 1) * batch_size] y_curr = y_test[counter * batch_size:(counter + 1) * batch_size] output = model(x_curr) acc = (output.max(1)[1] == y_curr).float().sum().item() correct += acc acc = acc / batch_size err = 1. - acc logger.info(f"error % [{corruption_type}{severity}]: {err:.2%}") error = 1. - correct/x_test[0] logger.info(f"error % [{corruption_type}{severity}]: {error:.2%}")
def evaluate(description): load_cfg_fom_args(description) assert cfg.CORRUPTION.DATASET == 'cifar10' base_model = load_model(cfg.MODEL.ARCH, cfg.CKPT_DIR, 'cifar10', ThreatModel.Linf).cuda() if cfg.MODEL.ADAPTATION == "dent": assert cfg.MODEL.EPISODIC dent_model = Dent(base_model, cfg.OPTIM) logger.info(dent_model.model) x_test, y_test = load_cifar10(cfg.CORRUPTION.NUM_EX, cfg.DATA_DIR) x_test, y_test = x_test.cuda(), y_test.cuda() adversary = AutoAttack( dent_model, norm='Linf', eps=8./255., version='standard', log_path=osp.join(cfg.SAVE_DIR, cfg.LOG_DEST)) adversary.run_standard_evaluation( x_test, y_test, bs=cfg.TEST.BATCH_SIZE)
def main(args: Namespace) -> None: model = load_model(args.model_name, model_dir=args.model_dir, dataset=args.dataset, threat_model=args.threat_model) device = torch.device(args.device) benchmark(model, n_examples=args.n_ex, dataset=args.dataset, threat_model=args.threat_model, to_disk=args.to_disk, model_name=args.model_name, data_dir=args.data_dir, device=device, batch_size=args.batch_size, eps=args.eps)
def run_all(args): models = [ ('Wu2020Adversarial_extra', 'Linf'), ('Carmon2019Unlabeled', 'Linf'), ('Wu2020Adversarial', 'L2'), ('Augustin2020Adversarial', 'L2'), ('Standard', 'Linf'), ] accuracy = {} for model_name, model_norm in models: model = load_model(model_name=model_name, norm=model_norm) model = model.to('cuda:0') accuracy[(model_name, model_norm)] = eval_dataset( model, args.eps, args.batch_size) logging.info('%s %s', model_name, model_norm) logging.info(str(accuracy)) logging.info(str(accuracy))
def init_model(opt): if opt.use_robust_bench: net_d = load_model(model_name=opt.use_robust_bench_model, dataset=opt.use_robust_bench_dataset, threat_model=opt.use_robust_bench_threat) net_d = ClassifierWithPreprocessing(net_d, RobustBenchClassifierInputs()) else: resnet_model = torchvision.models.resnet18 net_d = resnet_model( pretrained=False if opt.dataset_to_use == 'cifar' else True) if opt.dataset_to_use != 'imagenet': net_d.fc = torch.nn.Linear(in_features=net_d.fc.in_features, out_features=opt.n_classes) if opt.dataset_to_use == 'cifar': net_d.conv1 = torch.nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) net_d.maxpool = torch.nn.Sequential() for m in net_d.modules(): if isinstance(m, torch.nn.Conv2d): torch.nn.init.kaiming_uniform_(m.weight, a=math.sqrt(5)) net_d = ClassifierWithPreprocessing(net_d, ClassifierInputs()) if opt.load_checkpoint_d is not None: net_d.load_state_dict(torch.load(opt.load_checkpoint_d)) if opt.deactivate_bn: # turning off batch normalization on the classifier def train(self, mode=True): super(type(net_d), self).train(mode) for module in self.modules(): if isinstance(module, torch.nn.modules.BatchNorm1d) or \ isinstance(module, torch.nn.modules.BatchNorm2d) or \ isinstance(module, torch.nn.modules.BatchNorm3d): module.eval() net_d.train = types.MethodType(train, net_d) return net_d.cuda()
def main(): parser = argparse.ArgumentParser(description='Eval CIFAR10') parser.add_argument('--path', type=str, default='runs/X') parser.add_argument('--model_name', type=str, default='all') parser.add_argument('--model_norm', type=str, default='') parser.add_argument('--eps', type=float, default=8) parser.add_argument('--batch_size', type=float, default=250) parser.add_argument('--eps_max', type=float, default=10) args = parser.parse_args() if args.model_name == 'all': run_all(args) elif args.model_name == 'grid': make_grid(args) else: model = load_model(model_name=args.model_name, norm=args.model_norm) model = model.to('cuda:0') accuracy = eval_dataset(model, args.eps, args.batch_size) with open(os.path.join(args.path, 'log.jb'), 'wb') as f: joblib.dump({'args': dict(vars(args).items()), 'accuracy': accuracy}, f)
def test_clean_acc_jsons_exact(self): config = get_test_config() device = torch.device(config['device']) n_ex = 10000 x_test, y_test = load_cifar10(n_ex, config['data_dir']) for norm in model_dicts.keys(): print('Test models robust wrt {}'.format(norm)) models = list(model_dicts[norm].keys()) models.remove( 'Standard' ) # removed temporarily to avoid an error for pytorch 1.4.0 n_tests_passed = 0 for model_name in models: model = load_model(model_name, config['model_dir'], norm).to(device) acc = clean_accuracy(model, x_test, y_test, batch_size=config['batch_size'], device=device) with open('./model_info/{}/{}.json'.format(norm, model_name), 'r') as model_info: json_dict = json.load(model_info) success = abs( round(acc * 100., 2) - float(json_dict['clean_acc'])) <= 0.05 print('{}: clean accuracy {:.2%}, test passed: {}'.format( model_name, acc, success)) self.assertLessEqual( abs(round(acc * 100., 2) - float(json_dict['clean_acc'])), 0.05) n_tests_passed += success print('Test is passed for {}/{} models.'.format( n_tests_passed, len(models)))
import torch from robustbench.data import load_cifar10 from robustbench.utils import load_model from constopt.adversary import Adversary from constopt.optim import PGD, PGDMadry, FrankWolfe, MomentumFrankWolfe from constopt.constraints import LinfBall device = torch.device("cuda" if torch.cuda.is_available() else 'cpu') data, target = load_cifar10(n_examples=100) model = load_model(model_name='Carmon2019Unlabeled', norm='Linf') criterion = torch.nn.CrossEntropyLoss() eps = 8. / 255 constraint = LinfBall(eps) n_iter = 20 step_size_test = { PGD.name: 5e4 * 2.5 * constraint.alpha / n_iter, PGDMadry.name: 2.5 / n_iter, FrankWolfe.name: None, MomentumFrankWolfe.name: None } for alg_class in PGD, PGDMadry, FrankWolfe, MomentumFrankWolfe: adv = Adversary(data.shape, constraint, alg_class, device) adv_loss, delta = adv.perturb(data, target, model, criterion,
def build(self): model = load_model(model_name=self.name, norm='Linf') instance = DefenseInstance(model=model.to(self.device), detector=None) return instance
"Wu2020Adversarial_extra", "Carmon2019Unlabeled", "HYDRA", "Wang2020Improving", "Wu2020Adversarial", "Hendrycks2019Using", "Pang2020Boosting", "Zhang2020Attacks", "Rice2020Overfitting", "Huang2020Self", "Zhang2019Theoretically", "Chen2020Adversarial", "Engstrom2019Robustness", "Zhang2019You", "Wong2020Fast", "Ding2020MMA", ] l2_models = [ "Wu2020Adversarial", "Augustin2020Adversarial", "Engstrom2019Robustness", "Rice2020Overfitting", "Rony2019Decoupling ", "Ding2020MMA", ] for model in linf_models: model = load_model(model_name=model, model_dir=model_dir, norm='Linf') for model in l2_models: model = load_model(model_name=model, model_dir=model_dir, norm='L2')
from tqdm import tqdm import constopt as cpt from constopt.data import load_cifar10 from robustbench.utils import load_model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_size = 200 n_examples = 10000 loader = load_cifar10(batch_size=batch_size, data_dir='~/datasets') model_name = 'Standard' model = load_model(model_name, norm='Linf').to(device) criterion = torch.nn.CrossEntropyLoss() # Define the perturbation constraint set alpha = 8 / 255. constraint = cpt.constraints.LinfBall(alpha) def image_constraint_prox(delta, step_size=None, data=None): """Projects perturbation delta so that 0. <= data + delta <= 1.""" adv_img = torch.clamp(data + delta, 0, 1) delta = adv_img - data return delta
help='where to store downloaded models') parser.add_argument('--device', type=str, default='cuda:0', help='device to use for computations') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() device = torch.device(args.device) x_test, y_test = load_cifar10(args.n_ex, args.data_dir) x_test, y_test = x_test.to(device), y_test.to(device) model = load_model(args.model_name, args.model_dir, args.norm).to(device).eval() acc = clean_accuracy(model, x_test, y_test, batch_size=args.batch_size, device=device) print('Clean accuracy: {:.2%}'.format(acc)) adversary = AutoAttack(model, norm=args.norm, eps=args.eps, version='standard', device=device) x_adv = adversary.run_standard_evaluation(x_test, y_test)
import numpy as np import torch import copt from copt.utils_pytorch import make_func_and_grad from robustbench.data import load_cifar10 from robustbench.utils import load_model import matplotlib.pyplot as plt n_examples = 20 data_batch, target_batch = load_cifar10(n_examples=n_examples, data_dir='~/datasets') model = load_model("Standard") criterion = torch.nn.CrossEntropyLoss() # Define the constraint set + initial point alpha = 10. constraint = copt.constraint.L1Ball(alpha) for data, target in zip(data_batch, target_batch): data, target = data.unsqueeze(0), target.unsqueeze(0) # Define the loss function to be minimized, using Pytorch def loss_fun(delta): adv_input = data + delta return -criterion(model(adv_input), target) # Change the function to f_grad: returns loss_val, grad in flattened, numpy array f_grad = make_func_and_grad(loss_fun,
type=int, default=500, help='batch size for evaluation') parser.add_argument('--data_dir', type=str, default='./data', help='where to store downloaded datasets') parser.add_argument('--model_dir', type=str, default='./models', help='where to store downloaded models') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() x_test, y_test = load_cifar10(args.n_ex, args.data_dir) model = load_model(args.model_name, args.model_dir, args.norm).cuda().eval() acc = clean_accuracy(model, x_test, y_test, batch_size=args.batch_size) print('Clean accuracy: {:.2%}'.format(acc)) adversary = AutoAttack(model, norm=args.norm, eps=args.eps, version='standard') x_adv = adversary.run_standard_evaluation(x_test, y_test)
import torch from robustbench.utils import load_model import chop from chop.utils.image import matplotlib_imshow from chop.utils.data import CIFAR10, NormalizingModel device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_dir = "~/datasets/" dataset = CIFAR10(data_dir, normalize=False) classes = dataset.classes # CIFAR10 model model = load_model( 'Standard') # Can be changed to any model from the robustbench model zoo # Add an initial layer to normalize data. # This allows us to use the [0, 1] image constraint set model = NormalizingModel(model, dataset) model = model.to(device) # Attack criterion criterion = torch.nn.CrossEntropyLoss() n_epochs = 1 restarts = 5 batch_size = 250 loaders = dataset.loaders(batch_size, batch_size)
import torch from tqdm import tqdm import copt from copt.utils_pytorch import make_func_and_grad from robustbench.data import load_cifar10 from robustbench.utils import load_model n_examples = 10000 data_batch, target_batch = load_cifar10(n_examples=n_examples, data_dir='~/datasets') model_name = "Engstrom2019Robustness" model = load_model(model_name) criterion = torch.nn.CrossEntropyLoss() # Define the constraint set alpha = 0.5 constraint = copt.constraint.L2Ball(alpha) n_correct = 0 n_correct_adv = 0 # Define the loss function to be minimized, using Pytorch def loss_fun(delta, data): adv_input = data + delta return -criterion(model(adv_input), target)