def main(): """Inversion Generator just works for Inversion model Argparse structure is different from other running profile. """ parser = argparse.ArgumentParser(description='Generate inversion images') parser.add_argument('modelpath', metavar='P', type=str, help="Path of Inversion model") parser.add_argument('--expansion', '-e', metavar='E', type=int, help="Image expansion factor", default=200) parser.add_argument('--save-path', '-s', type=str, help="Path of generated image, optional") parser.add_argument('--channel', '-c', type=int, help="Inversion model output image channel", default=1) parser.add_argument('--num-classes', '-n', type=int, help="Inversion classifier input classes", default=10) parser.add_argument('--complexity', '-x', type=int, help="Inversion model conv channel size.", default=64) parser.add_argument('--blackbox', '-b', type=str, help="Full vector", default=None) parser.add_argument('--testset', metavar='DSET', type=str, help="If using full vector", default=None) parser.add_argument('-d', '--device-id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) args = parser.parse_args() model = get_net('Inversion', 'custom_cnn', pretrained=args.modelpath, num_classes=args.num_classes, channel=args.channel, complexity=args.complexity) if args.device_id >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(args.device_id) device = torch.device('cuda') else: device = torch.device('cpu') model = model.to(device) if args.save_path: save_path = args.save_path else: save_path = os.path.join(os.path.dirname(args.modelpath), 'generated') if not os.path.exists(save_path): os.mkdir(save_path) if args.testset is None: get_imgs(model, save_path, args.expansion, args.num_classes) else: blackbox = Blackbox.from_modeldir(args.blackbox, device=device) assert args.testset in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[args.testset] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] testset = datasets.__dict__[args.testset](train=False, transform=transform) results = [] dataloader = DataLoader(testset, 128, False) total = 0 img_vectors = [] for inputs, targets in tqdm(dataloader): vector = blackbox(inputs) imgs = model(vector.to(device)).cpu() img_vectors.append(imgs) for i in range(imgs.shape[0]): img_vectors.append(imgs[i]) # save_image(imgs[i], os.path.join(save_path, "{}.{}.bmp".format(targets[i], total + i))) total += imgs.shape[0] np.random.shuffle(img_vectors) for i in range(args.expansion): save_image(img_vectors[i], os.path.join(save_path, "{}.bmp".format(total + i)))
def from_modeldir(cls, model_dir, device=None, output_type='probs'): device = torch.device('cuda') if device is None else device # What was the model architecture used by this model? params_path = osp.join(model_dir, 'params.json') with open(params_path) as jf: params = json.load(jf) model_arch = params['model_arch'] num_classes = params['num_classes'] channel = params['channel'] complexity = params['complexity'] victim_dataset = params.get('dataset', 'imagenet') modelfamily = datasets.dataset_to_modelfamily[victim_dataset] # Instantiate the model # model = model_utils.get_net(model_arch, n_output_classes=num_classes) model = zoo.get_net(model_arch, modelfamily, pretrained=None, num_classes=num_classes, channel=channel, complexity=complexity) model = model.to(device) # Load weights checkpoint_path = osp.join(model_dir, 'model_best.pth.tar') if not osp.exists(checkpoint_path): checkpoint_path = osp.join(model_dir, 'checkpoint.pth.tar') print("=> loading checkpoint '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path) epoch = checkpoint['epoch'] best_test_acc = checkpoint['best_acc'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint (epoch {}, acc={:.2f})".format( epoch, best_test_acc)) blackbox = cls(model, device, output_type) return blackbox
def main(): #torch.backends.cudnn.enabled = False parser = argparse.ArgumentParser(description='Train a model') # Required arguments parser.add_argument('dataset', metavar='DS_NAME', type=str, help='Dataset name') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') # Optional arguments parser.add_argument('-x', '--complexity', metavar='X', type=int, help='Complexity of conv layer channel.', default=64) parser.add_argument('-o', '--out_path', metavar='PATH', type=str, help='Output path for model', default=cfg.MODEL_DIR) parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument( '--log-interval', type=int, default=100, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=30, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('-w', '--num_workers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('--train_subset', type=int, help='Use a subset of train set', default=None) parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=None) parser.add_argument('--optimizer-choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad')) args = parser.parse_args() params = vars(args) # torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') #torch.cuda.set_device(str(params['device_id'])) else: device = torch.device('cpu') # ----------- Set up dataset dataset_name = params['dataset'] valid_datasets = datasets.__dict__.keys() if dataset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) dataset = datasets.__dict__[dataset_name] modelfamily = datasets.dataset_to_modelfamily[dataset_name] train_transform = datasets.modelfamily_to_transforms[modelfamily]['train'] test_transform = datasets.modelfamily_to_transforms[modelfamily]['test'] trainset = dataset(train=True, transform=train_transform) testset = dataset(train=False, transform=test_transform) num_classes = len(trainset.classes) sample = testset[0][0] if len(sample.shape) <= 2: # 2 dimensional images channel = 1 else: channel = sample.shape[0] params['channel'] = channel params['num_classes'] = num_classes if params['train_subset'] is not None: idxs = np.arange(len(trainset)) ntrainsubset = params['train_subset'] idxs = np.random.choice(idxs, size=ntrainsubset, replace=False) trainset = Subset(trainset, idxs) # ----------- Set up model model_name = params['model_arch'] pretrained = params['pretrained'] complexity = params['complexity'] # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained) model = zoo.get_net(model_name, modelfamily, pretrained, num_classes=num_classes, channel=channel, complexity=complexity) model = model.to(device) optimizer = get_optimizer(model.parameters(), params['optimizer_choice'], **params) # ----------- Train out_path = params['out_path'] model_utils.train_model(model, trainset, testset=testset, device=device, optimizer=optimizer, **params) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def main(): parser = argparse.ArgumentParser(description='Train a model') # Required arguments parser.add_argument('model_dir', metavar='DIR', type=str, help='Directory containing transferset.pickle') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') parser.add_argument('--budgets', metavar='B', type=str, help='Comma separated values of budgets. Knockoffs will be trained for each budget.') # Optional arguments parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=60, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('-w', '--num_workers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=False) # Attacker's defense parser.add_argument('--argmaxed', action='store_true', help='Only consider argmax labels', default=False) parser.add_argument('--optimizer_choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad')) args = parser.parse_args() params = vars(args) torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') model_dir = params['model_dir'] # ----------- Set up transferset transferset_path = osp.join(model_dir, 'transferset.pickle') with open(transferset_path, 'rb') as rf: transferset_samples = pickle.load(rf) num_classes = transferset_samples[0][1].size(0) print('=> found transfer set with {} samples, {} classes'.format(len(transferset_samples), num_classes)) # ----------- Clean up transfer (if necessary) if params['argmaxed']: new_transferset_samples = [] print('=> Using argmax labels (instead of posterior probabilities)') for i in range(len(transferset_samples)): x_i, y_i = transferset_samples[i] argmax_k = y_i.argmax() y_i_1hot = torch.zeros_like(y_i) y_i_1hot[argmax_k] = 1. new_transferset_samples.append((x_i, y_i_1hot)) transferset_samples = new_transferset_samples # ----------- Set up testset dataset_name = params['testdataset'] valid_datasets = datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[dataset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] if dataset_name not in valid_datasets: raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets)) dataset = datasets.__dict__[dataset_name] testset = dataset(train=False, transform=transform) if len(testset.classes) != num_classes: raise ValueError('# Transfer classes ({}) != # Testset classes ({})'.format(num_classes, len(testset.classes))) # ----------- Set up model model_name = params['model_arch'] pretrained = params['pretrained'] # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained) model = zoo.get_net(model_name, modelfamily, pretrained, num_classes=num_classes) model = model.to(device) # ----------- Train budgets = [int(b) for b in params['budgets'].split(',')] for b in budgets: np.random.seed(cfg.DEFAULT_SEED) torch.manual_seed(cfg.DEFAULT_SEED) torch.cuda.manual_seed(cfg.DEFAULT_SEED) transferset = samples_to_transferset(transferset_samples, budget=b, transform=transform) print() print('=> Training at budget = {}'.format(len(transferset))) optimizer = get_optimizer(model.parameters(), params['optimizer_choice'], **params) print(params) checkpoint_suffix = '.{}'.format(b) criterion_train = model_utils.soft_cross_entropy model_utils.train_model(model, transferset, model_dir, testset=testset, criterion_train=criterion_train, checkpoint_suffix=checkpoint_suffix, device=device, optimizer=optimizer, **params) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(model_dir, 'params_train.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
agreement += torch.sum(labels_bb.cpu() == labels_sur.cpu()).int() transfer += torch.sum(adv_labels_bb.cpu() == targets.cpu()).int() if targeted else torch.sum( adv_labels_bb.cpu() != targets.cpu()).int() print("Agreement: {}".format(agreement / total)) print("Transferability: {}".format(transfer / total)) return transfer / total if __name__ == '__main__': # this block of code is only for temporary test. from datasets import GTSRB from datasets import modelfamily_to_transforms transform = modelfamily_to_transforms['custom_cnn']['train'] dataset = GTSRB(False, transform) from knockoff.victim.blackbox import Blackbox import torch device = torch.device('cuda') blackbox = Blackbox.from_modeldir('results/models/victim/gtsrb', device) from models import zoo surrogate = zoo.get_net( 'CNN32', 'custom_cnn', 'results/models/adversary/manhattan/checkpoint.28.iter.pth.tar', num_classes=43) transfer = transferability(blackbox, surrogate, dataset, targeted=False)
def __init__(self, model_arch: str, state_dir: str, testset: str, pretrained: str = None, sampleset: str = None, blackbox_path: str = None, cuda: bool = True, complexity: int = 64, optimizer_choice: str = 'sgdm', batch_size: int = 64, topk: int = 0, argmax: bool = False, num_workers: int = 16, **kwargs) -> None: self.device = torch.device('cuda') if cuda else torch.device('cpu') self.state_dir = state_dir # Store model checkpoint, selected state in Active thief etc. self.selection, self.transfer, self.indices_list = load_state( state_dir) if not os.path.exists(state_dir): os.makedirs(state_dir) self.cuda = cuda if blackbox_path is None: # if blackbox_path is None, no blackbox model is involved in model training. self.blackbox = None else: self.blackbox = Blackbox.from_modeldir(blackbox_path, self.device) modelfamily = datasets.dataset_to_modelfamily[testset] # Work around for MNIST. MNISTlike is one channel image and is normalized with specific parameter. if testset in ('MNIST', 'KMNIST', 'EMNIST', 'EMNISTLetters', 'FashionMNIST'): self.channel = 1 self.transforms = datasets.MNIST_transform else: self.channel = 3 self.transforms = datasets.modelfamily_to_transforms[modelfamily] # For absolute accuracy test. self.testset = datasets.__dict__[testset]( train=False, transform=self.transforms['test']) if sampleset is not None: self.sampleset = datasets.__dict__[sampleset]( train=True, transform=self.transforms['train']) else: self.sampleset = None self.argmax = argmax self.batch_size = batch_size # For relative accuracy test. self.query = lambda data: query(self.blackbox, data, len( data), self.argmax, self.batch_size, self.device, self.topk) self.evaluation_set = query(self.blackbox, unpack(self.testset), len(self.testset), True, self.batch_size, self.device) self.num_classes = len(self.testset.classes) self.target_model = get_net(model_arch, modelfamily, pretrained=pretrained, channel=self.channel, complexity=complexity, num_classes=self.num_classes).to( self.device) self.optim = get_optimizer(self.target_model.parameters(), optimizer_choice, **kwargs) self.criterion = soft_cross_entropy self.batch_size = batch_size self.topk = topk self.num_workers = num_workers self.kwargs = kwargs
def parser_dealer(option: Dict[str, bool]) -> Dict[str, Any]: parser = argparse.ArgumentParser(description='Train a model') # Required arguments if option['transfer']: parser.add_argument('policy', metavar='PI', type=str, help='Policy to use while training', choices=['random', 'adaptive']) parser.add_argument('--budget', metavar='N', type=int, help='Size of transfer set to construct', required=True) parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--queryset', metavar='TYPE', type=str, help='Adversary\'s dataset (P_A(X))', required=True) if option['active']: parser.add_argument('strategy', metavar='S', type=str, help='Active Sample Strategy', choices=['kcenter', 'random', 'dfal']) parser.add_argument('--metric', metavar="M", type=str, help='K-Center method distance metric', choices=['euclidean', 'manhattan', 'l1', 'l2'], default='euclidean') parser.add_argument('--initial-size', metavar='N', type=int, help='Active Learning Initial Sample Size', default=100) parser.add_argument('--budget-per-iter', metavar='N', type=int, help='budget for every iteration', default=100) parser.add_argument('--iterations', metavar='N', type=int, help='iteration times', default=10) if option['sampling']: parser.add_argument( 'sampleset', metavar='DS_NAME', type=str, help= 'Name of sample dataset in active learning selecting algorithms') parser.add_argument('--load-state', action='store_true', default=False, help='Turn on if load state.') parser.add_argument('--state-suffix', metavar='SE', type=str, help='load selected samples from sample set', required=False, default='') if option['synthetic']: parser.add_argument('synthetic_method', metavar='SM', type=str, help='Synthetic Method', choices=['fgsm', 'ifgsm', 'mifgsm']) parser.add_argument('eps', metavar='E', type=float, help='Synthetic maximum epsilon') parser.add_argument( 'targeted_method', metavar='T', type=str, help='Target methods', choices=['non-targeted', 'targeted-random', 'targeted-topk']) if option['black_box']: parser.add_argument( 'victim_model_dir', metavar='VIC_DIR', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) parser.add_argument('--argmaxed', action='store_true', help='Only consider argmax labels', default=False) parser.add_argument('--pseudoblackbox', action='store_true', help='Load prequeried labels as blackbox', default=False) parser.add_argument('--topk', metavar='TK', type=int, help='iteration times', default=0) if option['train']: parser.add_argument('model_dir', metavar='SUR_DIR', type=str, help='Surrogate Model Destination directory') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') # Optional arguments parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('-x', '--complexity', type=int, default=64, metavar='N', help="Model conv channel size.") parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument( '--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=60, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=False) parser.add_argument('--optimizer-choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad')) # apply to all circumstances parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-d', '--device-id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-w', '--num-workers', metavar='N', type=int, help='# Worker threads to load data', default=10) args = parser.parse_args() params = vars(args) device = device_dealer(**params) params['device'] = device if option['black_box']: blackbox_dir = params['victim_model_dir'] if params['pseudoblackbox']: params['blackbox'] = PseudoBlackbox(blackbox_dir) else: params['blackbox'] = Blackbox.from_modeldir(blackbox_dir, device) if option['active']: pass if option['sampling']: sample_set_name = params['sampleset'] assert sample_set_name in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[sample_set_name] transform = datasets.modelfamily_to_transforms[modelfamily]['train'] dataset = datasets.__dict__[sample_set_name](train=True, transform=transform) params['queryset'] = dataset params['selected'] = set() if params['load_state']: total = set([i for i in range(len(dataset))]) path = params['model_dir'] params['selection'], params['transferset'], params[ 'selected_indices'] = load_state(path, params['state_suffix']) if option['train']: testset_name = params['testdataset'] assert testset_name in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[testset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] testset = datasets.__dict__[testset_name](train=False, transform=transform) params['testset'] = testset pretrained_path = params['pretrained'] model_arch = params['model_arch'] if params['pseudoblackbox']: num_classes = params['blackbox'].train_results[0].shape[0] else: num_classes = len(testset.classes) sample = testset[0][0] model = zoo.get_net(model_arch, modelfamily, pretrained_path, num_classes=num_classes, channel=sample.shape[0], complexity=params['complexity']) params['surrogate'] = model.to(device) return params
def main(): parser = argparse.ArgumentParser( description='Select deepfool images, retrain the target model.') parser.add_argument( 'model_dir', metavar='SUR_DIR', type=str, help= 'Surrogate Model Destination directory, which may contain selecting state, ' 'aka, selection.pickle, transferset.pickle, select_indices.pickle') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') parser.add_argument( 'blackbox_dir', metavar='VIC_DIR', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) parser.add_argument( 'sampleset', metavar='DS_NAME', type=str, help='Name of sample dataset in deepfool selecting algorithms') parser.add_argument('deepfool_budget', metavar='N', type=int, help='deepfool selection size.') parser.add_argument( '--state-budget', type=int, help="if > 0, load corresponding budget of selection state.", default=0) parser.add_argument('--argmaxed', action='store_true', help='Only consider argmax labels', default=False) parser.add_argument('--topk', metavar='TK', type=int, help='iteration times', default=0) parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('-x', '--complexity', type=int, default=64, metavar='N', help="Model conv channel size.") parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument( '--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=60, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('--pretrained', type=str, help='Use pretrained network, or a checkpoint', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=False) parser.add_argument('--optimizer-choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad')) parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-d', '--device-id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-w', '--num-workers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('-pen', '--ispenalty', metavar='N', type=bool, help='# use penalty matrix', default=True) args = parser.parse_args() params = vars(args) device = device_dealer(device_id=args.device_id) blackbox = Blackbox.from_modeldir(args.blackbox_dir, device) assert args.sampleset in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[args.sampleset] transform = datasets.modelfamily_to_transforms[modelfamily]['train'] queryset = datasets.__dict__[args.sampleset](train=True, transform=transform) if args.state_budget > 0: selection, transfer, indices_list = load_state( state_dir=args.model_dir) selection = set(indices_list[:args.state_budget]) transfer = transfer[:args.state_budget] indices_list = indices_list[:args.state_budget] else: selection, transfer, indices_list = set(), [], [] testset_name = args.testdataset assert testset_name in datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[testset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] testset = datasets.__dict__[testset_name](train=False, transform=transform) num_classes = len(testset.classes) pretrained_path = params['pretrained'] model_arch = params['model_arch'] sample = testset[0][0] model = zoo.get_net(model_arch, modelfamily, pretrained_path, num_classes=num_classes, channel=sample.shape[0], complexity=params['complexity']) model = model.to(device) penalty = np.ones((10, 10)) deepfool_choose(model, blackbox, queryset, testset, selection, transfer, indices_list, device, penalty, **params)
def main(): parser = argparse.ArgumentParser(description='Train a model') # Required arguments parser.add_argument('victim_model_dir', metavar='PATH', type=str, help='Directory of Victim Blackbox') parser.add_argument('model_dir', metavar='DIR', type=str, help='Directory containing transferset.pickle') parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') parser.add_argument( '--budgets', metavar='B', type=str, help= 'Comma separated values of budgets. Knockoffs will be trained for each budget.' ) parser.add_argument( '--rounds', metavar='R', type=str, help='Comma seperates values of duplication rounds of each budget.') # Optional arguments parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0) parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--init_alpha', type=float, default=1.0, metavar='I', help='initial iteration step (default: 1.0)') parser.add_argument( '--num_steps', type=int, default=80, metavar='I', help='iteration steps of each crafted sample (default: 80)') parser.add_argument( '--eps', type=float, default=255.0, metavar='E', help='maximum change that can be done on a image. (default: 255.0)') parser.add_argument('--method', type=str, default='topk-IFGSMMod', metavar='METHOD', help='direction_method-gradient_method') parser.add_argument('--directions', type=int, default=2, metavar='D', help='directions') parser.add_argument('--max_pixel', type=float, default=1.0, metavar='P', help='upper bound') parser.add_argument('--min_pixel', type=float, default=0.0, metavar='P', help='lower bound') parser.add_argument( '--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--resume', default=None, type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--lr-step', type=int, default=60, metavar='N', help='Step sizes for LR') parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N', help='LR Decay Rate') parser.add_argument('-w', '--num_workers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=False) # Attacker's defense parser.add_argument('--argmaxed', action='store_true', help='Only consider argmax labels', default=False) parser.add_argument('--optimizer_choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad')) args = parser.parse_args() params = vars(args) torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') train.model_dir = params['model_dir'] # ----------- Set up transferset transferset_path = osp.join(train.model_dir, 'transferset.pickle') with open(transferset_path, 'rb') as rf: transferset_samples = pickle.load(rf) num_classes = transferset_samples[0][1].size(0) print('=> found transfer set with {} samples, {} classes'.format( len(transferset_samples), num_classes)) # ----------- Clean up transfer (if necessary) if params['argmaxed']: new_transferset_samples = [] print('=> Using argmax labels (instead of posterior probabilities)') for i in range(len(transferset_samples)): x_i, y_i = transferset_samples[i] argmax_k = y_i.argmax() y_i_1hot = torch.zeros_like(y_i) y_i_1hot[argmax_k] = 1. new_transferset_samples.append((x_i, y_i_1hot)) transferset_samples = new_transferset_samples # ----------- Set up testset dataset_name = params['testdataset'] valid_datasets = datasets.__dict__.keys() modelfamily = datasets.dataset_to_modelfamily[dataset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] if dataset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) dataset = datasets.__dict__[dataset_name] testset = dataset(train=False, transform=transform) if len(testset.classes) != num_classes: raise ValueError( '# Transfer classes ({}) != # Testset classes ({})'.format( num_classes, len(testset.classes))) # ----------- Set up model model_name = params['model_arch'] pretrained = params['pretrained'] # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained) model = zoo.get_net(model_name, modelfamily, pretrained, num_classes=num_classes) model = model.to(device) # ----------- Initialize blackbox blackbox_dir = params['victim_model_dir'] blackbox = Blackbox.from_modeldir(blackbox_dir, device) # ----------- Set up train params budgets = [int(b) for b in params['budgets'].split(',')] rounds = [int(r) for r in params['rounds'].split(',')] np.random.seed(cfg.DEFAULT_SEED) torch.manual_seed(cfg.DEFAULT_SEED) torch.cuda.manual_seed(cfg.DEFAULT_SEED) train.optimizer = get_optimizer(model.parameters(), params['optimizer_choice'], **params) train.criterion_train = model_utils.soft_cross_entropy train.params = params train.device = device train.testset = testset print(params) # Set up crafter params original_samples = transferset_samples[:] adversary = SyntheticAdversary(blackbox=blackbox, classifier=model, device=device, **params) for b, r in zip(budgets, rounds): if params['pretrained'] is None: train(model, original_samples, b, 1) total_samples = transferset_samples latest_samples = random.sample(total_samples, b) for r in range(2, r + 1): latest_samples = adversary.synthesize(latest_samples) transferset_samples = original_samples[:] transferset_samples.extend(latest_samples) total_samples.extend(latest_samples) train(model, transferset_samples, b, r) latest_samples = random.sample(total_samples, b) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(train.model_dir, 'params_train.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)