def main(): parser = argparse.ArgumentParser(description='Construct transfer set') parser.add_argument('victim_model_dir', metavar='PATH', type=str, help='Path to victim model. Should contain files "model_best.pth.tar" and "params.json"') parser.add_argument('defense', metavar='TYPE', type=str, help='Type of defense to use', choices=BBOX_CHOICES) parser.add_argument('defense_args', metavar='STR', type=str, help='Blackbox arguments in format "k1:v1,k2:v2,..."') parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries', default=1) # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes', # default=None) # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals', # default=None) # ----------- Other params parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0) parser.add_argument('-w', '--nworkers', metavar='N', type=int, help='# Worker threads to load data', default=10) args = parser.parse_args() params = vars(args) out_path = params['out_dir'] knockoff_utils.create_dir(out_path) torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') # ----------- Initialize blackbox blackbox_dir = params['victim_model_dir'] defense_type = params['defense'] if defense_type == 'rand_noise': BB = RandomNoise elif defense_type == 'rand_noise_wb': BB = RandomNoise_WB elif defense_type == 'mad': BB = MAD elif defense_type == 'mad_wb': BB = MAD_WB elif defense_type == 'reverse_sigmoid': BB = ReverseSigmoid elif defense_type == 'reverse_sigmoid_wb': BB = ReverseSigmoid_WB elif defense_type in ['none', 'topk', 'rounding']: BB = Blackbox else: raise ValueError('Unrecognized blackbox type') defense_kwargs = parse_defense_kwargs(params['defense_args']) defense_kwargs['log_prefix'] = 'test' print('=> Initializing BBox with defense {} and arguments: {}'.format(defense_type, defense_kwargs)) blackbox = BB.from_modeldir(blackbox_dir, device, **defense_kwargs) for k, v in defense_kwargs.items(): params[k] = v # ----------- Set up queryset with open(osp.join(blackbox_dir, 'params.json'), 'r') as rf: bbox_params = json.load(rf) testset_name = bbox_params['dataset'] valid_datasets = datasets.__dict__.keys() if testset_name not in valid_datasets: raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[testset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] testset = datasets.__dict__[testset_name](train=False, transform=transform) print('=> Evaluating on {} ({} samples)'.format(testset_name, len(testset))) # ----------- Evaluate batch_size = params['batch_size'] nworkers = params['nworkers'] epoch = bbox_params['epochs'] testloader = DataLoader(testset, num_workers=nworkers, shuffle=False, batch_size=batch_size) test_loss, test_acc = model_utils.test_step(blackbox, testloader, nn.CrossEntropyLoss(), device, epoch=epoch) log_out_path = osp.join(out_path, 'bboxeval.{}.log.tsv'.format(len(testset))) with open(log_out_path, 'w') as wf: columns = ['run_id', 'epoch', 'split', 'loss', 'accuracy', 'best_accuracy'] wf.write('\t'.join(columns) + '\n') run_id = str(datetime.now()) test_cols = [run_id, epoch, 'test', test_loss, test_acc, test_acc] wf.write('\t'.join([str(c) for c in test_cols]) + '\n') # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params_evalbbox.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def main(): parser = argparse.ArgumentParser(description='Construct transfer set') parser.add_argument('policy', metavar='PI', type=str, help='Policy to use while training', choices=['random', 'adaptive']) parser.add_argument( 'victim_model_dir', metavar='PATH', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) #??? Why do we need this? parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--budget', metavar='N', type=int, help='Size of transfer set to construct', required=True) parser.add_argument('--queryset', metavar='TYPE', type=str, help='Adversary\'s dataset (P_A(X))', required=True) parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries', default=8) # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes', # default=None) # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals', # default=None) # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data', # default=1.0) # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data', # default=1.0) # ----------- Other params parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0) parser.add_argument('-w', '--nworkers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('-n', '--ngrams', metavar='NG', type=int, help='#n-grams', default=2) args = parser.parse_args() params = vars(args) out_path = params['out_dir'] knockoff_utils.create_dir(out_path) torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') # ----------- Set up queryset queryset_name = params['queryset'] ngrams = params['ngrams'] valid_datasets = list(text_classification.DATASETS.keys()) if queryset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[queryset_name] # transform = datasets.modelfamily_to_transforms[modelfamily]['test'] # queryset = datasets.__dict__[queryset_name](train=True, transform=transform) dataset_dir = '.data' dataset_dir = dataset_dir + '/' + queryset_name.lower() + '_csv' train_data_path = os.path.join( dataset_dir, queryset_name + "_ngrams_{}_train.data".format(ngrams)) test_data_path = os.path.join( dataset_dir, queryset_name + "_ngrams_{}_test.data".format(ngrams)) if not (os.path.exists(train_data_path) and os.path.exists(test_data_path)): if not os.path.exists('.data'): print("Creating directory {}".format(dataset_dir)) os.mkdir('.data') trainset, testset = text_classification.DATASETS[queryset_name]( root='.data', ngrams=ngrams) print("Saving train data to {}".format(train_data_path)) torch.save(trainset, train_data_path) print("Saving test data to {}".format(test_data_path)) torch.save(testset, test_data_path) else: print("Loading train data from {}".format(train_data_path)) trainset = torch.load(train_data_path) print("Loading test data from {}".format(test_data_path)) testset = torch.load(test_data_path) queryset, _ = trainset, testset vocab_size = len(trainset.get_vocab()) num_classes = len(trainset.get_labels()) # ----------- Initialize blackbox i.e. victim model blackbox_dir = params['victim_model_dir'] embed_dim = 32 blackbox = Blackbox.from_modeldir(blackbox_dir, vocab_size, num_classes, embed_dim, device) # ----------- Initialize adversary batch_size = params['batch_size'] nworkers = params['nworkers'] transfer_out_path = osp.join(out_path, 'transferset.pickle') if params['policy'] == 'random': adversary = RandomAdversary(blackbox, queryset, batch_size=batch_size) elif params['policy'] == 'adaptive': raise NotImplementedError() else: raise ValueError("Unrecognized policy") print('=> constructing transfer set...') transferset = adversary.get_transferset(params['budget']) with open(transfer_out_path, 'wb') as wf: pickle.dump(transferset, wf) print('=> transfer set ({} samples) written to: {}'.format( len(transferset), transfer_out_path)) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params_transfer.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def train_model(model, trainset, out_path, batch_size=64, criterion_train=None, criterion_test=None, testset=None, device=None, num_workers=10, resume=None, lr=0.1, momentum=0.5, lr_step=30, lr_gamma=0.1, epochs=100, log_interval=100, weighted_loss=False, checkpoint_suffix='', optimizer=None, scheduler=None, **kwargs): if device is None: device = torch.device('cuda') if not osp.exists(out_path): knockoff_utils.create_dir(out_path) run_id = str(datetime.now()) # Data loaders train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers) if testset is not None: test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers) else: test_loader = None if weighted_loss: if not isinstance(trainset.samples[0][1], int): print('Labels in trainset is of type: {}. Expected: {}.'.format( type(trainset.samples[0][1]), int)) class_to_count = dd(int) for _, y in trainset.samples: class_to_count[y] += 1 class_sample_count = [ class_to_count[c] for c, cname in enumerate(trainset.classes) ] print('=> counts per class: ', class_sample_count) weight = np.min(class_sample_count) / torch.Tensor(class_sample_count) weight = weight.to(device) print('=> using weights: ', weight) else: weight = None # Optimizer if criterion_train is None: criterion_train = nn.CrossEntropyLoss(reduction='mean', weight=weight) if criterion_test is None: criterion_test = nn.CrossEntropyLoss(reduction='mean', weight=weight) if optimizer is None: optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=5e-4) if scheduler is None: scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) start_epoch = 1 best_train_acc, train_acc = -1., -1. best_test_acc, test_acc, test_loss = -1., -1., -1. # Resume if required if resume is not None: model_path = resume if osp.isfile(model_path): print("=> loading checkpoint '{}'".format(model_path)) checkpoint = torch.load(model_path) # start_epoch = checkpoint['epoch'] best_test_acc = checkpoint['best_acc'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(model_path)) # raw = model.layer4[0].conv2.weight.data.clone() # model = protect_model(model, layer_name="conv2d_12") # model = protect_model(model, layer_name="conv2d_11") # model = protect_model(model, layer_name="conv2d_10") # new = model.layer4[0].conv2.weight.data # diff = raw != new # st() # Initialize logging log_path = osp.join(out_path, 'train{}.log.tsv'.format(checkpoint_suffix)) if not osp.exists(log_path): with open(log_path, 'w') as wf: columns = [ 'run_id', 'epoch', 'split', 'loss', 'accuracy', 'best_accuracy' ] wf.write('\t'.join(columns) + '\n') tb_dir = osp.join(out_path, "tb_log") if os.path.exists(tb_dir): shutil.rmtree(tb_dir) os.makedirs(tb_dir) tb_writer = SummaryWriter(tb_dir) model_out_path = osp.join(out_path, 'checkpoint{}.pth.tar'.format(checkpoint_suffix)) for epoch in range(start_epoch, epochs + 1): scheduler.step(epoch) train_loss, train_acc = train_step(model, train_loader, criterion_train, optimizer, epoch, device, log_interval=log_interval, tb_writer=tb_writer) best_train_acc = max(best_train_acc, train_acc) # train_loss, train_acc = 0, 0 if test_loader is not None: test_loss, test_acc = test_step(model, test_loader, criterion_test, device, epoch=epoch) best_test_acc = max(best_test_acc, test_acc) tb_writer.add_scalar('test/loss', test_loss, epoch) tb_writer.add_scalar('test/acc', test_acc, epoch) # Checkpoint if test_acc >= best_test_acc: state = { 'epoch': epoch, 'arch': model.__class__, 'state_dict': model.state_dict(), 'best_acc': test_acc, 'optimizer': optimizer.state_dict(), 'created_on': str(datetime.now()), } torch.save(state, model_out_path) # Log with open(log_path, 'a') as af: train_cols = [ run_id, epoch, 'train', train_loss, train_acc, best_train_acc ] af.write('\t'.join([str(c) for c in train_cols]) + '\n') test_cols = [ run_id, epoch, 'test', test_loss, test_acc, best_test_acc ] af.write('\t'.join([str(c) for c in test_cols]) + '\n') return model
def main(): parser = argparse.ArgumentParser(description='Construct transfer set') parser.add_argument('policy', metavar='PI', type=str, help='Policy to use while training', choices=['random', 'adaptive']) parser.add_argument( 'victim_model_dir', metavar='PATH', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--budget', metavar='N', type=int, help='Size of transfer set to construct', required=True) parser.add_argument('--queryset', metavar='TYPE', type=str, help='Adversary\'s dataset (P_A(X))', required=True) parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries', default=8) parser.add_argument('--root', metavar='DIR', type=str, help='Root directory for ImageFolder', default=None) parser.add_argument('--modelfamily', metavar='TYPE', type=str, help='Model family', default=None) # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes', # default=None) # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals', # default=None) # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data', # default=1.0) # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data', # default=1.0) # ----------- Other params parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0) parser.add_argument('-w', '--nworkers', metavar='N', type=int, help='# Worker threads to load data', default=10) args = parser.parse_args() params = vars(args) out_path = params['out_dir'] knockoff_utils.create_dir(out_path) torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') # ----------- Set up queryset queryset_name = params['queryset'] valid_datasets = datasets.__dict__.keys() if queryset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[queryset_name] if params[ 'modelfamily'] is None else params['modelfamily'] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] if queryset_name == 'ImageFolder': assert params[ 'root'] is not None, 'argument "--root ROOT" required for ImageFolder' queryset = datasets.__dict__[queryset_name](root=params['root'], transform=transform) else: queryset = datasets.__dict__[queryset_name](train=True, transform=transform) # ----------- Initialize blackbox blackbox_dir = params['victim_model_dir'] blackbox = Blackbox.from_modeldir(blackbox_dir, device) # ----------- Initialize adversary batch_size = params['batch_size'] nworkers = params['nworkers'] transfer_out_path = osp.join(out_path, 'transferset.pickle') if params['policy'] == 'random': adversary = RandomAdversary(blackbox, queryset, batch_size=batch_size) elif params['policy'] == 'adaptive': raise NotImplementedError() else: raise ValueError("Unrecognized policy") print('=> constructing transfer set...') transferset = adversary.get_transferset(params['budget']) with open(transfer_out_path, 'wb') as wf: pickle.dump(transferset, wf) print('=> transfer set ({} samples) written to: {}'.format( len(transferset), transfer_out_path)) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params_transfer.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def train_and_valid(trainset, testset, model, model_name, modelfamily, out_path, batch_size, lr, lr_gamma, num_workers, num_epochs=5, device='cpu'): if not osp.exists(out_path): knockoff_utils.create_dir(out_path) optimizer = torch.optim.SGD(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=lr_gamma) criterion = nn.CrossEntropyLoss(reduction='mean') train_data = DataLoader(trainset, batch_size=batch_size, shuffle=True, collate_fn=generate_batch, num_workers=num_workers) num_lines = num_epochs * len(train_data) best_test_acc, test_acc = -1., -1. # Initialize logging log_path = osp.join(out_path, 'train-{}-{}.log.tsv'.format(model_name, modelfamily)) if not osp.exists(log_path): with open(log_path, 'w') as wf: columns = ['run_id', 'epoch', 'training loss', 'test accuracy', 'best_accuracy'] wf.write('\t'.join(columns) + '\n') model_out_path = osp.join(out_path, 'checkpoint-{}-{}.pth.tar'.format(model_name, modelfamily)) for epoch in range(num_epochs): for i, (text, offsets, cls) in enumerate(train_data): optimizer.zero_grad() text, offsets, cls = text.to(device), offsets.to(device), cls.to(device) output = model(text, offsets) train_loss = criterion(output, cls) train_loss.backward() optimizer.step() processed_lines = i + len(train_data) * epoch progress = processed_lines / float(num_lines) if processed_lines % 128 == 0: sys.stderr.write( "\rTraining progress: {:3.0f}% lr: {:3.3f} loss: {:3.3f}".format( progress * 100, scheduler.get_lr()[0], train_loss)) scheduler.step() print("") test_acc = test(model, testset) best_test_acc = max(best_test_acc, test_acc) print("Test - Accuracy: {}".format(test_acc)) if test_acc >= best_test_acc: state = { 'epoch': epoch, 'arch': model.__class__, 'state_dict': model.state_dict(), 'best_acc': test_acc, 'optimizer': optimizer.state_dict(), 'created_on': str(datetime.now()), } torch.save(state, model_out_path) # Log run_id = str(datetime.now()) with open(log_path, 'a') as af: data_column = [run_id, epoch, train_loss.item(), test_acc, best_test_acc] af.write('\t'.join([str(c) for c in data_column]) + '\n')
def main(): parser = argparse.ArgumentParser( description='Construct apaptive transfer set') parser.add_argument( 'victim_model_dir', metavar='PATH', type=str, help= 'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"' ) parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--budget', metavar='N', type=int, help='Size of transfer set to construct', required=True) parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name') parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test') parser.add_argument('--queryset', metavar='TYPE', type=str, help='Adversary\'s dataset (P_A(X))', required=True) parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries', default=8) # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes', # default=None) # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals', # default=None) # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data', # default=1.0) # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data', # default=1.0) # ----------- Other params parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0) parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None) parser.add_argument('--defense', type=str, help='Defense strategy used by victim side', default=None) args = parser.parse_args() params = vars(args) out_path = params['out_dir'] knockoff_utils.create_dir(out_path + "-adaptive") defense = params['defense'] if defense: transfer_out_path = osp.join(out_path + "-adaptive", 'transferset-' + defense + '.pickle') else: transfer_out_path = osp.join(out_path + "-adaptive", 'transferset.pickle') torch.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') # ----------- Set up queryset queryset_name = params['queryset'] valid_datasets = datasets.__dict__.keys() if queryset_name not in valid_datasets: raise ValueError( 'Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[queryset_name] transform = datasets.modelfamily_to_transforms[modelfamily]['test'] queryset = datasets.__dict__[queryset_name](train=True, transform=transform) queryset.targets = [ queryset.samples[idx][1] for idx in range(len(queryset)) ] # code.interact(local=dict(globals(), **locals())) num_classes = len(queryset.classes) # code.interact(local=dict(globals(), **locals())) # ----------- Initialize blackbox blackbox_dir = params['victim_model_dir'] blackbox = Blackbox.from_modeldir(blackbox_dir, defense, device) # ----------- Initialize Knockoff Nets model_name = params['model_arch'] pretrained = params['pretrained'] # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained) model = zoo.get_net(model_name, modelfamily, pretrained, num_classes=num_classes) model = model.to(device) adversary = AdaptiveAdversary(queryset, blackbox, model, device, reward='all') print('=> constructing transfer set...') transferset = adversary.get_transferset(params['budget']) with open(transfer_out_path, 'wb') as wf: pickle.dump(transferset, wf) print('=> transfer set ({} samples) written to: {}'.format( len(transferset), transfer_out_path)) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params_transfer.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)
def main(): parser = argparse.ArgumentParser(description='Jacobian Model Stealing Attack') parser.add_argument('policy', metavar='PI', type=str, help='Policy to use while training') parser.add_argument('victim_model_dir', metavar='PATH', type=str, help='Path to victim model. Should contain files "model_best.pth.tar" and "params.json"') parser.add_argument('defense', metavar='TYPE', type=str, help='Type of defense to use', choices=BBOX_CHOICES, default='none') parser.add_argument('defense_args', metavar='STR', type=str, help='Blackbox arguments in format "k1:v1,k2:v2,..."') parser.add_argument('--model_adv', metavar='STR', type=str, help='Model arch of F_A', default=None) parser.add_argument('--pretrained', metavar='STR', type=str, help='Assumption of F_A', default=None) parser.add_argument('--out_dir', metavar='PATH', type=str, help='Destination directory to store transfer set', required=True) parser.add_argument('--testset', metavar='TYPE', type=str, help='Blackbox testset (P_V(X))', required=True) parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries', default=cfg.DEFAULT_BATCH_SIZE) # ----------- Params for Jacobian-based augmentation parser.add_argument('--budget', metavar='N', type=int, help='Query limit to blackbox', default=10000) parser.add_argument('--queryset', metavar='TYPE', type=str, help='Data for seed images', required=True) parser.add_argument('--seedsize', metavar='N', type=int, help='Size of seed set', default=100) parser.add_argument('--rho', metavar='N', type=int, help='# Data Augmentation Steps', default=None) parser.add_argument('--sigma', metavar='N', type=int, help='Reservoir sampling beyond these many epochs', default=3) parser.add_argument('--kappa', metavar='N', type=int, help='Size of reservoir', default=None) parser.add_argument('--tau', metavar='N', type=int, help='Iteration period after which step size is multiplied by -1', default=5) parser.add_argument('--train_epochs', metavar='N', type=int, help='# Epochs to train model', default=20) # ----------- Other params parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0) parser.add_argument('-w', '--nworkers', metavar='N', type=int, help='# Worker threads to load data', default=10) parser.add_argument('--train_transform', action='store_true', help='Perform data augmentation', default=False) args = parser.parse_args() params = vars(args) out_path = params['out_dir'] knockoff_utils.create_dir(out_path) np.random.seed(cfg.DEFAULT_SEED) torch.manual_seed(cfg.DEFAULT_SEED) torch.cuda.manual_seed(cfg.DEFAULT_SEED) if params['device_id'] >= 0: os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id']) device = torch.device('cuda') else: device = torch.device('cpu') # ----------- Set up queryset queryset_name = params['queryset'] valid_datasets = datasets.__dict__.keys() if queryset_name not in valid_datasets: raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[queryset_name] transform_type = 'train' if params['train_transform'] else 'test' if params['train_transform']: print('=> Using data augmentation while querying') transform = datasets.modelfamily_to_transforms[modelfamily][transform_type] queryset = datasets.__dict__[queryset_name](train=True, transform=transform) # Use a subset of queryset subset_idxs = np.random.choice(range(len(queryset)), size=params['seedsize'], replace=False) seedset = Subset(queryset, subset_idxs) # ----------- Set up testset testset_name = params['testset'] if testset_name not in valid_datasets: raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets)) modelfamily = datasets.dataset_to_modelfamily[testset_name] transform_type = 'test' transform = datasets.modelfamily_to_transforms[modelfamily][transform_type] testset = datasets.__dict__[testset_name](train=False, transform=transform) num_classes = len(testset.classes) # ----------- Initialize blackbox blackbox_dir = params['victim_model_dir'] defense_type = params['defense'] if defense_type == 'rand_noise': BB = RandomNoise elif defense_type == 'rand_noise_wb': BB = RandomNoise_WB elif defense_type == 'mad': BB = MAD elif defense_type == 'mad_wb': BB = MAD_WB elif defense_type == 'reverse_sigmoid': BB = ReverseSigmoid elif defense_type == 'reverse_sigmoid_wb': BB = ReverseSigmoid_WB elif defense_type in ['none', 'topk', 'rounding']: BB = Blackbox else: raise ValueError('Unrecognized blackbox type') defense_kwargs = parse_defense_kwargs(params['defense_args']) defense_kwargs['log_prefix'] = 'transfer' print('=> Initializing BBox with defense {} and arguments: {}'.format(defense_type, defense_kwargs)) blackbox = BB.from_modeldir(blackbox_dir, device, **defense_kwargs) for k, v in defense_kwargs.items(): params[k] = v # ----------- Initialize adversary budget = params['budget'] model_adv_name = params['model_adv'] model_adv_pretrained = params['pretrained'] train_epochs = params['train_epochs'] batch_size = params['batch_size'] kappa = params['kappa'] tau = params['tau'] rho = params['rho'] sigma = params['sigma'] policy = params['policy'] adversary = JacobianAdversary(blackbox, budget, model_adv_name, model_adv_pretrained, modelfamily, seedset, testset, device, out_path, batch_size=batch_size, train_epochs=train_epochs, kappa=kappa, tau=tau, rho=rho, sigma=sigma, aug_strategy=policy) print('=> constructing transfer set...') transferset, model_adv = adversary.get_transferset() # import ipdb; ipdb.set_trace() # These can be massive (>30G) -- skip it for now # transfer_out_path = osp.join(out_path, 'transferset.pickle') # with open(transfer_out_path, 'wb') as wf: # pickle.dump(transferset, wf, protocol=pickle.HIGHEST_PROTOCOL) # print('=> transfer set ({} samples) written to: {}'.format(len(transferset), transfer_out_path)) # Store arguments params['created_on'] = str(datetime.now()) params_out_path = osp.join(out_path, 'params_transfer.json') with open(params_out_path, 'w') as jf: json.dump(params, jf, indent=True)