コード例 #1
0
def main():
    parser = argparse.ArgumentParser(description='Construct transfer set')
    parser.add_argument('victim_model_dir', metavar='PATH', type=str,
                        help='Path to victim model. Should contain files "model_best.pth.tar" and "params.json"')
    parser.add_argument('defense', metavar='TYPE', type=str, help='Type of defense to use',
                        choices=BBOX_CHOICES)
    parser.add_argument('defense_args', metavar='STR', type=str, help='Blackbox arguments in format "k1:v1,k2:v2,..."')
    parser.add_argument('--out_dir', metavar='PATH', type=str,
                        help='Destination directory to store transfer set', required=True)
    parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries', default=1)
    # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes',
    #                     default=None)
    # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals',
    #                     default=None)
    # ----------- Other params
    parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0)
    parser.add_argument('-w', '--nworkers', metavar='N', type=int, help='# Worker threads to load data', default=10)
    args = parser.parse_args()
    params = vars(args)

    out_path = params['out_dir']
    knockoff_utils.create_dir(out_path)

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    # ----------- Initialize blackbox
    blackbox_dir = params['victim_model_dir']
    defense_type = params['defense']
    if defense_type == 'rand_noise':
        BB = RandomNoise
    elif defense_type == 'rand_noise_wb':
        BB = RandomNoise_WB
    elif defense_type == 'mad':
        BB = MAD
    elif defense_type == 'mad_wb':
        BB = MAD_WB
    elif defense_type == 'reverse_sigmoid':
        BB = ReverseSigmoid
    elif defense_type == 'reverse_sigmoid_wb':
        BB = ReverseSigmoid_WB
    elif defense_type in ['none', 'topk', 'rounding']:
        BB = Blackbox
    else:
        raise ValueError('Unrecognized blackbox type')
    defense_kwargs = parse_defense_kwargs(params['defense_args'])
    defense_kwargs['log_prefix'] = 'test'
    print('=> Initializing BBox with defense {} and arguments: {}'.format(defense_type, defense_kwargs))
    blackbox = BB.from_modeldir(blackbox_dir, device, **defense_kwargs)

    for k, v in defense_kwargs.items():
        params[k] = v

    # ----------- Set up queryset
    with open(osp.join(blackbox_dir, 'params.json'), 'r') as rf:
        bbox_params = json.load(rf)
    testset_name = bbox_params['dataset']
    valid_datasets = datasets.__dict__.keys()
    if testset_name not in valid_datasets:
        raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets))
    modelfamily = datasets.dataset_to_modelfamily[testset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    testset = datasets.__dict__[testset_name](train=False, transform=transform)
    print('=> Evaluating on {} ({} samples)'.format(testset_name, len(testset)))

    # ----------- Evaluate
    batch_size = params['batch_size']
    nworkers = params['nworkers']
    epoch = bbox_params['epochs']
    testloader = DataLoader(testset, num_workers=nworkers, shuffle=False, batch_size=batch_size)
    test_loss, test_acc = model_utils.test_step(blackbox, testloader, nn.CrossEntropyLoss(), device,
                                                epoch=epoch)

    log_out_path = osp.join(out_path, 'bboxeval.{}.log.tsv'.format(len(testset)))
    with open(log_out_path, 'w') as wf:
        columns = ['run_id', 'epoch', 'split', 'loss', 'accuracy', 'best_accuracy']
        wf.write('\t'.join(columns) + '\n')

        run_id = str(datetime.now())
        test_cols = [run_id, epoch, 'test', test_loss, test_acc, test_acc]
        wf.write('\t'.join([str(c) for c in test_cols]) + '\n')

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(out_path, 'params_evalbbox.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
コード例 #2
0
def main():
    parser = argparse.ArgumentParser(description='Construct transfer set')
    parser.add_argument('policy',
                        metavar='PI',
                        type=str,
                        help='Policy to use while training',
                        choices=['random', 'adaptive'])
    parser.add_argument(
        'victim_model_dir',
        metavar='PATH',
        type=str,
        help=
        'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"'
    )  #??? Why do we need this?
    parser.add_argument('--out_dir',
                        metavar='PATH',
                        type=str,
                        help='Destination directory to store transfer set',
                        required=True)
    parser.add_argument('--budget',
                        metavar='N',
                        type=int,
                        help='Size of transfer set to construct',
                        required=True)
    parser.add_argument('--queryset',
                        metavar='TYPE',
                        type=str,
                        help='Adversary\'s dataset (P_A(X))',
                        required=True)
    parser.add_argument('--batch_size',
                        metavar='TYPE',
                        type=int,
                        help='Batch size of queries',
                        default=8)
    # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes',
    #                     default=None)
    # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals',
    #                     default=None)
    # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data',
    #                     default=1.0)
    # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data',
    #                     default=1.0)
    # ----------- Other params
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id',
                        default=0)
    parser.add_argument('-w',
                        '--nworkers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    parser.add_argument('-n',
                        '--ngrams',
                        metavar='NG',
                        type=int,
                        help='#n-grams',
                        default=2)
    args = parser.parse_args()
    params = vars(args)

    out_path = params['out_dir']
    knockoff_utils.create_dir(out_path)

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    # ----------- Set up queryset
    queryset_name = params['queryset']
    ngrams = params['ngrams']
    valid_datasets = list(text_classification.DATASETS.keys())
    if queryset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    modelfamily = datasets.dataset_to_modelfamily[queryset_name]
    # transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    # queryset = datasets.__dict__[queryset_name](train=True, transform=transform)
    dataset_dir = '.data'
    dataset_dir = dataset_dir + '/' + queryset_name.lower() + '_csv'
    train_data_path = os.path.join(
        dataset_dir, queryset_name + "_ngrams_{}_train.data".format(ngrams))
    test_data_path = os.path.join(
        dataset_dir, queryset_name + "_ngrams_{}_test.data".format(ngrams))
    if not (os.path.exists(train_data_path)
            and os.path.exists(test_data_path)):
        if not os.path.exists('.data'):
            print("Creating directory {}".format(dataset_dir))
            os.mkdir('.data')
        trainset, testset = text_classification.DATASETS[queryset_name](
            root='.data', ngrams=ngrams)
        print("Saving train data to {}".format(train_data_path))
        torch.save(trainset, train_data_path)
        print("Saving test data to {}".format(test_data_path))
        torch.save(testset, test_data_path)
    else:
        print("Loading train data from {}".format(train_data_path))
        trainset = torch.load(train_data_path)
        print("Loading test data from {}".format(test_data_path))
        testset = torch.load(test_data_path)

    queryset, _ = trainset, testset
    vocab_size = len(trainset.get_vocab())
    num_classes = len(trainset.get_labels())
    # ----------- Initialize blackbox i.e. victim model
    blackbox_dir = params['victim_model_dir']
    embed_dim = 32
    blackbox = Blackbox.from_modeldir(blackbox_dir, vocab_size, num_classes,
                                      embed_dim, device)

    # ----------- Initialize adversary
    batch_size = params['batch_size']
    nworkers = params['nworkers']
    transfer_out_path = osp.join(out_path, 'transferset.pickle')
    if params['policy'] == 'random':
        adversary = RandomAdversary(blackbox, queryset, batch_size=batch_size)
    elif params['policy'] == 'adaptive':
        raise NotImplementedError()
    else:
        raise ValueError("Unrecognized policy")

    print('=> constructing transfer set...')
    transferset = adversary.get_transferset(params['budget'])
    with open(transfer_out_path, 'wb') as wf:
        pickle.dump(transferset, wf)
    print('=> transfer set ({} samples) written to: {}'.format(
        len(transferset), transfer_out_path))

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(out_path, 'params_transfer.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
コード例 #3
0
def train_model(model,
                trainset,
                out_path,
                batch_size=64,
                criterion_train=None,
                criterion_test=None,
                testset=None,
                device=None,
                num_workers=10,
                resume=None,
                lr=0.1,
                momentum=0.5,
                lr_step=30,
                lr_gamma=0.1,
                epochs=100,
                log_interval=100,
                weighted_loss=False,
                checkpoint_suffix='',
                optimizer=None,
                scheduler=None,
                **kwargs):

    if device is None:
        device = torch.device('cuda')
    if not osp.exists(out_path):
        knockoff_utils.create_dir(out_path)
    run_id = str(datetime.now())

    # Data loaders
    train_loader = DataLoader(trainset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=num_workers)
    if testset is not None:
        test_loader = DataLoader(testset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=num_workers)
    else:
        test_loader = None

    if weighted_loss:
        if not isinstance(trainset.samples[0][1], int):
            print('Labels in trainset is of type: {}. Expected: {}.'.format(
                type(trainset.samples[0][1]), int))

        class_to_count = dd(int)
        for _, y in trainset.samples:
            class_to_count[y] += 1
        class_sample_count = [
            class_to_count[c] for c, cname in enumerate(trainset.classes)
        ]
        print('=> counts per class: ', class_sample_count)
        weight = np.min(class_sample_count) / torch.Tensor(class_sample_count)
        weight = weight.to(device)
        print('=> using weights: ', weight)
    else:
        weight = None

    # Optimizer
    if criterion_train is None:
        criterion_train = nn.CrossEntropyLoss(reduction='mean', weight=weight)
    if criterion_test is None:
        criterion_test = nn.CrossEntropyLoss(reduction='mean', weight=weight)
    if optimizer is None:
        optimizer = optim.SGD(model.parameters(),
                              lr=lr,
                              momentum=momentum,
                              weight_decay=5e-4)
    if scheduler is None:
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=lr_step,
                                              gamma=lr_gamma)
    start_epoch = 1
    best_train_acc, train_acc = -1., -1.
    best_test_acc, test_acc, test_loss = -1., -1., -1.

    # Resume if required
    if resume is not None:
        model_path = resume
        if osp.isfile(model_path):
            print("=> loading checkpoint '{}'".format(model_path))
            checkpoint = torch.load(model_path)
            # start_epoch = checkpoint['epoch']
            best_test_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(model_path))

    # raw = model.layer4[0].conv2.weight.data.clone()
    # model = protect_model(model, layer_name="conv2d_12")
    # model = protect_model(model, layer_name="conv2d_11")
    # model = protect_model(model, layer_name="conv2d_10")
    # new = model.layer4[0].conv2.weight.data
    # diff = raw != new
    # st()

    # Initialize logging
    log_path = osp.join(out_path, 'train{}.log.tsv'.format(checkpoint_suffix))
    if not osp.exists(log_path):
        with open(log_path, 'w') as wf:
            columns = [
                'run_id', 'epoch', 'split', 'loss', 'accuracy', 'best_accuracy'
            ]
            wf.write('\t'.join(columns) + '\n')
    tb_dir = osp.join(out_path, "tb_log")
    if os.path.exists(tb_dir):
        shutil.rmtree(tb_dir)
    os.makedirs(tb_dir)
    tb_writer = SummaryWriter(tb_dir)

    model_out_path = osp.join(out_path,
                              'checkpoint{}.pth.tar'.format(checkpoint_suffix))
    for epoch in range(start_epoch, epochs + 1):
        scheduler.step(epoch)
        train_loss, train_acc = train_step(model,
                                           train_loader,
                                           criterion_train,
                                           optimizer,
                                           epoch,
                                           device,
                                           log_interval=log_interval,
                                           tb_writer=tb_writer)
        best_train_acc = max(best_train_acc, train_acc)
        # train_loss, train_acc = 0, 0

        if test_loader is not None:
            test_loss, test_acc = test_step(model,
                                            test_loader,
                                            criterion_test,
                                            device,
                                            epoch=epoch)
            best_test_acc = max(best_test_acc, test_acc)
            tb_writer.add_scalar('test/loss', test_loss, epoch)
            tb_writer.add_scalar('test/acc', test_acc, epoch)

        # Checkpoint
        if test_acc >= best_test_acc:
            state = {
                'epoch': epoch,
                'arch': model.__class__,
                'state_dict': model.state_dict(),
                'best_acc': test_acc,
                'optimizer': optimizer.state_dict(),
                'created_on': str(datetime.now()),
            }
            torch.save(state, model_out_path)

        # Log
        with open(log_path, 'a') as af:
            train_cols = [
                run_id, epoch, 'train', train_loss, train_acc, best_train_acc
            ]
            af.write('\t'.join([str(c) for c in train_cols]) + '\n')
            test_cols = [
                run_id, epoch, 'test', test_loss, test_acc, best_test_acc
            ]
            af.write('\t'.join([str(c) for c in test_cols]) + '\n')

    return model
コード例 #4
0
ファイル: transfer.py プロジェクト: xieldy/knockoffnets
def main():
    parser = argparse.ArgumentParser(description='Construct transfer set')
    parser.add_argument('policy',
                        metavar='PI',
                        type=str,
                        help='Policy to use while training',
                        choices=['random', 'adaptive'])
    parser.add_argument(
        'victim_model_dir',
        metavar='PATH',
        type=str,
        help=
        'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"'
    )
    parser.add_argument('--out_dir',
                        metavar='PATH',
                        type=str,
                        help='Destination directory to store transfer set',
                        required=True)
    parser.add_argument('--budget',
                        metavar='N',
                        type=int,
                        help='Size of transfer set to construct',
                        required=True)
    parser.add_argument('--queryset',
                        metavar='TYPE',
                        type=str,
                        help='Adversary\'s dataset (P_A(X))',
                        required=True)
    parser.add_argument('--batch_size',
                        metavar='TYPE',
                        type=int,
                        help='Batch size of queries',
                        default=8)
    parser.add_argument('--root',
                        metavar='DIR',
                        type=str,
                        help='Root directory for ImageFolder',
                        default=None)
    parser.add_argument('--modelfamily',
                        metavar='TYPE',
                        type=str,
                        help='Model family',
                        default=None)
    # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes',
    #                     default=None)
    # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals',
    #                     default=None)
    # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data',
    #                     default=1.0)
    # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data',
    #                     default=1.0)
    # ----------- Other params
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id',
                        default=0)
    parser.add_argument('-w',
                        '--nworkers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    args = parser.parse_args()
    params = vars(args)

    out_path = params['out_dir']
    knockoff_utils.create_dir(out_path)

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    # ----------- Set up queryset
    queryset_name = params['queryset']
    valid_datasets = datasets.__dict__.keys()
    if queryset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    modelfamily = datasets.dataset_to_modelfamily[queryset_name] if params[
        'modelfamily'] is None else params['modelfamily']
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    if queryset_name == 'ImageFolder':
        assert params[
            'root'] is not None, 'argument "--root ROOT" required for ImageFolder'
        queryset = datasets.__dict__[queryset_name](root=params['root'],
                                                    transform=transform)
    else:
        queryset = datasets.__dict__[queryset_name](train=True,
                                                    transform=transform)

    # ----------- Initialize blackbox
    blackbox_dir = params['victim_model_dir']
    blackbox = Blackbox.from_modeldir(blackbox_dir, device)

    # ----------- Initialize adversary
    batch_size = params['batch_size']
    nworkers = params['nworkers']
    transfer_out_path = osp.join(out_path, 'transferset.pickle')
    if params['policy'] == 'random':
        adversary = RandomAdversary(blackbox, queryset, batch_size=batch_size)
    elif params['policy'] == 'adaptive':
        raise NotImplementedError()
    else:
        raise ValueError("Unrecognized policy")

    print('=> constructing transfer set...')
    transferset = adversary.get_transferset(params['budget'])
    with open(transfer_out_path, 'wb') as wf:
        pickle.dump(transferset, wf)
    print('=> transfer set ({} samples) written to: {}'.format(
        len(transferset), transfer_out_path))

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(out_path, 'params_transfer.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
コード例 #5
0
def train_and_valid(trainset, testset, model, model_name, modelfamily, out_path, batch_size, lr, lr_gamma, num_workers,
					num_epochs=5, device='cpu'):
	if not osp.exists(out_path):
		knockoff_utils.create_dir(out_path)

	optimizer = torch.optim.SGD(model.parameters(), lr=lr)
	scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=lr_gamma)
	criterion = nn.CrossEntropyLoss(reduction='mean')
	train_data = DataLoader(trainset, batch_size=batch_size, shuffle=True,
							collate_fn=generate_batch, num_workers=num_workers)
	num_lines = num_epochs * len(train_data)

	best_test_acc, test_acc = -1., -1.

	# Initialize logging
	log_path = osp.join(out_path, 'train-{}-{}.log.tsv'.format(model_name, modelfamily))
	if not osp.exists(log_path):
		with open(log_path, 'w') as wf:
			columns = ['run_id', 'epoch', 'training loss', 'test accuracy', 'best_accuracy']
			wf.write('\t'.join(columns) + '\n')

	model_out_path = osp.join(out_path, 'checkpoint-{}-{}.pth.tar'.format(model_name, modelfamily))

	for epoch in range(num_epochs):

		for i, (text, offsets, cls) in enumerate(train_data):
			optimizer.zero_grad()
			text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)
			output = model(text, offsets)
			train_loss = criterion(output, cls)
			train_loss.backward()
			optimizer.step()
			processed_lines = i + len(train_data) * epoch
			progress = processed_lines / float(num_lines)
			if processed_lines % 128 == 0:
				sys.stderr.write(
					"\rTraining progress: {:3.0f}% lr: {:3.3f} loss: {:3.3f}".format(
						progress * 100, scheduler.get_lr()[0], train_loss))

		scheduler.step()

		print("")
		test_acc = test(model, testset)
		best_test_acc = max(best_test_acc, test_acc)
		print("Test - Accuracy: {}".format(test_acc))

		if test_acc >= best_test_acc:
			state = {
				'epoch': epoch,
				'arch': model.__class__,
				'state_dict': model.state_dict(),
				'best_acc': test_acc,
				'optimizer': optimizer.state_dict(),
				'created_on': str(datetime.now()),
			}
			torch.save(state, model_out_path)

		# Log
		run_id = str(datetime.now())
		with open(log_path, 'a') as af:
			data_column = [run_id, epoch, train_loss.item(), test_acc, best_test_acc]
			af.write('\t'.join([str(c) for c in data_column]) + '\n')
コード例 #6
0
def main():
    parser = argparse.ArgumentParser(
        description='Construct apaptive transfer set')
    parser.add_argument(
        'victim_model_dir',
        metavar='PATH',
        type=str,
        help=
        'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"'
    )
    parser.add_argument('--out_dir',
                        metavar='PATH',
                        type=str,
                        help='Destination directory to store transfer set',
                        required=True)
    parser.add_argument('--budget',
                        metavar='N',
                        type=int,
                        help='Size of transfer set to construct',
                        required=True)
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    parser.add_argument('testdataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Name of test')

    parser.add_argument('--queryset',
                        metavar='TYPE',
                        type=str,
                        help='Adversary\'s dataset (P_A(X))',
                        required=True)
    parser.add_argument('--batch_size',
                        metavar='TYPE',
                        type=int,
                        help='Batch size of queries',
                        default=8)
    # parser.add_argument('--topk', metavar='N', type=int, help='Use posteriors only from topk classes',
    #                     default=None)
    # parser.add_argument('--rounding', metavar='N', type=int, help='Round posteriors to these many decimals',
    #                     default=None)
    # parser.add_argument('--tau_data', metavar='N', type=float, help='Frac. of data to sample from Adv data',
    #                     default=1.0)
    # parser.add_argument('--tau_classes', metavar='N', type=float, help='Frac. of classes to sample from Adv data',
    #                     default=1.0)
    # ----------- Other params
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id',
                        default=0)
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network',
                        default=None)
    parser.add_argument('--defense',
                        type=str,
                        help='Defense strategy used by victim side',
                        default=None)

    args = parser.parse_args()
    params = vars(args)

    out_path = params['out_dir']
    knockoff_utils.create_dir(out_path + "-adaptive")

    defense = params['defense']
    if defense:
        transfer_out_path = osp.join(out_path + "-adaptive",
                                     'transferset-' + defense + '.pickle')
    else:
        transfer_out_path = osp.join(out_path + "-adaptive",
                                     'transferset.pickle')

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    # ----------- Set up queryset
    queryset_name = params['queryset']
    valid_datasets = datasets.__dict__.keys()
    if queryset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    modelfamily = datasets.dataset_to_modelfamily[queryset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    queryset = datasets.__dict__[queryset_name](train=True,
                                                transform=transform)
    queryset.targets = [
        queryset.samples[idx][1] for idx in range(len(queryset))
    ]
    # code.interact(local=dict(globals(), **locals()))
    num_classes = len(queryset.classes)
    # code.interact(local=dict(globals(), **locals()))
    # ----------- Initialize blackbox
    blackbox_dir = params['victim_model_dir']
    blackbox = Blackbox.from_modeldir(blackbox_dir, defense, device)

    # ----------- Initialize Knockoff Nets
    model_name = params['model_arch']
    pretrained = params['pretrained']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name,
                        modelfamily,
                        pretrained,
                        num_classes=num_classes)
    model = model.to(device)

    adversary = AdaptiveAdversary(queryset,
                                  blackbox,
                                  model,
                                  device,
                                  reward='all')

    print('=> constructing transfer set...')
    transferset = adversary.get_transferset(params['budget'])

    with open(transfer_out_path, 'wb') as wf:
        pickle.dump(transferset, wf)
    print('=> transfer set ({} samples) written to: {}'.format(
        len(transferset), transfer_out_path))

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(out_path, 'params_transfer.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
コード例 #7
0
def main():
    parser = argparse.ArgumentParser(description='Jacobian Model Stealing Attack')
    parser.add_argument('policy', metavar='PI', type=str, help='Policy to use while training')
    parser.add_argument('victim_model_dir', metavar='PATH', type=str,
                        help='Path to victim model. Should contain files "model_best.pth.tar" and "params.json"')
    parser.add_argument('defense', metavar='TYPE', type=str, help='Type of defense to use',
                        choices=BBOX_CHOICES, default='none')
    parser.add_argument('defense_args', metavar='STR', type=str, help='Blackbox arguments in format "k1:v1,k2:v2,..."')
    parser.add_argument('--model_adv', metavar='STR', type=str, help='Model arch of F_A', default=None)
    parser.add_argument('--pretrained', metavar='STR', type=str, help='Assumption of F_A', default=None)
    parser.add_argument('--out_dir', metavar='PATH', type=str,
                        help='Destination directory to store transfer set', required=True)
    parser.add_argument('--testset', metavar='TYPE', type=str, help='Blackbox testset (P_V(X))', required=True)
    parser.add_argument('--batch_size', metavar='TYPE', type=int, help='Batch size of queries',
                        default=cfg.DEFAULT_BATCH_SIZE)
    # ----------- Params for Jacobian-based augmentation
    parser.add_argument('--budget', metavar='N', type=int, help='Query limit to blackbox', default=10000)
    parser.add_argument('--queryset', metavar='TYPE', type=str, help='Data for seed images', required=True)
    parser.add_argument('--seedsize', metavar='N', type=int, help='Size of seed set', default=100)
    parser.add_argument('--rho', metavar='N', type=int, help='# Data Augmentation Steps', default=None)
    parser.add_argument('--sigma', metavar='N', type=int, help='Reservoir sampling beyond these many epochs', default=3)
    parser.add_argument('--kappa', metavar='N', type=int, help='Size of reservoir', default=None)
    parser.add_argument('--tau', metavar='N', type=int,
                        help='Iteration period after which step size is multiplied by -1', default=5)
    parser.add_argument('--train_epochs', metavar='N', type=int, help='# Epochs to train model', default=20)
    # ----------- Other params
    parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id', default=0)
    parser.add_argument('-w', '--nworkers', metavar='N', type=int, help='# Worker threads to load data', default=10)
    parser.add_argument('--train_transform', action='store_true', help='Perform data augmentation', default=False)
    args = parser.parse_args()
    params = vars(args)

    out_path = params['out_dir']
    knockoff_utils.create_dir(out_path)

    np.random.seed(cfg.DEFAULT_SEED)
    torch.manual_seed(cfg.DEFAULT_SEED)
    torch.cuda.manual_seed(cfg.DEFAULT_SEED)

    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    # ----------- Set up queryset
    queryset_name = params['queryset']
    valid_datasets = datasets.__dict__.keys()
    if queryset_name not in valid_datasets:
        raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets))
    modelfamily = datasets.dataset_to_modelfamily[queryset_name]
    transform_type = 'train' if params['train_transform'] else 'test'
    if params['train_transform']:
        print('=> Using data augmentation while querying')
    transform = datasets.modelfamily_to_transforms[modelfamily][transform_type]
    queryset = datasets.__dict__[queryset_name](train=True, transform=transform)

    # Use a subset of queryset
    subset_idxs = np.random.choice(range(len(queryset)), size=params['seedsize'], replace=False)
    seedset = Subset(queryset, subset_idxs)

    # ----------- Set up testset
    testset_name = params['testset']
    if testset_name not in valid_datasets:
        raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets))
    modelfamily = datasets.dataset_to_modelfamily[testset_name]
    transform_type = 'test'
    transform = datasets.modelfamily_to_transforms[modelfamily][transform_type]
    testset = datasets.__dict__[testset_name](train=False, transform=transform)
    num_classes = len(testset.classes)

    # ----------- Initialize blackbox
    blackbox_dir = params['victim_model_dir']
    defense_type = params['defense']
    if defense_type == 'rand_noise':
        BB = RandomNoise
    elif defense_type == 'rand_noise_wb':
        BB = RandomNoise_WB
    elif defense_type == 'mad':
        BB = MAD
    elif defense_type == 'mad_wb':
        BB = MAD_WB
    elif defense_type == 'reverse_sigmoid':
        BB = ReverseSigmoid
    elif defense_type == 'reverse_sigmoid_wb':
        BB = ReverseSigmoid_WB
    elif defense_type in ['none', 'topk', 'rounding']:
        BB = Blackbox
    else:
        raise ValueError('Unrecognized blackbox type')
    defense_kwargs = parse_defense_kwargs(params['defense_args'])
    defense_kwargs['log_prefix'] = 'transfer'
    print('=> Initializing BBox with defense {} and arguments: {}'.format(defense_type, defense_kwargs))
    blackbox = BB.from_modeldir(blackbox_dir, device, **defense_kwargs)

    for k, v in defense_kwargs.items():
        params[k] = v

    # ----------- Initialize adversary
    budget = params['budget']
    model_adv_name = params['model_adv']
    model_adv_pretrained = params['pretrained']
    train_epochs = params['train_epochs']
    batch_size = params['batch_size']
    kappa = params['kappa']
    tau = params['tau']
    rho = params['rho']
    sigma = params['sigma']
    policy = params['policy']
    adversary = JacobianAdversary(blackbox, budget, model_adv_name, model_adv_pretrained, modelfamily, seedset,
                                  testset, device, out_path, batch_size=batch_size,
                                  train_epochs=train_epochs, kappa=kappa, tau=tau, rho=rho,
                                  sigma=sigma, aug_strategy=policy)

    print('=> constructing transfer set...')
    transferset, model_adv = adversary.get_transferset()
    # import ipdb; ipdb.set_trace()
    # These can be massive (>30G) -- skip it for now
    # transfer_out_path = osp.join(out_path, 'transferset.pickle')
    # with open(transfer_out_path, 'wb') as wf:
    #     pickle.dump(transferset, wf, protocol=pickle.HIGHEST_PROTOCOL)
    # print('=> transfer set ({} samples) written to: {}'.format(len(transferset), transfer_out_path))

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(out_path, 'params_transfer.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)