Ejemplo n.º 1
0
def main():
    """Inversion Generator just works for Inversion model

    Argparse structure is different from other running profile.
    """
    parser = argparse.ArgumentParser(description='Generate inversion images')
    parser.add_argument('modelpath', metavar='P', type=str, help="Path of Inversion model")
    parser.add_argument('--expansion', '-e', metavar='E', type=int, help="Image expansion factor", default=200)
    parser.add_argument('--save-path', '-s', type=str, help="Path of generated image, optional")
    parser.add_argument('--channel', '-c', type=int, help="Inversion model output image channel", default=1)
    parser.add_argument('--num-classes', '-n', type=int, help="Inversion classifier input classes", default=10)
    parser.add_argument('--complexity', '-x', type=int, help="Inversion model conv channel size.", default=64)
    parser.add_argument('--blackbox', '-b', type=str, help="Full vector", default=None)
    parser.add_argument('--testset', metavar='DSET', type=str, help="If using full vector", default=None)
    parser.add_argument('-d', '--device-id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0)

    args = parser.parse_args()
    model = get_net('Inversion', 'custom_cnn', pretrained=args.modelpath, num_classes=args.num_classes,
                    channel=args.channel, complexity=args.complexity)
    if args.device_id >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.device_id)
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model = model.to(device)
    if args.save_path:
        save_path = args.save_path
    else:
        save_path = os.path.join(os.path.dirname(args.modelpath), 'generated')
        if not os.path.exists(save_path):
            os.mkdir(save_path)

    if args.testset is None:
        get_imgs(model, save_path, args.expansion, args.num_classes)
    else:
        blackbox = Blackbox.from_modeldir(args.blackbox, device=device)
        assert args.testset in datasets.__dict__.keys()
        modelfamily = datasets.dataset_to_modelfamily[args.testset]
        transform = datasets.modelfamily_to_transforms[modelfamily]['test']
        testset = datasets.__dict__[args.testset](train=False, transform=transform)
        results = []
        dataloader = DataLoader(testset, 128, False)
        total = 0
        img_vectors = []
        for inputs, targets in tqdm(dataloader):
            vector = blackbox(inputs)
            imgs = model(vector.to(device)).cpu()
            img_vectors.append(imgs)
            for i in range(imgs.shape[0]):
                img_vectors.append(imgs[i])
                # save_image(imgs[i], os.path.join(save_path, "{}.{}.bmp".format(targets[i], total + i)))
            total += imgs.shape[0]
        np.random.shuffle(img_vectors)
        for i in range(args.expansion):
            save_image(img_vectors[i], os.path.join(save_path, "{}.bmp".format(total + i)))
Ejemplo n.º 2
0
    def from_modeldir(cls, model_dir, device=None, output_type='probs'):
        device = torch.device('cuda') if device is None else device

        # What was the model architecture used by this model?
        params_path = osp.join(model_dir, 'params.json')
        with open(params_path) as jf:
            params = json.load(jf)
        model_arch = params['model_arch']
        num_classes = params['num_classes']
        channel = params['channel']
        complexity = params['complexity']
        victim_dataset = params.get('dataset', 'imagenet')
        modelfamily = datasets.dataset_to_modelfamily[victim_dataset]

        # Instantiate the model
        # model = model_utils.get_net(model_arch, n_output_classes=num_classes)
        model = zoo.get_net(model_arch,
                            modelfamily,
                            pretrained=None,
                            num_classes=num_classes,
                            channel=channel,
                            complexity=complexity)
        model = model.to(device)

        # Load weights
        checkpoint_path = osp.join(model_dir, 'model_best.pth.tar')
        if not osp.exists(checkpoint_path):
            checkpoint_path = osp.join(model_dir, 'checkpoint.pth.tar')
        print("=> loading checkpoint '{}'".format(checkpoint_path))
        checkpoint = torch.load(checkpoint_path)
        epoch = checkpoint['epoch']
        best_test_acc = checkpoint['best_acc']
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint (epoch {}, acc={:.2f})".format(
            epoch, best_test_acc))

        blackbox = cls(model, device, output_type)
        return blackbox
Ejemplo n.º 3
0
def main():
    #torch.backends.cudnn.enabled = False
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('dataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Dataset name')
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    # Optional arguments
    parser.add_argument('-x',
                        '--complexity',
                        metavar='X',
                        type=int,
                        help='Complexity of conv layer channel.',
                        default=64)
    parser.add_argument('-o',
                        '--out_path',
                        metavar='PATH',
                        type=str,
                        help='Output path for model',
                        default=cfg.MODEL_DIR)
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id. -1 for CPU.',
                        default=0)
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        metavar='LR',
                        help='learning rate (default: 0.1)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=100,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr-step',
                        type=int,
                        default=30,
                        metavar='N',
                        help='Step sizes for LR')
    parser.add_argument('--lr-gamma',
                        type=float,
                        default=0.1,
                        metavar='N',
                        help='LR Decay Rate')
    parser.add_argument('-w',
                        '--num_workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    parser.add_argument('--train_subset',
                        type=int,
                        help='Use a subset of train set',
                        default=None)
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network',
                        default=None)
    parser.add_argument('--weighted-loss',
                        action='store_true',
                        help='Use a weighted loss',
                        default=None)
    parser.add_argument('--optimizer-choice',
                        type=str,
                        help='Optimizer',
                        default='sgdm',
                        choices=('sgd', 'sgdm', 'adam', 'adagrad'))
    args = parser.parse_args()
    params = vars(args)

    # torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
        #torch.cuda.set_device(str(params['device_id']))
    else:
        device = torch.device('cpu')

    # ----------- Set up dataset
    dataset_name = params['dataset']
    valid_datasets = datasets.__dict__.keys()
    if dataset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]

    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    train_transform = datasets.modelfamily_to_transforms[modelfamily]['train']
    test_transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    trainset = dataset(train=True, transform=train_transform)
    testset = dataset(train=False, transform=test_transform)
    num_classes = len(trainset.classes)
    sample = testset[0][0]
    if len(sample.shape) <= 2:
        # 2 dimensional images
        channel = 1
    else:
        channel = sample.shape[0]
    params['channel'] = channel
    params['num_classes'] = num_classes

    if params['train_subset'] is not None:
        idxs = np.arange(len(trainset))
        ntrainsubset = params['train_subset']
        idxs = np.random.choice(idxs, size=ntrainsubset, replace=False)
        trainset = Subset(trainset, idxs)

    # ----------- Set up model
    model_name = params['model_arch']
    pretrained = params['pretrained']
    complexity = params['complexity']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name,
                        modelfamily,
                        pretrained,
                        num_classes=num_classes,
                        channel=channel,
                        complexity=complexity)
    model = model.to(device)
    optimizer = get_optimizer(model.parameters(), params['optimizer_choice'],
                              **params)

    # ----------- Train
    out_path = params['out_path']
    model_utils.train_model(model,
                            trainset,
                            testset=testset,
                            device=device,
                            optimizer=optimizer,
                            **params)

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(out_path, 'params.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('model_dir', metavar='DIR', type=str, help='Directory containing transferset.pickle')
    parser.add_argument('model_arch', metavar='MODEL_ARCH', type=str, help='Model name')
    parser.add_argument('testdataset', metavar='DS_NAME', type=str, help='Name of test')
    parser.add_argument('--budgets', metavar='B', type=str,
                        help='Comma separated values of budgets. Knockoffs will be trained for each budget.')
    # Optional arguments
    parser.add_argument('-d', '--device_id', metavar='D', type=int, help='Device id. -1 for CPU.', default=0)
    parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-e', '--epochs', type=int, default=100, metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--log-interval', type=int, default=50, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--resume', default=None, type=str, metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr-step', type=int, default=60, metavar='N',
                        help='Step sizes for LR')
    parser.add_argument('--lr-gamma', type=float, default=0.1, metavar='N',
                        help='LR Decay Rate')
    parser.add_argument('-w', '--num_workers', metavar='N', type=int, help='# Worker threads to load data', default=10)
    parser.add_argument('--pretrained', type=str, help='Use pretrained network', default=None)
    parser.add_argument('--weighted-loss', action='store_true', help='Use a weighted loss', default=False)
    # Attacker's defense
    parser.add_argument('--argmaxed', action='store_true', help='Only consider argmax labels', default=False)
    parser.add_argument('--optimizer_choice', type=str, help='Optimizer', default='sgdm', choices=('sgd', 'sgdm', 'adam', 'adagrad'))
    args = parser.parse_args()
    params = vars(args)

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model_dir = params['model_dir']

    # ----------- Set up transferset
    transferset_path = osp.join(model_dir, 'transferset.pickle')
    with open(transferset_path, 'rb') as rf:
        transferset_samples = pickle.load(rf)
    num_classes = transferset_samples[0][1].size(0)
    print('=> found transfer set with {} samples, {} classes'.format(len(transferset_samples), num_classes))

    # ----------- Clean up transfer (if necessary)
    if params['argmaxed']:
        new_transferset_samples = []
        print('=> Using argmax labels (instead of posterior probabilities)')
        for i in range(len(transferset_samples)):
            x_i, y_i = transferset_samples[i]
            argmax_k = y_i.argmax()
            y_i_1hot = torch.zeros_like(y_i)
            y_i_1hot[argmax_k] = 1.
            new_transferset_samples.append((x_i, y_i_1hot))
        transferset_samples = new_transferset_samples

    # ----------- Set up testset
    dataset_name = params['testdataset']
    valid_datasets = datasets.__dict__.keys()
    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    if dataset_name not in valid_datasets:
        raise ValueError('Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]
    testset = dataset(train=False, transform=transform)
    if len(testset.classes) != num_classes:
        raise ValueError('# Transfer classes ({}) != # Testset classes ({})'.format(num_classes, len(testset.classes)))

    # ----------- Set up model
    model_name = params['model_arch']
    pretrained = params['pretrained']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name, modelfamily, pretrained, num_classes=num_classes)
    model = model.to(device)

    # ----------- Train
    budgets = [int(b) for b in params['budgets'].split(',')]

    for b in budgets:
        np.random.seed(cfg.DEFAULT_SEED)
        torch.manual_seed(cfg.DEFAULT_SEED)
        torch.cuda.manual_seed(cfg.DEFAULT_SEED)

        transferset = samples_to_transferset(transferset_samples, budget=b, transform=transform)
        print()
        print('=> Training at budget = {}'.format(len(transferset)))

        optimizer = get_optimizer(model.parameters(), params['optimizer_choice'], **params)
        print(params)

        checkpoint_suffix = '.{}'.format(b)
        criterion_train = model_utils.soft_cross_entropy
        model_utils.train_model(model, transferset, model_dir, testset=testset, criterion_train=criterion_train,
                                checkpoint_suffix=checkpoint_suffix, device=device, optimizer=optimizer, **params)

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(model_dir, 'params_train.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)
Ejemplo n.º 5
0
        agreement += torch.sum(labels_bb.cpu() == labels_sur.cpu()).int()
        transfer += torch.sum(adv_labels_bb.cpu() ==
                              targets.cpu()).int() if targeted else torch.sum(
                                  adv_labels_bb.cpu() != targets.cpu()).int()

    print("Agreement: {}".format(agreement / total))
    print("Transferability: {}".format(transfer / total))
    return transfer / total


if __name__ == '__main__':
    # this block of code is only for temporary test.
    from datasets import GTSRB
    from datasets import modelfamily_to_transforms

    transform = modelfamily_to_transforms['custom_cnn']['train']
    dataset = GTSRB(False, transform)
    from knockoff.victim.blackbox import Blackbox
    import torch

    device = torch.device('cuda')
    blackbox = Blackbox.from_modeldir('results/models/victim/gtsrb', device)
    from models import zoo

    surrogate = zoo.get_net(
        'CNN32',
        'custom_cnn',
        'results/models/adversary/manhattan/checkpoint.28.iter.pth.tar',
        num_classes=43)
    transfer = transferability(blackbox, surrogate, dataset, targeted=False)
Ejemplo n.º 6
0
 def __init__(self,
              model_arch: str,
              state_dir: str,
              testset: str,
              pretrained: str = None,
              sampleset: str = None,
              blackbox_path: str = None,
              cuda: bool = True,
              complexity: int = 64,
              optimizer_choice: str = 'sgdm',
              batch_size: int = 64,
              topk: int = 0,
              argmax: bool = False,
              num_workers: int = 16,
              **kwargs) -> None:
     self.device = torch.device('cuda') if cuda else torch.device('cpu')
     self.state_dir = state_dir  # Store model checkpoint, selected state in Active thief etc.
     self.selection, self.transfer, self.indices_list = load_state(
         state_dir)
     if not os.path.exists(state_dir):
         os.makedirs(state_dir)
     self.cuda = cuda
     if blackbox_path is None:
         # if blackbox_path is None, no blackbox model is involved in model training.
         self.blackbox = None
     else:
         self.blackbox = Blackbox.from_modeldir(blackbox_path, self.device)
     modelfamily = datasets.dataset_to_modelfamily[testset]
     # Work around for MNIST. MNISTlike is one channel image and is normalized with specific parameter.
     if testset in ('MNIST', 'KMNIST', 'EMNIST', 'EMNISTLetters',
                    'FashionMNIST'):
         self.channel = 1
         self.transforms = datasets.MNIST_transform
     else:
         self.channel = 3
         self.transforms = datasets.modelfamily_to_transforms[modelfamily]
     # For absolute accuracy test.
     self.testset = datasets.__dict__[testset](
         train=False, transform=self.transforms['test'])
     if sampleset is not None:
         self.sampleset = datasets.__dict__[sampleset](
             train=True, transform=self.transforms['train'])
     else:
         self.sampleset = None
     self.argmax = argmax
     self.batch_size = batch_size
     # For relative accuracy test.
     self.query = lambda data: query(self.blackbox, data, len(
         data), self.argmax, self.batch_size, self.device, self.topk)
     self.evaluation_set = query(self.blackbox, unpack(self.testset),
                                 len(self.testset), True, self.batch_size,
                                 self.device)
     self.num_classes = len(self.testset.classes)
     self.target_model = get_net(model_arch,
                                 modelfamily,
                                 pretrained=pretrained,
                                 channel=self.channel,
                                 complexity=complexity,
                                 num_classes=self.num_classes).to(
                                     self.device)
     self.optim = get_optimizer(self.target_model.parameters(),
                                optimizer_choice, **kwargs)
     self.criterion = soft_cross_entropy
     self.batch_size = batch_size
     self.topk = topk
     self.num_workers = num_workers
     self.kwargs = kwargs
Ejemplo n.º 7
0
def parser_dealer(option: Dict[str, bool]) -> Dict[str, Any]:
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    if option['transfer']:
        parser.add_argument('policy',
                            metavar='PI',
                            type=str,
                            help='Policy to use while training',
                            choices=['random', 'adaptive'])
        parser.add_argument('--budget',
                            metavar='N',
                            type=int,
                            help='Size of transfer set to construct',
                            required=True)
        parser.add_argument('--out_dir',
                            metavar='PATH',
                            type=str,
                            help='Destination directory to store transfer set',
                            required=True)
        parser.add_argument('--queryset',
                            metavar='TYPE',
                            type=str,
                            help='Adversary\'s dataset (P_A(X))',
                            required=True)

    if option['active']:
        parser.add_argument('strategy',
                            metavar='S',
                            type=str,
                            help='Active Sample Strategy',
                            choices=['kcenter', 'random', 'dfal'])
        parser.add_argument('--metric',
                            metavar="M",
                            type=str,
                            help='K-Center method distance metric',
                            choices=['euclidean', 'manhattan', 'l1', 'l2'],
                            default='euclidean')
        parser.add_argument('--initial-size',
                            metavar='N',
                            type=int,
                            help='Active Learning Initial Sample Size',
                            default=100)
        parser.add_argument('--budget-per-iter',
                            metavar='N',
                            type=int,
                            help='budget for every iteration',
                            default=100)
        parser.add_argument('--iterations',
                            metavar='N',
                            type=int,
                            help='iteration times',
                            default=10)
    if option['sampling']:
        parser.add_argument(
            'sampleset',
            metavar='DS_NAME',
            type=str,
            help=
            'Name of sample dataset in active learning selecting algorithms')
        parser.add_argument('--load-state',
                            action='store_true',
                            default=False,
                            help='Turn on if load state.')
        parser.add_argument('--state-suffix',
                            metavar='SE',
                            type=str,
                            help='load selected samples from sample set',
                            required=False,
                            default='')
    if option['synthetic']:
        parser.add_argument('synthetic_method',
                            metavar='SM',
                            type=str,
                            help='Synthetic Method',
                            choices=['fgsm', 'ifgsm', 'mifgsm'])
        parser.add_argument('eps',
                            metavar='E',
                            type=float,
                            help='Synthetic maximum epsilon')
        parser.add_argument(
            'targeted_method',
            metavar='T',
            type=str,
            help='Target methods',
            choices=['non-targeted', 'targeted-random', 'targeted-topk'])
    if option['black_box']:
        parser.add_argument(
            'victim_model_dir',
            metavar='VIC_DIR',
            type=str,
            help=
            'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"'
        )
        parser.add_argument('--argmaxed',
                            action='store_true',
                            help='Only consider argmax labels',
                            default=False)
        parser.add_argument('--pseudoblackbox',
                            action='store_true',
                            help='Load prequeried labels as blackbox',
                            default=False)
        parser.add_argument('--topk',
                            metavar='TK',
                            type=int,
                            help='iteration times',
                            default=0)
    if option['train']:
        parser.add_argument('model_dir',
                            metavar='SUR_DIR',
                            type=str,
                            help='Surrogate Model Destination directory')
        parser.add_argument('model_arch',
                            metavar='MODEL_ARCH',
                            type=str,
                            help='Model name')
        parser.add_argument('testdataset',
                            metavar='DS_NAME',
                            type=str,
                            help='Name of test')
        # Optional arguments
        parser.add_argument('-e',
                            '--epochs',
                            type=int,
                            default=100,
                            metavar='N',
                            help='number of epochs to train (default: 100)')
        parser.add_argument('-x',
                            '--complexity',
                            type=int,
                            default=64,
                            metavar='N',
                            help="Model conv channel size.")
        parser.add_argument('--lr',
                            type=float,
                            default=0.01,
                            metavar='LR',
                            help='learning rate (default: 0.01)')
        parser.add_argument('--momentum',
                            type=float,
                            default=0.5,
                            metavar='M',
                            help='SGD momentum (default: 0.5)')
        parser.add_argument(
            '--log-interval',
            type=int,
            default=50,
            metavar='N',
            help='how many batches to wait before logging training status')
        parser.add_argument('--resume',
                            default=None,
                            type=str,
                            metavar='PATH',
                            help='path to latest checkpoint (default: none)')
        parser.add_argument('--lr-step',
                            type=int,
                            default=60,
                            metavar='N',
                            help='Step sizes for LR')
        parser.add_argument('--lr-gamma',
                            type=float,
                            default=0.1,
                            metavar='N',
                            help='LR Decay Rate')
        parser.add_argument('--pretrained',
                            type=str,
                            help='Use pretrained network',
                            default=None)
        parser.add_argument('--weighted-loss',
                            action='store_true',
                            help='Use a weighted loss',
                            default=False)
        parser.add_argument('--optimizer-choice',
                            type=str,
                            help='Optimizer',
                            default='sgdm',
                            choices=('sgd', 'sgdm', 'adam', 'adagrad'))
    # apply to all circumstances
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-d',
                        '--device-id',
                        metavar='D',
                        type=int,
                        help='Device id. -1 for CPU.',
                        default=0)
    parser.add_argument('-w',
                        '--num-workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    args = parser.parse_args()
    params = vars(args)
    device = device_dealer(**params)
    params['device'] = device
    if option['black_box']:
        blackbox_dir = params['victim_model_dir']
        if params['pseudoblackbox']:
            params['blackbox'] = PseudoBlackbox(blackbox_dir)
        else:
            params['blackbox'] = Blackbox.from_modeldir(blackbox_dir, device)
    if option['active']:
        pass
    if option['sampling']:
        sample_set_name = params['sampleset']
        assert sample_set_name in datasets.__dict__.keys()
        modelfamily = datasets.dataset_to_modelfamily[sample_set_name]
        transform = datasets.modelfamily_to_transforms[modelfamily]['train']
        dataset = datasets.__dict__[sample_set_name](train=True,
                                                     transform=transform)
        params['queryset'] = dataset
        params['selected'] = set()
        if params['load_state']:
            total = set([i for i in range(len(dataset))])
            path = params['model_dir']
            params['selection'], params['transferset'], params[
                'selected_indices'] = load_state(path, params['state_suffix'])
    if option['train']:
        testset_name = params['testdataset']
        assert testset_name in datasets.__dict__.keys()
        modelfamily = datasets.dataset_to_modelfamily[testset_name]
        transform = datasets.modelfamily_to_transforms[modelfamily]['test']
        testset = datasets.__dict__[testset_name](train=False,
                                                  transform=transform)
        params['testset'] = testset

        pretrained_path = params['pretrained']
        model_arch = params['model_arch']
        if params['pseudoblackbox']:
            num_classes = params['blackbox'].train_results[0].shape[0]
        else:
            num_classes = len(testset.classes)
        sample = testset[0][0]

        model = zoo.get_net(model_arch,
                            modelfamily,
                            pretrained_path,
                            num_classes=num_classes,
                            channel=sample.shape[0],
                            complexity=params['complexity'])
        params['surrogate'] = model.to(device)
    return params
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(
        description='Select deepfool images, retrain the target model.')
    parser.add_argument(
        'model_dir',
        metavar='SUR_DIR',
        type=str,
        help=
        'Surrogate Model Destination directory, which may contain selecting state, '
        'aka, selection.pickle, transferset.pickle, select_indices.pickle')
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    parser.add_argument('testdataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Name of test')
    parser.add_argument(
        'blackbox_dir',
        metavar='VIC_DIR',
        type=str,
        help=
        'Path to victim model. Should contain files "model_best.pth.tar" and "params.json"'
    )
    parser.add_argument(
        'sampleset',
        metavar='DS_NAME',
        type=str,
        help='Name of sample dataset in deepfool selecting algorithms')
    parser.add_argument('deepfool_budget',
                        metavar='N',
                        type=int,
                        help='deepfool selection size.')
    parser.add_argument(
        '--state-budget',
        type=int,
        help="if > 0, load corresponding budget of selection state.",
        default=0)
    parser.add_argument('--argmaxed',
                        action='store_true',
                        help='Only consider argmax labels',
                        default=False)
    parser.add_argument('--topk',
                        metavar='TK',
                        type=int,
                        help='iteration times',
                        default=0)
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('-x',
                        '--complexity',
                        type=int,
                        default=64,
                        metavar='N',
                        help="Model conv channel size.")
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=50,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr-step',
                        type=int,
                        default=60,
                        metavar='N',
                        help='Step sizes for LR')
    parser.add_argument('--lr-gamma',
                        type=float,
                        default=0.1,
                        metavar='N',
                        help='LR Decay Rate')
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network, or a checkpoint',
                        default=None)
    parser.add_argument('--weighted-loss',
                        action='store_true',
                        help='Use a weighted loss',
                        default=False)
    parser.add_argument('--optimizer-choice',
                        type=str,
                        help='Optimizer',
                        default='sgdm',
                        choices=('sgd', 'sgdm', 'adam', 'adagrad'))
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-d',
                        '--device-id',
                        metavar='D',
                        type=int,
                        help='Device id. -1 for CPU.',
                        default=0)
    parser.add_argument('-w',
                        '--num-workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    parser.add_argument('-pen',
                        '--ispenalty',
                        metavar='N',
                        type=bool,
                        help='# use penalty matrix',
                        default=True)
    args = parser.parse_args()
    params = vars(args)
    device = device_dealer(device_id=args.device_id)
    blackbox = Blackbox.from_modeldir(args.blackbox_dir, device)
    assert args.sampleset in datasets.__dict__.keys()
    modelfamily = datasets.dataset_to_modelfamily[args.sampleset]
    transform = datasets.modelfamily_to_transforms[modelfamily]['train']
    queryset = datasets.__dict__[args.sampleset](train=True,
                                                 transform=transform)

    if args.state_budget > 0:
        selection, transfer, indices_list = load_state(
            state_dir=args.model_dir)
        selection = set(indices_list[:args.state_budget])
        transfer = transfer[:args.state_budget]
        indices_list = indices_list[:args.state_budget]
    else:
        selection, transfer, indices_list = set(), [], []
    testset_name = args.testdataset
    assert testset_name in datasets.__dict__.keys()
    modelfamily = datasets.dataset_to_modelfamily[testset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    testset = datasets.__dict__[testset_name](train=False, transform=transform)
    num_classes = len(testset.classes)
    pretrained_path = params['pretrained']
    model_arch = params['model_arch']
    sample = testset[0][0]

    model = zoo.get_net(model_arch,
                        modelfamily,
                        pretrained_path,
                        num_classes=num_classes,
                        channel=sample.shape[0],
                        complexity=params['complexity'])
    model = model.to(device)
    penalty = np.ones((10, 10))
    deepfool_choose(model, blackbox, queryset, testset, selection, transfer,
                    indices_list, device, penalty, **params)
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(description='Train a model')
    # Required arguments
    parser.add_argument('victim_model_dir',
                        metavar='PATH',
                        type=str,
                        help='Directory of Victim Blackbox')
    parser.add_argument('model_dir',
                        metavar='DIR',
                        type=str,
                        help='Directory containing transferset.pickle')
    parser.add_argument('model_arch',
                        metavar='MODEL_ARCH',
                        type=str,
                        help='Model name')
    parser.add_argument('testdataset',
                        metavar='DS_NAME',
                        type=str,
                        help='Name of test')
    parser.add_argument(
        '--budgets',
        metavar='B',
        type=str,
        help=
        'Comma separated values of budgets. Knockoffs will be trained for each budget.'
    )
    parser.add_argument(
        '--rounds',
        metavar='R',
        type=str,
        help='Comma seperates values of duplication rounds of each budget.')
    # Optional arguments
    parser.add_argument('-d',
                        '--device_id',
                        metavar='D',
                        type=int,
                        help='Device id. -1 for CPU.',
                        default=0)
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--init_alpha',
                        type=float,
                        default=1.0,
                        metavar='I',
                        help='initial iteration step (default: 1.0)')
    parser.add_argument(
        '--num_steps',
        type=int,
        default=80,
        metavar='I',
        help='iteration steps of each crafted sample (default: 80)')
    parser.add_argument(
        '--eps',
        type=float,
        default=255.0,
        metavar='E',
        help='maximum change that can be done on a image. (default: 255.0)')
    parser.add_argument('--method',
                        type=str,
                        default='topk-IFGSMMod',
                        metavar='METHOD',
                        help='direction_method-gradient_method')
    parser.add_argument('--directions',
                        type=int,
                        default=2,
                        metavar='D',
                        help='directions')
    parser.add_argument('--max_pixel',
                        type=float,
                        default=1.0,
                        metavar='P',
                        help='upper bound')
    parser.add_argument('--min_pixel',
                        type=float,
                        default=0.0,
                        metavar='P',
                        help='lower bound')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=50,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--resume',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--lr-step',
                        type=int,
                        default=60,
                        metavar='N',
                        help='Step sizes for LR')
    parser.add_argument('--lr-gamma',
                        type=float,
                        default=0.1,
                        metavar='N',
                        help='LR Decay Rate')
    parser.add_argument('-w',
                        '--num_workers',
                        metavar='N',
                        type=int,
                        help='# Worker threads to load data',
                        default=10)
    parser.add_argument('--pretrained',
                        type=str,
                        help='Use pretrained network',
                        default=None)
    parser.add_argument('--weighted-loss',
                        action='store_true',
                        help='Use a weighted loss',
                        default=False)
    # Attacker's defense
    parser.add_argument('--argmaxed',
                        action='store_true',
                        help='Only consider argmax labels',
                        default=False)
    parser.add_argument('--optimizer_choice',
                        type=str,
                        help='Optimizer',
                        default='sgdm',
                        choices=('sgd', 'sgdm', 'adam', 'adagrad'))
    args = parser.parse_args()
    params = vars(args)

    torch.manual_seed(cfg.DEFAULT_SEED)
    if params['device_id'] >= 0:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(params['device_id'])
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    train.model_dir = params['model_dir']

    # ----------- Set up transferset
    transferset_path = osp.join(train.model_dir, 'transferset.pickle')
    with open(transferset_path, 'rb') as rf:
        transferset_samples = pickle.load(rf)
    num_classes = transferset_samples[0][1].size(0)
    print('=> found transfer set with {} samples, {} classes'.format(
        len(transferset_samples), num_classes))

    # ----------- Clean up transfer (if necessary)
    if params['argmaxed']:
        new_transferset_samples = []
        print('=> Using argmax labels (instead of posterior probabilities)')
        for i in range(len(transferset_samples)):
            x_i, y_i = transferset_samples[i]
            argmax_k = y_i.argmax()
            y_i_1hot = torch.zeros_like(y_i)
            y_i_1hot[argmax_k] = 1.
            new_transferset_samples.append((x_i, y_i_1hot))
        transferset_samples = new_transferset_samples

    # ----------- Set up testset
    dataset_name = params['testdataset']
    valid_datasets = datasets.__dict__.keys()
    modelfamily = datasets.dataset_to_modelfamily[dataset_name]
    transform = datasets.modelfamily_to_transforms[modelfamily]['test']
    if dataset_name not in valid_datasets:
        raise ValueError(
            'Dataset not found. Valid arguments = {}'.format(valid_datasets))
    dataset = datasets.__dict__[dataset_name]
    testset = dataset(train=False, transform=transform)
    if len(testset.classes) != num_classes:
        raise ValueError(
            '# Transfer classes ({}) != # Testset classes ({})'.format(
                num_classes, len(testset.classes)))

    # ----------- Set up model
    model_name = params['model_arch']
    pretrained = params['pretrained']
    # model = model_utils.get_net(model_name, n_output_classes=num_classes, pretrained=pretrained)
    model = zoo.get_net(model_name,
                        modelfamily,
                        pretrained,
                        num_classes=num_classes)
    model = model.to(device)

    # ----------- Initialize blackbox
    blackbox_dir = params['victim_model_dir']
    blackbox = Blackbox.from_modeldir(blackbox_dir, device)

    # ----------- Set up train params
    budgets = [int(b) for b in params['budgets'].split(',')]
    rounds = [int(r) for r in params['rounds'].split(',')]
    np.random.seed(cfg.DEFAULT_SEED)
    torch.manual_seed(cfg.DEFAULT_SEED)
    torch.cuda.manual_seed(cfg.DEFAULT_SEED)
    train.optimizer = get_optimizer(model.parameters(),
                                    params['optimizer_choice'], **params)
    train.criterion_train = model_utils.soft_cross_entropy
    train.params = params
    train.device = device
    train.testset = testset

    print(params)

    # Set up crafter params
    original_samples = transferset_samples[:]
    adversary = SyntheticAdversary(blackbox=blackbox,
                                   classifier=model,
                                   device=device,
                                   **params)

    for b, r in zip(budgets, rounds):
        if params['pretrained'] is None:
            train(model, original_samples, b, 1)
        total_samples = transferset_samples
        latest_samples = random.sample(total_samples, b)
        for r in range(2, r + 1):
            latest_samples = adversary.synthesize(latest_samples)
            transferset_samples = original_samples[:]
            transferset_samples.extend(latest_samples)
            total_samples.extend(latest_samples)
            train(model, transferset_samples, b, r)
            latest_samples = random.sample(total_samples, b)

    # Store arguments
    params['created_on'] = str(datetime.now())
    params_out_path = osp.join(train.model_dir, 'params_train.json')
    with open(params_out_path, 'w') as jf:
        json.dump(params, jf, indent=True)