Beispiel #1
0
def train_model(data, model_name, dataset_train, dataset_test, epochs, device,
                file_model):
    dataloader_train = DataLoader(dataset_train, batch_size=128, shuffle=True)
    dataloader_test = DataLoader(dataset_test, batch_size=128, shuffle=False)
    print('Train set: {}, Test set: {}'.format(len(dataset_train),
                                               len(dataset_test)))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    if data == 'mnist':
        model = BaseModel(use_prob=True).to(device)
    elif data == 'cifar10' and model_name == 'resnet':
        model = Resnet(use_prob=True).to(device)
    elif data == 'cifar10' and model_name == 'vgg':
        model = Vgg(use_prob=True).to(device)
    else:
        raise NotImplementedError

    optimizer = optim.SGD(model.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    loss = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

    if not os.path.exists(file_model):
        since = time.time()
        for e in range(epochs):
            start = time.time()
            tr_loss, tr_acc = train(model, dataloader_train, loss, optimizer,
                                    device)
            va_loss, va_acc = validate(model, dataloader_test, loss, device)
            scheduler.step()
            time_elapsed = time.time() - start
            print((
                '{:2d}/{:d}[{:s}] Train Loss: {:.4f} Acc: {:.4f}%, Test Loss: {:.4f} Acc: {:.4f}%'
            ).format(e + 1, epochs,
                     str(datetime.timedelta(seconds=time_elapsed)), tr_loss,
                     tr_acc * 100., va_loss, va_acc * 100.))

        time_elapsed = time.time() - since
        print('Total run time:', str(datetime.timedelta(seconds=time_elapsed)))

        torch.save(model.state_dict(), file_model)
        print('Save base model to:', file_model)
    else:
        print('Found existing file:', file_model)
        model.load_state_dict(torch.load(file_model, map_location=device))
    return model
Beispiel #2
0
def main():
    with open('data.json') as data_json:
        data_params = json.load(data_json)

    parser = argparse.ArgumentParser()
    parser.add_argument('--data', type=str)
    parser.add_argument('--data_path', type=str, default='data')
    parser.add_argument('--output_path', type=str, default='results')
    parser.add_argument('--pretrained', type=str, required=True)
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--attack', type=str, required=True, choices=data_params['attacks'])
    parser.add_argument('--eps', type=float, default=0.3)
    # NOTE: In CW_L2 attack, eps is the upper bound of c.
    parser.add_argument('--n_samples', type=int, default=2000)
    parser.add_argument('--random_state', type=int, default=1234)
    args = parser.parse_args()
    print(args)

    set_seeds(args.random_state)
    
    if not os.path.exists(args.output_path):
        print('Output folder does not exist. Create:', args.output_path)
        os.mkdir(args.output_path)
        
    print('Dataset:', args.data)
    print('Pretrained model:', args.pretrained)
    print('Running attack: {}'.format(args.attack))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    # Prepare data
    transforms = tv.transforms.Compose([tv.transforms.ToTensor()])

    if args.data == 'mnist':
        dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms)
    elif args.data == 'cifar10':
        dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms)
    else:
        data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name'])
        print('Read file:', data_path)
        X, y = load_csv(data_path)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y,
            test_size=data_params['data'][args.data]['n_test'],
            random_state=args.random_state)
        scaler = MinMaxScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)
        dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long))
        dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long))

    dataloader_train = DataLoader(dataset_train, 256, shuffle=False)
    dataloader_test = DataLoader(dataset_test, 256, shuffle=False)

    shape_train = get_shape(dataloader_train.dataset)
    shape_test = get_shape(dataloader_test.dataset)
    print('Train set:', shape_train)
    print('Test set:', shape_test)

    # Load model
    use_prob = args.attack not in ['apgd', 'apgd1', 'apgd2', 'cw2', 'cwinf']
    print('Attack:', args.attack)
    print('Using softmax layer:', use_prob)
    if args.data == 'mnist':
        model = BaseModel(use_prob=use_prob).to(device)
        model_name = 'basic'
    elif args.data == 'cifar10':
        model_name = args.pretrained.split('_')[1]
        if model_name == 'resnet':
            model = Resnet(use_prob=use_prob).to(device)
        elif model_name == 'vgg':
            model = Vgg(use_prob=use_prob).to(device)
        else:
            raise ValueError('Unknown model: {}'.format(model_name))
    else:
        n_features = data_params['data'][args.data]['n_features']
        n_classes = data_params['data'][args.data]['n_classes']
        model = NumericModel(
            n_features,
            n_hidden=n_features * 4,
            n_classes=n_classes,
            use_prob=use_prob).to(device)
        model_name = 'basic' + str(n_features * 4)

    optimizer = optim.SGD(model.parameters(), lr=0.01,
                          momentum=0.9, weight_decay=5e-4)
    loss = nn.CrossEntropyLoss()
    pretrained_path = os.path.join(args.output_path, args.pretrained)
    model.load_state_dict(torch.load(pretrained_path, map_location=device))

    _, acc_train = validate(model, dataloader_train, loss, device)
    _, acc_test = validate(model, dataloader_test, loss, device)
    print('Accuracy on train set: {:.4f}%'.format(acc_train * 100))
    print('Accuracy on test set: {:.4f}%'.format(acc_test * 100))

    # Create a subset which only contains recognisable samples.
    tensor_test_X, tensor_test_y = get_correct_examples(
        model, dataset_test, device=device, return_tensor=True)
    dataset_perfect = TensorDataset(tensor_test_X, tensor_test_y)
    loader_perfect = DataLoader(dataset_perfect, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader_perfect, loss, device)
    print('Accuracy on {} filtered test examples: {:.4f}%'.format(
        len(dataset_perfect), acc_perfect * 100))

    # Generate adversarial examples
    n_features = data_params['data'][args.data]['n_features']
    n_classes = data_params['data'][args.data]['n_classes']
    if isinstance(n_features, int):
        n_features = (n_features,)

    classifier = PyTorchClassifier(
        model=model,
        loss=loss,
        input_shape=n_features,
        optimizer=optimizer,
        nb_classes=n_classes,
        clip_values=(0.0, 1.0),
        device_type='gpu')

    if args.attack == 'apgd':
        eps_step = args.eps / 10.0 if args.eps <= 0.1 else 0.1
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            eps=args.eps,
            eps_step=eps_step,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'apgd1':
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            norm=1,
            eps=args.eps,
            eps_step=0.1,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'apgd2':
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            norm=2,
            eps=args.eps,
            eps_step=0.1,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'bim':
        eps_step = args.eps / 10.0
        attack = BasicIterativeMethod(
            estimator=classifier,
            eps=args.eps,
            eps_step=eps_step,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'boundary':
        attack = BoundaryAttack(
            estimator=classifier,
            max_iter=1000,
            sample_size=args.batch_size,
            targeted=False)
    elif args.attack == 'cw2':
        # NOTE: Do NOT increase the batch size!
        attack = CarliniWagnerAttackL2(
            model=model,
            n_classes=n_classes,
            confidence=args.eps,
            verbose=True,
            check_prob=False,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'cwinf':
        attack = CarliniLInfMethod(
            classifier=classifier,
            confidence=args.eps,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'deepfool':
        attack = DeepFool(
            classifier=classifier,
            epsilon=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'fgsm':
        attack = FastGradientMethod(
            estimator=classifier,
            eps=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'jsma':
        attack = SaliencyMapMethod(
            classifier=classifier,
            gamma=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'line':
        if args.data == 'mnist':
            color = args.eps
        elif args.data == 'cifar10':
            color = (args.eps, args.eps, args.eps)
        else:
            raise NotImplementedError
        attack = LineAttack(color=color, thickness=1)
    elif args.attack == 'shadow':
        attack = ShadowAttack(
            estimator=classifier,
            batch_size=args.batch_size,
            targeted=False,
            verbose=False)
    elif args.attack == 'watermark':
        attack = WaterMarkAttack(
            eps=args.eps,
            n_classes=data_params['data'][args.data]['n_classes'],
            x_min=0.0,
            x_max=1.0,
            targeted=False)

        X_train, y_train = get_correct_examples(model, dataset_train, device=device, return_tensor=True)
        X_train = X_train.cpu().detach().numpy()
        y_train = y_train.cpu().detach().numpy()
        attack.fit(X_train, y_train)
    else:
        raise NotImplementedError

    if len(dataset_perfect) > args.n_samples:
        n = args.n_samples
    else:
        n = len(dataset_perfect)

    X_benign = tensor_test_X[:n].cpu().detach().numpy()
    y = tensor_test_y[:n].cpu().detach().numpy()

    print('Creating {} adversarial examples with eps={} (Not all attacks use eps)'.format(n, args.eps))
    time_start = time.time()
    # Shadow attack only takes single sample!
    if args.attack == 'shadow':
        adv = np.zeros_like(X_benign)
        for i in trange(len(X_benign)):
            adv[i] = attack.generate(x=np.expand_dims(X_benign[i], axis=0))
    elif args.attack == 'watermark':
        # This is untargeted.
        adv = attack.generate(X_benign, y)
    else:
        adv = attack.generate(x=X_benign)
    time_elapsed = time.time() - time_start
    print('Total time spend: {}'.format(str(datetime.timedelta(seconds=time_elapsed))))

    pred_benign = np.argmax(classifier.predict(X_benign), axis=1)
    acc_benign = np.sum(pred_benign == y) / n
    pred_adv = np.argmax(classifier.predict(adv), axis=1)
    acc_adv = np.sum(pred_adv == y) / n
    print("Accuracy on benign samples: {:.4f}%".format(acc_benign * 100))
    print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100))

    # Save results
    if args.n_samples < 2000:
        output_file = '{}_{}_{}_{}_size{}'.format(args.data, model_name, args.attack, str(args.eps), args.n_samples)
    else:
        output_file = '{}_{}_{}_{}'.format(args.data, model_name, args.attack, str(args.eps))

    path_x = os.path.join(args.output_path, '{}_x.npy'.format(output_file))
    path_y = os.path.join(args.output_path, '{}_y.npy'.format(output_file))
    path_adv = os.path.join(args.output_path, '{}_adv.npy'.format(output_file))
    np.save(path_x, X_benign)
    np.save(path_y, y)
    np.save(path_adv, adv)

    print('Saved to:', '{}_adv.npy'.format(output_file))
    print()
Beispiel #3
0
def train_adv(data='mnist',
              model_name='basic',
              n_samples=2000,
              eps=2.,
              path_output='results',
              path_data='data',
              is_test=False,
              batch_size=128,
              device='cpu'):
    # Prepare data
    transforms = tv.transforms.Compose([tv.transforms.ToTensor()])
    if data == 'mnist':
        dataset_test = datasets.MNIST(path_data,
                                      train=False,
                                      download=True,
                                      transform=transforms)
    elif data == 'cifar10':
        dataset_test = datasets.CIFAR10(path_data,
                                        train=False,
                                        download=True,
                                        transform=transforms)
    else:
        raise NotImplementedError
    loader_test = DataLoader(dataset_test,
                             batch_size=batch_size,
                             shuffle=False)

    # Load model
    if data == 'mnist':
        model = BaseModel(use_prob=False).to(device)
        n_features = (1, 28, 28)
        pretrained = 'mnist_200.pt'
    elif data == 'cifar10':
        n_features = (3, 32, 32)
        if model_name == 'resnet':
            model = Resnet(use_prob=False).to(device)
            pretrained = 'cifar10_resnet_200.pt'
        elif model_name == 'vgg':
            model = Vgg(use_prob=False).to(device)
            pretrained = 'cifar10_vgg_200.pt'
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError

    pretrained_path = os.path.join(path_output, pretrained)
    model.load_state_dict(torch.load(pretrained_path, map_location=device))
    optimizer = optim.SGD(model.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    loss = nn.CrossEntropyLoss()
    _, acc_test = validate(model, loader_test, loss, device)
    print('Accuracy on test set: {:.4f}%'.format(acc_test * 100))

    tensor_test_X, tensor_test_y = get_correct_examples(model,
                                                        dataset_test,
                                                        device=device,
                                                        return_tensor=True)
    # Get samples from the tail
    if not is_test:
        # This is for training the surrogate model
        tensor_test_X = tensor_test_X[-n_samples:]
        tensor_test_y = tensor_test_y[-n_samples:]
    else:
        # This is for testing the surrogate model
        tensor_test_X = tensor_test_X[-n_samples - 2000:-2000]
        tensor_test_y = tensor_test_y[-n_samples - 2000:-2000]
    dataset_test = TensorDataset(tensor_test_X, tensor_test_y)
    loader_test = DataLoader(dataset_test,
                             batch_size=batch_size,
                             shuffle=False)
    _, acc_perfect = validate(model, loader_test, loss, device)
    print('Accuracy on {} filtered test set: {:.4f}%'.format(
        len(dataset_test), acc_perfect * 100))

    classifier = PyTorchClassifier(model=model,
                                   loss=loss,
                                   input_shape=n_features,
                                   optimizer=optimizer,
                                   nb_classes=10,
                                   clip_values=(0.0, 1.0),
                                   device_type='gpu')
    attack = AutoProjectedGradientDescent(estimator=classifier,
                                          eps=eps,
                                          eps_step=0.1,
                                          max_iter=1000,
                                          batch_size=batch_size,
                                          targeted=False)

    X_benign = tensor_test_X.cpu().detach().numpy()
    y_true = tensor_test_y.cpu().detach().numpy()
    adv = attack.generate(x=X_benign)
    pred_adv = np.argmax(classifier.predict(adv), axis=1)
    acc_adv = np.mean(pred_adv == y_true)
    print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100))

    if not is_test:
        output_file = '{}_{}_baard_surro_train_eps{}_size{}.pt'.format(
            data, model_name, eps, n_samples)
    else:
        output_file = '{}_{}_baard_surro_test_eps{}_size{}.pt'.format(
            data, model_name, eps, n_samples)
    file_path = os.path.join(path_output, output_file)
    output = {'X': X_benign, 'adv': adv, 'y': y_true}
    torch.save(output, file_path)
    print('Save to:', file_path)
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path', type=str, default='data')
    parser.add_argument('--output_path', type=str, default='results')
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--epochs', type=int, default=5)
    parser.add_argument('--pretrained', type=str, nargs='?')
    parser.add_argument('--random_state', type=int, default=1234)
    args = parser.parse_args()
    print(args)

    set_seeds(args.random_state)

    if not os.path.exists(args.data_path):
        os.makedirs(args.data_path)

    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    # Fetch dataset
    transforms = tv.transforms.Compose([tv.transforms.ToTensor()])
    dataset_train = datasets.MNIST(
        args.data_path, train=True, download=True, transform=transforms)
    dataset_test = datasets.MNIST(
        args.data_path, train=False, download=True, transform=transforms)

    dataloader_train = DataLoader(
        dataset_train, batch_size=args.batch_size, shuffle=True)
    dataloader_test = DataLoader(
        dataset_test, batch_size=args.batch_size, shuffle=False)

    print('Train set: {}, Test set: {}'.format(
        len(dataset_train), len(dataset_test)))

    # Prepare model
    model = BaseModel().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01,
                          momentum=0.9, weight_decay=5e-4)
    loss = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=args.epochs)

    # Load pre-trained model
    if args.pretrained is not None:
        pretrained_path = os.path.join(args.output_path, args.pretrained)
        model.load_state_dict(torch.load(pretrained_path, map_location=device))

    # Train model
    since = time.time()
    for epoch in range(args.epochs):
        start = time.time()
        tr_loss, tr_acc = train(model, dataloader_train,
                                loss, optimizer, device)
        va_loss, va_acc = validate(model, dataloader_test, loss, device)
        scheduler.step()

        time_elapsed = time.time() - start
        print(('{:2d}/{:d}[{:s}] Train Loss: {:.4f} Acc: {:.4f}%, ' +
               'Test Loss: {:.4f} Acc: {:.4f}%').format(
            epoch + 1, args.epochs,
            str(datetime.timedelta(seconds=time_elapsed)),
            tr_loss, tr_acc * 100.,
            va_loss, va_acc * 100.))

    time_elapsed = time.time() - since
    print('Total run time: {:.0f}m {:.1f}s'.format(
        time_elapsed // 60,
        time_elapsed % 60))

    # Save model
    file_name = os.path.join(
        args.output_path, 'mnist_{}.pt'.format(args.epochs))
    print('Output file name: {}'.format(file_name))
    torch.save(model.state_dict(), file_name)

    # Test accuracy per class:
    print('Training set:')
    X, y = dataset2tensor(dataset_train)
    X = X.cpu().detach().numpy()
    y = y.cpu().detach().numpy()
    print_acc_per_label(model, X, y, device)

    print('Test set:')
    X, y = dataset2tensor(dataset_test)
    X = X.cpu().detach().numpy()
    y = y.cpu().detach().numpy()
    print_acc_per_label(model, X, y, device)
Beispiel #5
0
def run_attack_untargeted(file_model, X, y, att_name, eps, device):
    path = file_model.split('/')[0]
    file_str = file_model.split('/')[-1]
    name_arr = file_str.split('_')
    data = name_arr[0]
    model_name = name_arr[1]
    file_data = os.path.join(
        path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name,
                                      round(eps * 1000)))

    if os.path.exists(file_data):
        print('Found existing file:', file_data)
        obj = torch.load(file_data)
        return obj['adv'], obj['X'], obj['y']

    if data == 'mnist':
        n_features = (1, 28, 28)
        n_classes = 10
        model = BaseModel(use_prob=False).to(device)
    elif data == 'cifar10':
        n_features = (3, 32, 32)
        n_classes = 10
        if model_name == 'resnet':
            model = Resnet(use_prob=False).to(device)
        elif model_name == 'vgg':
            model = Vgg(use_prob=False).to(device)
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError

    model.load_state_dict(torch.load(file_model, map_location=device))
    loss = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    classifier = PyTorchClassifier(model=model,
                                   loss=loss,
                                   input_shape=n_features,
                                   optimizer=optimizer,
                                   nb_classes=n_classes,
                                   clip_values=(0.0, 1.0),
                                   device_type='gpu')

    if att_name == 'apgd':
        eps_step = eps / 4. if eps <= 0.2 else 0.1
        attack = AutoProjectedGradientDescent(estimator=classifier,
                                              eps=eps,
                                              eps_step=eps_step,
                                              max_iter=1000,
                                              batch_size=BATCH_SIZE,
                                              targeted=False)
    elif att_name == 'apgd2':
        attack = AutoProjectedGradientDescent(estimator=classifier,
                                              norm=2,
                                              eps=eps,
                                              eps_step=0.1,
                                              max_iter=1000,
                                              batch_size=BATCH_SIZE,
                                              targeted=False)
    elif att_name == 'cw2':
        # Do not increase the batch_size
        attack = CarliniWagnerAttackL2(model=model,
                                       n_classes=n_classes,
                                       confidence=eps,
                                       verbose=True,
                                       check_prob=False,
                                       batch_size=32,
                                       targeted=False)
    elif att_name == 'deepfool':
        # Do not adjust Epsilon
        attack = DeepFool(classifier=classifier, batch_size=BATCH_SIZE)
    elif att_name == 'fgsm':
        attack = FastGradientMethod(estimator=classifier,
                                    eps=eps,
                                    batch_size=BATCH_SIZE)
    elif att_name == 'line':
        attack = LineAttack(color=1, thickness=2)
    else:
        raise NotImplementedError

    time_start = time.time()
    adv = attack.generate(x=X)
    time_elapsed = time.time() - time_start
    print('Total run time:', str(datetime.timedelta(seconds=time_elapsed)))

    obj = {'X': X, 'y': y, 'adv': adv}
    torch.save(obj, file_data)
    print('Save data to:', file_data)

    return adv, X, y
Beispiel #6
0
def main():
    set_seeds(SEED)

    device = device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    file_model = os.path.join('result_0', 'mnist_dnn_model.pt')
    model = BaseModel(use_prob=False).to(device)
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join('result_0', 'mnist_dnn_apgd2_3000.pt')
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    pred = predict_numpy(model, adv, device)
    print('Acc on adv:', np.mean(pred == y))

    # Split data
    X_def_test = X[:1000]
    y_def_test = y[:1000]
    adv_def_test = adv[:1000]
    pred_adv_def_test = pred[:1000]

    X_def_val = X[1000:2000]
    y_def_val = y[1000:2000]
    adv_def_val = adv[1000:2000]
    pred_adv_def_val = pred[1000:2000]

    X_att_test = X[2000:4000]
    y_att_test = y[2000:4000]
    adv_att_test = adv[2000:4000]
    pred_adv_att_test = pred[2000:4000]

    X_surro_train = X[4000:]
    y_surro_train = y[4000:]
    adv_surro_train = adv[4000:]
    pred_adv_surro_train = pred[4000:]

    # Load baard
    file_baard_train = os.path.join('result_0',
                                    'mnist_dnn_baard_s1_train_data.pt')
    obj = torch.load(file_baard_train)
    X_baard_train_s1 = obj['X_s1']
    X_baard_train = obj['X']
    y_baard_train = obj['y']

    stages = []
    stages.append(ApplicabilityStage(n_classes=10, quantile=1., verbose=False))
    stages.append(
        ReliabilityStage(n_classes=10, k=10, quantile=1., verbose=False))
    stages.append(
        DecidabilityStage(n_classes=10, k=100, quantile=1., verbose=False))
    detector = BAARDOperator(stages=stages)

    detector.stages[0].fit(X_baard_train_s1, y_baard_train)
    for stage in detector.stages[1:]:
        stage.fit(X_baard_train, y_baard_train)

    file_baard_threshold = os.path.join('result_0',
                                        'mnist_dnn_baard_threshold.pt')
    thresholds = torch.load(file_baard_threshold)['thresholds']
    detector.load(file_baard_threshold)

    file_surro = os.path.join('result_0', 'mnist_dnn_baard_surrogate.pt')
    surrogate = get_pretrained_surrogate(file_surro, device)

    # Test surrogate model
    X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:]))
    pred_test = predict_numpy(model, X_test, device)
    label_test = detector.detect(X_test, pred_test)
    acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:])
    fpr = np.mean(label_test[:1000])
    print('BAARD Acc_on_adv:', acc)
    print('BAARD FPR:', fpr)

    label_surro = predict_numpy(surrogate, X_test, device)
    acc = np.mean(label_surro == label_test)
    print('Acc on surrogate:', acc)

    loss = torch.nn.CrossEntropyLoss()
    optimizer_clf = torch.optim.SGD(model.parameters(),
                                    lr=0.01,
                                    momentum=0.9,
                                    weight_decay=5e-4)
    art_classifier = PyTorchClassifier(model=model,
                                       loss=loss,
                                       input_shape=(1, 28, 28),
                                       nb_classes=10,
                                       optimizer=optimizer_clf)

    optimizer_sur = torch.optim.SGD(surrogate.parameters(),
                                    lr=0.01,
                                    momentum=0.9,
                                    weight_decay=5e-4)
    art_detector = PyTorchClassifier(model=surrogate,
                                     loss=loss,
                                     input_shape=(1, 28, 28),
                                     nb_classes=2,
                                     optimizer=optimizer_sur)

    loss_multiplier = 1. / 36.
    clip_fun = BAARD_Clipper(detector)

    attack = AutoProjectedGradientDescentDetectors(
        estimator=art_classifier,
        detector=art_detector,
        detector_th=0,  #fpr,
        clf_loss_multiplier=loss_multiplier,
        detector_clip_fun=clip_fun,
        loss_type='logits_difference',
        batch_size=128,
        norm=2,
        eps=8.0,
        eps_step=0.9,
        beta=0.5,
        max_iter=100)

    # X_toy = np.random.rand(128, 1, 28, 28).astype(np.float32)
    # pred_toy = art_classifier.predict(X_toy)
    # rejected_s1 = detector.stages[0].predict(X_toy, pred_toy)
    # print('Without:', np.mean(rejected_s1))

    # X_clipped = clip_fun(X_toy, art_classifier)
    # rejected_s1 = detector.stages[0].predict(X_clipped, pred_toy)
    # print('With:', np.mean(rejected_s1))
    # adv_x = attack.generate(x=X_toy)
    # pred_adv = predict_numpy(model, adv_x, device)
    # pred_sur = art_detector.predict(adv_x)
    # print('From surrogate model:', np.mean(pred_sur == 1))
    # labelled_as_adv = detector.detect(adv_x, pred_adv)
    # print('From BAARD', np.mean(labelled_as_adv == 1))

    # # Test it stage by stage
    # reject_s1 = detector.stages[0].predict(adv_x, pred_adv)
    # print('reject_s1', np.mean(reject_s1))
    # reject_s2 = detector.stages[1].predict(adv_x, pred_adv)
    # print('reject_s2', np.mean(reject_s2))
    # reject_s3 = detector.stages[2].predict(adv_x, pred_adv)
    # print('reject_s3', np.mean(reject_s3))

    x = X_att_test[:10]
    y = y_att_test[:10]
    adv_x = attack.generate(x=x, y=None)
    pred_adv = predict_numpy(model, adv_x, device)
    pred_sur = art_detector.predict(adv_x)

    pred = predict_numpy(model, adv_x, device)
    print('Acc classifier:', np.mean(pred == y))

    print('From surrogate model:', np.mean(pred_sur == 1))
    labelled_as_adv = detector.detect(adv_x, pred_adv)
    print('From BAARD', np.mean(labelled_as_adv == 1))

    # Test it stage by stage
    reject_s1 = detector.stages[0].predict(adv_x, pred_adv)
    print('reject_s1', np.mean(reject_s1))
    reject_s2 = detector.stages[1].predict(adv_x, pred_adv)
    print('reject_s2', np.mean(reject_s2))
    reject_s3 = detector.stages[2].predict(adv_x, pred_adv)
    print('reject_s3', np.mean(reject_s3))
    print()
Beispiel #7
0
def main(seed, dataset_name, clf_name, detector_name, epsilon_lst,input_shape):
    set_seeds(SEEDS[seed])

    device = device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    print("load the classifier")
    file_model = os.path.join('result_{:}'.format(seed),
                              '{:}_{:}_model.pt'.format(dataset_name,
                                                        clf_name))
    if clf_name == 'dnn':
        model = BaseModel(use_prob=False).to(device)
    elif clf_name == 'resnet':
        model = Resnet(use_prob=False).to(device)
    else:
        raise ValueError("model idx unknown")
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join('result_{:}'.format(seed),
                             '{:}_{:}_apgd2_2000.pt'.format(dataset_name,
                                                        clf_name))
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    pred = predict_numpy(model, adv, device)
    print('Acc on adv (epsilon 2):', np.mean(pred == y))

    # Split data
    X_att_test = X[2000:3000]
    y_att_test = y[2000:3000]

    print("x attr shape ", X_att_test.shape)

    #########################################################################
    # Load baard
    print("Load baard")
    file_baard_train = os.path.join(
        'result_{:}'.format(seed), '{:}_{:}_baard_s1_train_data.pt'.format(
                                                        dataset_name,
                                                        clf_name))
    obj = torch.load(file_baard_train)
    X_baard_train_s1 = obj['X_s1']
    X_baard_train = obj['X']
    y_baard_train = obj['y']

    stages = []
    stages.append(ApplicabilityStage(n_classes=10, quantile=1., verbose=False))
    stages.append(ReliabilityStage(n_classes=10, k=10, quantile=1., verbose=False))
    stages.append(DecidabilityStage(n_classes=10, k=100, quantile=1., verbose=False))
    detector = BAARDOperator(stages=stages)

    detector.stages[0].fit(X_baard_train_s1, y_baard_train)
    for stage in detector.stages[1:]:
        stage.fit(X_baard_train, y_baard_train)

    print("load baard's thresholds")
    file_baard_threshold = os.path.join(
        'result_{:}'.format(seed), '{:}_{:}_baard_threshold.pt'.format(
            dataset_name,
                                                        clf_name))

    thresholds = torch.load(file_baard_threshold)['thresholds']
    detector.load(file_baard_threshold)

    print("load the surrogate")
    file_surro = os.path.join('result_{:}'.format(seed),
                              '{:}_{:}_baard_surrogate.pt'.format(
                                  dataset_name,
                                                        clf_name))
    surrogate = get_pretrained_surrogate(file_surro, device)

    loss = torch.nn.CrossEntropyLoss()
    optimizer_clf = torch.optim.SGD(
        model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_classifier = PyTorchClassifier(
        model=model,
        loss=loss,
        input_shape=input_shape,
        nb_classes=10,
        optimizer=optimizer_clf
    )

    optimizer_sur = torch.optim.SGD(
        surrogate.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_detector = PyTorchClassifier(
        model=surrogate,
        loss=loss,
        input_shape=input_shape,
        nb_classes=2,
        optimizer=optimizer_sur
    )

    clip_fun = BAARD_Clipper(detector)

    #########################################################################

    pred_folder = 'result_{:}/predictions_wb_eval/{:}_{:}_{:}'.format(seed,
                                                              dataset_name,
                                                       clf_name, detector_name)

    print("compute prediction for samples at epsilon 0")
    x = X_att_test[:10]
    y = y_att_test[:10]

    # compute and save predictions
    cmpt_and_save_predictions(model, art_detector, detector, device, x, y,
                              pred_folder, 0)

    for eps in epsilon_lst:

        print("epsilon ", eps)

        if dataset_name == 'mnist':
            loss_multiplier = 1. / 36.
        else:
            loss_multiplier = 0.1

        attack = AutoProjectedGradientDescentDetectors(
            estimator=art_classifier,
            detector=art_detector,
            detector_th=0,
            detector_clip_fun=clip_fun,
            loss_type='logits_difference',
            batch_size=128,
            norm=2,
            eps=eps,
            eps_step=0.9,
            beta=0.5,
            max_iter=100)

        adv_x = attack.generate(x=x, y=None)

        # compute and save predictions
        cmpt_and_save_predictions(model, art_detector, detector, device, adv_x,
                                  y, pred_folder, eps)
Beispiel #8
0
def main():
    set_seeds(SEED)

    device = device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    file_model = os.path.join('result_0', 'mnist_dnn_model.pt')
    model = BaseModel(use_prob=False).to(device)
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join('result_0', 'mnist_dnn_apgd2_3000.pt')
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    pred = predict_numpy(model, adv, device)
    print('Acc on adv:', np.mean(pred == y))

    # Split data
    X_def_test = X[:1000]
    y_def_test = y[:1000]
    adv_def_test = adv[:1000]
    pred_adv_def_test = pred[:1000]

    X_def_val = X[1000:2000]
    y_def_val = y[1000:2000]
    adv_def_val = adv[1000:2000]
    pred_adv_def_val = pred[1000:2000]

    X_att_test = X[2000:4000]
    y_att_test = y[2000:4000]
    adv_att_test = adv[2000:4000]
    pred_adv_att_test = pred[2000:4000]

    X_surro_train = X[4000:]
    y_surro_train = y[4000:]
    adv_surro_train = adv[4000:]
    pred_adv_surro_train = pred[4000:]

    # Load baard
    file_baard_train = os.path.join(
        'result_0', 'mnist_dnn_baard_s1_train_data.pt')
    obj = torch.load(file_baard_train)
    X_baard_train_s1 = obj['X_s1']
    X_baard_train = obj['X']
    y_baard_train = obj['y']

    file_baard_threshold = os.path.join(
        'result_0', 'mnist_dnn_baard_threshold.pt')
    thresholds = torch.load(file_baard_threshold)['thresholds']

    stage1 = ApplicabilityStage(n_classes=10, quantile=1.)
    stage1.thresholds_ = thresholds[0]

    file_surro = os.path.join('result_0', 'mnist_dnn_baard_surrogate.pt')
    surrogate = get_pretrained_surrogate(file_surro, device)

    # Test surrogate model
    X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:]))
    pred_test = predict_numpy(model, X_test, device)
    # label_test = detector.detect(X_test, pred_test)
    # acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:])
    # fpr = np.mean(label_test[:1000])
    # print('BAARD Acc_on_adv:', acc)
    # print('BAARD FPR:', fpr)

    label_surro = predict_numpy(surrogate, X_test, device)
    # acc = np.mean(label_surro == label_test)
    # print('Acc on surrogate:', acc)

    loss = torch.nn.CrossEntropyLoss()
    optimizer_clf = torch.optim.SGD(
        model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_classifier = PyTorchClassifier(
        model=model,
        loss=loss,
        input_shape=(1, 28, 28),
        nb_classes=10,
        optimizer=optimizer_clf
    )

    optimizer_sur = torch.optim.SGD(
        surrogate.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_detector = PyTorchClassifier(
        model=surrogate,
        loss=loss,
        input_shape=(1, 28, 28),
        nb_classes=2,
        optimizer=optimizer_sur
    )

    fpr = 0.05
    attack = AutoProjectedGradientDescentDetectors(
        estimator=art_classifier,
        detector=art_detector,
        detector_th=fpr,
        clf_loss_multiplier=1. / 36.,
        loss_type='logits_difference',
        batch_size=128,
        norm=2,
        eps=5.0,
        eps_step=0.9,
        beta=0.5,
        max_iter=100)

    # adv_x = attack.generate(x=X_att_test[:100], y=y_att_test[:100])
    file_whitebox_adv = 'mnist_apgd2_3000_whitebox_size100.npy'
    # np.save(file_whitebox_adv, adv_x)
    adv_x = np.load(file_whitebox_adv)
    print('adv_x', adv_x.shape)

    pred_adv = predict_numpy(model, adv_x, device)
    adv_x = clip_by_threshold(adv_x, pred_adv, thresholds[0])
    pred_sur = art_detector.predict(adv_x)
    print('From surrogate model:', np.mean(pred_sur == 1))
    labelled_as_adv = stage1.predict(adv_x, pred_adv)
    print('From BAARD', np.mean(labelled_as_adv == 1))
    
    # Testing 
    # X_toy = np.random.rand(128, 1, 28, 28).astype(np.float32)  # Same size as MNIST in a single batch
    # y_toy = np.concatenate((np.zeros(50), np.ones(50)))
    # rejected = stage1.predict(X_toy, y_toy)
    # print('rejected', np.mean(rejected))
    # X_bypass = clip_by_threshold(X_toy, y_toy, thresholds[0])
    # rejected_after = stage1.predict(X_bypass, y_toy)
    # print('rejected_after', np.mean(rejected_after))

    print('Pause')