def attack(predictWrapper, x_train, x_test, y_train, y_test, input_shape, datapoint):

    min_pixel_value = x_train.min()
    max_pixel_value = x_train.max()
    print('min_pixel_value ', min_pixel_value)
    print('max_pixel_value ', max_pixel_value)

    print('xtrain shape: ', x_train.shape)
    print('xtest shape: ', x_test.shape)
    print('y_train shape: ', y_train.shape)
    print('ytest shape: ', y_test.shape)

    # Create classifier
    classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot,
                                    input_shape=(input_shape, ),
                                    nb_classes=2,
                                    clip_values=(min_pixel_value, max_pixel_value))

    print('----- generate adv data by HopSkipJump attack -----')
    # Generate adversarial test examples
    s = time.time()

    attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=100, max_eval=10000, init_eval=100, init_size=100)
    # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100)


    # Input data shape should be 2D
    datapoint = datapoint.reshape((-1, input_shape))
    adv_data = attacker.generate(x=datapoint)

    distortion(datapoint, adv_data)
    print('Generate test adv cost time: ', time.time() - s)

    return adv_data
Beispiel #2
0
def init_hopskipjump(config, data, limit=50):
    """Runs the HopSkipJump evasion attack

    Arxiv Paper: https://arxiv.org/abs/1904.02144"""
    attack = HopSkipJump(config,
                         False,
                         max_iter=limit,
                         max_eval=100,
                         init_eval=10)
    return attack.generate(data)
Beispiel #3
0
def hopskipjump(
    data,
    query,
    query_limit,
    art_model,
    victim_input_shape,
    substitute_input_shape,
    victim_output_targets,
):
    """Runs the HopSkipJump evasion attack

    Arxiv Paper: https://arxiv.org/abs/1904.02144"""

    internal_limit = int(query_limit * 0.5)
    X, y = copycat(
        data,
        query,
        internal_limit,
        art_model,
        victim_input_shape,
        substitute_input_shape,
        victim_output_targets,
        reshape=False,
    )

    # import pdb; pdb.set_trace()
    X_np = X.detach().clone().numpy()
    # config = set_evasion_model(query, victim_input_shape, victim_input_targets)
    evasion_limit = int(query_limit * 0.5)

    # The initial evaluation number must be lower than the maximum
    lower_bound = 0.01 * evasion_limit
    init_eval = int(lower_bound if lower_bound > 1 else 1)

    # Run attack and process results
    attack = HopSkipJump(
        art_model,
        False,
        norm="inf",
        max_iter=evasion_limit,
        max_eval=evasion_limit,
        init_eval=init_eval,
    )
    result = attack.generate(X_np)
    result = (torch.from_numpy(attack.generate(X_np)).clone().detach().float()
              )  # .detach().clone().float()
    y = query(result)
    result = reshape_input(result, substitute_input_shape)
    return result, y
Beispiel #4
0
    def get(self):
        data = self.parser.parse_args()
        img = data.get('img')
        img_data = img.split(',')
        img = np.array(img_data, np.float32).reshape(28, 28)
        img = img * 255.0
        img_new = np.zeros((1, 32, 32, 1))
        img_new[0] = np.pad(img.reshape(28, 28), [(2, ), (2, )],
                            mode='constant').reshape(32, 32, 1)

        global sess
        global graph
        with graph.as_default():
            set_session(sess)
            attack = HopSkipJump(classifier=classifier,
                                 targeted=False,
                                 max_iter=0,
                                 max_eval=1000,
                                 init_eval=10)
            iter_step = 3
            x_adv = None
            for i in range(iter_step):
                x_adv = attack.generate(x=img_new,
                                        x_adv_init=x_adv,
                                        resume=True)

                #clear_output()
                # print("Adversarial image at step %d." % (i * iter_step),
                #     "and class label %d." % np.argmax(classifier.predict(x_adv)[0]))

                attack.max_iter = iter_step

        sav_img = Image.fromarray(x_adv.reshape(32, 32))
        sav_img = sav_img.convert("L")
        sav_img.save("test.jpg")
        buffer = BytesIO()
        sav_img.save(buffer, format="JPEG")
        myimage = buffer.getvalue()
        res = str(predict(x_adv))
        print("After Attack: ", res)

        return jsonify({
            'res': res,
            'dat': bytes.decode(base64.b64encode(myimage))
        })
Beispiel #5
0
def robust_score(y_true,
                 y_pred,
                 eps=0.1,
                 X=None,
                 y=None,
                 model=None,
                 feature_selector=None,
                 scorer=None):
    all_ids = range(X.shape[0])
    test_ids = y_true.index.values
    train_ids = list(set(all_ids) - set(test_ids))

    y_train = y[train_ids]
    y_test = y[test_ids]

    X_train = X[train_ids, :]
    X_test = X[test_ids, :]

    if type(feature_selector) != type(None):
        X_train = feature_selector.fit_transform(X_train)
        X_test = feature_selector.transform(X_test)

    #tuned_parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}
    #cv = GridSearchCV(model, tuned_parameters)
    #cv.fit(X_train, y_train)
    #best_model = cv.best_estimator_
    best_model = copy.deepcopy(model)
    best_model.fit(X_train, y_train)

    classifier = SklearnClassifier(model=best_model)
    attack = HopSkipJump(classifier=classifier,
                         max_iter=1,
                         max_eval=10,
                         init_eval=5,
                         init_size=1)

    X_test_adv = attack.generate(X_test)

    diff = scorer(best_model, X_test, y_test) - scorer(best_model, X_test_adv,
                                                       y_test)
    return diff
Beispiel #6
0
def robust_score_test(eps=0.1,
                      X_test=None,
                      y_test=None,
                      model=None,
                      feature_selector=None,
                      scorer=None):
    X_test_filtered = feature_selector.transform(X_test)

    best_model = copy.deepcopy(model)

    classifier = SklearnClassifier(model=best_model)
    attack = HopSkipJump(classifier=classifier,
                         max_iter=1,
                         max_eval=10,
                         init_eval=5,
                         init_size=1)

    X_test_adv = attack.generate(X_test_filtered)

    score_original_test = scorer(best_model, X_test_filtered, y_test)
    score_corrupted_test = scorer(best_model, X_test_adv, y_test)

    diff = score_original_test - score_corrupted_test
    return diff
Beispiel #7
0
# Create a query function for a PyTorch Lightning model
model = train_mnist_victim()


def query_mnist(input_data):
    input_data = torch.from_numpy(input_data)
    return get_target(model, input_data)


emnist_train, emnist_test = get_emnist_data()

test = BlackBoxClassifier(
    predict=query_mnist,
    input_shape=(1, 28, 28, 1),
    nb_classes=10,
    clip_values=(0, 255),
    preprocessing_defences=None,
    postprocessing_defences=None,
    preprocessing=None,
)

attack = HopSkipJump(test, False, max_iter=50, max_eval=100, init_eval=10)

X, y = emnist_train.data, emnist_train.targets

X = X.to(torch.float32)

X = X.unsqueeze(3)

attack.generate(X)
Beispiel #8
0
x_train = target.astype(float)
classifier = BlackBoxClassifier(predict,
                                x_train[0].shape,
                                3,
                                clip_values=(0, 255))
res = predict(x_train[:1])
print(res)
assert (res[0, 0] == 1)

# Select target image and show prediction
target_image = x_train[0]

# Generate HopSkipJump attack against black box classifier
attack = HopSkipJump(classifier=classifier,
                     targeted=True,
                     max_iter=0,
                     max_eval=1000,
                     init_eval=10)
iter_step = 10
stop = Image.open(curr_path + "../danny-machine/machine.jpg")
stop = np.array([np.array(stop)]).astype(float)
x_adv = stop
errors = []
for i in range(100):
    x_adv = attack.generate(x=np.array([target_image]),
                            y=[1],
                            x_adv_init=x_adv)

    l2_err = np.linalg.norm(np.reshape(x_adv[0] - target_image, [-1]))
    print("Adversarial image at step %d." % (i * iter_step), "L2 error",
          np.linalg.norm(np.reshape(x_adv[0] - target_image, [-1])),
Beispiel #9
0
    print('max_pixel_value ', max_pixel_value)

    # Create classifier
    classifier = BlackBoxClassifier(predict=predictWrapper.predict_one_hot,
                                    input_shape=input_shape,
                                    nb_classes=args.n_classes,
                                    clip_values=(min_pixel_value,
                                                 max_pixel_value))

    print('----- generate adv data by HopSkipJump attack -----')
    # Generate adversarial test examples

    attacker = HopSkipJump(classifier=classifier,
                           targeted=False,
                           norm=2,
                           max_iter=40,
                           max_eval=10000,
                           init_eval=100,
                           init_size=100)
    # attacker = HopSkipJump(classifier=classifier, targeted=False, norm=2, max_iter=2, max_eval=10000, init_eval=100, init_size=100)

    # Input data shape should be 2D
    datapoint = test[correct_index[:1]]

    s = time.time()
    adv_data = attacker.generate(x=datapoint)

    # distortion(datapoint, adv_data)
    print('Generate test adv cost time: ', time.time() - s)

    # return adv_data
def main(args):
    assert args.dataset in ['mnist', 'cifar', 'svhn', 'tiny', 'tiny_gray'], \
        "dataset parameter must be either 'mnist', 'cifar', 'svhn', or 'tiny'"
    print('Dataset: %s' % args.dataset)
    adv_path = '/home/aaldahdo/detectors/adv_data/'

    if args.dataset == 'mnist':
        from baselineCNN.cnn.cnn_mnist import MNISTCNN as model
        model_mnist = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_mnist.model
        sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.3
        pa_th=78
        # random_restart = 20
        # x_train = model_mnist.x_train
        x_test = model_mnist.x_test
        # y_train = model_mnist.y_train
        y_test = model_mnist.y_test
        y_test_labels = model_mnist.y_test_labels
        translation = 10
        rotation = 60
    
    elif args.dataset == 'mnist_gray':
        from baselineCNN.cnn.cnn_mnist_gray import MNISTCNN as model
        model_mnist = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_mnist.model
        sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.3
        pa_th=78
        # random_restart = 20
        # x_train = model_mnist.x_train
        x_test = model_mnist.x_test
        # y_train = model_mnist.y_train
        y_test = model_mnist.y_test
        y_test_labels = model_mnist.y_test_labels
        translation = 10
        rotation = 60

    elif args.dataset == 'cifar':
        from baselineCNN.cnn.cnn_cifar10 import CIFAR10CNN as model
        model_cifar = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_cifar.model
        sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_cifar.x_train
        x_test = model_cifar.x_test
        # y_train = model_cifar.y_train
        y_test = model_cifar.y_test
        y_test_labels = model_cifar.y_test_labels
        translation = 8
        rotation = 30
    
    elif args.dataset == 'cifar_gray':
        from baselineCNN.cnn.cnn_cifar10_gray import CIFAR10CNN as model
        model_cifar = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_cifar.model
        sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_cifar.x_train
        x_test = model_cifar.x_test
        # y_train = model_cifar.y_train
        y_test = model_cifar.y_test
        y_test_labels = model_cifar.y_test_labels
        translation = 8
        rotation = 30

    elif args.dataset == 'svhn':
        from baselineCNN.cnn.cnn_svhn import SVHNCNN as model
        model_svhn = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_svhn.model
        sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_svhn.x_train
        x_test = model_svhn.x_test
        # y_train = model_svhn.y_train
        y_test = model_svhn.y_test
        y_test_labels = model_svhn.y_test_labels
        translation = 10
        rotation = 60

    elif args.dataset == 'svhn_gray':
        from baselineCNN.cnn.cnn_svhn_gray import SVHNCNN as model
        model_svhn = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_svhn.model
        sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_svhn.x_train
        x_test = model_svhn.x_test
        # y_train = model_svhn.y_train
        y_test = model_svhn.y_test
        y_test_labels = model_svhn.y_test_labels
        translation = 10
        rotation = 60

    elif args.dataset == 'tiny':
        from baselineCNN.cnn.cnn_tiny import TINYCNN as model
        model_tiny = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_tiny.model
        sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_tiny.x_train
        x_test = model_tiny.x_test
        # y_train = model_tiny.y_train
        y_test = model_tiny.y_test
        y_test_labels = model_tiny.y_test_labels
        translation = 8
        rotation = 30
        del model_tiny

    elif args.dataset == 'tiny_gray':
        from baselineCNN.cnn.cnn_tiny_gray import TINYCNN as model
        model_tiny = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_tiny.model
        sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        # x_train = model_tiny.x_train
        x_test = model_tiny.x_test
        # y_train = model_tiny.y_train
        y_test = model_tiny.y_test
        y_test_labels = model_tiny.y_test_labels
        translation = 8
        rotation = 30
        del model_tiny

    
    # batch_count_start = args.batch_indx
    # bsize = args.batch_size
    # batch_count_end = batch_count_start + 1

    #FGSM
    for e in epsilons:
        attack = FastGradientMethod(estimator=kclassifier, eps=e, eps_step=0.01, batch_size=256)
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_fgsm_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))
    
    #BIM
    for e in epsilons:
        attack = BasicIterativeMethod(estimator=kclassifier, eps=e, eps_step=0.01, batch_size=32, max_iter=int(e*256*1.25))
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_bim_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))
    
    #PGD1
    for e in epsilons1:
        attack = ProjectedGradientDescent(estimator=kclassifier, norm=1, eps=e, eps_step=4, batch_size=32)
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_pgd1_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))
    
    #PGD2
    for e in epsilons2:
        attack = ProjectedGradientDescent(estimator=kclassifier, norm=2, eps=e, eps_step=0.1, batch_size=32)
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_pgd2_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))
    
    #PGDInf
    for e in epsilons:
        attack = ProjectedGradientDescent(estimator=kclassifier, norm=np.inf, eps=e, eps_step=0.01, batch_size=32)
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_pgdi_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))

    #CWi
    attack = CarliniLInfMethod(classifier=kclassifier, max_iter=200)
    adv_data = attack.generate(x=x_test)
    adv_file_path = adv_path + args.dataset + '_cwi.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    # #CWi
    # if args.dataset=='tiny':
    #     for n, x, y in batch(x_test, y_test, batch_size=bsize):
    #         if n>=batch_count_start*bsize and n<batch_count_end*bsize:
    #             adv_file_path = adv_path + args.dataset + '_cwi_' + str(batch_count_start) + '.npy'
    #             if not os.path.isfile(adv_file_path):
    #                 attack = CarliniLInfMethod(classifier=kclassifier, max_iter=100, batch_size=bsize)
    #                 adv_data = attack.generate(x=x)
    #                 np.save(adv_file_path, adv_data)
    #                 print('Done - {}'.format(adv_file_path))

    #CW2 - SLOW
    attack = CarliniL2Method(classifier=kclassifier, max_iter=100, batch_size=1, confidence=10)
    adv_data = attack.generate(x=x_test)
    adv_file_path = adv_path + args.dataset + '_cw2.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    #DF
    attack = DeepFool(classifier=kclassifier)
    adv_data = attack.generate(x=x_test)
    adv_file_path = adv_path + args.dataset + '_df.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    # #DF
    # if args.dataset=='tiny':
    #     for n, x, y in batch(x_test, y_test, batch_size=bsize):
    #         if n>=batch_count_start*bsize and n<batch_count_end*bsize:
    #             attack = DeepFool(classifier=kclassifier, epsilon=9, max_iter=100)
    #             adv_data = attack.generate(x=x)
    #             adv_file_path = adv_path + args.dataset + '_df_'+ str(batch_count_start) + '.npy'
    #             np.save(adv_file_path, adv_data)
    #             print('Done - {}'.format(adv_file_path))

    #Spatial transofrmation attack
    attack = SpatialTransformation(classifier=kclassifier, max_translation=translation, max_rotation=rotation)
    adv_data = attack.generate(x=x_test)
    adv_file_path = adv_path + args.dataset + '_sta.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    #Square Attack
    attack = SquareAttack(estimator=kclassifier, max_iter=200, eps=eps_sa)
    adv_data = attack.generate(x=x_test, y=y_test)
    adv_file_path = adv_path + args.dataset + '_sa.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    #HopSkipJump Attack
    y_test_next= get_next_class(y_test)
    attack = HopSkipJump(classifier=kclassifier, targeted=False, max_iter=0, max_eval=100, init_eval=10)
    
    iter_step = 10
    adv_data = np.zeros(x_test.shape)
    # adv_data = adv_data[0:25]
    for i in range(4):
        adv_data = attack.generate(x=x_test, x_adv_init=adv_data, resume=True)
        attack.max_iter = iter_step

    # _, acc_normal = classifier.evaluate(x_test[0:25], y_test[0:25])
    # _, acc_adv = classifier.evaluate(adv_data, y_test[0:25])
    # print('Normal accuracy - {}\nAttack accuracy - {}'.format(acc_normal, acc_adv))

    # subcount=1
    # for i in range(0, 25):
    #     plt.subplot(5,5,subcount)
    #     if args.dataset=='mnist':
    #         plt.imshow(adv_data[i][:,:,0])
    #     else:
    #         plt.imshow(adv_data[i][:,:,:])
    #     plt.suptitle(args.dataset+ " sb")
    #     subcount = subcount + 1
    # plt.show()

        adv_file_path = adv_path + args.dataset + '_hop.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))

    #ZOO attack
    attack = ZooAttack(classifier=kclassifier, batch_size=32)
    adv_data = attack.generate(x=x_test, y=y_test)
    adv_file_path = adv_path + args.dataset + '_zoo.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))
Beispiel #11
0
def load_hopskip(classifier):
    return HopSkipJump(classifier=classifier,
                        targeted=False,
                        max_iter=10,
                        max_eval=1000,
                        init_eval=10)
def adversarial_generation(
    model: Architecture,
    x,
    y,
    epsilon=0.25,
    attack_type=AttackType.FGSM,
    num_iter=10,
    attack_backend: str = AttackBackend.FOOLBOX,
):
    """
    Create an adversarial example (FGMS only for now)
    """
    x.requires_grad = True

    logger.info(f"Generating for x (shape={x.shape}) and y (shape={y.shape})")

    if attack_backend == AttackBackend.ART:

        from art.attacks.evasion import (
            FastGradientMethod,
            ProjectedGradientDescent,
            DeepFool as DeepFoolArt,
            CarliniL2Method,
            HopSkipJump,
        )

        if attack_type == AttackType.FGSM:
            attacker = FastGradientMethod(estimator=model.art_classifier,
                                          eps=epsilon)
        elif attack_type == AttackType.PGD:
            attacker = ProjectedGradientDescent(
                estimator=model.art_classifier,
                max_iter=num_iter,
                eps=epsilon,
                eps_step=2 * epsilon / num_iter,
            )
        elif attack_type == AttackType.DeepFool:
            attacker = DeepFoolArt(classifier=model.art_classifier,
                                   max_iter=num_iter)
        elif attack_type == "CW":
            attacker = CarliniL2Method(
                classifier=model.art_classifier,
                max_iter=num_iter,
                binary_search_steps=15,
            )
        elif attack_type == AttackType.SQUARE:
            # attacker = SquareAttack(estimator=model.get_art_classifier())
            raise NotImplementedError("Work in progress")
        elif attack_type == AttackType.HOPSKIPJUMP:
            attacker = HopSkipJump(
                classifier=model.art_classifier,
                targeted=False,
                max_eval=100,
                max_iter=10,
                init_eval=10,
            )
        else:
            raise NotImplementedError(f"{attack_type} is not available in ART")

        attacked = attacker.generate(x=x.detach().cpu())
        attacked = torch.from_numpy(attacked).to(device)

    elif attack_backend == AttackBackend.FOOLBOX:

        import foolbox as fb

        if model.name in ["efficientnet", "resnet32", "resnet44", "resnet56"]:
            model.set_default_forward_mode(None)
        else:
            model.set_default_forward_mode("presoft")

        if attack_type == AttackType.FGSM:
            attacker = fb.attacks.LinfFastGradientAttack()
        elif attack_type == AttackType.PGD:
            attacker = fb.attacks.LinfProjectedGradientDescentAttack(
                steps=num_iter, random_start=False, rel_stepsize=2 / num_iter)
        elif attack_type == AttackType.DeepFool:
            attacker = fb.attacks.LinfDeepFoolAttack(loss="crossentropy")
        elif attack_type == AttackType.CW:
            attacker = fb.attacks.L2CarliniWagnerAttack(steps=num_iter)
        elif attack_type == AttackType.BOUNDARY:
            attacker = fb.attacks.BoundaryAttack(steps=7000,
                                                 spherical_step=0.01,
                                                 source_step=0.01)
            x = x.float()
        else:
            raise NotImplementedError(
                f"{attack_type} is not available in Foolbox")

        attacked, _, _ = attacker(
            model.foolbox_classifier,
            x.detach(),
            torch.from_numpy(y).to(device),
            epsilons=epsilon,
        )

        model.set_default_forward_mode(None)

    elif attack_backend == AttackBackend.CUSTOM:

        from tda.dataset.custom_attacks import FGSM, BIM, DeepFool, CW

        if attack_type == AttackType.FGSM:
            attacker = FGSM(model, ce_loss)
            attacked = attacker.run(data=x.detach(),
                                    target=torch.from_numpy(y).to(device),
                                    epsilon=epsilon)
        elif attack_type == AttackType.PGD:
            attacker = BIM(model, ce_loss, lims=(0, 1), num_iter=num_iter)
            attacked = attacker.run(data=x.detach(),
                                    target=torch.from_numpy(y).to(device),
                                    epsilon=epsilon)
        elif attack_type == AttackType.DeepFool:
            attacker = DeepFool(model, num_classes=10, num_iter=num_iter)
            attacked = [
                attacker(x[i].detach(),
                         torch.tensor(y[i]).to(device)) for i in range(len(x))
            ]
            attacked = torch.cat([torch.unsqueeze(a, 0) for a in attacked], 0)
        elif attack_type == AttackType.CW:
            attacker = CW(model, lims=(0, 1), num_iter=num_iter)
            attacked = attacker.run(data=x.detach(),
                                    target=torch.from_numpy(y).to(device))
            attacked = torch.cat([torch.unsqueeze(a, 0) for a in attacked], 0)
        else:
            raise NotImplementedError(
                f"{attack_type} is not available as custom implementation")
    else:
        raise NotImplementedError(f"Unknown backend {attack_backend}")

    return attacked.detach().double()
Beispiel #13
0

# A toy example of how to call the class
if __name__ == '__main__':
    from sklearn.datasets import load_breast_cancer
    from sklearn.metrics import f1_score
    diabetes = load_breast_cancer()

    X = diabetes.data
    y = diabetes.target

    model = PrivateRandomForest(n_estimators=100, epsilon=0.1)
    model.fit(X, y)

    print(f1_score(y, model.predict(X)))

    #print(model.predict(X))

    import numpy as np
    from art.classifiers import SklearnClassifier

    import copy
    from art.attacks.evasion import HopSkipJump

    classifier = SklearnClassifier(model=model)
    attack = HopSkipJump(classifier=classifier, max_iter=1, max_eval=100)

    X_test_adv = attack.generate(X)

    print(model.predict(X_test_adv))
Beispiel #14
0
def experiment(dataset_id, folder, n_estimators=500, reps=5, n_attack=50):
    dataset = openml.datasets.get_dataset(dataset_id)
    X, y, is_categorical, _ = dataset.get_data(
        dataset_format="array", target=dataset.default_target_attribute)

    if np.mean(is_categorical) > 0:
        return

    if np.isnan(np.sum(y)):
        return

    if np.isnan(np.sum(X)):
        return

    total_sample = X.shape[0]
    unique_classes, counts = np.unique(y, return_counts=True)

    test_sample = min(counts) // 3

    indx = []
    for label in unique_classes:
        indx.append(np.where(y == label)[0])

    max_sample = min(counts) - test_sample
    train_samples = np.logspace(np.log10(2),
                                np.log10(max_sample),
                                num=10,
                                endpoint=True,
                                dtype=int)

    train_samples = [train_samples[-1]]

    # Only use small data for now
    if train_samples[-1] > 1000:
        return

    l2_kdf_list = []
    l2_rf_list = []
    linf_kdf_list = []
    linf_rf_list = []
    err_adv_rf_list = []
    err_adv_kdf_list = []
    err_rf = []
    err_kdf = []
    mc_rep = []
    samples_attack = []
    samples = []

    for train_sample in train_samples:
        for rep in range(reps):
            indx_to_take_train = []
            indx_to_take_test = []

            for ii, _ in enumerate(unique_classes):
                np.random.shuffle(indx[ii])
                indx_to_take_train.extend(list(indx[ii][:train_sample]))
                indx_to_take_test.extend(
                    list(indx[ii][-test_sample:counts[ii]]))

            # Fit the estimators
            model_kdf = kdf(
                kwargs={
                    "n_estimators":
                    n_estimators,
                    "min_samples_leaf":
                    int(np.ceil(X.shape[1] * 10 / np.log(train_sample))),
                })
            model_kdf.fit(X[indx_to_take_train], y[indx_to_take_train])
            proba_kdf = model_kdf.predict_proba(X[indx_to_take_test])
            proba_rf = model_kdf.rf_model.predict_proba(X[indx_to_take_test])
            predicted_label_kdf = np.argmax(proba_kdf, axis=1)
            predicted_label_rf = np.argmax(proba_rf, axis=1)

            # Initial classification error
            err_rf.append(1 -
                          np.mean(predicted_label_rf == y[indx_to_take_test]))
            err_kdf.append(1 - np.mean(
                predicted_label_kdf == y[indx_to_take_test]))

            ## Adversarial attack ###
            def _predict_kdf(x):
                """Wrapper to query black box"""
                proba_kdf = model_kdf.predict_proba(x)
                predicted_label_kdf = np.argmax(proba_kdf, axis=1)
                return to_categorical(
                    predicted_label_kdf,
                    nb_classes=len(np.unique(y[indx_to_take_train])),
                )

            def _predict_rf(x):
                """Wrapper to query blackbox for rf"""
                proba_rf = model_kdf.rf_model.predict_proba(x)
                predicted_label_rf = np.argmax(proba_rf, axis=1)
                return to_categorical(predicted_label_rf,
                                      nb_classes=len(
                                          np.unique(y[indx_to_take_train])))

            art_classifier_kdf = BlackBoxClassifier(
                _predict_kdf,
                X[indx_to_take_train][0].shape,
                len(np.unique(y[indx_to_take_train])),
            )
            art_classifier_rf = BlackBoxClassifier(
                _predict_rf,
                X[indx_to_take_train][0].shape,
                len(np.unique(y[indx_to_take_train])),
            )
            attack_rf = HopSkipJump(
                classifier=art_classifier_rf,
                targeted=False,
                max_iter=50,
                max_eval=1000,
                init_eval=10,
            )
            attack_kdf = HopSkipJump(
                classifier=art_classifier_kdf,
                targeted=False,
                max_iter=50,
                max_eval=1000,
                init_eval=10,
            )

            ### For computational reasons, attack a random subset that is identified correctly
            # Get indices of correctly classified samples common to both
            selection_idx = indx_to_take_train
            proba_kdf = model_kdf.predict_proba(X[selection_idx])
            proba_rf = model_kdf.rf_model.predict_proba(X[selection_idx])
            predicted_label_kdf = np.argmax(proba_kdf, axis=1)
            predicted_label_rf = np.argmax(proba_rf, axis=1)

            idx_kdf = np.where(predicted_label_kdf == y[selection_idx])[0]
            idx_rf = np.where(predicted_label_rf == y[selection_idx])[0]
            idx_common = list(np.intersect1d(idx_kdf, idx_rf))

            # Randomly sample from the common indices
            if n_attack > len(idx_common):
                n_attack = len(idx_common)
            idx = random.sample(idx_common, n_attack)
            if n_attack == 0:
                return

            ### Generate samples
            x_adv_kdf = attack_kdf.generate(X[selection_idx][idx])
            x_adv_rf = attack_rf.generate(X[selection_idx][idx])

            # Compute norms
            l2_kdf = np.mean(
                np.linalg.norm(X[selection_idx][idx] - x_adv_kdf,
                               ord=2,
                               axis=1))
            l2_rf = np.mean(
                np.linalg.norm(X[selection_idx][idx] - x_adv_rf, ord=2,
                               axis=1))
            linf_rf = np.mean(
                np.linalg.norm(X[selection_idx][idx] - x_adv_rf,
                               ord=np.inf,
                               axis=1))
            linf_kdf = np.mean(
                np.linalg.norm(X[selection_idx][idx] - x_adv_kdf,
                               ord=np.inf,
                               axis=1))

            ### Classification
            # Make adversarial prediction
            proba_rf = model_kdf.rf_model.predict_proba(x_adv_rf)
            predicted_label_rf_adv = np.argmax(proba_rf, axis=1)
            err_adv_rf = 1 - np.mean(
                predicted_label_rf_adv == y[selection_idx][idx])

            proba_kdf = model_kdf.predict_proba(x_adv_kdf)
            predicted_label_kdf_adv = np.argmax(proba_kdf, axis=1)
            err_adv_kdf = 1 - np.mean(
                predicted_label_kdf_adv == y[selection_idx][idx])

            print("l2_rf = {:.4f}, linf_rf = {:.4f}, err_rf = {:.4f}".format(
                l2_rf, linf_rf, err_adv_rf))
            print(
                "l2_kdf = {:.4f}, linf_kdf = {:.4f}, err_kdf = {:.4f}".format(
                    l2_kdf, linf_kdf, err_adv_kdf))

            l2_kdf_list.append(l2_kdf)
            l2_rf_list.append(l2_rf)
            linf_kdf_list.append(linf_kdf)
            linf_rf_list.append(linf_rf)
            err_adv_kdf_list.append(err_adv_kdf)
            err_adv_rf_list.append(err_adv_rf)

            mc_rep.append(rep)
            samples_attack.append(n_attack)
            samples.append(train_sample)

    df = pd.DataFrame()
    df["l2_kdf"] = l2_kdf_list
    df["l2_rf"] = l2_rf_list
    df["linf_kdf"] = linf_kdf_list
    df["linf_rf"] = linf_rf_list
    df["err_kdf"] = err_kdf
    df["err_rf"] = err_rf
    df["err_adv_kdf"] = err_adv_kdf_list
    df["err_adv_rf"] = err_adv_rf_list
    df["rep"] = mc_rep
    df["samples_attack"] = samples_attack
    df["samples"] = samples

    df.to_csv(folder + "/" + "openML_cc18_" + str(dataset_id) + ".csv")
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

print(X_test.shape)

print("trained")

classifier = SklearnClassifier(model=model)
attack = HopSkipJump(classifier=classifier,
                     max_iter=1,
                     max_eval=10,
                     init_eval=10,
                     init_size=1)
X_test_attacked = attack.generate(X_test, y_test)

robustness = empirical_robustness(classifier,
                                  X_test,
                                  'hsj',
                                  attack_params={
                                      'max_iter': 1,
                                      'max_eval': 10,
                                      'init_eval': 10,
                                      'init_size': 1
                                  })
print('Robustness: ' + str(robustness))