Beispiel #1
0
def sample_attack(keras_model,
                  image,
                  attack_method,
                  input_name,
                  labeled,
                  target=0):
    """
     对于给定的模型和输入,本方法将使用指定的攻击方法生成攻击图片
    :return:
    """
    input_shape = nm.extract_input_shape(keras_model)[1:]
    image = nm.prepare_image(image, input_shape)
    layer_input = [image]
    label = keras_model.predict(np.asarray(layer_input))
    label = np.argmax(label)
    if labeled is not None and not label == labeled:
        return None, "This image cannot be correctly classified, no adversarial sample will be generated. expected: " + str(
            labeled) + " actual: " + str(label)
    network_model = KerasModel(keras_model, bounds=(0, 1))

    # run the attack
    if str(attack_method).lower() == 'lbfgs':
        attack = LBFGSAttack(model=network_model,
                             criterion=TargetClassProbability(target, p=.5))
    elif str(attack_method).lower() == 'singlepixelattack':
        attack = SinglePixelAttack(model=network_model,
                                   criterion=TargetClassProbability(target,
                                                                    p=.5))
    else:
        return "Attack method not supported at the moment"
    print(label)
    if label == target:
        target = (target + 1) % 10
    adversarial = attack(image[:, :, ::-1], label)
    output = network_model.predictions(adversarial)
    print(np.argmax(output))
    adversarial = adversarial.reshape(input_shape)
    adversarial = adversarial * 255
    adv_image_name = 'adv_{}_origin_{}_{}_{}'.format(target, label,
                                                     attack_method, input_name)
    print(adversarial.shape)
    im = None
    if len(adversarial.shape) == 2:
        im = Image.fromarray(np.uint8(adversarial), mode="1")
    if len(adversarial.shape) == 3 and adversarial.shape[2] == 1:
        im = Image.fromarray(np.uint8(
            adversarial.reshape(adversarial.shape[0], adversarial.shape[1])),
                             mode="L")
    if len(adversarial.shape) == 3 and adversarial.shape[2] == 3:
        im = Image.fromarray(np.uint8(adversarial), mode="RGB")
    im.save(os.path.join(basedir, Config.UPLOAD_IMAGE_FOLDER, adv_image_name))
    # cv2.imwrite(os.path.join(basedir, Config.UPLOAD_IMAGE_FOLDER, adv_image_name), adversarial)
    print('adv', adv_image_name)
    return adversarial, adv_image_name
Beispiel #2
0
def untargeted_attack(model, imgs):
    """
    :param model: attacked model
    :param images: numpy array of orignial images with shape (10,28,28,1)

    return a numpy array with adversarial images
    """
    model = foolbox.models.KerasModel(model, bounds=(0, 1))
    target_class = 1

    adversarials = []
    for image in imgs:
        label_orig = np.argmax(model.predictions(image))

        if 0:
            criterion = TargetClassProbability(target_class, p=0.99)
            attack = foolbox.attacks.LBFGSAttack(model, criterion)
            adversarial = attack(image, label=label_orig)
        if 1:
            attack = foolbox.attacks.FGSM(model)
            adversarial = attack(image, label=label_orig)

        adversarials.append(adversarial)

    return adversarials
Beispiel #3
0
def attack(x,
           model,
           method,
           label_adv,
           label_true,
           entropy_masking=False,
           confidence=0.99):
    _model = FoolboxKerasModelEntropy(model,
                                      bounds=(0, 1),
                                      entropy_mask=entropy_masking,
                                      cache_grad_mask=True)
    if entropy_masking is True:
        _model.compute_gradient_mask(
            x)  # Precompute and cache gradient mask of image

    label_adv = TARGET_CLASS
    criterion = TargetClassProbability(label_adv,
                                       p=confidence)  # Targeted attack

    attacker = None
    img_adv = None
    if method == "BIM":
        attacker = LinfinityBasicIterativeAttack(
            _model, criterion, distance=foolbox.distances.Linfinity)

        img_adv = attacker(x,
                           label_true,
                           binary_search=False,
                           epsilon=1.0,
                           stepsize=0.004,
                           iterations=1000)
    else:
        raise "Unkown attack!"

    return img_adv
def generate_adversarial(file_name, original_path, original_class, categories):
  original_image = mpimg.imread(original_path)
  target_class = randint(0, 12)
  while target_class == original_class:
    target_class = randint(0, 12)
  criteria = TargetClassProbability(target_class, 0.95)
  adversarial = Adversarial(adversarial_model,
                            criterion = criteria,
                            original_image = original_image,
                            original_class = original_class,
                            distance = Linfinity)
  adversarial_image = attack(adversarial,
                             epsilon = 0.0003,
                             random_start = True,
                             iterations = 200,
                             stepsize = 0.0001,
                             return_early = False)
  if adversarial_image is None:
    print("adversarial not found")
    return
  adverse = np.round(adversarial_image)
  original_category = categories[original_class]
  target_category = categories[target_class]
  original_image_name = file_name[:-4]
  save_path = "adversarial_examples/" + original_category + "/"
  saved_image_name = original_image_name + "_" + target_category + ".png"
  matplotlib.image.imsave(save_path+saved_image_name, adverse/255, format = 'png')
def validate(val_loader, model, epsilon, args):
    # switch to evaluate mode
    model.eval()

    mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
    std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
    preprocessing = (mean, std)
    fmodel = PyTorchModel(model,
                          bounds=(0, 1),
                          num_classes=2048,
                          preprocessing=preprocessing)

    np.random.seed(1)  # fix seed for reproducibility across models, images

    clean_label = 0  # dummy label
    target_labels = np.random.choice(
        np.setdiff1d(np.arange(2048), clean_label), 6)
    print(target_labels)

    imgs = []
    advs = []

    # Batch processing is experimental in foolbox, so we feed images one by one.
    for i, (images, target) in enumerate(val_loader):

        if i == 2:
            image = np.float32(np.random.rand(3, 224, 224))
            imgs.append(image)
            print(image)
        else:
            image = images.cpu().numpy()[0]
            imgs.append(image)
            print(image)

        for j in range(len(target_labels)):
            target_label = target_labels[j]
            attack = ProjectedGradientDescentAttack(
                model=fmodel,
                criterion=TargetClassProbability(target_label, 1. - 1e-6),
                distance=Linfinity)
            adversarial = attack(image,
                                 clean_label,
                                 binary_search=False,
                                 epsilon=epsilon,
                                 stepsize=1. / 255,
                                 iterations=500,
                                 random_start=False,
                                 return_early=False)

            adv_pred_label = np.argmax(fmodel.predictions(adversarial))
            clean_pred_label = np.argmax(fmodel.predictions(image))
            print('Iter, Clean_pred, Adv, Adv_pred: ', i, clean_pred_label,
                  target_label, adv_pred_label)

            advs.append(adversarial)

        if i == 2:
            return imgs, advs
Beispiel #6
0
def attack(pb_path, image_path):
    with tf.Session() as session:
        with gfile.FastGFile(pb_path, 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            session.graph.as_default()
            tf.import_graph_def(graph_def, name='')
        session.run(tf.global_variables_initializer())
        images = session.graph.get_tensor_by_name('input_1:0')
        logits = session.graph.get_tensor_by_name('dense_3/BiasAdd:0')
        output = session.graph.get_tensor_by_name('dense_3/Softmax:0')
        # for n in tf.get_default_graph().get_operations():
        #     print(n.values())
        model = TensorFlowModel(images, logits, (-1, 1))

        image = inception_preprocessing(open_image(image_path, 299, 299))
        p, ext = os.path.splitext(image_path)

        values = model.predictions(image)
        label = np.argmax(values)
        print('label:', categories[label])

        print('attacking...')
        target_class = 2
        for prob in range(95, 100, 1):
            prob = prob / 100
            print('probability is:', prob)
            adv_path = '{}-adv-{}{}'.format(p, prob, ext)
            pert_path = '{}-pert-{}{}'.format(p, prob, ext)

            criterion = TargetClassProbability(target_class, p=prob)

            # attack = LBFGSAttack(model, criterion)
            # attack = FGSM(model, criterion)
            # attack = ProjectedGradientDescentAttack(model, criterion)
            attack = LinfinityBasicIterativeAttack(model, criterion)
            # attack = L1BasicIterativeAttack(model, criterion)
            # attack = L2BasicIterativeAttack(model, criterion)
            # attack = MomentumIterativeAttack(model, criterion)

            # attack = FGSM(model)
            # attack = MomentumIterativeAttack(model)
            # attack = SinglePixelAttack(model)
            # attack = LocalSearchAttack(model)

            adversarial = attack(image, label=label)
            new_label = np.argmax(model.predictions(adversarial))
            print('new label:', categories[new_label])

            raw_image = inception_postprocessing(image)
            raw_adversarial = inception_postprocessing(adversarial)
            raw_pert = raw_adversarial - raw_image

            save_image(raw_adversarial, adv_path)
            save_image(raw_pert, pert_path)
Beispiel #7
0
def perturb_lbfgs(sample, model, data):
    # Perturb images using LBFGS attack by Szegedy et al. using the foolbox library
    # Based on the tutorial: https://foolbox.readthedocs.io/en/latest/user/tutorial.html

    # create model for foolbox
    foolbox_model = KerasModel(model, (0.0, 1.0), channel_axis=1)
    #foolbox_model = TheanoModel(model.input, model.layers[-2].output, (0.0, 1.0), 10, channel_axis=1)

    # get correct class
    correct_class = model.predict_classes(sample)

    # set target to be next higher class (and 0 for 9)
    target_class = (correct_class+1)%10

    # set attack criterion to be 90% target class probability
    criterion = TargetClassProbability(target_class, p=0.90)

    # create attack on model with given criterion
    attack = LBFGSAttack()

    #print(sample[0,:,:,:].shape)

    # generate adversarial example
    # sample needs to be transformed from (batchsize, channels, rows, cols) format to (height, width, channels) for
    # foolbox, but that leads to problems with the model
    transformed_sample = sample.reshape(28,28,1)
    ad_ins = Adversarial(foolbox_model, criterion, transformed_sample, correct_class)

    adversarial = attack(ad_ins)

    # get class of adversarial example
    pred_class = model.predict_classes(adversarial)
    if pred_class != correct_class:
        return (1, adversarial)

    return (0, sample)
Beispiel #8
0
#
# fgm_params = {'eps': 0.3,
#                'clip_min': 0.,
#                'clip_max': 1.}
#
# adv_x = fgm.generate((train_images[:2]), **fgm_params)
# preds_adv = model.get_probs(adv_x)



model = TensorFlowModel(cnn.inputs,cnn.network,bounds=(0, 255))

from foolbox.criteria import TargetClassProbability

target_class = 9
criterion = TargetClassProbability(target_class, p=0.99)


from foolbox.attacks import FGSM


attack=FGSM(model)
image = train_images[0].reshape((28, 28, 1))
label = np.argmax(model.predictions(image))

adversarial = attack(image,label=label,epsilons=1,max_epsilon=0.03*255)


import matplotlib.pyplot as plt

plt.subplot(1, 3, 1)
Beispiel #9
0
cat_x = np.expand_dims(cat_img, axis=0)

# Build a foolbox model
fmodel = KerasModel(kmodel, bounds=(-1, 1))

# label of the target class
preds = kmodel.predict(dog_x)
dog_label = np.argmax(preds)

# label of the original class
preds = kmodel.predict(cat_x)
cat_label = np.argmax(preds)

criterion_1 = TopKMisclassification(k=5)
criterion_2 = TargetClass(dog_label)
criterion_3 = TargetClassProbability(dog_label, p=0.5)
criterion = criterion_1 & criterion_2 & criterion_3

attack = BoundaryAttack(model=fmodel, criterion=criterion)

iteration_size = 1000
global_iterations = 0
# Run boundary attack to generate an adversarial example
adversarial = attack(cat_img,
                     label=cat_label,
                     unpack=False,
                     iterations=iteration_size,
                     starting_point=dog_img,
                     log_every_n_steps=10,
                     verbose=True)
global_iterations += iteration_size
Beispiel #10
0
    print(train_images.shape, train_labels.shape)
    print(test_images.shape, test_labels.shape)

    #    print(np.argmax(fmodel.predictions(test_images)), int(test_labels))
    """
    # run the attack
    attack = LBFGSAttack(model=fmodel, criterion=TargetClassProbability(781, p=.5))
    adversarial = attack(image, label)
    # show results
    print(np.argmax(fmodel.predictions(adversarial)))
    print(foolbox.utils.softmax(fmodel.predictions(adversarial))[781])
    adversarial_rgb = adversarial[np.newaxis, :, :, :]
    preds = kmodel.predict(preprocess_input(adversarial_rgb.copy()))
    print("Top 5 predictions (adversarial: ", decode_predictions(preds, top=5))
    """

    #run the attack
    attack = foolbox.attacks.FGSM(model=fmodel,
                                  criterion=TargetClassProbability(10, p=.99))
    adversarial = attack(test_images, test_labels)
    print(np.argmax(fmodel.predictions(adversarial)))
    print(foolbox.utils.softmax(fmodel.predictions(adversarial))[10])

    plt.subplot(1, 2, 1)
    plt.imshow(test_images)

    plt.subplot(1, 3, 2)
    plt.imshow(adversarial)

    plt.subplot(1, 3, 3)
    plt.imshow(adversarial - test_images)
        label = int(target_origin_image.split('.')[0])

        # Load target image.
        utility.print_message(
            OK, '{}/{} Load original image: {} = {}'.format(
                label + 1, len(target_list), target_origin_image,
                adv.classes[label]))
        origin_image = image.img_to_array(
            image.load_img(os.path.join(adv.origin_image_path,
                                        target_origin_image),
                           target_size=(adv.pixel_size, adv.pixel_size)))

        # Specify the target label.
        for idx2, target_class in enumerate(reversed(range(adv.nb_classes))):
            # Indicate target label.
            criterion = TargetClassProbability(label, p=0.9)
            attack = foolbox.attacks.LBFGSAttack(model=fool_model,
                                                 criterion=criterion)
            utility.print_message(
                OK,
                'Run the attack: target={}.{}'.format(label,
                                                      adv.classes[label]))

            # Run the attack.
            adversarial = attack(origin_image, label=label, unpack=False)

            # Save and show adversarial examples.
            utility.print_message(OK, 'Show the images.')
            plt.figure()
            plt.subplot(1, 3, 1)
            plt.title('Original')
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability
from saiterative import SAIterativeAttack
import numpy as np
import keras
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.applications.resnet50 import decode_predictions

keras.backend.set_learning_phase(0)
kmodel = ResNet50(weights='imagenet')
preprocessing = (np.array([104, 116, 123]), 1)
fmodel = KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing)

image, label = foolbox.utils.imagenet_example()

# run the attack
attack = SAIterativeAttack(model=fmodel, criterion=TargetClassProbability(781, p=.5))
adversarial = attack(image[:, :, ::-1], label)

# show results
print(np.argmax(fmodel.predictions(adversarial)))
print(foolbox.utils.softmax(fmodel.predictions(adversarial))[781])
adversarial_rgb = adversarial[np.newaxis, :, :, ::-1]
preds = kmodel.predict(preprocess_input(adversarial_rgb.copy()))
print("Top 5 predictions (adversarial: ", decode_predictions(preds, top=5))