def adversarialAccuracy(model):
    keras_model = KerasModel(model, bounds=(0, 1), channel_axis=channel_axis)
    criterion = Misclassification()

    length = x_test.shape[0]
    wrong = 0
    period = 50
    for i in range(length):
        image, label = x_test[i], y_test_original[i]

        #attack = foolbox.attacks.FGSM(keras_model, criterion)
        #image_adv = attack(image, label, epsilons=5, max_epsilon=1.0)
        pgd2 = foolbox.attacks.L2BasicIterativeAttack(keras_model, criterion)
        image_adv = pgd2(image,
                         label,
                         epsilon=1.0,
                         stepsize=1.0,
                         iterations=1,
                         binary_search=False)

        if image_adv is not None:
            prediction = np.argmax(
                keras_model.predictions_and_gradient(image_adv, label)[0])
            assert prediction != label
            wrong += 1
        if i % period == period - 1:
            print("Adversarial attack success rate: {} / {} = {}".format(
                wrong, i + 1, wrong / (i + 1)))
            if image_adv is not None:
                displayImage(image_adv, label)
                print("Size of perturbation: {}".format(
                    LA.norm(image_adv - image, None)))

    print("Adversarial error rate: {} / {} = {}".format(
        wrong, length, wrong / length))
Ejemplo n.º 2
0
def test_keras_model_preprocess():
    num_classes = 1000
    bounds = (0, 255)
    channels = num_classes

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        inputs = Input(shape=(5, 5, channels))
        logits = GlobalAveragePooling2D(
            data_format='channels_last')(inputs)

        preprocessing = (np.arange(num_classes)[None, None],
                         np.random.uniform(size=(5, 5, channels)) + 1)

        model1 = KerasModel(
            Model(inputs=inputs, outputs=logits),
            bounds=bounds,
            predicts='logits')

        model2 = KerasModel(
            Model(inputs=inputs, outputs=logits),
            bounds=bounds,
            predicts='logits',
            preprocessing=preprocessing)

        model3 = KerasModel(
            Model(inputs=inputs, outputs=logits),
            bounds=bounds,
            predicts='logits')

        preprocessing = (0, np.random.uniform(size=(5, 5, channels)) + 1)

        model4 = KerasModel(
            Model(inputs=inputs, outputs=logits),
            bounds=bounds,
            predicts='logits',
            preprocessing=preprocessing)

    np.random.seed(22)
    test_images = np.random.rand(2, 5, 5, channels).astype(np.float32)
    test_images_copy = test_images.copy()

    p1 = model1.batch_predictions(test_images)
    p2 = model2.batch_predictions(test_images)

    # make sure the images have not been changed by
    # the in-place preprocessing
    assert np.all(test_images == test_images_copy)

    p3 = model3.batch_predictions(test_images)

    assert p1.shape == p2.shape == p3.shape == (2, num_classes)

    np.testing.assert_array_almost_equal(
        p1 - p1.max(),
        p3 - p3.max(),
        decimal=5)

    model4.batch_predictions(test_images)
Ejemplo n.º 3
0
def sample_attack(keras_model,
                  image,
                  attack_method,
                  input_name,
                  labeled,
                  target=0):
    """
     对于给定的模型和输入,本方法将使用指定的攻击方法生成攻击图片
    :return:
    """
    input_shape = nm.extract_input_shape(keras_model)[1:]
    image = nm.prepare_image(image, input_shape)
    layer_input = [image]
    label = keras_model.predict(np.asarray(layer_input))
    label = np.argmax(label)
    if labeled is not None and not label == labeled:
        return None, "This image cannot be correctly classified, no adversarial sample will be generated. expected: " + str(
            labeled) + " actual: " + str(label)
    network_model = KerasModel(keras_model, bounds=(0, 1))

    # run the attack
    if str(attack_method).lower() == 'lbfgs':
        attack = LBFGSAttack(model=network_model,
                             criterion=TargetClassProbability(target, p=.5))
    elif str(attack_method).lower() == 'singlepixelattack':
        attack = SinglePixelAttack(model=network_model,
                                   criterion=TargetClassProbability(target,
                                                                    p=.5))
    else:
        return "Attack method not supported at the moment"
    print(label)
    if label == target:
        target = (target + 1) % 10
    adversarial = attack(image[:, :, ::-1], label)
    output = network_model.predictions(adversarial)
    print(np.argmax(output))
    adversarial = adversarial.reshape(input_shape)
    adversarial = adversarial * 255
    adv_image_name = 'adv_{}_origin_{}_{}_{}'.format(target, label,
                                                     attack_method, input_name)
    print(adversarial.shape)
    im = None
    if len(adversarial.shape) == 2:
        im = Image.fromarray(np.uint8(adversarial), mode="1")
    if len(adversarial.shape) == 3 and adversarial.shape[2] == 1:
        im = Image.fromarray(np.uint8(
            adversarial.reshape(adversarial.shape[0], adversarial.shape[1])),
                             mode="L")
    if len(adversarial.shape) == 3 and adversarial.shape[2] == 3:
        im = Image.fromarray(np.uint8(adversarial), mode="RGB")
    im.save(os.path.join(basedir, Config.UPLOAD_IMAGE_FOLDER, adv_image_name))
    # cv2.imwrite(os.path.join(basedir, Config.UPLOAD_IMAGE_FOLDER, adv_image_name), adversarial)
    print('adv', adv_image_name)
    return adversarial, adv_image_name
Ejemplo n.º 4
0
def test_keras_backward(num_classes):

    bounds = (0, 255)
    channels = num_classes

    model = Sequential()
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        model.add(GlobalAveragePooling2D(
            data_format='channels_last', input_shape=(5, 5, channels)))

        model = KerasModel(
            model,
            bounds=bounds,
            predicts='logits')

    test_image = np.random.rand(5, 5, channels).astype(np.float32)
    test_grad_pre = np.random.rand(num_classes).astype(np.float32)

    test_grad = model.backward(test_grad_pre, test_image)
    assert test_grad.shape == test_image.shape

    manual_grad = np.repeat(np.repeat(
        (test_grad_pre / 25.).reshape((1, 1, -1)),
        5, axis=0), 5, axis=1)

    np.testing.assert_almost_equal(
        test_grad,
        manual_grad)
Ejemplo n.º 5
0
def test_keras_model(num_classes):

    bounds = (0, 255)
    channels = num_classes

    model = Sequential()
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        model.add(
            GlobalAveragePooling2D(data_format="channels_last",
                                   input_shape=(5, 5, channels)))

        model = KerasModel(model, bounds=bounds, predicts="logits")

    test_images = np.random.rand(2, 5, 5, channels).astype(np.float32)
    test_label = 7

    assert model.forward(test_images).shape == (2, num_classes)

    test_logits = model.forward_one(test_images[0])
    assert test_logits.shape == (num_classes, )

    test_gradient = model.gradient_one(test_images[0], test_label)
    assert test_gradient.shape == test_images[0].shape

    np.testing.assert_almost_equal(
        model.forward_and_gradient_one(test_images[0], test_label)[0],
        test_logits)
    np.testing.assert_almost_equal(
        model.forward_and_gradient_one(test_images[0], test_label)[1],
        test_gradient)

    assert model.num_classes() == num_classes
Ejemplo n.º 6
0
def test_keras_model(num_classes):

    bounds = (0, 255)
    channels = num_classes

    model = Sequential()
    model.add(
        GlobalAveragePooling2D(data_format='channels_last',
                               input_shape=(5, 5, channels)))

    model = KerasModel(model, bounds=bounds, predicts='logits')

    test_images = np.random.rand(2, 5, 5, channels).astype(np.float32)
    test_label = 7

    assert model.batch_predictions(test_images).shape \
        == (2, num_classes)

    test_logits = model.predictions(test_images[0])
    assert test_logits.shape == (num_classes, )

    test_gradient = model.gradient(test_images[0], test_label)
    assert test_gradient.shape == test_images[0].shape

    np.testing.assert_almost_equal(
        model.predictions_and_gradient(test_images[0], test_label)[0],
        test_logits)
    np.testing.assert_almost_equal(
        model.predictions_and_gradient(test_images[0], test_label)[1],
        test_gradient)

    assert model.num_classes() == num_classes
    def __init__(self,
                 model,
                 bounds,
                 channel_axis=3,
                 preprocessing=(0, 1),
                 predicts='probabilities',
                 entropy_mask=True,
                 cache_grad_mask=False):
        super(FoolboxKerasModelEntropy,
              self).__init__(bounds=bounds,
                             channel_axis=channel_axis,
                             preprocessing=preprocessing)

        self.entropy_mask = entropy_mask
        self.grad_mask = None
        self.cache_grad_mask = cache_grad_mask
        self.keras_model = KerasModel(model, bounds, channel_axis,
                                      preprocessing, predicts)
Ejemplo n.º 8
0
def setup_local_model():
    #sets up local ResNet50 model, to use for local testing
    keras.backend.set_learning_phase(0)
    kmodel = keras.applications.resnet50.ResNet50(weights='imagenet')
    preprocessing = (np.array([104, 116, 123]), 1)
    model = KerasModel(kmodel,
                       bounds=(0, 255),
                       preprocessing=preprocessing,
                       predicts='logits')
    return model
Ejemplo n.º 9
0
def test_keras_model_forward_gradients():
    num_classes = 1000
    bounds = (0, 255)
    channels = num_classes

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        inputs = Input(shape=(5, 5, channels))
        logits = GlobalAveragePooling2D(data_format="channels_last")(inputs)

        preprocessing = (
            np.arange(num_classes)[None, None],
            np.random.uniform(size=(5, 5, channels)) + 1,
        )

        model = KerasModel(
            Model(inputs=inputs, outputs=logits),
            bounds=bounds,
            predicts="logits",
            preprocessing=preprocessing,
        )

    eps = 1e-3

    np.random.seed(22)
    test_images = np.random.rand(5, 5, 5, channels).astype(np.float32)
    test_labels = [7] * 5

    _, g1 = model.forward_and_gradient(test_images, test_labels)

    test_label_array = np.array([test_labels])
    l1 = model._loss_fn([test_images - eps / 2 * g1, test_label_array])[0]
    l2 = model._loss_fn([test_images + eps / 2 * g1, test_label_array])[0]

    assert 1e5 * (l2 - l1) > 1

    # make sure that gradient is numerically correct
    np.testing.assert_array_almost_equal(1e5 * (l2 - l1),
                                         1e5 * eps * np.linalg.norm(g1)**2,
                                         decimal=1)
Ejemplo n.º 10
0
def test_keras_model_gradients():
    num_classes = 1000
    bounds = (0, 255)
    channels = num_classes

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        inputs = Input(shape=(5, 5, channels))
        logits = GlobalAveragePooling2D(
            data_format='channels_last')(inputs)

        preprocessing = (np.arange(num_classes)[None, None],
                         np.random.uniform(size=(5, 5, channels)) + 1)

        model = KerasModel(
            Model(inputs=inputs, outputs=logits),
            bounds=bounds,
            predicts='logits',
            preprocessing=preprocessing)

    eps = 1e-3

    np.random.seed(22)
    test_image = np.random.rand(5, 5, channels).astype(np.float32)
    test_label = 7

    _, g1 = model.predictions_and_gradient(test_image, test_label)

    test_label_array = np.array([test_label])
    l1 = model._loss_fn([test_image[None] - eps / 2 * g1, test_label_array])[0]
    l2 = model._loss_fn([test_image[None] + eps / 2 * g1, test_label_array])[0]

    assert 1e5 * (l2 - l1) > 1

    # make sure that gradient is numerically correct
    np.testing.assert_array_almost_equal(
        1e5 * (l2 - l1),
        1e5 * eps * np.linalg.norm(g1)**2,
        decimal=1)
Ejemplo n.º 11
0
def test_keras_model_probs(num_classes):
    bounds = (0, 255)
    channels = num_classes

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        inputs = Input(shape=(5, 5, channels))
        logits = GlobalAveragePooling2D(
            data_format='channels_last')(inputs)
        probs = Activation(softmax)(logits)

        model1 = KerasModel(
            Model(inputs=inputs, outputs=logits),
            bounds=bounds,
            predicts='logits')

        model2 = KerasModel(
            Model(inputs=inputs, outputs=probs),
            bounds=bounds,
            predicts='probabilities')

        model3 = KerasModel(
            Model(inputs=inputs, outputs=probs),
            bounds=bounds,
            predicts='probs')

    np.random.seed(22)
    test_images = np.random.rand(2, 5, 5, channels).astype(np.float32)

    p1 = model1.batch_predictions(test_images)
    p2 = model2.batch_predictions(test_images)
    p3 = model3.batch_predictions(test_images)

    assert p1.shape == p2.shape == p3.shape == (2, num_classes)

    np.testing.assert_array_almost_equal(
        p1 - p1.max(),
        p2 - p2.max(),
        decimal=1)

    np.testing.assert_array_almost_equal(
        p2 - p2.max(),
        p3 - p3.max(),
        decimal=5)
Ejemplo n.º 12
0
def test_keras_model_preprocess():
    num_classes = 1000
    bounds = (0, 255)
    channels = num_classes

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        inputs = Input(shape=(5, 5, channels))
        logits = GlobalAveragePooling2D(data_format='channels_last')(inputs)

        preprocessing = (np.arange(num_classes)[None, None],
                         np.random.uniform(size=(5, 5, channels)) + 1)

        model1 = KerasModel(Model(inputs=inputs, outputs=logits),
                            bounds=bounds,
                            predicts='logits')

        model2 = KerasModel(Model(inputs=inputs, outputs=logits),
                            bounds=bounds,
                            predicts='logits',
                            preprocessing=preprocessing)

        model3 = KerasModel(Model(inputs=inputs, outputs=logits),
                            bounds=bounds,
                            predicts='logits')

    np.random.seed(22)
    test_images = np.random.rand(2, 5, 5, channels).astype(np.float32)
    test_images_copy = test_images.copy()

    p1 = model1.batch_predictions(test_images)
    p2 = model2.batch_predictions(test_images)

    # make sure the images have not been changed by
    # the in-place preprocessing
    assert np.all(test_images == test_images_copy)

    p3 = model3.batch_predictions(test_images)

    assert p1.shape == p2.shape == p3.shape == (2, num_classes)

    np.testing.assert_array_almost_equal(p1 - p1.max(),
                                         p3 - p3.max(),
                                         decimal=5)
Ejemplo n.º 13
0
def test_keras_model_preprocess():
    num_classes = 1000
    bounds = (0, 255)
    channels = num_classes

    inputs = Input(shape=(5, 5, channels))
    logits = GlobalAveragePooling2D(data_format='channels_last')(inputs)

    def preprocess_fn(x):
        # modify x in-place
        x /= 2
        return x

    model1 = KerasModel(Model(inputs=inputs, outputs=logits),
                        bounds=bounds,
                        predicts='logits')

    model2 = KerasModel(Model(inputs=inputs, outputs=logits),
                        bounds=bounds,
                        predicts='logits',
                        preprocess_fn=preprocess_fn)

    model3 = KerasModel(Model(inputs=inputs, outputs=logits),
                        bounds=bounds,
                        predicts='logits')

    np.random.seed(22)
    test_images = np.random.rand(2, 5, 5, channels).astype(np.float32)
    test_images_copy = test_images.copy()

    p1 = model1.batch_predictions(test_images)
    p2 = model2.batch_predictions(test_images)

    # make sure the images have not been changed by
    # the in-place preprocessing
    assert np.all(test_images == test_images_copy)

    p3 = model3.batch_predictions(test_images)

    assert p1.shape == p2.shape == p3.shape == (2, num_classes)

    np.testing.assert_array_almost_equal(p1 - p1.max(),
                                         p3 - p3.max(),
                                         decimal=5)
Ejemplo n.º 14
0
def main():
	# load keras model
	kmodel = load_model('./models/cnn_model_10bar_ohlc.h5')
	# load data
	data = pro.load_pkl('./data/label8_eurusd_10bar_1500_500_val200_gaf.pkl')
	train_x = data['train_ohlc_gaf']
	train_label = data['train_label_onehot']
	# create foolbox model
	fmodel = KerasModel(kmodel, bounds=(-1, 1))
	# create our modified attack model
	MODIFIED_LocalSearchAttack = foolbox.attacks.LocalSearchAttack(model=fmodel)
	# generate fake data
	generate_adversarial_examples(kmodel = kmodel, fmodel = fmodel, attacker = MODIFIED_LocalSearchAttack,
	                              x_data = train_x, y_label = train_label)
Ejemplo n.º 15
0
def main(params):
	# load data
	data = pro.load_pkl(params['pkl_name'])

	# load our model
	kmodel = load_model(params['model_name'])
	
	# create foolbox model
	fmodel = KerasModel(kmodel, bounds=(-1, 1))

	# customized LocalSearchAttack
	attack = foolbox.attacks.LocalSearchAttack(model=fmodel)

	# attack all samples
	results = attack_all_samples(data, kmodel, fmodel, attack)
Ejemplo n.º 16
0
def main():
    # model wont work with eager execution enable
    tf.compat.v1.disable_eager_execution()

    batch_size = 128
    num_classes = 10
    epochs = 12

    model = mnistmodel.MnistModel(batch_size, num_classes, epochs)
    white_box = model.build_model()

    x_sample, y_sample = model.get_samples()

    #image_plot = plt.imshow(np.reshape(x_sample*255, (28, 28)))
    #plt.show()

    # no attack
    label = np.argmax(white_box.predict(np.reshape(x_sample, (1, 28, 28, 1))),
                      axis=-1)[0]
    label = np.asarray(label)
    print(label)

    # attack
    preprocessing = dict(flip_axis=-1)
    fmodel = KerasModel(white_box,
                        bounds=(0, 255),
                        preprocessing=preprocessing)

    attack = attacks.FGSM(fmodel)
    adversarial = attack(x_sample, y_sample)

    image_plot = plt.imshow(np.reshape(adversarial, (28, 28)))
    plt.show()

    label = np.argmax(white_box.predict(np.reshape(adversarial,
                                                   (1, 28, 28, 1))),
                      axis=-1)[0]
    print(label)
Ejemplo n.º 17
0
def perturb_lbfgs(sample, model, data):
    # Perturb images using LBFGS attack by Szegedy et al. using the foolbox library
    # Based on the tutorial: https://foolbox.readthedocs.io/en/latest/user/tutorial.html

    # create model for foolbox
    foolbox_model = KerasModel(model, (0.0, 1.0), channel_axis=1)
    #foolbox_model = TheanoModel(model.input, model.layers[-2].output, (0.0, 1.0), 10, channel_axis=1)

    # get correct class
    correct_class = model.predict_classes(sample)

    # set target to be next higher class (and 0 for 9)
    target_class = (correct_class+1)%10

    # set attack criterion to be 90% target class probability
    criterion = TargetClassProbability(target_class, p=0.90)

    # create attack on model with given criterion
    attack = LBFGSAttack()

    #print(sample[0,:,:,:].shape)

    # generate adversarial example
    # sample needs to be transformed from (batchsize, channels, rows, cols) format to (height, width, channels) for
    # foolbox, but that leads to problems with the model
    transformed_sample = sample.reshape(28,28,1)
    ad_ins = Adversarial(foolbox_model, criterion, transformed_sample, correct_class)

    adversarial = attack(ad_ins)

    # get class of adversarial example
    pred_class = model.predict_classes(adversarial)
    if pred_class != correct_class:
        return (1, adversarial)

    return (0, sample)
Ejemplo n.º 18
0
from foolbox.criteria import OriginalClassProbability, Misclassification, ConfidentMisclassification

from skimage.measure import compare_ssim as ssim

fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images,
                               test_labels) = fashion_mnist.load_data()
train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

train_images = train_images / 255.0
test_images = test_images / 255.0

trained_model = keras.models.load_model('fashionMNIST.h5')
cnn5 = keras.models.load_model('cnn5.h5')
sub_model = KerasModel(trained_model, bounds=(0, 1), channel_axis=1)

testNum = 100
randArray = np.random.choice(10000, testNum, replace=False)


def diff_ssim(img1, img2):
    img1 = (img1 * 255).reshape([28, 28])
    img2 = (img2 * 255).reshape([28, 28])
    return ssim(img1, img2)


attackRate_u2 = 0.0
attackSSIM_u2 = 0.0
count = 0
class FoolboxKerasModelEntropy(DifferentiableModel):
    def __init__(self,
                 model,
                 bounds,
                 channel_axis=3,
                 preprocessing=(0, 1),
                 predicts='probabilities',
                 entropy_mask=True,
                 cache_grad_mask=False):
        super(FoolboxKerasModelEntropy,
              self).__init__(bounds=bounds,
                             channel_axis=channel_axis,
                             preprocessing=preprocessing)

        self.entropy_mask = entropy_mask
        self.grad_mask = None
        self.cache_grad_mask = cache_grad_mask
        self.keras_model = KerasModel(model, bounds, channel_axis,
                                      preprocessing, predicts)

    def compute_gradient_mask(self, image):
        gray = skimage.color.rgb2gray(image)
        mask = skimage.filters.rank.entropy(gray, skimage.morphology.disk(3))

        low = mask < 4.2
        high = mask >= 4.2

        mask[low] = 0.0
        mask[high] = 1.0

        self.grad_mask = np.broadcast_to(
            mask.reshape(mask.shape[0] * mask.shape[1], 1),
            (mask.shape[0] * mask.shape[1], image.shape[2])).reshape(
                image.shape)

    def __mask_gradient(self, grad, image):
        if self.entropy_mask is True:
            if self.cache_grad_mask is True:
                return grad * self.grad_mask
            else:
                mask = utils.image2mask(image)
                mask = np.broadcast_to(
                    mask.reshape(mask.shape[0] * mask.shape[1], 1),
                    (mask.shape[0] * mask.shape[1], image.shape[2])).reshape(
                        image.shape)

                return grad * mask
        else:
            return grad

    def predictions_and_gradient(self, image, label):
        """Calculates predictions for an image and the gradient of
        the cross-entropy loss w.r.t. the image.
        Parameters
        ----------
        image : `numpy.ndarray`
            Single input with shape as expected by the model
            (without the batch dimension).
        label : int
            Reference label used to calculate the gradient.
        Returns
        -------
        predictions : `numpy.ndarray`
            Vector of predictions (logits, i.e. before the softmax) with
            shape (number of classes,).
        gradient : `numpy.ndarray`
            The gradient of the cross-entropy loss w.r.t. the image. Will
            have the same shape as the image.
        See Also
        --------
        :meth:`gradient`
        """

        pred, grad = self.keras_model.predictions_and_gradient(image, label)

        return pred, self.__mask_gradient(grad, image)

    def num_classes(self):
        return self.keras_model.num_classes()

    def batch_predictions(self, images):
        return self.keras_model.batch_predictions(images)

    def backward(self, gradient, image):
        """Backpropagates the gradient of some loss w.r.t. the logits
        through the network and returns the gradient of that loss w.r.t
        to the input image.
        Parameters
        ----------
        gradient : `numpy.ndarray`
            Gradient of some loss w.r.t. the logits.
        image : `numpy.ndarray`
            Single input with shape as expected by the model
            (without the batch dimension).
        Returns
        -------
        gradient : `numpy.ndarray`
            The gradient w.r.t the image.
        See Also
        --------
        :meth:`gradient`
        """

        grad = self.keras_model.backward(gradient, image)

        return self.__mask_gradient(grad, image)
            model = deep.five_layer_dnn_model_wide(input_shape, output_shape,
                                                   0, l1, 0)
        elif args.experiment_type == "six_layer":
            model = deep.six_layer_dnn_model_wide(input_shape, output_shape, 0,
                                                  l1, 0)
        elif args.experiment_type == "VGG":
            model = convolutional.vgg_model_wide(args.dataset, 0, l1, 0)
        elif args.experiment_type == "leNet":
            model = convolutional.leNet_model_wide(0, l1, 0)
        else:
            raise Exception("Invalid model!")

        model.fit(x_train, y_train, epochs=50, batch_size=128)
        preds = np.argmax(model.predict(x_test), axis=1)

        kmodel = KerasModel(model=model, bounds=(min_, max_))

        attack = None
        if args.attack_type == 'l2':
            attack = CarliniWagnerL2Attack(kmodel, TargetClass(7))
        elif args.attack_type == 'linf':
            attack = RandomPGD(kmodel, TargetClass(7))

        x_sample = np.take(x_test, ones, axis=0)

        # We exclude by default those examples which are not predicted by the classifier as 1s.
        true_ones = np.where(preds == 1)[0]

        x_sample = np.take(x_sample, true_ones, axis=0)
        y_sample = np.array([to_one_hot(1) for _ in x_sample])
Ejemplo n.º 21
0
# Load two images. The cat image is original image
# and the dog image is used to initialize a targeted
# attack.
dog_img = image.load_img('dog.jpg', target_size=(224, 224))
cat_img = image.load_img('cat.jpg', target_size=(224, 224))
dog_img = image.img_to_array(dog_img)
cat_img = image.img_to_array(cat_img)
cat_img = 2.0 * cat_img / 255.0 - 1
dog_img = 2.0 * dog_img / 255.0 - 1

dog_x = np.expand_dims(dog_img, axis=0)
cat_x = np.expand_dims(cat_img, axis=0)

# Build a foolbox model
fmodel = KerasModel(kmodel, bounds=(-1, 1))

# label of the target class
preds = kmodel.predict(dog_x)
dog_label = np.argmax(preds)

# label of the original class
preds = kmodel.predict(cat_x)
cat_label = np.argmax(preds)

criterion_1 = TopKMisclassification(k=5)
criterion_2 = TargetClass(dog_label)
criterion_3 = TargetClassProbability(dog_label, p=0.5)
criterion = criterion_1 & criterion_2 & criterion_3

attack = BoundaryAttack(model=fmodel, criterion=criterion)
Ejemplo n.º 22
0
    print("adversarial not found")
    return
  adverse = np.round(adversarial_image)
  original_category = categories[original_class]
  target_category = categories[target_class]
  original_image_name = file_name[:-4]
  save_path = "adversarial_examples/" + original_category + "/"
  saved_image_name = original_image_name + "_" + target_category + ".png"
  matplotlib.image.imsave(save_path+saved_image_name, adverse/255, format = 'png')


#----------------creating the adversarial Model------------------------#
keras.backend.set_learning_phase(0)
network = load_model("classifier(final).h5")
adversarial_model = KerasModel(network, 
                              bounds = (0, 255), 
                              preprocessing = (0, 1))
attack = ProjectedGradientDescentAttack()


#----------------------------Creating the directories------------------#
base_directory = "all_years_140x140"
categories = os.listdir(base_directory)
while len(categories) != 13:
  del categories[0]
for category in categories:
  adversary_path = "adversarial_examples/" + category
  if not os.path.exists(adversary_path):
    os.makedirs(adversary_path)
  if os.path.isdir(base_directory + "/" + category):
      images = os.listdir(base_directory + "/" + category)
Ejemplo n.º 23
0
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability, Misclassification
import numpy as np
import keras
from keras.models import load_model
import matplotlib.pyplot as plt


kmodel = load_model('./LeNet.h5')
preprocessing = (np.array([104, 116, 123]), 1)
fmodel = KerasModel(kmodel, bounds=(0, 255))
attack = LBFGSAttack(model=fmodel, criterion=Misclassification())


adversarial_imgs = []
adversarial_labels =[]
# adversarial_imgs = np.asarray(adversarial_imgs)
# adversarial_labels = np.asarray(adversarial_labels)
# print(type(adversarial_imgs))
img_temp = np.load('./mnist_pure/x_train.npy')
# print(img_temp.shape)
img_temp = np.asarray(img_temp, dtype=np.float32)
# print(img_temp[0].shape)
label_temp = np.load('./mnist_pure/y_train.npy')
label_temp= np.asarray(label_temp, dtype=np.float32)

for i in range(0,60000):
    adversarial = attack(img_temp[i], label_temp[i])
    adversarial_imgs.append(adversarial)
Ejemplo n.º 24
0
    def main():

        # Load Keras model
        model = load_model(
            r'.................................................h5')

        Ptype = 'probabilities'  # (default) with the softmax
        # # Switch softmax with linear activations -- to avoid the softmax
        #model = force_linear_activation(model=model, savemodel=None)
        #Ptype = 'logits'

        compressJPEG = 0  #'true'
        jpeg = 0
        jpeg_quality = 85

        # size (no color images)
        img_rows, img_cols, img_chans = 64, 64, 1
        num_classes = 2

        #---------------------------------------------------------
        #  Load test data, define labels, test the model
        #-----------------------------------------------------------

        images = glob(r'F:..................................\*.png')

        label = 1  # label = 0 for Manipulated, 1 for Original ------ for StammNets, it is the reverse ! (0 for Original)

        #number of imagess for testing the model
        #numImg = len(images) # <= len(images)

        numImg = 100

        #np.random.seed(1234)
        #index = np.random.randint(len(images), size=numImg)
        index = np.arange(numImg)

        x_test = np.zeros((numImg, img_rows, img_cols))
        for i in np.arange(numImg):
            img = imread(images[
                index[i]])  # Flatten=True means convert to gray on the fly
            if compressJPEG:
                img1 = Image.fromarray(img)
                img1.save('temp.jpeg', "JPEG", quality=jpeg_quality)
                img = Image.open('temp.jpeg')
            x_test[i] = img

        # Labels
        y_test_c = np.tile(label, numImg)

        # Convert labels to one-hot with Keras
        y_test = keras.utils.to_categorical(y_test_c, num_classes)

        # Reshape test data, divide by 255 because net was trained this way
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, img_chans)

        x_test = x_test.astype('float32')
        x_test /= 255

        # Test legitimate examples
        score = model.evaluate(x_test, y_test, verbose=0)
        predicted_legitimate_labels = np.argmax(model.predict(x_test), axis=1)
        print('Accuracy on legitimate images (all): {:3.4f}'.format(score[1]))

        # ----------------------------------------------------------------------------------------------------------------------
        # Attack the [correctly classified] images in the test set
        # ----------------------------------------------------------------------------------------------------------------------

        # Wrap model
        fmodel = KerasModel(model, bounds=(0, 1), predicts=Ptype)
        #KK: KerasModel Creates a Model instance from a Keras model.

        # Prepare attack
        #attack = foolbox.attacks.IterativeGradientSignAttack(fmodel)
        #######attack = foolbox.attacks.DeepFoolAttack(fmodel)
        attack = foolbox.attacks.SaliencyMapAttack(fmodel)

        #attack = foolbox.attacks.BIM(fmodel)

        #attack = foolbox.attacks.LBFGSAttack(fmodel)

        # ------Get data, labels and categorical labels ***only for correctly classified examples***
        l = np.argwhere(predicted_legitimate_labels == y_test_c).shape[0]
        #this is the number of legitimate images correctly classified
        x_test_ok = np.reshape(
            x_test[
                np.array(np.argwhere(
                    predicted_legitimate_labels == y_test_c)), :, :, :],
            (l, img_rows, img_cols, img_chans))
        #put the correctly classified images in a Numpy array x_test_ok
        y_test_ok = np.reshape(
            y_test[np.argwhere(predicted_legitimate_labels == y_test_c), :],
            (l, num_classes))
        y_test_c_ok = np.argmax(y_test_ok, axis=1)

        # ------------------

        # Elaborate n_test adversarial examples ***only for correctly classified examples*** (at most l)
        n_test = l  #150 # it must be lower than l
        #how many many images out of the correctly classified you want to try to attack!

        S = 0
        S_jpg = 0
        avg_Max_dist = 0
        avg_L1_dist = 0
        avg_No_Mod_Pixels = 0
        t = 0
        avg_psnr = 0
        PSNR = 0
        psnr_org = 0

        adv_images = np.zeros((n_test, img_rows, img_cols, img_chans))
        true_labels_cat = []
        for idx in np.arange(n_test):
            image = x_test_ok[idx]

            true_labels_cat.append(y_test_ok[idx, :])

            image = image.astype('float32')

            if compressJPEG:
                img1 = Image.fromarray(np.uint8(255 * image[:, :, 0]))
                img1.save('temp.jpeg', "JPEG", quality=jpeg_quality)
                img_reread = Image.open('temp.jpeg')
                image = np.array(img_reread)
                image = np.reshape(image, (img_rows, img_cols, img_chans))

            # Generate adversarial images
            adv_images[idx] = attack(image, y_test_c_ok[idx])

            adversarial_image = 255 * adv_images[idx].reshape(
                (img_rows, img_cols))

            #######################################
            #np.save('.................................' % idx,adversarial_image)
            #path_adv_Image = '..................................'
            #adversarial = adversarial_image
            #cv2.imwrite(path_adv_Image + 'adv_%d.png' % idx, adversarial)

            # Scores of legitimate and adversarial images for each idx
            scoreTemp = fmodel.predictions(image)
            true_score = foolbox.utils.softmax(scoreTemp)
            true_class = np.argmax(true_score)
            adv_score = foolbox.utils.softmax(
                fmodel.predictions(adv_images[idx]))
            adv_class = np.argmax(adv_score)

            print(
                'Image {}. Class changed from {} to {}. The score passes from {} to {}'
                .format(idx, true_class, adv_class, true_score, adv_score))
            '''print('After rounding. Class changed from {} to {}. The score passes from {} to {}'.format(idx, true_class,
                                                                                                 Z_class, true_score,
                                                                                                 Z_score))
																								'''

            # the if below is to solve the strange problem with the prediction of a matrix of nan values...
            if np.any(np.isnan(adv_images[idx])):
                adv_class = true_class  #attack not successful
                t = t + 1
                print('An adversarial image cannot be found!!')

            if true_class == adv_class:
                S = S + 1

            # plot image, adv_image and difference
            #Measure the distortion between the original image and attacked image
            image_before = 255 * image.reshape((img_rows, img_cols))
            diff = np.double(image_before) - np.double(adversarial_image)
            #diff = np.double(image_before) - np.double(Z)
            print(
                'Max distortion adversarial [After Rounding] = {:3.4f}; L1 distortion = {:3.4f}'
                .format(
                    abs(diff).max(),
                    abs(diff).sum() / (img_rows * img_cols)))
            print('Percentage of modified pixels [After Rounding]  = {:3.4f}'.
                  format(np.count_nonzero(diff) / (img_rows * img_cols)))

            psnr_org = psnr(image_before, adversarial_image)
            print('PSNR = {:3.4f}'.format(abs(psnr_org)))

            X = np.uint8(image_before)
            #Z = np.uint8(np.round(adversarial_image))  # Omit This Line Code
            #show_figures(X,Z,true_score,Z_score)

            # to save the result of the attack, save the Z matrix.......
            #Z.save(...)

            # update average distortion
            if true_class != adv_class:
                avg_Max_dist = avg_Max_dist + abs(diff).max()
                avg_L1_dist = avg_L1_dist + abs(diff).sum() / (img_rows *
                                                               img_cols)
                avg_No_Mod_Pixels = avg_No_Mod_Pixels + np.count_nonzero(
                    diff) / (img_rows * img_cols)
                avg_psnr = avg_psnr + psnr(image_before, adversarial_image)

            # -------------------------------
            # #Compress JPEG the image and test again
            # -------------------------------
            '''if jpeg:
                #cv2.imwrite('tmp.jpg', Z[::-1], [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality])
                #adv_reread = imread('tmp.jpg')
                img1 = Image.fromarray(Z)
                img1.save('temp.jpeg', "JPEG", quality= jpeg_quality)
                adv_reread = Image.open('temp.jpeg')
                x_test_comp = np.array(adv_reread)
                x_test_comp = x_test_comp.reshape(img_rows, img_cols, img_chans)
                x_test_comp = x_test_comp.astype('float32')
                x_test_comp /= 255
                adv_reread_score = foolbox.utils.softmax(fmodel.predictions(x_test_comp))
                adv_reread_class = np.argmax(adv_reread_score)
                if true_class == adv_reread_class:
                    S_jpg = S_jpg + 1
                print('Class after JPEG compression {}, with score {}.'.format(adv_reread_class,adv_reread_score))

                x_test_comp = 255* x_test_comp.reshape((img_rows, img_cols))
			'''

        n = n_test - S
        print('Adversarial failures: {} over {}'.format(S, n_test))
        print('Average distortion: max dist {}, L1 dist {}'.format(
            avg_Max_dist / n, avg_L1_dist / n))
        print('Average no of modified pixels: {}'.format(avg_No_Mod_Pixels /
                                                         n))
        print('The adversarial image cannot be found  {} times over {}'.format(
            t, n_test))

        if jpeg:
            print(
                'Percentage of adversarial JPEG unchanged with QF {} (the attack is not successful): {}'
                .format(jpeg_quality, S_jpg / n_test))

        # Evaluate accuracy
        true_labels_cat = np.array(true_labels_cat)
        adv_score = model.evaluate(adv_images, true_labels_cat, verbose=0)
        #Z_score = model.evaluate(Z, true_labels_cat, verbose=0)

        score_perfect = model.evaluate(x_test_ok, y_test_ok, verbose=0)

        print('Accuracy on legitimate images (all): {:3.4f}'.format(score[1]))
        print(
            'Accuracy on legitimate images (only correctly classified, obviously 1): {:3.4f}'
            .format(score_perfect[1]))
        print('Accuracy on adversarial images: {:3.4f}'.format(adv_score[1]))
        print('Attack success rate on adversarial images N1: {:3.4f}'.format(
            1 - adv_score[1]))
        print('Average PSNR =: {:3.4f}'.format(avg_psnr / n))
        #print('Accuracy on legitimate images (all) by mismatched model: {:3.4f}'.format(score2[1]))

        # SECOND PART
        # Load the second model and test the adversarial images

        # Label
        label3 = 1  # it may be different from label because of the differences in the model.

        # Labels
        y_test_c = np.tile(label3, n_test)

        # Convert labels to one-hot with Keras
        y_test2 = keras.utils.to_categorical(y_test_c, num_classes)
Ejemplo n.º 25
0
#!/usr/bin/env python3

import keras
from keras.applications.resnet50 import ResNet50

from foolbox.models import KerasModel
from robust_vision_benchmark import imagenet_model_server

keras.backend.set_learning_phase(0)
kmodel = ResNet50(weights='imagenet')
preprocessing = ([104, 116, 123], 1)
fmodel = KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing)

imagenet_model_server(fmodel, channel_order='BGR', image_size=224)
Ejemplo n.º 26
0
from keras.applications.resnet50 import preprocess_input
from keras.applications.resnet50 import decode_predictions
from scipy.misc import imsave, imshow, imread
import matplotlib.pyplot as plt
import glob

<<<<<<< HEAD:NIPS_attack/test.py
image_dir = '/data0/images/imagenet12/imagenet224'
=======
image_dir = './imgs'
>>>>>>> e604fd2040b9fa9642b4f51892ca69633d9ac984:test.py

keras.backend.set_learning_phase(0)
kmodel = ResNet50(weights='imagenet')
preprocessing = (np.array([104, 116, 123]), 1)
fmodel = KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing)

success = 0.
paths = glob.glob(image_dir+'/*.png')
<<<<<<< HEAD:NIPS_attack/test.py
print "{} images found".format(len(paths))
for path in paths[:100]:
=======
for path in paths:
>>>>>>> e604fd2040b9fa9642b4f51892ca69633d9ac984:test.py
    image = imread(path).astype(np.float32)

    test = image.copy()
    preds = kmodel.predict(preprocess_input(np.expand_dims(test, 0)))
    label = np.argmax(preds)
    #print("Top 3 predictions (regular: ", decode_predictions(preds, top=3))
Ejemplo n.º 27
0
# Load two images. The cat image is original image
# and the dog image is used to initialize a targeted
# attack.
dog_img = image.load_img('dog.jpg', target_size=(224, 224))
cat_img = image.load_img('cat.jpg', target_size=(224, 224))
dog_img = image.img_to_array(dog_img)
cat_img = image.img_to_array(cat_img)
cat_img = 2.0 * cat_img / 255.0 - 1
dog_img = 2.0 * dog_img / 255.0 - 1

dog_x = np.expand_dims(dog_img, axis=0)
cat_x = np.expand_dims(cat_img, axis=0)

# Build a foolbox model
fmodel = KerasModel(kmodel, bounds=(-1, 1))

# label of the target class
preds = kmodel.predict(dog_x)
dog_label = np.argmax(preds)

# label of the original class
preds = kmodel.predict(cat_x)
cat_label = np.argmax(preds)

criterion_1 = TopKMisclassification(k=5)
criterion_2 = TargetClass(dog_label)
criterion_3 = TargetClassProbability(dog_label, p=0.5)
criterion = criterion_1 & criterion_2 & criterion_3

attack = BoundaryAttack(model=fmodel, criterion=criterion)
    def main():

        # Load Keras model
        model = load_model(r'...........................h5')  #First model

        # Switch softmax with linear activations -- per evitare il softmax

        Ptype =  'probabilities' #'logits' # 'probabilities'


        # 64x64, 2 digits
        img_rows, img_cols, img_chans = 128,128, 1
        input_shape = (img_rows, img_cols, img_chans)
        num_classes = 2

        jpeg_quality = 85
        jpeg = 0 # 'true'
        compressJPEG =  0 #'true'

        #---------------------------------------------------------
        #  Load test data and define labels (numImg, 64,64)
        #-----------------------------------------------------------

        images = glob(r'...................\*.png')   #images from first model (Manipulated class)
        label = 0   # label = 1 for Original  and Label = 0 for Manipulated class

        # mismatch model: Load Keras model
        model2 = load_model(r'...................h5')  #load second model
        label2 = 1


        # We compute accuracy based on numebr of images ( 5 ) #Ehsan
        numImg = len(images)

        np.random.seed(1234)
        index = np.random.randint(len(images), size=numImg)

        x_test = np.zeros((numImg, img_rows, img_cols))
        for i in np.arange(numImg):
            img = imread(images[index[i]], flatten=False)  # Flatten=True means convert to gray on the fly
            if compressJPEG:
                img1 = Image.fromarray(img)
                img1.save('temp.jpeg', "JPEG", quality=jpeg_quality)
                img = Image.open('temp.jpeg')
            x_test[i] = img

        # Labels of authentic images = 1 (non-authentic = 0).
        y_test_c = np.tile(label, numImg)


        # Convert labels to one-hot with Keras
        y_test = keras.utils.to_categorical(y_test_c, num_classes)

        # Reshape test data, divide by 255 because net was trained this way
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, img_chans)

        x_test = x_test.astype('float32')
        x_test /= 255

        # Test legitimate examples
        score = model.evaluate(x_test, y_test, verbose=0)
        #Returns the loss value (of the loss function) & metrics (accuracy ...) values for the model in test mode
        predicted_legitimate_labels = np.argmax(model.predict(x_test), axis=1)

        print('Accuracy on legitimate images (all): {:3.4f}'.format(score[1]))

        y_test_c2 = np.tile(label2, numImg)
        y_test2 = keras.utils.to_categorical(y_test_c2, num_classes)
        #one-hot representation
        score2 = model2.evaluate(x_test, y_test2, verbose=0)
        # Returns the loss value (of the loss function) & metrics (accuracy ...) values for the model in test mode
        #predicted_legitimate_labels2 = np.argmax(model2.predict(x_test), axis=1)
        print('Accuracy on legitimate images (all) by mismatched model: {:3.4f}'.format(score2[1]))

        # ----------------------------------------------------------------------------------------------------------------------
        # Attack the first image of the test set
        # ----------------------------------------------------------------------------------------------------------------------

        # Wrap model
        fmodel = KerasModel(model, bounds=(0, 1), predicts=Ptype)

        # Prepare attack
        #attack = foolbox.attacks.FGSM(fmodel)
        #attack = foolbox.attacks.DeepFoolAttack(fmodel)
        #attack = foolbox.attacks.DeepFoolAttack(fmodel)
        #attack = foolbox.attacks.SaliencyMapAttack(fmodel,threshold=PSNR2MSE(55))
        #attack = foolbox.attacks.LBFGSAttack(fmodel)
        attack = foolbox.attacks.LBFGSAttack(fmodel, threshold=PSNR2MSE(55))  #LBFGS adversarial attack with limit PSNR


        # ------Get data, labels and categorical labels ***only for correctly classified examples***
        l = np.argwhere(predicted_legitimate_labels == y_test_c).shape[0]

        x_test_ok = np.reshape(x_test[np.array(np.argwhere(predicted_legitimate_labels == y_test_c)), :, :, :], (l, img_rows,
                                                                                                                 img_cols,
                                                                                                                img_chans))
        test_ok_index = index[np.array(np.argwhere(predicted_legitimate_labels == y_test_c))]

        # x_test_ok are the images that are correctly classified by the first model since we do not want to attack misclassified images

        y_test_ok = np.reshape(y_test[np.argwhere(predicted_legitimate_labels == y_test_c), :], (l, num_classes))
        y_test_c_ok = np.argmax(y_test_ok, axis=1)


        y_test_c_ok_2 = np.tile(label2, l)
        y_test_ok_2 = keras.utils.to_categorical(y_test_c_ok_2, num_classes)
        score3 = model2.evaluate(x_test_ok, y_test_ok_2, verbose=0)
        predicted_legitimate_labels2 = np.argmax(model2.predict(x_test_ok), axis=1)

        l = np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2).shape[0]
        x_test_ok = np.reshape(x_test_ok[np.array(np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2)), :, :, :],
                               (l, img_rows,
                                img_cols,
                                img_chans))
        y_test_ok = np.reshape(y_test_ok[np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2), :],
                               (l, num_classes))
        y_test_c_ok = np.argmax(y_test_ok, axis=1)

        test_ok_index = np.squeeze(test_ok_index[np.array(np.argwhere(predicted_legitimate_labels2 == y_test_c_ok_2))])

        # ------------------


        # Elaborate n_test adversarial examples ***only for correctly classified examples***
        n_test = l  #Benedetta

        #n_test = l    #Ehsan : You're the man Ehsan

        S = 0
        S_int = 0
        S_jpg  = 0
        avg_Max_dist = 0
        avg_L1_dist = 0
        avg_Max_dist_made_integer = 0
        avg_L1_dist_made_integer = 0
        avg_No_Mod_Pixels = 0
        avg_No_Mod_Pixels_integer_rounding_adv_img = 0
        avg_No_Mod_Pixels_integer_NO_rounding = 0
        PSNR = 0
        t = 0
        avg_psnr = 0
        avg_psnr_int = 0
        psnr_org=0 #for each image
        psnr_Int=0 #for each image
        max_diff_integer=0
        max_diff=0


        adv_images = np.zeros((n_test, img_rows, img_cols, img_chans))
        adv_images_integer = np.zeros((n_test, img_rows, img_cols, img_chans))
        true_labels_cat = []
        for idx in np.arange(n_test):
            #n_test should be less than to the length of x_test_ok
            image = x_test_ok[idx]

            true_labels_cat.append(y_test_ok[idx, :])

            image = image.astype('float32')
            image_original = 255 * image.reshape((img_rows, img_cols))

            if compressJPEG:
                img1 = Image.fromarray(np.uint8(255*image[:,:,0]))
                img1.save('temp.jpeg', "JPEG", quality=jpeg_quality)
                img_reread = Image.open('temp.jpeg')
                image = np.array(img_reread)
                image = np.reshape(image, (img_rows, img_cols, img_chans))


            # Generate adversarial images
            adv_images[idx] = attack(image, y_test_c_ok[idx])

            adversarial_image = 255 * adv_images[idx].reshape((img_rows, img_cols))

            Z = np.uint8(np.round(adversarial_image))

            # Store adversarial integer images
            ##############################################################################

            path1='E:/......................./' #output folder


            cv2.imwrite(os.path.join(path1, os.path.basename(images[test_ok_index[idx]])), Z)

            ##################################################################################
            path2 = '''E:\..................\\'''
            diff_noise=adversarial_image - image_original
            Noise = np.uint8((diff_noise - np.min(diff_noise)) / (np.max(diff_noise) - np.min(diff_noise)))
            cv2.imwrite(path2 + 'adv_Nosie_%d.png' % idx, 255*Noise)
            adv_images_integer[idx] = np.reshape(Z / 255., (img_rows, img_cols, 1))


            # Scores of legitimate and adversarial images for each idx
            scoreTemp = fmodel.predictions(image)
            true_score = foolbox.utils.softmax(scoreTemp)
            true_class = np.argmax(true_score)
            #it is the ground truth true_class according to network 1
            adv_score = foolbox.utils.softmax(fmodel.predictions(adv_images[idx]))
            adv_class = np.argmax(adv_score)
            adv_integer_score = foolbox.utils.softmax(fmodel.predictions(adv_images_integer[idx]))
            adv_integer_class = np.argmax(adv_integer_score)



            print('Image {}. Class changed from {} to {}. The score passes from {} to {}'.format(idx, true_class,
                                                                                                 adv_class, true_score,
                                                                                                 adv_score))

            print('Image Made Integer {}. Class changed from {} to {}. The score passes from {} to {}'.format(idx, true_class,
                                                                                                 adv_integer_class, true_score,
                                                                                                 adv_integer_score))

            # the if below is to solve the strange problem with the prediction of a matrix of nan values...
            if np.any(np.isnan(adv_images[idx])):
                adv_class = true_class
                adv_integer_class = true_class
                t = t + 1
                print('An adversarial image cannot be found!!')


            if true_class == adv_class:
                S = S+1
            if true_class == adv_integer_class:
                S_int = S_int + 1

            # plot image, adv_image and difference
            image_before = 255 * image.reshape((img_rows, img_cols))
            X = np.uint8(image_before) # uint8 non ha effetto di troncamento



            diff = np.double(image_before) - np.double(adversarial_image)

            print('Max distortion adversarial = {:3.4f}; L1 distortion = {:3.4f}'.format(abs(diff).max(),
                                                                                                 abs(diff).sum() / (
                                                                                                             img_rows * img_cols)))
            print('Percentage of modified pixels on integers = {:3.4f}. Percentage of negative modifications  = {:3.4f}'.format(np.count_nonzero(diff)/(img_rows * img_cols), np.count_nonzero(np.double(abs(diff)) - np.double(diff))/(img_rows * img_cols)))


            diff_integer = np.double(X) - np.double(Z)

            max_diff_integer = diff_integer.max()
            max_diff = diff.max()


            path3 = '''E:\Benedetta_for_ICASSP\IMAGE_Diff_Int\\'''
            Noise2 = np.uint8((diff_integer - np.min(diff_integer)) / (np.max(diff_integer) - np.min(diff_integer)))
            cv2.imwrite(path3 + 'adv_Nosie_%d.png' % idx, 255 * Noise2)



            print('Max distortion adversarial integer = {:3.4f}; L1 distortion = {:3.4f}'.format(abs(diff_integer).max(), abs(diff_integer).sum()/(img_rows * img_cols)))

            #show_figures(X,Z,true_score,adv_score)   #Ehsan: Compute PSNR for each Images org and Adversarial integer
            psnr_org=psnr(image_before, adversarial_image)
            print('PSNR = {:3.4f}'.format(abs(psnr_org)))

            psnr_Int = psnr(X, Z)
            print('PSNR (Integer) = {:3.4f}'.format(abs(psnr_Int)))


            # update average distortion
            if true_class != adv_class:
              avg_Max_dist = avg_Max_dist + abs(diff).max()
              avg_L1_dist = avg_L1_dist + abs(diff).sum()/(img_rows * img_cols)
              avg_No_Mod_Pixels = avg_No_Mod_Pixels + np.count_nonzero(diff) / (img_rows * img_cols)
              avg_psnr = avg_psnr + psnr(image_before, adversarial_image)


            if true_class != adv_integer_class:
              avg_Max_dist_made_integer = avg_Max_dist_made_integer + abs(diff_integer).max()
              avg_L1_dist_made_integer = avg_L1_dist_made_integer + abs(diff_integer).sum()/(img_rows * img_cols)
              avg_No_Mod_Pixels_integer_rounding_adv_img = avg_No_Mod_Pixels_integer_rounding_adv_img + np.count_nonzero(diff_integer) / (img_rows * img_cols)  # ????????? why diff ????
              #this after rounding to integer the adversarial image
              avg_No_Mod_Pixels_integer_NO_rounding = avg_No_Mod_Pixels_integer_NO_rounding + np.count_nonzero(diff) / (img_rows * img_cols)
              #this is just without rounding but counting the difference when the true class and the modified class are different
              avg_psnr_int = avg_psnr_int + psnr(X, Z)

            # -------------------------------
            # #Compress JPEG the image and test again
            # -------------------------------

            if jpeg:

                img1 = Image.fromarray(Z)
                img1.save('temp.jpeg', "JPEG", quality= jpeg_quality)
                adv_reread = Image.open('temp.jpeg')
                x_test_comp = np.array(adv_reread)
                x_test_comp = x_test_comp.reshape(img_rows, img_cols, img_chans)
                x_test_comp = x_test_comp.astype('float32')
                x_test_comp /= 255
                adv_reread_score = foolbox.utils.softmax(fmodel.predictions(x_test_comp))
                adv_reread_class = np.argmax(adv_reread_score)
                if true_class == adv_reread_class:
                    S_jpg = S_jpg + 1
                print('Class after JPEG compression {}, with score {}.'.format(adv_reread_class,adv_reread_score))

                x_test_comp = 255* x_test_comp.reshape((img_rows, img_cols))
                print('PSNR = {}'.format(psnr(image_before, x_test_comp)))

                PSNR = psnr(image_before, x_test_comp) + PSNR


        n=n_test-S
        n_int=n_test-S_int
        print('Class for the adversarial unchanged: {} over {}'.format(S,n_test))
        # on how many test images (advesarial) the attack did not work
        print('Class for the adversarial integer unchanged: {} over {}'.format(S_int,n_test))
        # on how many test images (advesarial) integer the attack did not work
        print('Average distortion: max dist {}, L1 dist {}'.format(avg_Max_dist/n,avg_L1_dist/n))
        print('Average distortion (made integer): max dist {}, L1 dist {}'.format(avg_Max_dist_made_integer/n_int,avg_L1_dist_made_integer/n_int))
        print('Average no of modified pixels: {}'.format(avg_No_Mod_Pixels/n))
        print('Average no of modified pixels on integers NO ROUNDING: {}'.format(avg_No_Mod_Pixels_integer_NO_rounding /n_int))
        print('Average no of modified pixels on integers rounding adv_img to int: {}'.format(avg_No_Mod_Pixels_integer_rounding_adv_img / n_int))


        print('The adversarial image cannot be found  {} times over {}'.format(t,n_test))


        if jpeg:
           print('Percentage of adversarial JPEG unchanged with QF {} (the attack is not successful): {}'.format(jpeg_quality, S_jpg/n_test))

        print('Average PSNR distortion for JPEG adversarial images : {}'.format(PSNR/n_test))

        # Evaluate accuracy
        true_labels_cat = np.array(true_labels_cat)
        adv_score = model.evaluate(adv_images, true_labels_cat, verbose=0)
        adv_score_integer= model.evaluate(adv_images_integer, true_labels_cat, verbose=0)

        score_perfect = model.evaluate(x_test_ok, y_test_ok, verbose=0)

        print('Accuracy on legitimate images (all) by N1: {:3.4f}'.format(score[1]))
        print('Accuracy on legitimate images (all) by mismatched model N2: {:3.4f}'.format(score2[1]))  # ????? Score2
        print('Accuracy on legitimate images (only correctly classified, obviously 1) N1: {:3.4f}'.format(score_perfect[1]))
        print('Accuracy on adversarial images N1: {:3.4f}'.format(adv_score[1]))
        print('Attack success rate on adversarial images N1: {:3.4f}'.format(1-adv_score[1]))
        print('Accuracy on adversarial images (made integer) N1: {:3.4f}'.format(adv_score_integer[1]))
        print('Attack success on adversarial images (made integer) N1: {:3.4f}'.format(1-adv_score_integer[1]))
        print('Average PSNR =: {:3.4f}'.format(avg_psnr / n))
        print('Average PSNR (Integer) =: {:3.4f}'.format(avg_psnr_int / n_int))

        # SECOND PART
        # Load the second model and test the adversarial images
        # Label
        #label3 = np.abs(1 - label2)  # it may be different from label because of the differences in the model.

        # Labels
        y_test_c = np.tile(label2, n_test)

        # Convert labels to one-hot with Keras
        y_test2 = keras.utils.to_categorical(y_test_c, num_classes)

        # Test
        adv_score_mismatch = model2.evaluate(adv_images, y_test2, verbose=0)

        # here Ehsan we need to evaluate model 2 in the same way but not on adv_images ... on adv_images_integer

        adv_score_mismatch_on_integer = model2.evaluate(adv_images_integer, y_test2, verbose=0)



        print('Accuracy on adversarial images with the mismatched model N2: {:3.4f}'.format(adv_score_mismatch[1]))
        print('Attack success rate on adversarial images with the mismatched model N2: {:3.4f}'.format(1-adv_score_mismatch[1]))

        print('Accuracy on adversarial images with the mismatched model (Integer) N2: {:3.4f}'.format(adv_score_mismatch_on_integer[1]))
        print('Attack success rate on adversarial images with the mismatched model (Integer) N2: {:3.4f}'.format(1-adv_score_mismatch_on_integer[1]))
Ejemplo n.º 29
0
plot_sample = True
plot_samples = 100

# Define plot path.
if plot_sample:
    path = './adv_samples/'
    path += dataset
    if not os.path.exists(path):
        os.makedirs(path)

# -------------------------------------------------
# ADVERSARIAL SAMPLE GENERATION
# -------------------------------------------------

# Create Foolbox model from Keras ResNet classifier and FGSM attack type.
foolbox_model = KerasModel(model, (0, 1))
attack = FGSM(foolbox_model)

# Turn all test set samples into adversarial samples.
for i in tqdm(range(len(X_te))):

    # Try to create an adversarial sample.
    adv_sample = attack(np.reshape(X_te[i], orig_dims),
                        label=y_te[i],
                        max_epsilon=max_epsilon)

    # In rare cases, sample generation might fail, which leaves adv_sample empty.
    if adv_sample is not None:

        # Successful adversarial samples are written back into the original matrix.
        X_te[i] = np.reshape(adv_sample, np.prod(orig_dims))
Ejemplo n.º 30
0
from foolbox.criteria import Misclassification
import numpy as np
import keras
from keras.datasets import mnist
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.applications.resnet50 import decode_predictions
from scipy.misc import imsave, imshow, imread
import matplotlib.pyplot as plt
import glob
from train_mnist import madry_mnist_model

keras.backend.set_learning_phase(0)
model = madry_mnist_model()
model.load_weights('mnist_madry.h5')
fmodel = KerasModel(model, bounds=(0, 1))
success = 0.
(images, labels), _ = mnist.load_data()

for image in images[:100]:

    image = image.astype(np.float32)  #[:, :, np.newaxis]/255.
    image = image[np.newaxis, :, :, np.newaxis]
    image /= 255.

    test = image.copy()
    preds = model.predict(test)
    label = np.argmax(preds)
    print("Label: ", label)
    #imshow(image[0, :, :, 0])
Ejemplo n.º 31
0
from foolbox.criteria import ConfidentMisclassification
import numpy as np
import keras
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.applications.resnet50 import decode_predictions
import cv2
import scipy.io as sio
import matplotlib.pyplot as plt

# instantiate model
keras.backend.set_learning_phase(0)
kmodel = ResNet50(weights='imagenet')

preprocessing = (np.array([103.0626,115.9029,123.1516]), 1)
fmodel = KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing)


attacks=['GradientAttack',
'GradientSignAttack',
'IterativeGradientAttack',
'IterativeGradientSignAttack',
'LBFGSAttack',
'ApproximateLBFGSAttack',
'DeepFoolAttack',
'DeepFoolL2Attack',
'DeepFoolLinfinityAttack',
'SaliencyMapAttack',
'GaussianBlurAttack',
'ContrastReductionAttack',
'SinglePixelAttack',