예제 #1
0
def test_update_image_classification_sw(art_warning, fix_get_mnist_subset, image_dl_estimator):
    try:

        from art.attacks.evasion import ProjectedGradientDescent

        classifier, _ = image_dl_estimator(from_logits=False)

        swd = SummaryWriterDefault(summary_writer=True, ind_1=True, ind_2=True, ind_3=True, ind_4=True)

        attack = ProjectedGradientDescent(
            estimator=classifier, max_iter=10, eps=0.3, eps_step=0.03, batch_size=5, verbose=False, summary_writer=swd
        )

        (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset

        attack.generate(x=x_train_mnist, y=y_train_mnist)

        assert all(attack.summary_writer.i_1 == [False, False, False, False, False])
        if np.ndim(attack.summary_writer.i_2) != 0:
            assert len(attack.summary_writer.i_2) == 5
        np.testing.assert_almost_equal(attack.summary_writer.i_3["0"], np.array([0.0, 0.0, 0.0, 0.0, 0.0]))
        np.testing.assert_almost_equal(attack.summary_writer.i_4["0"], np.array([0.0, 0.0, 0.0, 0.0, 0.0]))

    except ARTTestException as e:
        art_warning(e)
def test_binary_keras_instantiation_and_attack_pgd(art_warning):
    tf.compat.v1.disable_eager_execution()
    try:
        x, y = sklearn.datasets.make_classification(n_samples=10000,
                                                    n_features=20,
                                                    n_informative=5,
                                                    n_redundant=2,
                                                    n_repeated=0,
                                                    n_classes=2)
        train_x, test_x, train_y, test_y = sklearn.model_selection.train_test_split(
            x, y, test_size=0.2)
        train_x = train_x.astype(np.float32)
        test_x = test_x.astype(np.float32)
        model = tf.keras.models.Sequential([
            tf.keras.layers.Dense(128,
                                  activation=tf.nn.relu,
                                  input_shape=(20, )),
            tf.keras.layers.Dense(1, activation=tf.nn.sigmoid),
        ])
        model.summary()
        model.compile(optimizer=tf.optimizers.Adam(),
                      loss="binary_crossentropy",
                      metrics=["accuracy"])
        classifier = KerasClassifier(model=model)
        classifier.fit(train_x, train_y, nb_epochs=5)
        pred = classifier.predict(test_x)
        attack = ProjectedGradientDescent(estimator=classifier, eps=0.5)
        x_test_adv = attack.generate(x=test_x)
        adv_predictions = classifier.predict(x_test_adv)
        assert (adv_predictions != pred).any()
    except ARTTestException as e:
        art_warning(e)
예제 #3
0
    def test_pgd(self):
        from art.attacks.evasion import ProjectedGradientDescent

        attack = ProjectedGradientDescent(estimator=self.obj_detect,
                                          max_iter=2)
        x_test_adv = attack.generate(x=self.x_test, y=self.y_test)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 x_test_adv, self.x_test)
예제 #4
0
def test_update_image_classification_bool_str(art_warning, fix_get_mnist_subset, image_dl_estimator, summary_writer):
    try:

        from art.attacks.evasion import ProjectedGradientDescent

        classifier, _ = image_dl_estimator(from_logits=False)

        attack = ProjectedGradientDescent(
            estimator=classifier,
            max_iter=10,
            eps=0.3,
            eps_step=0.03,
            batch_size=5,
            verbose=False,
            summary_writer=summary_writer,
        )

        (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset

        attack.generate(x=x_train_mnist, y=y_train_mnist)

    except ARTTestException as e:
        art_warning(e)
예제 #5
0
def test_update_image_object_detection_sw(art_warning, fix_get_mnist_subset, fix_get_rcnn):
    try:

        from art.attacks.evasion import ProjectedGradientDescent

        frcnn = fix_get_rcnn

        swd = SummaryWriterDefault(summary_writer=True, ind_1=False, ind_2=True, ind_3=True, ind_4=True)

        attack = ProjectedGradientDescent(
            estimator=frcnn, max_iter=10, eps=0.3, eps_step=0.03, batch_size=5, verbose=False, summary_writer=swd
        )

        (x_train_mnist, y_train_mnist, x_test_mnist, y_test_mnist) = fix_get_mnist_subset

        attack.generate(x=x_train_mnist, y=y_train_mnist)

        if np.ndim(attack.summary_writer.i_2) != 0:
            assert len(attack.summary_writer.i_2) == 5
        np.testing.assert_almost_equal(attack.summary_writer.i_3["0"], np.array([0.2265982]))
        np.testing.assert_almost_equal(attack.summary_writer.i_4["0"], np.array([0.0, 0.0, 0.0, 0.0, 0.0]))

    except ARTTestException as e:
        art_warning(e)
예제 #6
0
            x_i_new = x_i[0:num_frames_min, :, :, :]
            x_list_new.append(x_i_new)

        x = np.asarray(x_list_new, dtype=float)

    y_pred = pgt.predict(x=x, y_init=y_init)

    ##################
    # evasion attack #
    ##################

    from art.attacks.evasion import ProjectedGradientDescent

    attack = ProjectedGradientDescent(estimator=pgt, eps=eps, eps_step=eps_step, batch_size=1, max_iter=20)

    x_adv = attack.generate(x=x, y=y_pred)

    y_pred_adv = pgt.predict(x=x_adv, y_init=y_init)

    if x.dtype == object:
        for i in range(x.shape[0]):
            print("L_inf:", np.max(np.abs(x_adv[i] - x[i])))
    else:
        print("L_inf:", np.max(np.abs(x_adv - x)))

    ################################
    # visualise adversarial images #
    ################################

    x_vis = x_adv
    y_vis = y_pred_adv
예제 #7
0
def natual(eps):
    # Step 1: Load the MNIST dataset

    (x_train,
     y_train), (x_test,
                y_test), min_pixel_value, max_pixel_value = load_mnist()

    # Step 2: Create the model

    import tensorflow as tf
    from tensorflow.keras import Model
    from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D

    class TensorFlowModel(Model):
        """
        Standard TensorFlow model for unit testing.
        """
        def __init__(self):
            super(TensorFlowModel, self).__init__()
            self.conv1 = Conv2D(filters=4, kernel_size=5, activation="relu")
            self.conv2 = Conv2D(filters=10, kernel_size=5, activation="relu")
            self.maxpool = MaxPool2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding="valid",
                                     data_format=None)
            self.flatten = Flatten()
            self.dense1 = Dense(100, activation="relu")
            self.logits = Dense(10, activation="linear")

        def call(self, x):
            """
            Call function to evaluate the model.
            :param x: Input to the model
            :return: Prediction of the model
            """
            x = self.conv1(x)
            x = self.maxpool(x)
            x = self.conv2(x)
            x = self.maxpool(x)
            x = self.flatten(x)
            x = self.dense1(x)
            x = self.logits(x)
            return x

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

    def train_step(model, images, labels):
        with tf.GradientTape() as tape:
            predictions = model(images, training=True)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    model = TensorFlowModel()
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

    # Step 3: Create the ART classifier

    classifier = TensorFlowV2Classifier(
        model=model,
        loss_object=loss_object,
        train_step=train_step,
        nb_classes=10,
        input_shape=(28, 28, 1),
        clip_values=(0, 1),
    )

    # Step 4: Train the ART classifier

    classifier.fit(x_train, y_train, batch_size=64, nb_epochs=10)

    # Step 5: Evaluate the ART classifier on benign test examples

    predictions = classifier.predict(x_test)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print("Accuracy on benign test examples: {}%".format(accuracy * 100))

    # Step 6: Generate adversarial test examples
    attack = ProjectedGradientDescent(estimator=classifier,
                                      eps=eps,
                                      eps_step=eps / 3,
                                      max_iter=20)
    x_test_adv = attack.generate(x=x_test)

    # Step 7: Evaluate the ART classifier on adversarial test examples

    predictions = classifier.predict(x_test_adv)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))
예제 #8
0
def main():
    args = parse_option()
    print(args)

    # check args
    if args.loss not in LOSS_NAMES:
        raise ValueError('Unsupported loss function type {}'.format(args.loss))

    if args.optimizer == 'adam':
        optimizer1 = tf.keras.optimizers.Adam(lr=args.lr_1)
    elif args.optimizer == 'lars':
        from lars_optimizer import LARSOptimizer
        # not compatible with tf2
        optimizer1 = LARSOptimizer(
            args.lr_1,
            exclude_from_weight_decay=['batch_normalization', 'bias'])
    elif args.optimizer == 'sgd':
        optimizer1 = tfa.optimizers.SGDW(learning_rate=args.lr_1,
                                         momentum=0.9,
                                         weight_decay=1e-4)
    optimizer2 = tf.keras.optimizers.Adam(lr=args.lr_2)

    model_name = '{}_model-bs_{}-lr_{}'.format(args.loss, args.batch_size_1,
                                               args.lr_1)

    # 0. Load data
    if args.data == 'mnist':
        mnist = tf.keras.datasets.mnist
    elif args.data == 'fashion_mnist':
        mnist = tf.keras.datasets.fashion_mnist
    print('Loading {} data...'.format(args.data))
    (_, y_train), (_, y_test) = mnist.load_data()
    # x_train, x_test = x_train / 255.0, x_test / 255.0
    # x_train = x_train.reshape(-1, 28*28).astype(np.float32)
    # x_test = x_test.reshape(-1, 28*28).astype(np.float32)
    (x_train, _), (x_test, _), _, _ = load_mnist()
    # print(x_train[0][0])
    print(x_train.shape, x_test.shape)

    # simulate low data regime for training
    # n_train = x_train.shape[0]
    # shuffle_idx = np.arange(n_train)
    # np.random.shuffle(shuffle_idx)

    # x_train = x_train[shuffle_idx][:args.n_data_train]
    # y_train = y_train[shuffle_idx][:args.n_data_train]
    # print('Training dataset shapes after slicing:')
    print(x_train.shape, y_train.shape)

    train_ds = tf.data.Dataset.from_tensor_slices(
        (x_train, y_train)).shuffle(5000).batch(args.batch_size_1)

    train_ds2 = tf.data.Dataset.from_tensor_slices(
        (x_train, y_train)).shuffle(5000).batch(args.batch_size_2)

    test_ds = tf.data.Dataset.from_tensor_slices(
        (x_test, y_test)).batch(args.batch_size_1)

    # 1. Stage 1: train encoder with multiclass N-pair loss
    encoder = Encoder(normalize=True, activation=args.activation)
    projector = Projector(args.projection_dim,
                          normalize=True,
                          activation=args.activation)

    if args.loss == 'max_margin':

        def loss_func(z, y):
            return losses.max_margin_contrastive_loss(z,
                                                      y,
                                                      margin=args.margin,
                                                      metric=args.metric)
    elif args.loss == 'npairs':
        loss_func = losses.multiclass_npairs_loss
    elif args.loss == 'sup_nt_xent':

        def loss_func(z, y):
            return losses.supervised_nt_xent_loss(
                z,
                y,
                temperature=args.temperature,
                base_temperature=args.base_temperature)
    elif args.loss.startswith('triplet'):
        triplet_kind = args.loss.split('-')[1]

        def loss_func(z, y):
            return losses.triplet_loss(z,
                                       y,
                                       kind=triplet_kind,
                                       margin=args.margin)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    test_loss = tf.keras.metrics.Mean(name='test_loss')

    # tf.config.experimental_run_functions_eagerly(True)
    @tf.function
    # train step for the contrastive loss
    def train_step_stage1(x, y):
        '''
        x: data tensor, shape: (batch_size, data_dim)
        y: data labels, shape: (batch_size, )
        '''
        with tf.GradientTape() as tape:
            r = encoder(x, training=True)
            z = projector(r, training=True)
            # print("z", z, "y", y)
            loss = loss_func(z, y)

        gradients = tape.gradient(
            loss, encoder.trainable_variables + projector.trainable_variables)
        optimizer1.apply_gradients(
            zip(gradients,
                encoder.trainable_variables + projector.trainable_variables))
        train_loss(loss)

    @tf.function
    def test_step_stage1(x, y):
        r = encoder(x, training=False)
        z = projector(r, training=False)
        t_loss = loss_func(z, y)
        test_loss(t_loss)

    print('Stage 1 training ...')
    for epoch in range(args.epoch):
        # Reset the metrics at the start of the next epoch
        train_loss.reset_states()
        test_loss.reset_states()

        for x, y in train_ds:
            train_step_stage1(x, y)

        for x_te, y_te in test_ds:
            test_step_stage1(x_te, y_te)

        template = 'Epoch {}, Loss: {}, Test Loss: {}'
        # print(template.format(epoch + 1,
        #                       train_loss.result(),
        #                       test_loss.result()))

    if args.draw_figures:
        # projecting data with the trained encoder, projector
        x_tr_proj = projector(encoder(x_train))
        x_te_proj = projector(encoder(x_test))
        # convert tensor to np.array
        x_tr_proj = x_tr_proj.numpy()
        x_te_proj = x_te_proj.numpy()
        print(x_tr_proj.shape, x_te_proj.shape)

        # check learned embedding using PCA
        pca = PCA(n_components=2)
        pca.fit(x_tr_proj)
        x_te_proj_pca = pca.transform(x_te_proj)

        x_te_proj_pca_df = pd.DataFrame(x_te_proj_pca, columns=['PC1', 'PC2'])
        x_te_proj_pca_df['label'] = y_test
        # PCA scatter plot
        fig, ax = plt.subplots()
        ax = sns.scatterplot('PC1',
                             'PC2',
                             data=x_te_proj_pca_df,
                             palette='tab10',
                             hue='label',
                             linewidth=0,
                             alpha=0.6,
                             ax=ax)

        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        title = 'Data: {}\nEmbedding: {}\nbatch size: {}; LR: {}'.format(
            args.data, LOSS_NAMES[args.loss], args.batch_size_1, args.lr_1)
        ax.set_title(title)
        fig.savefig('figs/PCA_plot_{}_{}_embed.png'.format(
            args.data, model_name))

        # density plot for PCA
        g = sns.jointplot('PC1', 'PC2', data=x_te_proj_pca_df, kind="hex")
        plt.subplots_adjust(top=0.95)
        g.fig.suptitle(title)

        g.savefig('figs/Joint_PCA_plot_{}_{}_embed.png'.format(
            args.data, model_name))

    # Stage 2: freeze the learned representations and then learn a classifier
    # on a linear layer using a softmax loss
    softmax = SoftmaxPred()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_acc = tf.keras.metrics.SparseCategoricalAccuracy(name='train_ACC')
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_acc = tf.keras.metrics.SparseCategoricalAccuracy(name='test_ACC')

    cce_loss_obj = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True)

    @tf.function
    # train step for the 2nd stage
    def train_step(model, x, y):
        '''
        x: data tensor, shape: (batch_size, data_dim)
        y: data labels, shape: (batch_size, )
        '''
        with tf.GradientTape() as tape:
            r = model.layers[0](x, training=False)
            y_preds = model.layers[1](r, training=True)
            loss = cce_loss_obj(y, y_preds)

        # freeze the encoder, only train the softmax layer
        gradients = tape.gradient(loss, model.layers[1].trainable_variables)
        optimizer2.apply_gradients(
            zip(gradients, model.layers[1].trainable_variables))
        train_loss(loss)
        train_acc(y, y_preds)

    @tf.function
    def test_step(x, y):
        r = encoder(x, training=False)
        y_preds = softmax(r, training=False)
        t_loss = cce_loss_obj(y, y_preds)
        test_loss(t_loss)
        test_acc(y, y_preds)

    if args.write_summary:
        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        train_log_dir = 'logs/{}/{}/{}/train'.format(model_name, args.data,
                                                     current_time)
        test_log_dir = 'logs/{}/{}/{}/test'.format(model_name, args.data,
                                                   current_time)
        train_summary_writer = tf.summary.create_file_writer(train_log_dir)
        test_summary_writer = tf.summary.create_file_writer(test_log_dir)

    print('Stage 2 training ...')
    model = tf.keras.Sequential([encoder, softmax])
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True)

    classifier = TensorFlowV2Classifier(
        model=model,
        loss_object=loss_object,
        train_step=train_step,
        nb_classes=10,
        input_shape=(28, 28, 1),
        clip_values=(0, 1),
    )

    # classifier.fit(x_train, y_train, batch_size=256, nb_epochs=20)

    for epoch in range(args.epoch):
        # Reset the metrics at the start of the next epoch
        train_loss.reset_states()
        train_acc.reset_states()
        test_loss.reset_states()
        test_acc.reset_states()

        for x, y in train_ds2:
            train_step(model, x, y)

        if args.write_summary:
            with train_summary_writer.as_default():
                tf.summary.scalar('loss', train_loss.result(), step=epoch)
                tf.summary.scalar('accuracy', train_acc.result(), step=epoch)

        for x_te, y_te in test_ds:
            test_step(x_te, y_te)

        if args.write_summary:
            with test_summary_writer.as_default():
                tf.summary.scalar('loss', test_loss.result(), step=epoch)
                tf.summary.scalar('accuracy', test_acc.result(), step=epoch)

        template = 'Epoch {}, Loss: {}, Acc: {}, Test Loss: {}, Test Acc: {}'
        print(
            template.format(epoch + 1, train_loss.result(),
                            train_acc.result() * 100, test_loss.result(),
                            test_acc.result() * 100))

    predictions = classifier.predict(x_test)
    print(predictions.shape, y_test.shape)
    accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test)
    print("Accuracy on benign test examples: {}%".format(accuracy * 100))

    print('Stage 3 attacking ...')

    attack = ProjectedGradientDescent(estimator=classifier,
                                      eps=args.eps,
                                      eps_step=args.eps / 3,
                                      max_iter=20)
    x_test_adv = attack.generate(x=x_test)

    print('Stage 4 attacking ...')

    predictions = classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test)
    print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))

    natual(args.eps)
예제 #9
0
def main():
    # Create ART object detector
    frcnn = PyTorchFasterRCNN(clip_values=(0, 255),
                              attack_losses=[
                                  "loss_classifier", "loss_box_reg",
                                  "loss_objectness", "loss_rpn_box_reg"
                              ])

    # Load image 1
    image_0 = cv2.imread("./10best-cars-group-cropped-1542126037.jpg")
    image_0 = cv2.cvtColor(image_0, cv2.COLOR_BGR2RGB)  # Convert to RGB
    print("image_0.shape:", image_0.shape)

    # Load image 2
    image_1 = cv2.imread("./banner-diverse-group-of-people-2.jpg")
    image_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2RGB)  # Convert to RGB
    image_1 = cv2.resize(image_1,
                         dsize=(image_0.shape[1], image_0.shape[0]),
                         interpolation=cv2.INTER_CUBIC)
    print("image_1.shape:", image_1.shape)

    # Stack images
    image = np.stack([image_0, image_1], axis=0).astype(np.float32)
    print("image.shape:", image.shape)

    for i in range(image.shape[0]):
        plt.axis("off")
        plt.title("image {}".format(i))
        plt.imshow(image[i].astype(np.uint8), interpolation="nearest")
        plt.show()

    # Make prediction on benign samples
    predictions = frcnn.predict(x=image)

    for i in range(image.shape[0]):
        print("\nPredictions image {}:".format(i))

        # Process predictions
        predictions_class, predictions_boxes, predictions_class = extract_predictions(
            predictions[i])

        # Plot predictions
        plot_image_with_boxes(img=image[i].copy(),
                              boxes=predictions_boxes,
                              pred_cls=predictions_class)

    # Create and run attack
    eps = 32
    attack = ProjectedGradientDescent(estimator=frcnn,
                                      eps=eps,
                                      eps_step=2,
                                      max_iter=10)
    image_adv = attack.generate(x=image, y=None)

    print("\nThe attack budget eps is {}".format(eps))
    print("The resulting maximal difference in pixel values is {}.".format(
        np.amax(np.abs(image - image_adv))))

    for i in range(image_adv.shape[0]):
        plt.axis("off")
        plt.title("image_adv {}".format(i))
        plt.imshow(image_adv[i].astype(np.uint8), interpolation="nearest")
        plt.show()

    predictions_adv = frcnn.predict(x=image_adv)

    for i in range(image.shape[0]):
        print("\nPredictions adversarial image {}:".format(i))

        # Process predictions
        predictions_adv_class, predictions_adv_boxes, predictions_adv_class = extract_predictions(
            predictions_adv[i])

        # Plot predictions
        plot_image_with_boxes(img=image_adv[i].copy(),
                              boxes=predictions_adv_boxes,
                              pred_cls=predictions_adv_class)
    max_iter=40,
    targeted=False,
    num_random_init=5,
    batch_size=32,
)

# Step 4: Create the trainer object - AdversarialTrainerFBFPyTorch
# if you have apex installed, change use_amp to True
epsilon = 8.0 / 255.0
trainer = AdversarialTrainerFBFPyTorch(classifier, eps=epsilon, use_amp=False)

# Build a Keras image augmentation object and wrap it in ART
art_datagen = PyTorchDataGenerator(iterator=dataloader,
                                   size=x_train.shape[0],
                                   batch_size=128)

# Step 5: fit the trainer
trainer.fit_generator(art_datagen, nb_epochs=30)

x_test_pred = np.argmax(classifier.predict(x_test), axis=1)
print(
    "Accuracy on benign test samples after adversarial training: %.2f%%" %
    (np.sum(x_test_pred == np.argmax(y_test, axis=1)) / x_test.shape[0] * 100))

x_test_attack = attack.generate(x_test)
x_test_attack_pred = np.argmax(classifier.predict(x_test_attack), axis=1)
print(
    "Accuracy on original PGD adversarial samples after adversarial training: %.2f%%"
    % (np.sum(x_test_attack_pred == np.argmax(y_test, axis=1)) /
       x_test.shape[0] * 100))
)

# Create a toy Keras CNN architecture & wrap it under ART interface
classifier = KerasClassifier(build_model(),
                             clip_values=(0, 1),
                             use_logits=False)

# Create attack for adversarial trainer; here, we use 2 attacks, both crafting adv examples on the target model
pgd = ProjectedGradientDescent(classifier,
                               eps=8,
                               eps_step=2,
                               max_iter=10,
                               num_random_init=20)

# Create some adversarial samples for evaluation
x_test_pgd = pgd.generate(x_test)

# Create adversarial trainer and perform adversarial training
adv_trainer = AdversarialTrainer(classifier, attacks=pgd, ratio=1.0)
adv_trainer.fit_generator(art_datagen, nb_epochs=83)

# Evaluate the adversarially trained model on clean test set
labels_true = np.argmax(y_test, axis=1)
labels_test = np.argmax(classifier.predict(x_test), axis=1)
print("Accuracy test set: %.2f%%" %
      (np.sum(labels_test == labels_true) / x_test.shape[0] * 100))

# Evaluate the adversarially trained model on original adversarial samples
labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1)
print("Accuracy on original PGD adversarial samples: %.2f%%" %
      (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))
예제 #12
0
def plot_attacks_acc(classifier, x, y, path_fig, dataset, title):
    '''
    Description:
        This function takes in a classifier model and a list of images with labels and creates
        a plot showing how the accuracy of model on the dataset decreases as attack strength (perturbation size)
        increases for 3 different attacks (FGSM, PGD, BIM).
    :param classifier: model to be evaluated
    :param x: list of images to be predicted on
    :param y: labels of images
    :param path_fig: path to save the plot figure
    :param dataset: name of dataset (e.g. mnist, cifar, ddsm, brain_mri, lidc)
    :param title: title to define plot figure
    :return: Figure will be saved with title
    '''
    if dataset == 'ddsm':
        eps_range = [0.00001, 0.00005, 0.0001, 0.00025, 0.0005, 0.00075, 0.001, 0.00125, 0.0015, 0.00175, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.0045, 0.005, 0.0055, 0.006, 0.007, 0.008]
        step_size = 0.001
    elif dataset == 'brain_mri':
        eps_range = [0.0001, 0.0005, 0.001, 0.0013, 0.0016, 0.002, 0.00225, 0.0025, 0.00275, 0.003, 0.00325, 0.0035, 0.00375, 0.004, 0.0045, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.012]
        step_size = 0.001
    elif dataset == 'mnist':
        eps_range = [0.0001, 0.01, 0.02, 0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5]
        step_size = 0.1
    elif dataset == 'cifar':
        eps_range = [0.0001, 0.001, 0.002, 0.003, 0.004, 0.005, 0.007, 0.009, 0.01, 0.015, 0.02, 0.03, 0.04, 0.05]
        step_size = 0.01
    elif dataset == 'lidc':
        eps_range = [0.0001, 0.0003, 0.0006, 0.0008, 0.001, 0.00125, 0.0015, 0.00175, 0.002, 0.0023, 0.0026, 0.0028, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.02]
        step_size = 0.001
    nb_correct_fgsm = []
    nb_correct_pgd = []
    nb_correct_bim = []
    for eps in eps_range:
        attacker_fgsm = FastGradientMethod(classifier, eps=eps)
        attacker_pgd = ProjectedGradientDescent(classifier, eps=eps, eps_step=eps/4, max_iter=10,
                                            num_random_init=5)
        attacker_bim = BasicIterativeMethod(classifier, eps=eps, eps_step=eps/10, max_iter=10)
        x_fgsm = attacker_fgsm.generate(x)
        x_pgd = attacker_pgd.generate(x)
        x_bim = attacker_bim.generate(x)
        x_pred_fgsm = np.argmax(classifier.predict(x_fgsm), axis=1)
        nb_correct_fgsm += [np.sum(x_pred_fgsm == np.argmax(y, axis=1))]
        x_pred_pgd = np.argmax(classifier.predict(x_pgd), axis=1)
        nb_correct_pgd += [np.sum(x_pred_pgd == np.argmax(y, axis=1))]
        x_pred_bim = np.argmax(classifier.predict(x_bim), axis=1)
        nb_correct_bim += [np.sum(x_pred_bim == np.argmax(y, axis=1))]

    fig, ax = plt.subplots()
    ax.plot(np.array(eps_range) / step_size, 100 * np.array(nb_correct_fgsm) / y.shape[0], 'b--', label='FGSM')
    ax.plot(np.array(eps_range) / step_size, 100 * np.array(nb_correct_pgd) / y.shape[0], 'r--', label='PGD')
    ax.plot(np.array(eps_range) / step_size, 100 * np.array(nb_correct_bim) / y.shape[0], 'g--', label='BIM')
    legend = ax.legend(loc='upper right', shadow=True, fontsize='large')
    legend.get_frame().set_facecolor('#FFFFFF')
    if dataset == 'mnist':
        plt.xlabel('Perturbation (x ' + '$10^{-1}$' + ')')
    elif dataset == 'cifar':
        plt.xlabel('Perturbation (x ' + '$10^{-2}$' + ')')
    else:
        plt.xlabel('Perturbation (x ' + '$10^{-3}$' + ')')
    plt.ylabel('Accuracy (%)')
    plt.savefig(path_fig + dataset + '/' + title + '.png')
    plt.clf()

    data = [np.array(eps_range), np.array(nb_correct_fgsm) / y.shape[0], np.array(nb_correct_pgd) / y.shape[0], np.array(nb_correct_bim) / y.shape[0]]
    out = csv.writer(open(path_csv + dataset + '/' + title + '.csv', "w"), delimiter=',', quoting=csv.QUOTE_ALL)
    out.writerows(zip(*data))
    return 0
예제 #13
0
accuracy = np.sum(predictions == y_test) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))

# Step 5: Generate adversarial test examples

# attack = FastGradientMethod(estimator=classifier, eps=0.1)
# x_test_adv = attack.generate(x=x_test)

# adv_crafter = DeepFool(classifier, nb_grads=args.nb_grads)
# pgd
adv_crafter_untargeted = ProjectedGradientDescent(classifier,
                                                  eps=args.eps,
                                                  eps_step=args.eps_step,
                                                  max_iter=args.max_iter)
print("PGD:Craft attack on untargeted training examples")
x_test_adv = adv_crafter_untargeted.generate(x_test)

adv_crafter_targeted = ProjectedGradientDescent(classifier,
                                                targeted=True,
                                                eps=args.eps_step,
                                                eps_step=args.eps_step,
                                                max_iter=args.max_iter)
print("PGD:Craft attack on targeted training examples")
targets = random_targets(y_test, nb_classes=10)
x_test_adv_targeted = adv_crafter_targeted.generate(x_test, **{"y": targets})

#auto pgd
auto_adv_crafter_untargeted = AutoProjectedGradientDescent(
    classifier, eps=args.eps, eps_step=args.eps_step, max_iter=args.max_iter)
print("AutoPGD:Craft attack on untargeted training examples")
x_test_auto_adv = auto_adv_crafter_untargeted.generate(x_test)
예제 #14
0
tiny_vgg.compile(optimizer=optimizer, loss=loss_object)

classifier = KerasClassifier(model=tiny_vgg,
                             clip_values=(0, 1),
                             use_logits=False)

attack = ProjectedGradientDescent(estimator=classifier,
                                  eps=16 / 255,
                                  eps_step=1 / 255,
                                  norm="inf",
                                  max_iter=200)

#attack = CarliniLInfMethod(classifier,
#    confidence=0.8, targeted=False, learning_rate=0.001)

x_test_adv = attack.generate(x=x_test)
outputs = classifier.predict(x_test_adv)

preds = np.argmax(outputs, axis=1)
trues = np.argmax(y_test, axis=1)

accuracy = np.sum(preds == trues) / len(y_test)
print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))
print("Ixs that worked: ")
print(np.where(preds != trues))

## Save a few of x_test_adv, please work:
for i in range(len(preds)):
    x = (x_test_adv[i] * 255).astype(np.uint8)
    im = Image.fromarray(x)
    im.save(ADV_IMAGE_SAVE_LOCATION + '/x_adv_' + str(i) + '.jpeg')
예제 #15
0
        clip_values=(0, 1),
    )

    print("Creating adversarial attack object...\n")

    pgd = ProjectedGradientDescent(adv_classifier,
                                   norm=np.inf,
                                   eps=eps,
                                   eps_step=0.001,
                                   targeted=False,
                                   batch_size=2048,
                                   num_random_init=27)

    print("Generating adversarial samples...\n")
    logger.info("Craft attack on training examples")
    x_train_adv = pgd.generate(train_data)
    save_samples(x_train_adv, 'pgd_train', exp)
    logger.info("=" * 50)

    logger.info("Craft attack test examples")
    x_test_adv = pgd.generate(test_data)
    save_samples(x_test_adv, 'pgd_test', exp)
    logger.info("=" * 50)

    print("Evaluating adversarial samples on clean model...\n")
    preds = np.argmax(adv_classifier.predict(x_test_adv), axis=1)
    acc = np.sum(
        preds == non_encoded_test_labels) / non_encoded_test_labels.shape[0]
    logger.info("Classifier before adversarial training")
    logger.info("Accuracy on adversarial samples: %.2f%%", (acc * 100))
    logger.info("=" * 50)
        0.0001, 0.0003, 0.0006, 0.0007, 0.0008, 0.0009, 0.001, 0.00125, 0.0015,
        0.00175, 0.002, 0.0023, 0.0026, 0.0028, 0.003, 0.004, 0.005, 0.006,
        0.007, 0.008, 0.009, 0.01, 0.011, 0.012, 0.013, 0.014, 0.015, 0.016,
        0.017, 0.018, 0.019, 0.02
    ]

# evaluate sensitivity scores of each image
test_eps_scores = [1] * x_test.shape[0]

for eps in eps_range:
    attacker = ProjectedGradientDescent(classifier,
                                        eps=eps,
                                        eps_step=eps / 4,
                                        max_iter=max_iter,
                                        num_random_init=num_random_init)
    x_test_adv = attacker.generate(x_test)
    for i in range(x_test.shape[0]):
        img = np.expand_dims(x_test[i], axis=0)
        adv_img = np.expand_dims(x_test_adv[i], axis=0)
        pred = np.argmax(classifier.predict(img))
        pred_adv = np.argmax(classifier.predict(adv_img))
        if test_eps_scores[i] == 1:
            if pred != pred_adv:
                test_eps_scores[i] = eps
np.save(path + dataset + '/test_eps_scores.npy', test_eps_scores)

test_eps_scores = np.load(path + dataset + '/test_eps_scores.npy')

test_eps_freq = [0] * x_test.shape[0]
for eps_score in test_eps_scores:
    for i in range(len(eps_range)):