def main(_):
    # Load training and test data
    data = ld_cifar10()
    model = CNN()
    loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = tf.optimizers.Adam(learning_rate=0.001)

    # Metrics to track the different accuracies.
    train_loss = tf.metrics.Mean(name='train_loss')
    test_acc_clean = tf.metrics.SparseCategoricalAccuracy()
    test_acc_fgsm = tf.metrics.SparseCategoricalAccuracy()
    test_acc_pgd = tf.metrics.SparseCategoricalAccuracy()

    @tf.function
    def train_step(x, y):
        with tf.GradientTape() as tape:
            predictions = model(x)
            loss = loss_object(y, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        train_loss(loss)

    # Train model with adversarial training
    for epoch in range(FLAGS.nb_epochs):
        # keras like display of progress
        progress_bar_train = tf.keras.utils.Progbar(50000)
        for (x, y) in data.train:
            if FLAGS.adv_train:
                # Replace clean example with adversarial example for adversarial training
                x = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40,
                                               np.inf)
            train_step(x, y)
            progress_bar_train.add(x.shape[0],
                                   values=[('loss', train_loss.result())])

    # Evaluate on clean and adversarial data
    progress_bar_test = tf.keras.utils.Progbar(10000)
    for x, y in data.test:
        y_pred = model(x)
        test_acc_clean(y, y_pred)

        x_fgm = fast_gradient_method(model, x, FLAGS.eps, np.inf)
        y_pred_fgm = model(x_fgm)
        test_acc_fgsm(y, y_pred_fgm)

        x_pgd = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40,
                                           np.inf)
        y_pred_pgd = model(x_pgd)
        test_acc_pgd(y, y_pred_pgd)

        progress_bar_test.add(x.shape[0])

    print('test acc on clean examples (%): {:.3f}'.format(
        test_acc_clean.result() * 100))
    print('test acc on FGM adversarial examples (%): {:.3f}'.format(
        test_acc_fgsm.result() * 100))
    print('test acc on PGD adversarial examples (%): {:.3f}'.format(
        test_acc_pgd.result() * 100))
Example #2
0
 def advStep(self,
             model_fn,
             image,
             epsilon,
             clip_min=-1,
             clip_max=1,
             norm=2):
     return fast_gradient_method(model_fn, image, epsilon, norm, clip_min,
                                 clip_max)
Example #3
0
def generate_adversarial_examples(model, data, data_labels,
                                  non_encoded_data_labels, eps_range):
    losses = []
    accs = []
    epsilons = []

    for epsilon in eps_range:
        epsilons.append(epsilon)
        adv_samples = fast_gradient_method(model_fn=model,
                                           x=data,
                                           eps=epsilon,
                                           norm=np.inf)
        loss, acc = model.evaluate(verbose=1,
                                   x=adv_samples,
                                   y=data_labels,
                                   batch_size=10)
        losses.append(loss)
        accs.append(acc)

    return epsilons, losses, accs
y_test = np.array(y_test)        
X_test = np.array(X_test)
X_test = X_test.astype('float32')/255 

#Pontossag a tesztkeszleten
pred = model.predict_classes(X_test)
acc = accuracy_score(y_test, pred)
print("Pontosság a tesztkészleten: {:.4f} %".format(acc*100))

#Ellenseges peldakon pontossag tesztelese
#Forras:https://github.com/tensorflow/cleverhans/blob/master/tutorials/future/tf2/mnist_tutorial.py (2020.11.26)

#ellenseges peldak generalasa az FGSM modszerrel
start = time.time()

x_fgm = fast_gradient_method(model, X_test, 0.2, np.inf)

end = time.time()

print("Ellenséges példák generálási ideje az FGSM módszerrel: {:.4f} s".format(end-start))

test_acc_fgsm = tf.metrics.SparseCategoricalAccuracy()

y_pred_fgm = model(x_fgm)
test_acc_fgsm(y_test, y_pred_fgm)
print('Pontosság az FGSM által generált képeken: {:.4f} %'.format(test_acc_fgsm.result() * 100))

#ellenseges peldak generalasa a PGD modszerrel
start = time.time()

x_pgd = projected_gradient_descent(model, X_test, 0.2, 0.01, 40, np.inf)
Example #5
0
def test_robustness(model, model_name, checkpoint_dict, dataset_path, eps=0.3):
    '''Evaluate the performance of a model on clean data (from the test set of the dataset found at dataset_path) and different adversarial attacks.
    model: tf.kers.Model the model to test
    model_name: str name of the model
    checkpoint_dict: dictionnary containing {'checkpoint_path': str, 'checkpoint': model checkpoint, 'saving_manager': manager} (returned by the make_base_model() and make_finetuning_model() functions, see above)
    dataset_path: str path to the dataset (e.g. RFD)
    eps: float Total epsilon for FGM and PGD attacks.
    '''

    from cleverhans.future.tf2.attacks import projected_gradient_descent, fast_gradient_method

    print('\n\nComputing performance on adversarial examples for model {}'.
          format(model_name))

    checkpoint_path = checkpoint_dict['checkpoint_path']
    checkpoint = checkpoint_dict['checkpoint']
    saving_manager = checkpoint_dict['saving_manager']

    checkpoint.restore(saving_manager.latest_checkpoint)
    print('Checkpoint found in {}'.format(checkpoint_path))

    with h5py.File(dataset_path, 'r') as f:
        n_imgs = f['test']['n_imgs'][0]
        test_acc_clean = tf.metrics.CategoricalAccuracy()
        test_acc_fgsm = tf.metrics.CategoricalAccuracy()
        test_acc_pgd = tf.metrics.CategoricalAccuracy()

        progress_bar_test = tf.keras.utils.Progbar(n_imgs)
        counter = 0
        fig, ax = plt.subplots(5, 3)
        for x, y in zip(f['test']['data'], f['test']['ID_labels']):
            x = tf.expand_dims(x, 0)  # need to add batch dimension

            y_pred = model(x)
            test_acc_clean(y, y_pred)

            x_fgm = fast_gradient_method(model, x, eps, np.inf)
            y_pred_fgm = model(x_fgm)
            test_acc_fgsm(y, y_pred_fgm)

            x_pgd = projected_gradient_descent(model, x, eps, 0.01, 40, np.inf)
            y_pred_pgd = model(x_pgd)
            test_acc_pgd(y, y_pred_pgd)

            if counter < 5:
                ax[counter, 0].imshow(np.squeeze(x), cmap='gray')
                ax[counter, 1].imshow(np.squeeze(x_fgm), cmap='gray')
                ax[counter, 2].imshow(np.squeeze(x_pgd), cmap='gray')
                if counter == 0:
                    ax[counter, 0].title.set_text('Orginials')
                    ax[counter, 1].title.set_text('FGM adversaries')
                    ax[counter, 2].title.set_text('PGD adversaries')

            counter += 1
            sys.stdout.write('\r')
            progress_bar_test.add(x.shape[0])
    clean_acc = test_acc_clean.result() * 100
    fgm_acc = test_acc_fgsm.result() * 100
    pgd_acc = test_acc_pgd.result() * 100
    print('Model {}: test acc on clean examples (%): {:.3f}'.format(
        model_name, clean_acc))
    print('Model {}: test acc on FGM adversarial examples (%): {:.3f}'.format(
        model_name, fgm_acc))
    print('Model {}: test acc on PGD adversarial examples (%): {:.3f}'.format(
        model_name, pgd_acc))

    plt.suptitle(
        'test acc: originals: {:.3f}%, FGM advs: {:.3f}%, , PGD advs: {:.3f}%'.
        format(clean_acc, fgm_acc, pgd_acc))
    plt.savefig(checkpoint_path[:-4] + 'adversarial_images.png')
    plt.close()

    return clean_acc, fgm_acc, pgd_acc
Example #6
0
def generate_attack(model, data, eps, norm):
    return fast_gradient_method(model_fn=model, x=data, eps=eps, norm=norm)
Example #7
0
def main(_):
    # Load training and test data
    # data = ld_mnist()
    X, Y = make_classification(n_samples=10000,
                               n_features=2,
                               n_redundant=0,
                               n_informative=2,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=1,
                               class_sep=2)

    plt.figure()
    plt.scatter(X[:, 0], X[:, 1], marker='o', c=Y, s=25, edgecolor='k')
    plt.show()

    scalar = MinMaxScaler()
    scalar.fit(X)
    X = scalar.transform(X).reshape(len(X), 2, 1)
    Y = np_utils.to_categorical(Y, 2)

    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        stratify=Y,
                                                        test_size=0.25)
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
    test_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))

    model = Net()
    loss_object = tf.losses.BinaryCrossentropy(from_logits=True)
    loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = tf.optimizers.Adam(learning_rate=0.03)

    # Metrics to track the different accuracies.
    train_loss = tf.metrics.Mean(name='train_loss')
    test_acc_clean = tf.metrics.SparseCategoricalAccuracy()
    test_acc_fgsm = tf.metrics.SparseCategoricalAccuracy()
    test_acc_pgd = tf.metrics.SparseCategoricalAccuracy()
    # test_acc_clean = tf.metrics.BinaryAccuracy()
    # test_acc_fgsm = tf.metrics.BinaryAccuracy()
    # test_acc_pgd = tf.metrics.BinaryAccuracy()

    @tf.function
    def train_step(x, y):
        with tf.GradientTape() as tape:
            predictions = model(x)
            loss = loss_object(y, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        train_loss(loss)

    # Train model with adversarial training
    for epoch in range(FLAGS.nb_epochs):
        # keras like display of progress
        progress_bar_train = tf.keras.utils.Progbar(10000)
        for (x, y) in train_dataset:

            if FLAGS.adv_train:
                # Replace clean example with adversarial example for adversarial training
                x = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40,
                                               np.inf)
            train_step(x, y)
            progress_bar_train.add(x.shape[0],
                                   values=[('loss', train_loss.result())])

    # Evaluate on clean and adversarial data
    progress_bar_test = tf.keras.utils.Progbar(10000)
    for x, y in test_dataset:
        y_pred = model(x)
        test_acc_clean(y, y_pred)

        x_fgm = fast_gradient_method(model, x, FLAGS.eps, np.inf)
        y_pred_fgm = model(x_fgm)
        test_acc_fgsm(y, y_pred_fgm)

        x_pgd = projected_gradient_descent(model, x, FLAGS.eps, 0.01, 40,
                                           np.inf)
        y_pred_pgd = model(x_pgd)
        test_acc_pgd(y, y_pred_pgd)

        progress_bar_test.add(x.shape[0])

    print('test acc on clean examples (%): {:.3f}'.format(
        test_acc_clean.result() * 100))
    print('test acc on FGM adversarial examples (%): {:.3f}'.format(
        test_acc_fgsm.result() * 100))
    print('test acc on PGD adversarial examples (%): {:.3f}'.format(
        test_acc_pgd.result() * 100))
def test_for_sub_error(name,
                       datatype,
                       feats=10,
                       n_train=10**4,
                       n_val=10**4,
                       epochs=10,
                       epsilon=0.3,
                       verbose=0,
                       save_model=True):
    #prepare the training data
    num_hidden = 200
    datatype_dict = {
        "XOR": 2,
        "orange_skin": 4,
        "nonlinear_additive": 4,
        "switch": 5
    }
    x_train, y_train, x_val, y_val, _ = create_data(datatype,
                                                    n=n_train,
                                                    nval=n_val,
                                                    feats=feats)
    #initialize and train the various models
    soft_mod, soft_logits_mod, soft_path, soft_dir, soft_cp = build_model(
        feats, num_hidden, name, "soft")
    train_model(soft_mod,
                x_train,
                y_train,
                x_val,
                y_val,
                save_model,
                soft_dir, [soft_cp],
                SOFT_MESSAGE,
                epochs=epochs,
                verbose=verbose)
    sig_mod, sig_logits_mod, sig_path, sig_dir, sig_cp = build_model(
        feats, num_hidden, name, "sig")
    train_model(sig_mod,
                x_train,
                y_train,
                x_val,
                y_val,
                save_model,
                sig_dir, [sig_cp],
                SIG_MESSAGE,
                epochs=epochs,
                verbose=verbose)
    l2x_mod, l2x_logit_mod, l2x_pred_mod, _, _, _ = L2X_flex(
        x_train,
        y_train,
        x_val,
        y_val,
        activation='relu',
        filedir=str(date.today()) + "l2x" + name,
        num_selected_features=datatype_dict[datatype],
        out_activation='sigmoid',
        loss='binary_crossentropy',
        optimizer='adam',
        num_hidden=num_hidden,
        num_layers=2,
        train=True,
        epochs=epochs,
        verbose=verbose)
    #create the adversarial examples
    epsilon = epsilon
    x_adv = fast_gradient_method(soft_logits_mod,
                                 x_val,
                                 epsilon,
                                 np.inf,
                                 targeted=False)
    x_adv = x_adv.numpy()  #turn to np.array from tf object
    #create correct labels for y_adv
    if datatype == "XOR":
        y_adv = generate_XOR_labels(x_adv)
    elif datatype == "orange_skin":
        y_adv = generate_orange_labels(x_adv)
    elif datatype == "nonlinear_additive":
        y_adv = generate_additive_labels(x_adv)
    if datatype != "switch":
        y_adv = (y_adv[:, 0] > 0.5) * 1
    else:
        y_adv = generate_switch_labels(x_adv)
    print_error_breakdown(datatype, x_val, y_val, soft_mod, "soft",
                          "soft val " + name + datatype)
    print_error_breakdown(datatype, x_adv, y_adv, soft_mod, "soft",
                          "soft adv " + name + datatype)
    print_error_breakdown(datatype, x_val, y_val, sig_mod, "sig",
                          "sig val " + name + datatype)
    print_error_breakdown(datatype, x_adv, y_adv, sig_mod, "sig",
                          "sig adv " + name + datatype)
    print_error_breakdown(datatype, x_val, y_val, l2x_mod, "sig",
                          "l2x val " + name + datatype)
    print_error_breakdown(datatype, x_adv, y_adv, l2x_mod, "sig",
                          "l2x adv " + name + datatype)
    #return the information needed to see the effect of the adversarial examples on the predictions
    return x_val, y_val, x_adv, y_adv, l2x_mod, l2x_logit_mod, l2x_pred_mod
#   :param y: (optional) Tensor with true labels. If targeted is true, then provide the
#             target label. Otherwise, only provide this parameter if you'd like to use true
#             labels when crafting adversarial samples. Otherwise, model predictions are used
#             as labels to avoid the "label leaking" effect (explained in this paper:
#             https://arxiv.org/abs/1611.01236). Default is None.
#   :param targeted: (optional) bool. Is the attack targeted or untargeted?
#             Untargeted, the default, will try to make the label incorrect.
#             Targeted will instead try to move in the direction of being more like y.
#   :param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime /
#             memory or for unit tests that intentionally pass strange input)
limit = 20000
adv_input = x_train[:limit]
eps = 0.1
Z = fast_gradient_method(obj.network,
                         adv_input,
                         eps,
                         np.inf,
                         clip_min=0.,
                         clip_max=1.)
print("####################################\nAdversarial example : \n", Z)

# Saving Adversarial Examples
file = open("adv_data.pkl", 'wb')
pickle.dump(Z, file)
file.close()

# Printing Accuracy
pred = obj.network(Z)
accu = obj.accuracy(pred, y_train[:limit])
pred2 = obj.network(x_test)
accu2 = obj.accuracy(pred2, y_test)
print(f"Original data  accuracy : {accu2:<.3f}")