Esempio n. 1
0
def test_clf(clf, ts):
    # Compute predictions on a experiments set
    y_pred = clf.predict(ts.X)
    # Metric to use for performance evaluation
    metric = CMetricAccuracy()
    # Evaluate the accuracy of the classifier
    acc = metric.performance_score(y_true=ts.Y, y_pred=y_pred)
    return acc
Esempio n. 2
0
    def test_multiclass(self):
        """Test multiclass SVM on MNIST digits."""

        self.logger.info("Testing multiclass SVM.")

        digits = tuple(range(0, 10))
        n_tr = 100  # Number of training set samples
        n_ts = 200  # Number of test set samples

        loader = CDataLoaderMNIST()
        tr = loader.load('training', digits=digits, num_samples=n_tr)
        ts = loader.load('testing', digits=digits, num_samples=n_ts)

        # Normalize the features in `[0, 1]`
        tr.X /= 255
        ts.X /= 255

        svm_params = {
            'kernel': CKernelRBF(gamma=0.1),
            'C': 10,
            'class_weight': {
                0: 1,
                1: 1
            },
            'n_jobs': 2
        }
        classifiers = [
            CClassifierMulticlassOVA(CClassifierSVM, **svm_params),
            CClassifierSVM(**svm_params),
        ]

        grads = []
        acc = []
        for clf in classifiers:
            clf.verbose = 1
            # We can now fit the classifier
            clf.fit(tr.X, tr.Y)
            # Compute predictions on a test set
            y_pred, scores = clf.predict(ts.X, return_decision_function=True)
            # Evaluate the accuracy of the classifier
            metric = CMetricAccuracy()
            acc.append(metric.performance_score(y_true=ts.Y, y_pred=y_pred))
            grads.append(clf.grad_f_x(ts.X[1, :], 1))

        self.assertAlmostEqual(acc[0], acc[1])
        self.assert_array_almost_equal(grads[0], grads[1])
Esempio n. 3
0
# print("Accuracy on test set: {:.2%}".format(acc))
# probs = secml_sklearn_clf.predict_proba(ds_te_secml.X)       #Doesn't work
#
# #sklearn here isn't supported for performing adversarial attacks, only the native SVM of secml supports adversarial attacks
# ###############################################################
#
# =============================================================================
x, y = ds_te_secml[:, :].X, ds_te_secml[:, :].Y  # This won't work if we want to specify the target
#class for each example

#secml_clf = CClassifierMulticlassOVA(CClassifierSVM, kernel = CKernelRBF(gamma = 10), C = 1)
secml_clf = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)
secml_clf.fit(ds_tr_secml)
preds = secml_clf.predict(ds_te_secml.X)
metric = CMetricAccuracy()
acc = metric.performance_score(y_true=ds_te_secml.Y, y_pred=preds)
print("Accuracy on test set: {:.2%}".format(acc))

#Performing the attack
noise_type = 'l2'
dmax = 0.4
lb, ub = None, None  # with 0, 1 it goes out of bounds
y_target = None  #### Here y_target can be some class, indicating which class is expected for the adversarial example

#solver_params = {
#    'eta': 0.3,
#    'max_iter': 100,
#    'eps': 1e-4
#}
solver_params = {
    'eta': 0.05,
    parameters=xval_params,
    splitter=xval_splitter,
    metric='accuracy',
    perf_evaluator='xval'
)

print("The best training parameters are: ", best_params)

# We can now fit the classifier
clf.fit(tr)

# Compute predictions on a test set
y_pred = clf.predict(ts.X)

# Evaluate the accuracy of the classifier
acc = metric.performance_score(y_true=ts.Y, y_pred=y_pred)

print("Accuracy on test set: {:.2%}".format(acc))

x0, y0 = ts[5, :].X, ts[5, :].Y  # Initial sample
print(x0.dtype)
print(y0.dtype)

noise_type = 'l2'  # Type of perturbation 'l1' or 'l2'
dmax = 0.4  # Maximum perturbation
lb, ub = 0, 1  # Bounds of the attack space. Can be set to `None` for unbounded
y_target = None  # None if `error-generic` or a class label for `error-specific`

# Should be chosen depending on the optimization problem
solver_params = {
    'eta': 0.3,
def attack_keras_model(X, Y, S, nb_attack=25, dmax=0.1):
    """
    Generates an adversarial attack on a general model.

    :param X: Original inputs on which the model is trained
    :param Y: Original outputs on which the model is trained
    :param S: Original protected attributes on which the model is trained
    :return: Adversarial dataset (i.e. new data points + original input)
    """

    from secml.data import CDataset
    from secml.array import CArray

    # secML wants all dimensions to be homogeneous (we had previously float and int in X)
    data_set_encoded_secML = CArray(X, dtype=float, copy=True)
    data_set_encoded_secML = CDataset(data_set_encoded_secML, Y)

    n_tr = round(0.66 * X.shape[0])
    n_ts = X.shape[0] - n_tr

    logger.debug(X.shape)
    logger.debug(n_tr)
    logger.debug(n_ts)

    from secml.data.splitter import CTrainTestSplit
    splitter = CTrainTestSplit(train_size=n_tr, test_size=n_ts)

    # Use training set for the classifier and then pick points from an internal test set.
    tr_set_secML, ts_set_secML = splitter.split(data_set_encoded_secML)

    # tr_set_secML = CDataset(X_train,Y_train)
    # ts_set_secML = CDataset(X_test,Y_test)

    # Create a surrogate classifier

    # Creation of the multiclass classifier
    from secml.ml.classifiers import CClassifierSVM
    from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA
    from secml.ml.kernel import CKernelRBF
    clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF())

    # Parameters for the Cross-Validation procedure
    xval_params = {'C': [1e-4, 1e-3, 1e-2, 0.1, 1], 'kernel.gamma': [0.01, 0.1, 1, 10, 100, 1e3]}

    # Let's create a 3-Fold data splitter
    random_state = 999

    from secml.data.splitter import CDataSplitterKFold
    xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state)

    # Select and set the best training parameters for the classifier
    logger.debug("Estimating the best training parameters...")
    best_params = clf.estimate_parameters(
        dataset=tr_set_secML,
        parameters=xval_params,
        splitter=xval_splitter,
        metric='accuracy',
        perf_evaluator='xval'
    )
    logger.debug("The best training parameters are: ", best_params)

    logger.debug(clf.get_params())
    logger.debug(clf.num_classifiers)

    # Metric to use for training and performance evaluation
    from secml.ml.peval.metrics import CMetricAccuracy
    metric = CMetricAccuracy()

    # Train the classifier
    clf.fit(tr_set_secML)
    logger.debug(clf.num_classifiers)

    # Compute predictions on a test set
    y_pred = clf.predict(ts_set_secML.X)

    # Evaluate the accuracy of the classifier
    acc = metric.performance_score(y_true=ts_set_secML.Y, y_pred=y_pred)

    logger.debug("Accuracy on test set: {:.2%}".format(acc))

    # Prepare attack configuration

    noise_type = 'l2'   # Type of perturbation 'l1' or 'l2'
    lb, ub = 0, 1       # Bounds of the attack space. Can be set to `None` for unbounded
    y_target = None     # None if `error-generic` or a class label for `error-specific`

    # Should be chosen depending on the optimization problem
    solver_params = {
        'eta': 0.1,         # grid search resolution
        'eta_min': 0.1,
        'eta_max': None,    # None should be ok
        'max_iter': 1000,
        'eps': 1e-2         # Tolerance on the stopping crit.
    }

    # Run attack

    from secml.adv.attacks.evasion import CAttackEvasionPGDLS
    pgd_ls_attack = CAttackEvasionPGDLS(
        classifier=clf,
        surrogate_classifier=clf,
        surrogate_data=tr_set_secML,
        distance=noise_type,
        dmax=dmax,
        lb=lb, ub=ub,
        solver_params=solver_params,
        y_target=y_target)

    nb_feat = X.shape[1]

    result_pts = np.empty([nb_attack, nb_feat])
    result_class = np.empty([nb_attack, 1])

    # take a point at random being the starting point of the attack and run the attack
    import random
    for nb_iter in range(0, nb_attack):
        rn = random.randint(0, ts_set_secML.num_samples - 1)
        x0, y0 = ts_set_secML[rn, :].X, ts_set_secML[rn, :].Y,

        try:
            y_pred_pgdls, _, adv_ds_pgdls, _ = pgd_ls_attack.run(x0, y0)
            adv_pt = adv_ds_pgdls.X.get_data()
            # np.asarray([np.asarray(row, dtype=float) for row in y_tr], dtype=float)
            result_pts[nb_iter] = adv_pt
            result_class[nb_iter] = y_pred_pgdls.get_data()[0]
        except ValueError:
            logger.warning("value error on {}".format(nb_iter))

    return result_pts, result_class, ts_set_secML[:nb_attack, :].Y
# choose and set the initial poisoning sample features and label
choiceX = training[0, :].X
choiceY = training[0, :].Y
poisonAttack.x0 = choiceX
poisonAttack.xc = choiceX
poisonAttack.yc = choiceY

print("Initial poisoning sample features: {:}".format(choiceX.ravel()))
print("Initial poisoning sample label: {:}".format(choiceY.item()))

# Number of poisoning points to generate
poisonAttack.n_points = 100

# Run the poisoning attack
print("Attack started...")
poisonYPrediction, poisonScores, poisoningPoints, f_opt = poisonAttack.run(
    test.X, test.Y)
print("Attack complete!")

# Evaluate the accuracy of the original classifier
originalClassifierAccuracy = metric.performance_score(y_true=test.Y,
                                                      y_pred=predictionY)

# Evaluate the accuracy after the poisoning attack
poisonedAccuracy = metric.performance_score(y_true=test.Y,
                                            y_pred=poisonYPrediction)

print(
    "Original accuracy on test set: {:.2%}".format(originalClassifierAccuracy))
print("Accuracy after attack on test set: {:.2%}".format(poisonedAccuracy))
#print("start linear classif")
#clf_l.fit(data_smp_encoded_secML)

print("Classifiers trained")

# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
metric = CMetricAccuracy()

# Compute predictions on a test set
y_lin_pred = clf_lin.predict(raw_data_encoded_secML.X)
#y_rbf_pred = clf_rbf.predict(ts.X)
#y_l_pred = clf_l.predict(raw_data_encoded_secML.X)

# Evaluate the accuracy of the classifier
acc_lin = metric.performance_score(y_true=raw_data_encoded_secML.Y,
                                   y_pred=y_lin_pred)
#acc_rbf = metric.performance_score(y_true=ts.Y, y_pred=y_rbf_pred)
#acc_rbf = 0.0
#acc_l = metric.performance_score(y_true=raw_data_encoded_secML.Y, y_pred=y_l_pred)

print("Performance evaluations ended:")
print(acc_lin)
#print(acc_rbf)
#print(acc_l)

print("Begin setup for attack")

import random
from secml.adv.attacks.evasion import CAttackEvasionPGD
#perform adversarial attacks
noise_type = 'l2'  # Type of perturbation 'l1' or 'l2'
Esempio n. 8
0
        x_use = np.concatenate((x_train, x_poison), 0)
        y_use = np.concatenate((y_train, y_poison), 0)
    else:
        x_use, y_use = x_train, y_train

    # Convert to CArray
    x_use, y_use = CArray(x_use), CArray(y_use)
    x_test, y_test = CArray(x_test), CArray(y_test)

    print("Poison rato: %.2f" % (len(x_poison) / len(x_train)))

    # Fit classifier
    random_state = 2021

    metric = CMetricAccuracy()
    clf = CClassifierLogistic(C=1)
    clf.fit(x_use, y_use)
    print("Training of classifier complete!")

    # Compute predictions on a test set
    y_pred_tr = clf.predict(x_use)
    y_pred = clf.predict(x_test)

    # Evaluate the accuracy of the original classifier
    tr_acc = metric.performance_score(y_true=y_use, y_pred=y_pred_tr)
    te_acc = metric.performance_score(y_true=y_test, y_pred=y_pred)

    # Report metrics using poisoned model
    print("Poisoned | Train accuracy {:.1%}".format(tr_acc))
    print("Poisoned | Test accuracy {:.1%}".format(te_acc))
Esempio n. 9
0
    # Run the poisoning attack
    print("Attack started...")
    _, _, pois_ds, _ = pois_attack.run(x_val, y_val)
    print("Attack complete!")

    # Extract, repeat, and save poisoned data
    pr_x, pr_y = pois_ds.X._data._data, pois_ds.Y._data._data
    pr_x = np.repeat(pr_x, num_reps, axis=0)
    pr_y = np.repeat(pr_y, num_reps, axis=0)
    np.savez("LR_data/%.2f_%d_data" % (poison_ratio, num_reps), x=pr_x, y=pr_y)

    print("Adding %d additional points" % len(pr_x))

    # Training of the poisoned classifier
    pois_ds_repeat = CDataset(CArray(pr_x), CArray(pr_y))
    pois_clf = clf.deepcopy()
    # Join the training set with the poisoning points
    pois_tr = tr.append(pois_ds_repeat)
    pois_clf.fit(pois_tr.X, pois_tr.Y)

    # Evaluate the accuracy of the original classifier
    acc = metric.performance_score(y_true=y_test, y_pred=y_pred)

    # Evaluate the accuracy after the poisoning attack
    pois_y_pred = pois_clf.predict(x_test)
    pois_acc = metric.performance_score(y_true=y_test, y_pred=pois_y_pred)

    # Report metrics using poisoned model
    print("Test accuracy on clean model: {:.2%}".format(acc))
    print("Test accuracy on posioned model: {:.2%}".format(pois_acc))
Esempio n. 10
0
random_state = 0

training_set, validation_set, test_set, \
privileged_condition_validation = load_data()

clf = CClassifierLogistic()
clf.fit(training_set)
print("Training of classifier complete!")

# Compute predictions on a test set
y_pred, scores = clf.predict(test_set.X, return_decision_function=True)

# Evaluate the accuracy of the classifier
metric = CMetricAccuracy()
acc = metric.performance_score(y_true=test_set.Y, y_pred=y_pred)
print("Accuracy on test set: {:.2%}".format(acc))

# Should be chosen depending on the optimization problem
solver_params = {'eta': 0.05, 'max_iter': 1000, 'eps': 1e-6}

pois_attack = CAttackPoisoningLogisticRegression(classifier=clf,
                                                 training_data=training_set,
                                                 surrogate_classifier=clf,
                                                 surrogate_data=validation_set,
                                                 val=validation_set,
                                                 lb=-20,
                                                 ub=20,
                                                 solver_type='pgd',
                                                 solver_params=solver_params,
                                                 random_seed=random_state,
Esempio n. 11
0
                                  training_data=training_data,
                                  val=validation_data,
                                  lb=lb,
                                  ub=ub,
                                  solver_params=solver_params,
                                  random_seed=random_state)

pois_attack.n_points = n_poisoning_points
# Run the poisoning attack
print("Attack started...")
pois_y_pred, pois_scores, pois_ds, f_opt = pois_attack.run(
    test_data.X, test_data.Y)
print("Attack complete!")

# Evaluate the accuracy of the original classifier
acc = metric.performance_score(y_true=test_data.Y, y_pred=y_pred)
# Evaluate the accuracy after the poisoning attack
pois_acc = metric.performance_score(y_true=test_data.Y, y_pred=pois_y_pred)

print("Original accuracy on test set: {:.2%}".format(acc))
print(
    "Accuracy after non adaptive attack on test set: {:.2%}".format(pois_acc))

# Adaptive attacker #################################################################################
clf = CClassifierSVM(kernel=CKernelRBF(gamma=GAMMA), C=C)
# We can now fit the classifier
clf.fit(training_data.X, training_data.Y)

pois_attack = CAttackPoisoningSVMwithLID(classifier=clf,
                                         training_data=training_data,
                                         val=validation_data,