def test_clf(clf, ts): # Compute predictions on a experiments set y_pred = clf.predict(ts.X) # Metric to use for performance evaluation metric = CMetricAccuracy() # Evaluate the accuracy of the classifier acc = metric.performance_score(y_true=ts.Y, y_pred=y_pred) return acc
def test_multiclass(self): """Test multiclass SVM on MNIST digits.""" self.logger.info("Testing multiclass SVM.") digits = tuple(range(0, 10)) n_tr = 100 # Number of training set samples n_ts = 200 # Number of test set samples loader = CDataLoaderMNIST() tr = loader.load('training', digits=digits, num_samples=n_tr) ts = loader.load('testing', digits=digits, num_samples=n_ts) # Normalize the features in `[0, 1]` tr.X /= 255 ts.X /= 255 svm_params = { 'kernel': CKernelRBF(gamma=0.1), 'C': 10, 'class_weight': { 0: 1, 1: 1 }, 'n_jobs': 2 } classifiers = [ CClassifierMulticlassOVA(CClassifierSVM, **svm_params), CClassifierSVM(**svm_params), ] grads = [] acc = [] for clf in classifiers: clf.verbose = 1 # We can now fit the classifier clf.fit(tr.X, tr.Y) # Compute predictions on a test set y_pred, scores = clf.predict(ts.X, return_decision_function=True) # Evaluate the accuracy of the classifier metric = CMetricAccuracy() acc.append(metric.performance_score(y_true=ts.Y, y_pred=y_pred)) grads.append(clf.grad_f_x(ts.X[1, :], 1)) self.assertAlmostEqual(acc[0], acc[1]) self.assert_array_almost_equal(grads[0], grads[1])
def test_classification(self): self.logger.info("Check the classification method... ") lab_cl, score = self.knn.predict(self.test.X, return_decision_function=True) acc = CMetricAccuracy().performance_score(self.test.Y, lab_cl) self.logger.info("Real label:\n{:}".format(self.test.Y.tolist())) self.logger.info("Predicted label:\n{:}".format(lab_cl.tolist())) self.logger.info("Accuracy: {:}".format(acc)) self.assertGreater(acc, 0.9)
def _test_accuracy(self, clf, ts): """Check classification accuracy on test set. Parameters ---------- clf : CClassifierPyTorch ts : CDataset """ self.assertTrue(clf.is_fitted()) label_torch, y_torch = \ clf.predict(ts.X, return_decision_function=True) acc_torch = CMetricAccuracy().performance_score(ts.Y, label_torch) self.logger.info("Accuracy of PyTorch Model: {:}".format(acc_torch)) self.assertGreater(acc_torch, 0.80)
# acc = metric.performance_score(y_true = ds_te_secml.Y, y_pred = preds) # print("Accuracy on test set: {:.2%}".format(acc)) # probs = secml_sklearn_clf.predict_proba(ds_te_secml.X) #Doesn't work # # #sklearn here isn't supported for performing adversarial attacks, only the native SVM of secml supports adversarial attacks # ############################################################### # # ============================================================================= x, y = ds_te_secml[:, :].X, ds_te_secml[:, :].Y # This won't work if we want to specify the target #class for each example #secml_clf = CClassifierMulticlassOVA(CClassifierSVM, kernel = CKernelRBF(gamma = 10), C = 1) secml_clf = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1) secml_clf.fit(ds_tr_secml) preds = secml_clf.predict(ds_te_secml.X) metric = CMetricAccuracy() acc = metric.performance_score(y_true=ds_te_secml.Y, y_pred=preds) print("Accuracy on test set: {:.2%}".format(acc)) #Performing the attack noise_type = 'l2' dmax = 0.4 lb, ub = None, None # with 0, 1 it goes out of bounds y_target = None #### Here y_target can be some class, indicating which class is expected for the adversarial example #solver_params = { # 'eta': 0.3, # 'max_iter': 100, # 'eps': 1e-4 #} solver_params = {
# Split in training and test from secml.data.splitter import CTrainTestSplit splitter = CTrainTestSplit( train_size=n_tr, test_size=n_ts, random_state=random_state) tr, ts = splitter.split(dataset) # Normalize the data from secml.ml.features import CNormalizerMinMax nmz = CNormalizerMinMax() tr.X = nmz.fit_transform(tr.X) ts.X = nmz.transform(ts.X) # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy metric = CMetricAccuracy() # Creation of the multiclass classifier from secml.ml.classifiers import CClassifierSVM from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA from secml.ml.kernel import CKernelRBF clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF()) # Parameters for the Cross-Validation procedure xval_params = {'C': [1e-2, 0.1, 1], 'kernel.gamma': [10, 100, 1e3]} # Let's create a 3-Fold data splitter from secml.data.splitter import CDataSplitterKFold xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state) # Select and set the best training parameters for the classifier
def setUpClass(cls): CUnitTest.setUpClass() cls._tr, cls._val, cls._ts = cls._create_mnist_dataset() cls._metric = CMetricAccuracy()
def attack_keras_model(X, Y, S, nb_attack=25, dmax=0.1): """ Generates an adversarial attack on a general model. :param X: Original inputs on which the model is trained :param Y: Original outputs on which the model is trained :param S: Original protected attributes on which the model is trained :return: Adversarial dataset (i.e. new data points + original input) """ from secml.data import CDataset from secml.array import CArray # secML wants all dimensions to be homogeneous (we had previously float and int in X) data_set_encoded_secML = CArray(X, dtype=float, copy=True) data_set_encoded_secML = CDataset(data_set_encoded_secML, Y) n_tr = round(0.66 * X.shape[0]) n_ts = X.shape[0] - n_tr logger.debug(X.shape) logger.debug(n_tr) logger.debug(n_ts) from secml.data.splitter import CTrainTestSplit splitter = CTrainTestSplit(train_size=n_tr, test_size=n_ts) # Use training set for the classifier and then pick points from an internal test set. tr_set_secML, ts_set_secML = splitter.split(data_set_encoded_secML) # tr_set_secML = CDataset(X_train,Y_train) # ts_set_secML = CDataset(X_test,Y_test) # Create a surrogate classifier # Creation of the multiclass classifier from secml.ml.classifiers import CClassifierSVM from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA from secml.ml.kernel import CKernelRBF clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF()) # Parameters for the Cross-Validation procedure xval_params = {'C': [1e-4, 1e-3, 1e-2, 0.1, 1], 'kernel.gamma': [0.01, 0.1, 1, 10, 100, 1e3]} # Let's create a 3-Fold data splitter random_state = 999 from secml.data.splitter import CDataSplitterKFold xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state) # Select and set the best training parameters for the classifier logger.debug("Estimating the best training parameters...") best_params = clf.estimate_parameters( dataset=tr_set_secML, parameters=xval_params, splitter=xval_splitter, metric='accuracy', perf_evaluator='xval' ) logger.debug("The best training parameters are: ", best_params) logger.debug(clf.get_params()) logger.debug(clf.num_classifiers) # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy metric = CMetricAccuracy() # Train the classifier clf.fit(tr_set_secML) logger.debug(clf.num_classifiers) # Compute predictions on a test set y_pred = clf.predict(ts_set_secML.X) # Evaluate the accuracy of the classifier acc = metric.performance_score(y_true=ts_set_secML.Y, y_pred=y_pred) logger.debug("Accuracy on test set: {:.2%}".format(acc)) # Prepare attack configuration noise_type = 'l2' # Type of perturbation 'l1' or 'l2' lb, ub = 0, 1 # Bounds of the attack space. Can be set to `None` for unbounded y_target = None # None if `error-generic` or a class label for `error-specific` # Should be chosen depending on the optimization problem solver_params = { 'eta': 0.1, # grid search resolution 'eta_min': 0.1, 'eta_max': None, # None should be ok 'max_iter': 1000, 'eps': 1e-2 # Tolerance on the stopping crit. } # Run attack from secml.adv.attacks.evasion import CAttackEvasionPGDLS pgd_ls_attack = CAttackEvasionPGDLS( classifier=clf, surrogate_classifier=clf, surrogate_data=tr_set_secML, distance=noise_type, dmax=dmax, lb=lb, ub=ub, solver_params=solver_params, y_target=y_target) nb_feat = X.shape[1] result_pts = np.empty([nb_attack, nb_feat]) result_class = np.empty([nb_attack, 1]) # take a point at random being the starting point of the attack and run the attack import random for nb_iter in range(0, nb_attack): rn = random.randint(0, ts_set_secML.num_samples - 1) x0, y0 = ts_set_secML[rn, :].X, ts_set_secML[rn, :].Y, try: y_pred_pgdls, _, adv_ds_pgdls, _ = pgd_ls_attack.run(x0, y0) adv_pt = adv_ds_pgdls.X.get_data() # np.asarray([np.asarray(row, dtype=float) for row in y_tr], dtype=float) result_pts[nb_iter] = adv_pt result_class[nb_iter] = y_pred_pgdls.get_data()[0] except ValueError: logger.warning("value error on {}".format(nb_iter)) return result_pts, result_class, ts_set_secML[:nb_attack, :].Y
trainingValidation, test = splitter.split(dataset) splitter = CTrainTestSplit(train_size=setSamplesTrainingNumber, test_size=setSamplesValidationNumber, random_state=random_state) training, validation = splitter.split(dataset) # Normalize the data normalizer = CNormalizerMinMax() training.X = normalizer.fit_transform(training.X) validation.X = normalizer.transform(validation.X) test.X = normalizer.transform(test.X) # Metric to use for training and performance evaluation metric = CMetricAccuracy() # Creation of the multiclass classifier classifier = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1) # We can now fit the classifier classifier.fit(training.X, training.Y) print("Training of classifier complete!") # Compute predictions on a test set predictionY = classifier.predict(test.X) # Bounds of the attack space. Can be set to `None` for unbounded lowerBound, upperBound = validation.X.min(), validation.X.max() # Should be chosen depending on the optimization problem
#) print(best_lin_params) #train classifier print("start training") clf_lin.fit(data_smp_encoded_secML) #print("linear training ended, begining rbf") #clf_rbf.fit(tr) #print("start linear classif") #clf_l.fit(data_smp_encoded_secML) print("Classifiers trained") # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy metric = CMetricAccuracy() # Compute predictions on a test set y_lin_pred = clf_lin.predict(raw_data_encoded_secML.X) #y_rbf_pred = clf_rbf.predict(ts.X) #y_l_pred = clf_l.predict(raw_data_encoded_secML.X) # Evaluate the accuracy of the classifier acc_lin = metric.performance_score(y_true=raw_data_encoded_secML.Y, y_pred=y_lin_pred) #acc_rbf = metric.performance_score(y_true=ts.Y, y_pred=y_rbf_pred) #acc_rbf = 0.0 #acc_l = metric.performance_score(y_true=raw_data_encoded_secML.Y, y_pred=y_l_pred) print("Performance evaluations ended:") print(acc_lin)
# Combine data x_use = np.concatenate((x_train, x_poison), 0) y_use = np.concatenate((y_train, y_poison), 0) else: x_use, y_use = x_train, y_train # Convert to CArray x_use, y_use = CArray(x_use), CArray(y_use) x_test, y_test = CArray(x_test), CArray(y_test) print("Poison rato: %.2f" % (len(x_poison) / len(x_train))) # Fit classifier random_state = 2021 metric = CMetricAccuracy() clf = CClassifierLogistic(C=1) clf.fit(x_use, y_use) print("Training of classifier complete!") # Compute predictions on a test set y_pred_tr = clf.predict(x_use) y_pred = clf.predict(x_test) # Evaluate the accuracy of the original classifier tr_acc = metric.performance_score(y_true=y_use, y_pred=y_pred_tr) te_acc = metric.performance_score(y_true=y_test, y_pred=y_pred) # Report metrics using poisoned model print("Poisoned | Train accuracy {:.1%}".format(tr_acc)) print("Poisoned | Test accuracy {:.1%}".format(te_acc))
return (x_train, y_train), (x_test, y_test), (0, 1) if __name__ == "__main__": import sys poison_ratio = float(sys.argv[1]) num_reps = int(sys.argv[2]) random_state = 2021 # Step 1: Load dataset (x_train, y_train), (x_test, y_test), (min_val, max_val) = load_data() # Metric to use for training and performance evaluation metric = CMetricAccuracy() # Creation of the multiclass classifier clf = CClassifierLogistic(C=1) # Make train-val split from train data for attack purposes x_train_red, x_val, y_train_red, y_val = train_test_split( x_train, y_train, stratify=y_train, test_size=0.3) # Convert to CArray x_train, y_train = CArray(x_train), CArray(y_train) x_test, y_test = CArray(x_test), CArray(y_test) x_train_red, y_train_red = CArray(x_train_red), CArray(y_train_red) x_val, y_val = CArray(x_val), CArray(y_val) # We can now fit the classifier