def setUp(self): self.clf = CClassifierMulticlassOVA( classifier=CClassifierSVM, kernel='rbf') self.dataset = CDLRandomBlobs( random_state=3, n_features=2, centers=4).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y)
def test_multiclass_gradient(self): """Test if gradient is correct when requesting for all classes with w""" multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced') multiclass.fit(self.dataset.X, self.dataset.Y) div = CArray.rand(shape=multiclass.n_classes, random_state=0) def f_x(x): x = multiclass.predict(x, return_decision_function=True)[1] return CArray((x / div).mean()) def grad_f_x(x): w = CArray.ones(shape=multiclass.n_classes) / \ (div * multiclass.n_classes) return multiclass.gradient(x, w=w) i = 5 # Sample to test x = self.dataset.X[i, :] from secml.optim.function import CFunction check_grad_val = CFunction(f_x, grad_f_x).check_grad(x, epsilon=1e-1) self.logger.info( "norm(grad - num_grad): %s", str(check_grad_val)) self.assertLess(check_grad_val, 1e-3)
class TestCPlotClassifier(CUnitTest): """Unit test for CPlotClassifier.""" def setUp(self): self.clf = CClassifierMulticlassOVA( classifier=CClassifierSVM, kernel='rbf') self.dataset = CDLRandomBlobs( random_state=3, n_features=2, centers=4).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y) def test_plot_decision_regions(self): """Test for `.plot_decision_regions` method.""" fig = CFigure(width=10, height=5) fig.subplot(1, 2, 1) fig.sp.plot_ds(self.dataset) fig.sp.plot_decision_regions( self.clf, n_grid_points=200, plot_background=False) fig.subplot(1, 2, 2) fig.sp.plot_ds(self.dataset) fig.sp.plot_decision_regions( self.clf, n_grid_points=200) fig.show()
def setUp(self): import numpy as np np.random.seed(12345678) # generate synthetic data self.ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, n_clusters_per_class=1, class_sep=1, random_state=0).load() # Add a new class modifying one of the existing clusters self.ds.Y[(self.ds.X[:, 0] > 0).logical_and( self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes # self.kernel = None self.kernel = CKernelRBF(gamma=10) # Data normalization self.normalizer = CNormalizerMinMax() self.ds.X = self.normalizer.fit_transform(self.ds.X) self.multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced', preprocess=None, kernel=self.kernel) self.multiclass.verbose = 0 # Training and classification self.multiclass.fit(self.ds.X, self.ds.Y) self.y_pred, self.score_pred = self.multiclass.predict( self.ds.X, return_decision_function=True)
def test_plot_decision_function(self): """Test plot of multiclass classifier decision function.""" # generate synthetic data ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, n_clusters_per_class=1, class_sep=1, random_state=0).load() multiclass = CClassifierMulticlassOVA( classifier=CClassifierSVM, class_weight='balanced', preprocess='min-max') # Training and classification multiclass.fit(ds.X, ds.Y) y_pred, score_pred = multiclass.predict( ds.X, return_decision_function=True) def plot_hyperplane(img, clf, min_v, max_v, linestyle, label): """Plot the hyperplane associated to the OVA clf.""" xx = CArray.linspace( min_v - 5, max_v + 5) # make sure the line is long enough # get the separating hyperplane yy = -(clf.w[0] * xx + clf.b) / clf.w[1] img.sp.plot(xx, yy, linestyle, label=label) fig = CFigure(height=7, width=8) fig.sp.title('{:} ({:})'.format(multiclass.__class__.__name__, multiclass.classifier.__name__)) x_bounds, y_bounds = ds.get_bounds() styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.'] for c_idx, c in enumerate(ds.classes): # Plot boundary and predicted label for each OVA classifier plot_hyperplane(fig, multiclass._binary_classifiers[c_idx], x_bounds[0], x_bounds[1], styles[c_idx], 'Boundary\nfor class {:}'.format(c)) fig.sp.scatter(ds.X[ds.Y == c, 0], ds.X[ds.Y == c, 1], s=40, c=styles[c_idx][0]) fig.sp.scatter(ds.X[y_pred == c, 0], ds.X[y_pred == c, 1], s=160, edgecolors=styles[c_idx][0], facecolors='none', linewidths=2) # Plotting multiclass decision function fig.sp.plot_decision_regions(multiclass, n_grid_points=100, grid_limits=ds.get_bounds(offset=5)) fig.sp.xlim(x_bounds[0] - .5 * x_bounds[1], x_bounds[1] + .5 * x_bounds[1]) fig.sp.ylim(y_bounds[0] - .5 * y_bounds[1], y_bounds[1] + .5 * y_bounds[1]) fig.sp.legend(loc=4) # lower, right fig.show()
def setUp(self): self.ds = CDLRandom(n_classes=3, n_samples=50, random_state=0, n_informative=3).load() self.logger.info("Fit an SVM and classify dataset...") self.ova = CClassifierMulticlassOVA(CClassifierSVM) self.ova.fit(self.ds.X, self.ds.Y) self.labels, self.scores = self.ova.predict( self.ds.X, return_decision_function=True)
def setUp(self): # 100 samples, 2 classes, 20 features self.ds = CDLDigits().load() self.clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF(gamma=1e-3)) # Training classifier self.clf.fit(self.ds.X, self.ds.Y) self.explainer = CExplainerGradientInput(self.clf)
def _prepare_multiclass_svm(self, sparse, seed): """Preparare the data required for attacking a MULTICLASS SVM. - load the MNIST dataset - create a MULTICLASS SVM with RBF kernel (C=1, gamma=0.01) Parameters ---------- sparse : bool seed : int or None Returns ------- ds : CDataset clf : CClassifierSVM """ self._load_mnist49(sparse, seed) clf = CClassifierMulticlassOVA( classifier=CClassifierSVM, C=1.0, kernel=CKernel.create('rbf', gamma=0.01), ) return clf
def test_fun(self): """Test for decision_function() and predict() methods.""" self.logger.info("Test for decision_function() and predict() methods.") mc = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced') scores_d = self._test_fun(mc, self.dataset.todense()) scores_s = self._test_fun(mc, self.dataset.tosparse()) self.assert_array_almost_equal(scores_d, scores_s)
def setUpClass(cls): CUnitTest.setUpClass() # 100 samples, 2 classes, 20 features cls.ds = CDLDigits().load() cls.clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF(gamma=1e-3)) # Training classifier cls.clf.fit(cls.ds.X, cls.ds.Y)
def test_params_multiclass(self): """Parameter estimation for multiclass classifiers.""" # Create dummy dataset (we want a test different from train) tr = CDLRandom(n_classes=4, n_clusters_per_class=1, random_state=50000).load() kernel = CKernel.create('rbf') multiclass = CClassifierMulticlassOVA(CClassifierSVM, C=1, kernel=kernel) multiclass.verbose = 1 xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} expected = {'C': 10.0, 'kernel.gamma': 0.1} self._run_multiclass(tr, multiclass, xval_parameters, expected) self.logger.info("Testing with preprocessor") kernel = CKernel.create('rbf') multiclass = CClassifierMulticlassOVA(CClassifierSVM, C=1, kernel=kernel, preprocess='min-max') multiclass.verbose = 1 xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} expected = {'C': 10.0, 'kernel.gamma': 0.1} self._run_multiclass(tr, multiclass, xval_parameters, expected)
def test_predict_withsvm(self): svc = SVC(kernel='linear', class_weight='balanced') multiclass_sklearn = OneVsRestClassifier(svc) multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced') multiclass.verbose = 2 multiclass.fit(self.dataset, n_jobs=2) class_pred, score_pred = multiclass.predict( self.dataset.X, return_decision_function=True) self.logger.info("Predicted: \n{:}".format(class_pred)) self.logger.info("Real: \n{:}".format(self.dataset.Y)) acc = CMetric.create('accuracy').performance_score( self.dataset.Y, class_pred) self.logger.info("Accuracy: {:}".format(acc)) multiclass_sklearn.fit(self.dataset.X.get_data(), self.dataset.Y.tondarray()) y_sklearn = multiclass_sklearn.predict(self.dataset.X.get_data()) acc_sklearn = CMetric.create('accuracy').performance_score( self.dataset.Y, CArray(y_sklearn)) self.logger.info("Accuracy Sklearn: {:}".format(acc_sklearn)) self.assertLess(abs(acc - acc_sklearn), 0.01)
def test_params_multiclass(self): """Parameter estimation for multiclass classifiers.""" kernel = CKernel.create('rbf') multiclass = CClassifierMulticlassOVA( CClassifierSVM, C=1, kernel=kernel) multiclass.verbose = 1 xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} expected = {'C': [1.0, 1.0, 10.0, 10.0], 'kernel.gamma': [0.1, 0.1, 0.1, 0.1]} self._run_multiclass(multiclass, xval_parameters, expected) self.logger.info("Testing with preprocessor") kernel = CKernel.create('rbf') multiclass = CClassifierMulticlassOVA( CClassifierSVM, C=1, kernel=kernel, preprocess='min-max') multiclass.verbose = 1 xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} expected = {'C': [100, 10, 10, 1], 'kernel.gamma': [0.1, 0.1, 0.1, 0.1]} self._run_multiclass(multiclass, xval_parameters, expected)
def setUpClass(cls): CAttackEvasionCleverhansTestCases.setUpClass() cls.seed = 0 cls.tr, cls.val, cls.ts, cls.digits, \ cls.img_w, cls.img_h = cls._load_mnist() cls.clf = CClassifierMulticlassOVA(CClassifierSVM) cls.clf.fit(cls.tr) cls.x0_img_class = 1 cls.y_target = 2 # Target class for targeted tests
def test_normalization(self): """Test data normalization inside CClassifierMulticlassOVA.""" from secml.ml.features.normalization import CNormalizerMinMax from secml.data import CDataset ds_norm_x = CNormalizerMinMax().fit_transform(self.dataset.X) multi_nonorm = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced') multi_nonorm.fit(CDataset(ds_norm_x, self.dataset.Y)) pred_y_nonorm = multi_nonorm.predict(ds_norm_x) multi = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced', preprocess='min-max') multi.fit(self.dataset) pred_y = multi.predict(self.dataset.X) self.logger.info("Predictions with internal norm:\n{:}".format(pred_y)) self.logger.info( "Predictions with external norm:\n{:}".format(pred_y_nonorm)) self.assertFalse((pred_y_nonorm != pred_y).any())
def test_apply_method(self): multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced') multiclass.fit(self.dataset.X, self.dataset.Y) multiclass.apply_method(CClassifierSVM.set, param_name='C', param_value=150) for i in range(multiclass.num_classifiers): self.assertEqual(multiclass._binary_classifiers[i].C, 150)
def test_preprocess(self): """Test classifier with preprocessors inside.""" multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced') # All linear transformations with gradient implemented self._test_preprocess(self.dataset, multiclass, ['min-max', 'mean-std'], [{'feature_range': (-1, 1)}, {}]) self._test_preprocess_grad(self.dataset, multiclass, ['min-max', 'mean-std'], [{'feature_range': (-1, 1)}, {}]) # Mixed linear/nonlinear transformations without gradient self._test_preprocess( self.dataset, multiclass, ['pca', 'unit-norm'], [{}, {}])
def test_multiclass(self): """Test multiclass SVM on MNIST digits.""" self.logger.info("Testing multiclass SVM.") digits = tuple(range(0, 10)) n_tr = 100 # Number of training set samples n_ts = 200 # Number of test set samples loader = CDataLoaderMNIST() tr = loader.load('training', digits=digits, num_samples=n_tr) ts = loader.load('testing', digits=digits, num_samples=n_ts) # Normalize the features in `[0, 1]` tr.X /= 255 ts.X /= 255 svm_params = { 'kernel': CKernelRBF(gamma=0.1), 'C': 10, 'class_weight': { 0: 1, 1: 1 }, 'n_jobs': 2 } classifiers = [ CClassifierMulticlassOVA(CClassifierSVM, **svm_params), CClassifierSVM(**svm_params), ] grads = [] acc = [] for clf in classifiers: clf.verbose = 1 # We can now fit the classifier clf.fit(tr.X, tr.Y) # Compute predictions on a test set y_pred, scores = clf.predict(ts.X, return_decision_function=True) # Evaluate the accuracy of the classifier metric = CMetricAccuracy() acc.append(metric.performance_score(y_true=ts.Y, y_pred=y_pred)) grads.append(clf.grad_f_x(ts.X[1, :], 1)) self.assertAlmostEqual(acc[0], acc[1]) self.assert_array_almost_equal(grads[0], grads[1])
def setUp(self): import numpy as np np.random.seed(12345678) self._dataset_creation() self.kernel = CKernelRBF(gamma=1) self.multiclass = CClassifierMulticlassOVA( classifier=CClassifierSVM, class_weight='balanced', preprocess=None, kernel=self.kernel) self.multiclass.verbose = 0 self.multiclass = CClassifierRejectThreshold(self.multiclass, 0.6) # Training and classification self.multiclass.fit(self.ds.X, self.ds.Y) self.y_pred, self.score_pred = self.multiclass.predict( self.ds.X, return_decision_function=True)
def setUpClass(cls): CAttackEvasionCleverhansTestCases.setUpClass() cls.seed = 0 cls.y_target = None cls.clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF(gamma=10), C=0.1, preprocess=CNormalizerMinMax()) cls.ds = CDLRandomBlobs(n_features=0, centers=[[0.1, 0.1], [0.5, 0], [0.8, 0.8]], cluster_std=0.01, n_samples=100, random_state=cls.seed).load() cls.clf.fit(cls.ds.X, cls.ds.Y) cls.x0 = CArray([0.6, 0.2]) cls.y0 = CArray(cls.clf.predict(cls.x0))
def test_set_get_state(self): """Test for set_state and get_state.""" pre = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}]) multi = CClassifierMulticlassOVA(classifier=CClassifierSVM, kernel='rbf', class_weight='balanced', preprocess=pre) # Setting different parameter in single trained_classifiers multi.prepare(num_classes=4) different_c = (10, 20, 30, 40) multi.set('C', different_c) different_gamma = (50, 60, 70, 80) multi.set('kernel.gamma', different_gamma) multi.fit(self.dataset) pred_y = multi.predict(self.dataset.X) self.logger.info( "Predictions before restoring state:\n{:}".format(pred_y)) state = multi.get_state() self.logger.info("State of multiclass:\n{:}".format(state)) # Create an entirely new clf pre_post = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}]) multi_post = CClassifierMulticlassOVA(classifier=CClassifierSVM, kernel='rbf', class_weight='balanced', preprocess=pre_post) # Restore state but not enough binary classifiers with self.assertRaises(ValueError): multi_post.set_state(state) # Restore state multi_post.prepare(num_classes=4) multi_post.set_state(state) for clf_idx, clf in enumerate(multi_post._binary_classifiers): self.assertEqual(clf.C, different_c[clf_idx]) self.assertEqual(clf.kernel.gamma, different_gamma[clf_idx]) pred_y_post = multi_post.predict(self.dataset.X) self.logger.info( "Predictions after restoring state:\n{:}".format(pred_y_post)) self.assert_array_equal(pred_y, pred_y_post)
def attack_keras_model(X, Y, S, nb_attack=25, dmax=0.1): """ Generates an adversarial attack on a general model. :param X: Original inputs on which the model is trained :param Y: Original outputs on which the model is trained :param S: Original protected attributes on which the model is trained :return: Adversarial dataset (i.e. new data points + original input) """ from secml.data import CDataset from secml.array import CArray # secML wants all dimensions to be homogeneous (we had previously float and int in X) data_set_encoded_secML = CArray(X, dtype=float, copy=True) data_set_encoded_secML = CDataset(data_set_encoded_secML, Y) n_tr = round(0.66 * X.shape[0]) n_ts = X.shape[0] - n_tr logger.debug(X.shape) logger.debug(n_tr) logger.debug(n_ts) from secml.data.splitter import CTrainTestSplit splitter = CTrainTestSplit(train_size=n_tr, test_size=n_ts) # Use training set for the classifier and then pick points from an internal test set. tr_set_secML, ts_set_secML = splitter.split(data_set_encoded_secML) # tr_set_secML = CDataset(X_train,Y_train) # ts_set_secML = CDataset(X_test,Y_test) # Create a surrogate classifier # Creation of the multiclass classifier from secml.ml.classifiers import CClassifierSVM from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA from secml.ml.kernel import CKernelRBF clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF()) # Parameters for the Cross-Validation procedure xval_params = {'C': [1e-4, 1e-3, 1e-2, 0.1, 1], 'kernel.gamma': [0.01, 0.1, 1, 10, 100, 1e3]} # Let's create a 3-Fold data splitter random_state = 999 from secml.data.splitter import CDataSplitterKFold xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state) # Select and set the best training parameters for the classifier logger.debug("Estimating the best training parameters...") best_params = clf.estimate_parameters( dataset=tr_set_secML, parameters=xval_params, splitter=xval_splitter, metric='accuracy', perf_evaluator='xval' ) logger.debug("The best training parameters are: ", best_params) logger.debug(clf.get_params()) logger.debug(clf.num_classifiers) # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy metric = CMetricAccuracy() # Train the classifier clf.fit(tr_set_secML) logger.debug(clf.num_classifiers) # Compute predictions on a test set y_pred = clf.predict(ts_set_secML.X) # Evaluate the accuracy of the classifier acc = metric.performance_score(y_true=ts_set_secML.Y, y_pred=y_pred) logger.debug("Accuracy on test set: {:.2%}".format(acc)) # Prepare attack configuration noise_type = 'l2' # Type of perturbation 'l1' or 'l2' lb, ub = 0, 1 # Bounds of the attack space. Can be set to `None` for unbounded y_target = None # None if `error-generic` or a class label for `error-specific` # Should be chosen depending on the optimization problem solver_params = { 'eta': 0.1, # grid search resolution 'eta_min': 0.1, 'eta_max': None, # None should be ok 'max_iter': 1000, 'eps': 1e-2 # Tolerance on the stopping crit. } # Run attack from secml.adv.attacks.evasion import CAttackEvasionPGDLS pgd_ls_attack = CAttackEvasionPGDLS( classifier=clf, surrogate_classifier=clf, surrogate_data=tr_set_secML, distance=noise_type, dmax=dmax, lb=lb, ub=ub, solver_params=solver_params, y_target=y_target) nb_feat = X.shape[1] result_pts = np.empty([nb_attack, nb_feat]) result_class = np.empty([nb_attack, 1]) # take a point at random being the starting point of the attack and run the attack import random for nb_iter in range(0, nb_attack): rn = random.randint(0, ts_set_secML.num_samples - 1) x0, y0 = ts_set_secML[rn, :].X, ts_set_secML[rn, :].Y, try: y_pred_pgdls, _, adv_ds_pgdls, _ = pgd_ls_attack.run(x0, y0) adv_pt = adv_ds_pgdls.X.get_data() # np.asarray([np.asarray(row, dtype=float) for row in y_tr], dtype=float) result_pts[nb_iter] = adv_pt result_class[nb_iter] = y_pred_pgdls.get_data()[0] except ValueError: logger.warning("value error on {}".format(nb_iter)) return result_pts, result_class, ts_set_secML[:nb_attack, :].Y
class TestCExplainerGradientInput(CUnitTest): """Unittests for CExplainerGradientInput""" def setUp(self): # 100 samples, 2 classes, 20 features self.ds = CDLDigits().load() self.clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF(gamma=1e-3)) # Training classifier self.clf.fit(self.ds) self.explainer = CExplainerGradientInput(self.clf) def test_explain(self): """Unittest for explain method.""" i = 67 ds_i = self.ds[i, :] x, y_true = ds_i.X, ds_i.Y.item() self.logger.info("Explaining P{:} c{:}".format(i, y_true)) x_pred, x_score = self.clf.predict(x, return_decision_function=True) self.logger.info("Predicted class {:}, scores:\n{:}".format( x_pred.item(), x_score)) self.logger.info("Candidates: {:}".format(x_score.argsort()[::-1])) fig = CFigure(height=1.5, width=12) # Plotting original image fig.subplot(1, self.ds.num_classes + 1, 1) fig.sp.imshow(x.reshape((8, 8)), cmap='gray') fig.sp.title("Origin c{:}".format(y_true)) fig.sp.yticks([]) fig.sp.xticks([]) attr = CArray.empty(shape=(self.ds.num_classes, x.size)) # Computing attributions for c in self.ds.classes: attr_c = self.explainer.explain(x, y=c) attr[c, :] = attr_c self.logger.info("Attributions class {:}:\n{:}".format( c, attr_c.tolist())) self.assertIsInstance(attr, CArray) self.assertEqual(attr.shape, attr.shape) th = max(abs(attr.min()), abs(attr.max())) # Plotting attributions for c in self.ds.classes: fig.subplot(1, self.ds.num_classes + 1, 2 + c) fig.sp.imshow(attr[c, :].reshape((8, 8)), cmap='seismic', vmin=-1 * th, vmax=th) fig.sp.title("Attr c{:}".format(c)) fig.sp.yticks([]) fig.sp.xticks([]) fig.tight_layout() fig.show()
def test_aspreprocess(self): """Test for normalizer used as preprocess.""" from secml.ml.classifiers import CClassifierSVM from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA model = mlp(input_dims=20, hidden_dims=(40,), output_dims=3) loss = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=1e-1) net = CClassifierPyTorch(model=model, loss=loss, optimizer=optimizer, random_state=0, epochs=10, preprocess='min-max') net.fit(self.ds.X, self.ds.Y) norm = CNormalizerDNN(net=net) clf = CClassifierMulticlassOVA( classifier=CClassifierSVM, preprocess=norm) self.logger.info("Testing last layer") clf.fit(self.ds.X, self.ds.Y) y_pred, scores = clf.predict( self.ds.X, return_decision_function=True) self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist())) self.logger.info("Predictions:\n{:}".format(y_pred.tolist())) self.logger.info("Scores:\n{:}".format(scores)) x = self.ds.X[0, :] self.logger.info("Testing last layer gradient") for c in self.ds.classes: self.logger.info("Gradient w.r.t. class {:}".format(c)) grad = clf.grad_f_x(x, y=c) self.logger.info("Output of grad_f_x:\n{:}".format(grad)) check_grad_val = CFunction( clf.decision_function, clf.grad_f_x).check_grad( x, y=c, epsilon=1e-1) self.logger.info( "norm(grad - num_grad): %s", str(check_grad_val)) self.assertLess(check_grad_val, 1e-3) self.assertTrue(grad.is_vector_like) self.assertEqual(x.size, grad.size) layer = 'linear1' norm.out_layer = layer self.logger.info("Testing layer {:}".format(norm.out_layer)) clf.fit(self.ds.X, self.ds.Y) y_pred, scores = clf.predict( self.ds.X, return_decision_function=True) self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist())) self.logger.info("Predictions:\n{:}".format(y_pred.tolist())) self.logger.info("Scores:\n{:}".format(scores)) self.logger.info("Testing 'linear1' layer gradient") grad = clf.grad_f_x(x, y=0) # y is required for multiclassova self.logger.info("Output of grad_f_x:\n{:}".format(grad)) self.assertTrue(grad.is_vector_like) self.assertEqual(x.size, grad.size)
class TestCLossCrossEntropy(CUnitTest): """Unittests for CLossCrossEntropy and softmax.""" def setUp(self): self.ds = CDLRandom(n_classes=3, n_samples=50, random_state=0, n_informative=3).load() self.logger.info("Fit an SVM and classify dataset...") self.ova = CClassifierMulticlassOVA(CClassifierSVM) self.ova.fit(self.ds.X, self.ds.Y) self.labels, self.scores = self.ova.predict( self.ds.X, return_decision_function=True) def test_in_out(self): """Unittest for input and output to CCrossEntropy""" def _check_loss(l, n_samples): self.assertIsInstance(l, CArray) self.assertTrue(l.isdense) self.assertEqual(1, l.ndim) self.assertEqual(n_samples, l.size) self.assertIsSubDtype(l.dtype, float) loss_class = CLossCrossEntropy() loss = loss_class.loss(self.ds.Y, self.scores) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true, scores).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, self.ds.Y.size) loss = loss_class.loss(self.ds.Y[0], self.scores[0, :]) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, 1) def test_grad(self): """Compare analytical gradients with its numerical approximation.""" def _loss_wrapper(scores, loss, true_labels): return loss.loss(true_labels, scores) loss_class = CLossCrossEntropy() y_true = CArray.randint(0, 2, 1) score = CArray.randn((1, 3)) self.logger.info("Y_TRUE: {:} SCORES: {:}".format(y_true, score)) for pos_label in (None, 0, 1, 2): self.logger.info("POS_LABEL: {:}".format(pos_label)) # real value of the gradient on x grad = loss_class.dloss(y_true, score, pos_label) self.logger.info("GRAD: {:}".format(grad)) approx = CFunction(_loss_wrapper).approx_fprime( score, eps, loss_class, y_true) self.logger.info("APPROX (FULL): {:}".format(approx)) pos_label = pos_label if pos_label is not None else y_true.item() approx = approx[pos_label] self.logger.info("APPROX (POS_LABEL): {:}".format(approx)) check_grad_val = (grad - approx).norm() self.logger.info("Gradient difference between analytical svm " "gradient and numerical gradient: %s", str(check_grad_val)) self.assertLess(check_grad_val, 1e-4, "the gradient is wrong {:}".format(check_grad_val))
class TestCSoftmax(CUnitTest): """Unittests for CSoftmax.""" def setUp(self): self.ds = CDLRandom(n_classes=3, n_samples=50, random_state=0, n_informative=3).load() self.logger.info("Fit an SVM and classify dataset...") self.ova = CClassifierMulticlassOVA(CClassifierSVM) self.ova.fit(self.ds) self.labels, self.scores = self.ova.predict( self.ds.X, return_decision_function=True) def test_softmax(self): """Unittests for softmax function.""" from sklearn.utils.extmath import softmax as softmax_sk sm = CSoftmax().softmax(self.scores) sm_sk = softmax_sk(self.scores.tondarray()) self.logger.info("Our softmax.max():\n{:}".format(sm.max())) self.logger.info("SKlearn softmax.max():\n{:}".format(sm_sk.max())) self.assertFalse((sm.round(4) != CArray(sm_sk).round(4)).any()) self.logger.info("Testing a single point...") sm = CSoftmax().softmax(self.scores[0, :]) sm_sk = softmax_sk(self.scores[0, :].tondarray()) self.logger.info("Our softmax.max():\n{:}".format(sm.max())) self.logger.info("SKlearn softmax.max():\n{:}".format(sm_sk.max())) self.assertFalse((sm.round(4) != CArray(sm_sk).round(4)).any()) def test_softmax_gradient(self): """Unittests for softmax gradient: Compare analytical gradients with its numerical approximation.""" self.softmax = CSoftmax() def _sigma_pos_label(s, y): """ Compute the sigmoid for the scores in s and return the i-th element of the vector that contains the results Parameters ---------- s: CArray scores pos_label: index of the considered score into the vector Returns ------- softmax: CArray """ softmax = self.softmax.softmax(s).ravel() return softmax[y] score = self.scores[0, :] for pos_label in (0, 1, 2): self.logger.info("POS_LABEL: {:}".format(pos_label)) # real value of the gradient on x grad = self.softmax.gradient(score, pos_label) self.logger.info("ANALITICAL GRAD: {:}".format(grad)) approx = CFunction(_sigma_pos_label).approx_fprime( score, 1e-5, pos_label) self.logger.info("NUMERICAL GRADIENT: {:}".format(approx)) check_grad_val = (grad - approx).norm() self.logger.info( "The norm of the difference bettween the " "analytical and the numerical gradient is: %s", str(check_grad_val)) self.assertLess(check_grad_val, 1e-4, "the gradient is wrong {:}".format(check_grad_val))
# Normalize the data from secml.ml.features import CNormalizerMinMax nmz = CNormalizerMinMax() tr.X = nmz.fit_transform(tr.X) ts.X = nmz.transform(ts.X) # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy metric = CMetricAccuracy() # Creation of the multiclass classifier from secml.ml.classifiers import CClassifierSVM from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA from secml.ml.kernel import CKernelRBF clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF()) # Parameters for the Cross-Validation procedure xval_params = {'C': [1e-2, 0.1, 1], 'kernel.gamma': [10, 100, 1e3]} # Let's create a 3-Fold data splitter from secml.data.splitter import CDataSplitterKFold xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state) # Select and set the best training parameters for the classifier print("Estimating the best training parameters...") best_params = clf.estimate_parameters( dataset=tr, parameters=xval_params, splitter=xval_splitter, metric='accuracy',
def test_gradient(self): """Unittests for gradient() function.""" multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced') i = 5 # Sample to test self.logger.info("Testing with dense data...") ds = self.dataset.todense() multiclass.fit(ds) pattern = ds.X[i, :] # Compare with numerical gradient grads_d = self._test_gradient_numerical(multiclass, pattern) # Check if we can return the i_th classifier for i in range(multiclass.num_classifiers): ova_grad = multiclass._binary_classifiers[i].grad_f_x(pattern) gradient = multiclass.grad_f_x(pattern, y=i) self.logger.info("Gradient of {:}^th sub-clf is:\n{:}".format( i, gradient)) self.assert_array_equal(gradient, ova_grad) self.logger.info("Testing with sparse data...") ds = self.dataset.tosparse() multiclass.fit(ds) pattern = ds.X[i, :] # Compare with numerical gradient grads_s = self._test_gradient_numerical(multiclass, pattern) # Compare dense gradients with sparse gradients for grad_i, grad in enumerate(grads_d): self.assert_array_almost_equal(grad.atleast_2d(), grads_s[grad_i]) # Test error raise with self.assertRaises(ValueError): multiclass.grad_f_x(pattern, y=-1) with self.assertRaises(ValueError): multiclass.grad_f_x(pattern, y=100)
def test_set(self): from secml.ml.kernels import CKernelRBF multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, C=1, kernel=CKernelRBF()) # Test set before training multiclass.set_params({'C': 100, 'kernel.gamma': 20}) for clf in multiclass._binary_classifiers: self.assertEqual(clf.C, 100.0) self.assertEqual(clf.kernel.gamma, 20.0) # Restoring kernel multiclass.set('kernel', CKernelRBF(gamma=50)) # Setting different parameter in single trained_classifiers multiclass.prepare(num_classes=4) different_c = (10, 20, 30, 40) multiclass.set('C', different_c) different_gamma = (50, 60, 70, 80) multiclass.set('kernel.gamma', different_gamma) # Fit multiclass classifier than test set after training multiclass.fit(self.dataset) for clf_idx, clf in enumerate(multiclass._binary_classifiers): self.assertEqual(clf.C, different_c[clf_idx]) self.assertEqual(clf.kernel.gamma, different_gamma[clf_idx]) # Test set after training multiclass.set_params({'C': 30, 'kernel.gamma': 200}) for clf in multiclass._binary_classifiers: self.assertEqual(clf.C, 30.0) self.assertEqual(clf.kernel.gamma, 200.0) for clf in multiclass._binary_classifiers: self.assertEqual(clf.C, 30.0) self.assertEqual(clf.kernel.gamma, 200.0) # Setting parameter in single trained_classifiers multiclass._binary_classifiers[0].kernel.gamma = 300 for i in range(1, multiclass.num_classifiers): self.assertNotEqual(multiclass._binary_classifiers[i].kernel.gamma, 300.0) # Setting different parameter in single trained_classifiers different_c = (100, 200, 300) # ValueError is raised as not enough binary classifiers are available with self.assertRaises(ValueError): multiclass.set('C', different_c) multiclass.prepare(num_classes=3) multiclass.set('C', different_c) for clf_idx, clf in enumerate(multiclass._binary_classifiers): self.assertEqual(clf.C, different_c[clf_idx])
class TestCAttackEvasionPGDLSMNIST(CAttackEvasionTestCases): """Unittests for CAttackEvasionPGDLS on MULTICLASS dataset.""" def setUp(self): import numpy as np np.random.seed(12345678) # generate synthetic data self.ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, n_clusters_per_class=1, class_sep=1, random_state=0).load() # Add a new class modifying one of the existing clusters self.ds.Y[(self.ds.X[:, 0] > 0).logical_and( self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes # self.kernel = None self.kernel = CKernelRBF(gamma=10) # Data normalization self.normalizer = CNormalizerMinMax() self.ds.X = self.normalizer.fit_transform(self.ds.X) self.multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced', preprocess=None, kernel=self.kernel) self.multiclass.verbose = 0 # Training and classification self.multiclass.fit(self.ds.X, self.ds.Y) self.y_pred, self.score_pred = self.multiclass.predict( self.ds.X, return_decision_function=True) def test_indiscriminate(self): """Test indiscriminate evasion.""" self.y_target = None self.logger.info("Test indiscriminate evasion ") expected_x = CArray([0.1783, 0.6249]) self._test_evasion_multiclass(expected_x) def test_targeted(self): """Test targeted evasion.""" self.y_target = 2 self.logger.info("Test target evasion " "(with target class {:}) ".format(self.y_target)) expected_x = CArray([0.9347, 0.3976]) self._test_evasion_multiclass(expected_x) def _test_evasion_multiclass(self, expected_x): # EVASION self.multiclass.verbose = 2 if self.normalizer is not None: lb = self.normalizer.feature_range[0] ub = self.normalizer.feature_range[1] else: lb = None ub = None dmax = 2 self.solver_params = {'eta': 1e-1, 'eta_min': 1.0} eva = CAttackEvasionPGDLS(classifier=self.multiclass, surrogate_classifier=self.multiclass, surrogate_data=self.ds, distance='l2', dmax=dmax, lb=lb, ub=ub, solver_params=self.solver_params, y_target=self.y_target) eva.verbose = 0 # 2 # Points from class 2 region # p_idx = 0 # Points from class 1 region # p_idx = 68 # Points from class 3 region p_idx = 1 # Wrong classified point # p_idx = 53 # Evasion goes up usually # Points from class 0 region # p_idx = 49 # Wrong classified point # p_idx = 27 # Correctly classified point x0 = self.ds.X[p_idx, :] y0 = self.ds.Y[p_idx].item() x_seq = CArray.empty((0, x0.shape[1])) scores = CArray([]) f_seq = CArray([]) x = x0 for d_idx, d in enumerate(range(0, dmax + 1)): self.logger.info("Evasion at dmax: " + str(d)) eva.dmax = d x, f_opt = eva._run(x0=x0, y0=y0, x_init=x) y_pred, score = self.multiclass.predict( x, return_decision_function=True) f_seq = f_seq.append(f_opt) # not considering all iterations, just values at dmax # for all iterations, you should bring eva.x_seq and eva.f_seq x_seq = x_seq.append(x, axis=0) s = score[:, y0 if self.y_target is None else self.y_target] scores = scores.append(s) self.logger.info("Predicted label after evasion: " + str(y_pred)) self.logger.info("Score after evasion: {:}".format(s)) self.logger.info("Objective function after evasion: {:}".format(f_opt)) # Compare optimal point with expected self.assert_array_almost_equal(eva.x_opt.todense().ravel(), expected_x, decimal=4) self._make_plots(x_seq, dmax, eva, x0, scores, f_seq) def _make_plots(self, x_seq, dmax, eva, x0, scores, f_seq): if self.make_figures is False: self.logger.debug("Skipping figures...") return fig = CFigure(height=9, width=10, markersize=6, fontsize=12) # Get plot bounds, taking into account ds and evaded point path bounds_x, bounds_y = self.ds.get_bounds() min_x, max_x = bounds_x min_y, max_y = bounds_y min_x = min(min_x, x_seq[:, 0].min()) max_x = max(max_x, x_seq[:, 0].max()) min_y = min(min_y, x_seq[:, 1].min()) max_y = max(max_y, x_seq[:, 1].max()) ds_bounds = [(min_x, max_x), (min_y, max_y)] # Plotting multiclass decision regions fig.subplot(2, 2, 1) fig = self._plot_decision_function(fig, plot_background=True) fig.sp.plot_path(x_seq, path_style='-', start_style='o', start_facecolor='w', start_edgewidth=2, final_style='o', final_facecolor='k', final_edgewidth=2) # plot distance constraint fig.sp.plot_fun(func=self._rescaled_distance, multipoint=True, plot_background=False, n_grid_points=20, levels_color='k', grid_limits=ds_bounds, levels=[0], colorbar=False, levels_linewidth=2.0, levels_style=':', alpha_levels=.4, c=x0, r=dmax) fig.sp.grid(linestyle='--', alpha=.5, zorder=0) # Plotting multiclass evasion objective function fig.subplot(2, 2, 2) fig = self._plot_decision_function(fig) fig.sp.plot_fgrads(eva._objective_function_gradient, grid_limits=ds_bounds, n_grid_points=20, color='k', alpha=.5) fig.sp.plot_path(x_seq, path_style='-', start_style='o', start_facecolor='w', start_edgewidth=2, final_style='o', final_facecolor='k', final_edgewidth=2) # plot distance constraint fig.sp.plot_fun(func=self._rescaled_distance, multipoint=True, plot_background=False, n_grid_points=20, levels_color='w', grid_limits=ds_bounds, levels=[0], colorbar=False, levels_style=':', levels_linewidth=2.0, alpha_levels=.5, c=x0, r=dmax) fig.sp.plot_fun(lambda z: eva._objective_function(z), multipoint=True, grid_limits=ds_bounds, colorbar=False, n_grid_points=20, plot_levels=False) fig.sp.grid(linestyle='--', alpha=.5, zorder=0) fig.subplot(2, 2, 3) if self.y_target is not None: fig.sp.title("Classifier Score for Target Class (Targ. Evasion)") else: fig.sp.title("Classifier Score for True Class (Indiscr. Evasion)") fig.sp.plot(scores) fig.sp.grid() fig.sp.xlim(0, dmax) fig.sp.xlabel("dmax") fig.subplot(2, 2, 4) fig.sp.title("Objective Function") fig.sp.plot(f_seq) fig.sp.grid() fig.sp.xlim(0, dmax) fig.sp.xlabel("dmax") fig.tight_layout() k_name = self.kernel.class_type if self.kernel is not None else 'lin' fig.savefig( fm.join( self.images_folder, "pgd_ls_multiclass_{:}c_kernel-{:}_target-{:}.pdf".format( self.ds.num_classes, k_name, self.y_target))) def _rescaled_distance(self, x, c, r): """Rescale distance for plot.""" if self.normalizer is not None: c = self.normalizer.inverse_transform(c) x = self.normalizer.inverse_transform(x) constr = CConstraintL2(center=c, radius=r) return x.apply_along_axis(constr.constraint, axis=1) def _get_style(self): """Define the style vector for the different classes.""" if self.ds.num_classes == 3: styles = [('b', 'o', '-'), ('g', 'p', '--'), ('r', 's', '-.')] elif self.ds.num_classes == 4: styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), ('y', 's', '-.'), ('gray', 'D', '--')] else: styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), ('y', 's', '-.'), ('gray', 'D', '--'), ('c', '-.'), ('m', '-'), ('y', '-.')] return styles def _plot_decision_function(self, fig, plot_background=False): """Plot the decision function of a multiclass classifier.""" fig.sp.title('{:} ({:})'.format(self.multiclass.__class__.__name__, self.multiclass.classifier.__name__)) x_bounds, y_bounds = self.ds.get_bounds() styles = self._get_style() for c_idx, c in enumerate(self.ds.classes): fig.sp.scatter(self.ds.X[self.ds.Y == c, 0], self.ds.X[self.ds.Y == c, 1], s=20, c=styles[c_idx][0], edgecolors='k', facecolors='none', linewidths=1, label='c {:}'.format(c)) # Plotting multiclass decision function fig.sp.plot_fun(lambda x: self.multiclass.predict(x), multipoint=True, cmap='Set2', grid_limits=self.ds.get_bounds(offset=5), colorbar=False, n_grid_points=300, plot_levels=True, plot_background=plot_background, levels=[-1, 0, 1, 2], levels_color='gray', levels_style='--') fig.sp.xlim(x_bounds[0] - .05, x_bounds[1] + .05) fig.sp.ylim(y_bounds[0] - .05, y_bounds[1] + .05) fig.sp.legend(loc=9, ncol=5, mode="expand", handletextpad=.1) return fig