def test_alignment(self): ds = CDLRandom(n_samples=100, n_features=500, n_redundant=0, n_informative=10, n_clusters_per_class=1, random_state=0).load() self.logger.info("Train Sec SVM") sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-2, lb=-0.1, ub=0.5) sec_svm.verbose = 2 sec_svm.fit(ds.X, ds.Y) self.logger.info("Train SVM") svm = CClassifierSVM(C=1) svm.fit(ds.X, ds.Y) self._compute_alignment(ds, sec_svm, svm) svm_pred = sec_svm.predict(ds.X) secsvm_pred = sec_svm.predict(ds.X) self.logger.info("SVM pred:\n{:}".format(svm_pred)) self.logger.info("Sec-SVM pred:\n{:}".format(secsvm_pred)) self.assert_array_almost_equal(secsvm_pred, svm_pred)
def test_grad_tr_params_linear(self): """Test `grad_tr_params` on a linear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): clf = CClassifierSVM(store_dual_vars=True, preprocess=n) clf.fit(self.ds.X, self.ds.Y) self._test_grad_tr_params(clf)
class TestCPlot(CUnitTest): """Unit test for TestCPlot.""" def setUp(self): self.clf = CClassifierSVM() self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y) def test_fun(self): """Test for CPlotFunction.plot_fun method.""" fig = CFigure() fig.sp.plot_ds(self.dataset) fig.sp.plot_fun(self.clf.decision_function, y=1) fig.show() def test_fgrads(self): """Test for CPlotFunction.plot_fgrads method.""" fig = CFigure() fig.sp.plot_ds(self.dataset) fig.sp.plot_fun(self.clf.decision_function, y=1) fig.sp.plot_fgrads(lambda x: self.clf.grad_f_x(x, y=1)) fig.show()
def test_grad_tr_params_nonlinear(self): """Test `grad_tr_params` on a nonlinear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): clf = CClassifierSVM(kernel='rbf', preprocess=n) clf.fit(self.ds.X, self.ds.Y) self._test_grad_tr_params(clf)
def test_performance(self): """ Compare the classifiers performance""" self.logger.info("Testing error performance of the " "classifiers on the training set") for sgd in self.sgds: self.logger.info("SGD kernel: {:}".format(sgd.preprocess)) if sgd.preprocess is not None: k = sgd.preprocess.deepcopy() else: k = None svm = CClassifierSVM(kernel=k) svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict(self.dataset.X, return_decision_function=True) label_sgd, y_sgd = sgd.predict(self.dataset.X, return_decision_function=True) acc_svm = CMetric.create('f1').performance_score( self.dataset.Y, label_svm) acc_sgd = CMetric.create('f1').performance_score( self.dataset.Y, label_sgd) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of SGD: {:}".format(acc_sgd)) self.assertGreater(acc_sgd, 0.90, "Accuracy of SGD: {:}".format(acc_sgd))
def test_performance(self): """ Compare the classifiers performance""" self.logger.info("Testing error performance of the " "classifiers on the training set") for ridge in self.ridges: self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess)) if ridge.preprocess is not None: svm_kernel = ridge.preprocess.deepcopy() else: svm_kernel = None svm = CClassifierSVM(kernel=svm_kernel) svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict( self.dataset.X, return_decision_function=True) label_ridge, y_ridge = ridge.predict( self.dataset.X, return_decision_function=True) acc_svm = CMetric.create('f1').performance_score( self.dataset.Y, label_svm) acc_ridge = CMetric.create('f1').performance_score( self.dataset.Y, label_ridge) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of ridge: {:}".format(acc_ridge)) self.assertGreater(acc_ridge, 0.90, "Accuracy of ridge: {:}".format(acc_ridge))
def test_store_dual_vars(self): """Test of parameters that control storing of dual space variables.""" self.logger.info("Checking CClassifierSVM.store_dual_vars...") self.logger.info("Instancing a linear SVM") svm = CClassifierSVM(kernel=None) self.assertIsNone(svm.store_dual_vars) svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNone(svm.sv) self.logger.info("Changing store_dual_vars to True") svm.store_dual_vars = True self.assertTrue(svm.store_dual_vars) svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNotNone(svm.sv) self.logger.info("Changing store_dual_vars to False") svm.store_dual_vars = False self.assertFalse(svm.store_dual_vars) svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNone(svm.sv) self.logger.info("Changing kernel to nonlinear when " "store_dual_vars is False should raise ValueError") with self.assertRaises(ValueError): svm.kernel = CKernelRBF() self.logger.info("Instancing a nonlinear SVM") svm = CClassifierSVM(kernel='rbf') self.assertIsNone(svm.store_dual_vars) svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNotNone(svm.sv) self.logger.info("Changing store_dual_vars to True") svm.store_dual_vars = True self.assertTrue(svm.store_dual_vars) svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNotNone(svm.sv) self.logger.info( "Changing store_dual_vars to False should raise ValueError") with self.assertRaises(ValueError): svm.store_dual_vars = False
def test_time(self): """ Compare execution time of ridge and SVM""" self.logger.info("Testing training speed of ridge compared to SVM ") for ridge in self.ridges: self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess)) svm = CClassifierSVM(ridge.preprocess) with self.timer() as t_svm: svm.fit(self.dataset.X, self.dataset.Y) self.logger.info( "Execution time of SVM: {:}".format(t_svm.interval)) with self.timer() as t_ridge: ridge.fit(self.dataset.X, self.dataset.Y) self.logger.info( "Execution time of ridge: {:}".format(t_ridge.interval))
def test_plot(self): ds = CDLRandom(n_samples=100, n_features=2, n_redundant=0, random_state=100).load() self.logger.info("Train Sec SVM") sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-3, lb=-0.1, ub=0.5) sec_svm.verbose = 2 sec_svm.fit(ds.X, ds.Y) self.logger.info("Train SVM") svm = CClassifierSVM(C=1) svm.fit(ds.X, ds.Y) self._compute_alignment(ds, sec_svm, svm) fig = CFigure(height=5, width=8) fig.subplot(1, 2, 1) # Plot dataset points fig.sp.plot_ds(ds) # Plot objective function fig.sp.plot_fun(svm.predict, multipoint=True, plot_background=True, plot_levels=False, n_grid_points=100, grid_limits=ds.get_bounds()) fig.sp.title("SVM") fig.subplot(1, 2, 2) # Plot dataset points fig.sp.plot_ds(ds) # Plot objective function fig.sp.plot_fun(sec_svm.predict, multipoint=True, plot_background=True, plot_levels=False, n_grid_points=100, grid_limits=ds.get_bounds()) fig.sp.title("Sec-SVM") fig.show()
class TestCFigure(CUnitTest): """Unittest for CFigure.""" def test_svm(self): self.X = CArray([[1, 2], [3, 4], [5, 6], [7, 8]]) self.Y = CArray([[0], [1], [1], [0]]).ravel() self.dataset = CDataset(self.X, self.Y) self.classifier = CClassifierSVM(kernel=CKernelRBF()) self.classifier.fit(self.dataset) self.x_min, self.x_max = (self.X[:, [0]].min() - 1, self.X[:, [0]].max() + 1) self.y_min, self.y_max = (self.X[:, [1]].min() - 1, self.X[:, [1]].max() + 1) self.fig = CFigure(height=7, width=10, linewidth=5, fontsize=24, markersize=20) self.fig.sp.title("Svm Test") self.logger.info("Test plot dataset method...") self.fig.sp.plot_ds(self.dataset) self.logger.info("Test plot path method...") path = CArray([[1, 2], [1, 3], [1.5, 5]]) self.fig.sp.plot_path(path) self.logger.info("Test plot function method...") bounds = [(self.x_min, self.x_max), (self.y_min, self.y_max)] self.fig.sp.plot_fun(self.classifier.decision_function, plot_levels=False, grid_limits=bounds, y=1) self.fig.sp.xlim(self.x_min, self.x_max) self.fig.sp.ylim(self.y_min, self.y_max) self.fig.show()
class TestCExplainerGradient(CUnitTest): """Unittests for CExplainerGradient""" def setUp(self): self.clf = CClassifierSVM() # 100 samples, 2 classes, 20 features self.ds = CDLDigits(class_list=[0, 1], zero_one=True).load() # Training classifier self.clf.fit(self.ds) self.explainer = CExplainerGradient(self.clf) def test_explain(self): """Unittest for explain method.""" i = 67 x = self.ds.X[i, :] attr = self.explainer.explain(x, y=1) self.logger.info("Attributions:\n{:}".format(attr.tolist())) self.assertIsInstance(attr, CArray) self.assertEqual(attr.shape, attr.shape) fig = CFigure(height=3, width=6) # Plotting original image fig.subplot(1, 2, 1) fig.sp.imshow(attr.reshape((8, 8)), cmap='gray') th = max(abs(attr.min()), abs(attr.max())) # Plotting attributions fig.subplot(1, 2, 2) fig.sp.imshow(attr.reshape((8, 8)), cmap='seismic', vmin=-1 * th, vmax=th) fig.show()
def test_margin(self): self.logger.info("Testing margin separation of SVM...") import numpy as np # we create 40 separable points rng = np.random.RandomState(0) n_samples_1 = 1000 n_samples_2 = 100 X = np.r_[1.5 * rng.randn(n_samples_1, 2), 0.5 * rng.randn(n_samples_2, 2) + [2, 2]] y = [0] * (n_samples_1) + [1] * (n_samples_2) dataset = CDataset(X, y) # fit the model clf = CClassifierSVM() clf.fit(dataset.X, dataset.Y) w = clf.w a = -w[0] / w[1] xx = CArray.linspace(-5, 5) yy = a * xx - clf.b / w[1] wclf = CClassifierSVM(class_weight={0: 1, 1: 10}) wclf.fit(dataset.X, dataset.Y) ww = wclf.w wa = -ww[0] / ww[1] wyy = wa * xx - wclf.b / ww[1] fig = CFigure(linewidth=1) fig.sp.plot(xx, yy.ravel(), 'k-', label='no weights') fig.sp.plot(xx, wyy.ravel(), 'k--', label='with weights') fig.sp.scatter(X[:, 0].ravel(), X[:, 1].ravel(), c=y) fig.sp.legend() fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_svm.pdf'))
def test_linear_svm(self): """Performs tests on linear SVM.""" self.logger.info("Testing SVM linear variants (kernel and not)") # Instancing a linear SVM and an SVM with linear kernel linear_svm = CClassifierSVM(kernel=None) kernel_linear_svm = self.svms[0] self.logger.info("SVM w/ linear kernel in the primal") self.assertIsNone(linear_svm.kernel) self.logger.info("Training both classifiers on dense data") linear_svm.fit(self.dataset.X, self.dataset.Y) kernel_linear_svm.fit(self.dataset.X, self.dataset.Y) linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict( self.dataset.X, return_decision_function=True) kernel_linear_svm_pred_y, \ kernel_linear_svm_pred_score = kernel_linear_svm.predict( self.dataset.X, return_decision_function=True) # check prediction self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y) self.logger.info("Training both classifiers on sparse data") linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) kernel_linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) self.assertTrue( linear_svm.w.issparse, "Weights vector is not sparse even " "if training data is sparse") linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict( self.dataset_sparse.X, return_decision_function=True) kernel_linear_svm_pred_y, \ kernel_linear_svm_pred_score = kernel_linear_svm.predict( self.dataset_sparse.X, return_decision_function=True) # check prediction self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)
def test_draw(self): """ Compare the classifiers graphically""" self.logger.info("Testing classifiers graphically") # generate 2D synthetic data dataset = CDLRandom(n_features=2, n_redundant=1, n_informative=1, n_clusters_per_class=1).load() dataset.X = CNormalizerMinMax().fit_transform(dataset.X) self.sgds[0].fit(dataset.X, dataset.Y) svm = CClassifierSVM() svm.fit(dataset.X, dataset.Y) fig = CFigure(width=10, markersize=8) fig.subplot(2, 1, 1) # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function fig.sp.plot_fun(svm.decision_function, grid_limits=dataset.get_bounds(), y=1) fig.sp.title('SVM') fig.subplot(2, 1, 2) # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function fig.sp.plot_fun(self.sgds[0].decision_function, grid_limits=dataset.get_bounds(), y=1) fig.sp.title('SGD Classifier') fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_sgd1.pdf'))
def test_store_dual_vars(self): """Test of parameters that control storing of dual space variables.""" self.logger.info("Checking CClassifierSVM.store_dual_vars...") self.logger.info("Linear SVM in primal space") svm = CClassifierSVM() svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNone(svm.alpha) self.logger.info("Linear SVM in dual space") svm = CClassifierSVM(kernel='linear') svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNotNone(svm.alpha) self.logger.info("Nonlinear SVM in dual space") svm = CClassifierSVM(kernel='rbf') svm.fit(self.dataset.X, self.dataset.Y) self.assertIsNotNone(svm.alpha)
) # Select and set the best training parameters for the RBF classifier #print("Estimating the best training parameters for RBF kernel...") #best_rbf_params = clf_rbf.estimate_parameters( # dataset=tr, # parameters=xval_rbf_params, # splitter=xval_splitter, # metric='accuracy', # perf_evaluator='xval' #) print(best_lin_params) #train classifier print("start training") clf_lin.fit(data_smp_encoded_secML) #print("linear training ended, begining rbf") #clf_rbf.fit(tr) #print("start linear classif") #clf_l.fit(data_smp_encoded_secML) print("Classifiers trained") # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy metric = CMetricAccuracy() # Compute predictions on a test set y_lin_pred = clf_lin.predict(raw_data_encoded_secML.X)
class TestCSecEval(CUnitTest): """Unittests for CSecEval (evasion attack).""" def setUp(self): self.classifier = CClassifierSVM(kernel='linear', C=1.0) self.lb = -2 self.ub = +2 n_tr = 20 n_ts = 10 n_features = 2 n_reps = 1 self.sec_eval = [] self.attack_ds = [] for rep_i in range(n_reps): self.logger.info( "Loading `random_blobs` with seed: {:}".format(rep_i)) loader = CDLRandomBlobs(n_samples=n_tr + n_ts, n_features=n_features, centers=[(-0.5, -0.5), (+0.5, +0.5)], center_box=(-0.5, 0.5), cluster_std=0.5, random_state=rep_i * 100 + 10) ds = loader.load() self.tr = ds[:n_tr, :] self.ts = ds[n_tr:, :] self.classifier.fit(self.tr.X, self.tr.Y) # only manipulate positive samples, targeting negative ones self.y_target = None self.attack_classes = CArray([1]) for create_fn in (self._attack_pgd_ls, self._attack_cleverhans): self.attack_ds.append(self.ts) attack, param_name, param_values = create_fn() # set sec eval object self.sec_eval.append( CSecEval( attack=attack, param_name=param_name, param_values=param_values, )) def _attack_pgd_ls(self): params = { "classifier": self.classifier, "double_init_ds": self.tr, "distance": 'l1', "lb": self.lb, "ub": self.ub, "y_target": self.y_target, "attack_classes": self.attack_classes, "solver_params": { 'eta': 0.5, 'eps': 1e-2 } } attack = CAttackEvasionPGDLS(**params) attack.verbose = 1 # sec eval params param_name = 'dmax' dmax = 2 dmax_step = 0.5 param_values = CArray.arange(start=0, step=dmax_step, stop=dmax + dmax_step) return attack, param_name, param_values def _attack_cleverhans(self): attack_params = { 'eps': 0.1, 'clip_max': self.ub, 'clip_min': self.lb, 'ord': 1 } attack = CAttackEvasionCleverhans(classifier=self.classifier, surrogate_data=self.tr, y_target=self.y_target, clvh_attack_class=FastGradientMethod, **attack_params) param_name = 'attack_params.eps' dmax = 2 dmax_step = 0.5 param_values = CArray.arange(start=0, step=dmax_step, stop=dmax + dmax_step) return attack, param_name, param_values def _plot_sec_eval(self): # figure creation figure = CFigure(height=5, width=5) for sec_eval in self.sec_eval: sec_eval_data = sec_eval.sec_eval_data # plot security evaluation figure.sp.plot_sec_eval(sec_eval_data, label='SVM', marker='o', show_average=True, mean=True) figure.subplots_adjust() figure.show() def test_sec_eval(self): # evaluate classifier security for sec_eval_i, sec_eval in enumerate(self.sec_eval): sec_eval.run_sec_eval(self.attack_ds[sec_eval_i]) self._plot_sec_eval() if __name__ == '__main__': CUnitTest.main()
class TestCLossClassification(CUnitTest): """Unittests for CLossClassification and subclasses.""" def setUp(self): self.ds = CDLRandom(n_samples=50, random_state=0).load() self.logger.info("Train an SVM and classify dataset...") self.svm = CClassifierSVM() self.svm.fit(self.ds) self.labels, self.scores = self.svm.predict( self.ds.X, return_decision_function=True) def test_one_at_zero(self): """Testing that classification loss return 1 for input 0.""" for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) self.assertEqual(CArray([1.0]), loss_class.loss(CArray([1]), CArray([0]))) def test_in_out(self): """Unittest for input and output to loss classes""" def _check_loss(l, n_samples): self.assertIsInstance(l, CArray) self.assertTrue(l.isdense) self.assertEqual(1, l.ndim) self.assertEqual(n_samples, l.size) self.assertEqual(l.dtype, float) for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) loss = loss_class.loss(self.ds.Y, self.scores) loss_mean = loss.mean() self.logger.info("{:}.loss(y_true, scores).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, self.ds.Y.size) loss_pos = loss_class.loss(self.ds.Y, self.scores[:, 1].ravel()) loss_mean_pos = loss_pos.mean() self.logger.info( "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format( loss_class.__class__.__name__, loss_mean_pos)) _check_loss(loss_pos, self.ds.Y.size) self.assertEqual(loss_mean, loss_mean_pos) loss = loss_class.loss(self.ds.Y, self.scores, pos_label=0) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true, scores, pos_label=0).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, self.ds.Y.size) loss_neg = loss_class.loss(self.ds.Y, self.scores[:, 0].ravel()) loss_mean_neg = loss_neg.mean() self.logger.info( "{:}.loss(y_true, scores[:,0].ravel()).mean():\n".format( loss_class.__class__.__name__, loss_mean_neg)) _check_loss(loss_neg, self.ds.Y.size) self.assertEqual(loss_mean, loss_mean_neg) loss = loss_class.loss(self.ds.Y[0], self.scores[0, :]) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, 1) def test_draw(self): """Drawing the loss functions. Inspired by: https://en.wikipedia.org/wiki/Loss_functions_for_classification """ fig = CFigure() x = CArray.arange(-1, 3.01, 0.01) fig.sp.plot(x, CArray([1 if i <= 0 else 0 for i in x]), label='0-1 indicator') for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) fig.sp.plot(x, loss_class.loss(CArray([1]), x), label=loss_id) fig.sp.grid() fig.sp.legend() fig.show() def test_grad(self): """Compare analytical gradients with its numerical approximation.""" def _loss_wrapper(scores, loss, true_labels): return loss.loss(true_labels, scores) def _dloss_wrapper(scores, loss, true_labels): return loss.dloss(true_labels, scores) for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) n_elemes = 1 y_true = CArray.randint(0, 2, n_elemes).todense() score = CArray.randn((n_elemes, )) check_grad_val = CFunction( _loss_wrapper, _dloss_wrapper).check_grad(score, 1e-8, loss=loss_class, true_labels=y_true) self.logger.info( "Gradient difference between analytical svm " "gradient and numerical gradient: %s", str(check_grad_val)) self.assertLess( check_grad_val, 1e-4, "the gradient is wrong {:} for {:} loss".format( check_grad_val, loss_id))
train_size=n_tr, random_state=random_state) training_data = CDataset(x_train, y) validation_data = CDataset(x_val, y_val) test_data = CDataset(xtt, ytt) del xtr del ytr metric = CMetricAccuracy() clf = CClassifierSVM(kernel=CKernelRBF(gamma=GAMMA), C=C) # We can now fit the classifier clf.fit(training_data.X, training_data.Y) print("Training of classifier complete!") # Compute predictions on a test set y_pred = clf.predict(test_data.X) lb, ub = validation_data.X.min(), validation_data.X.max( ) # Bounds of the attack space. Can be set to `None` for unbounded n_poisoning_points = int( n_tr * poison_percentage) # Number of poisoning points to generate # Should be chosen depending on the optimization problem solver_params = { 'eta': 0.05, 'eta_min': 0.05, 'eta_max': None, 'max_iter': 100,
class TestCLossRegression(CUnitTest): """Unittests for CLossRegression and subclasses.""" def setUp(self): self.ds = CDLRandom(n_samples=50, random_state=0).load() self.logger.info("Train an SVM and classify dataset...") self.svm = CClassifierSVM() self.svm.fit(self.ds.X, self.ds.Y) self.labels, self.scores = self.svm.predict( self.ds.X, return_decision_function=True) def test_in_out(self): """Unittest for input and output to loss classes""" def _check_loss(l, n_samples): self.assertIsInstance(l, CArray) self.assertTrue(l.isdense) self.assertEqual(1, l.ndim) self.assertEqual(n_samples, l.size) self.assertIsSubDtype(l.dtype, float) for loss_id in ('e-insensitive', 'e-insensitive-squared', 'quadratic'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) loss_pos = loss_class.loss(self.ds.Y, self.scores[:, 1].ravel()) loss_mean_pos = loss_pos.mean() self.logger.info( "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format( loss_class.__class__.__name__, loss_mean_pos)) _check_loss(loss_pos, self.ds.Y.size) loss = loss_class.loss(self.ds.Y[0], self.scores[0, 1].ravel()) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, 1) with self.assertRaises(ValueError): loss_class.loss(self.ds.Y, self.scores[:, 1]) def test_draw(self): """Drawing the loss functions. Inspired by: https://en.wikipedia.org/wiki/Loss_functions_for_classification """ fig = CFigure() x = CArray.arange(-1, 3.01, 0.01) for loss_id in ('e-insensitive', 'e-insensitive-squared', 'quadratic'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) fig.sp.plot(x, loss_class.loss(CArray([1]), x), label=loss_id) fig.sp.grid() fig.sp.legend() fig.show()
def setUp(self): classifier = CClassifierSVM( kernel='linear', C=1.0, grad_sampling=1.0) # data parameters discrete = False lb = -2 ub = +2 n_tr = 20 n_ts = 10 n_features = 2 n_reps = 1 self.sec_eval = [] self.attack_ds = [] for rep_i in range(n_reps): self.logger.info( "Loading `random_blobs` with seed: {:}".format(rep_i)) loader = CDLRandomBlobs( n_samples=n_tr + n_ts, n_features=n_features, centers=[(-0.5, -0.5), (+0.5, +0.5)], center_box=(-0.5, 0.5), cluster_std=0.5, random_state=rep_i * 100 + 10) ds = loader.load() tr = ds[:n_tr, :] ts = ds[n_tr:, :] classifier.fit(tr) self.attack_ds.append(ts) # only manipulate positive samples, targeting negative ones self.y_target = None attack_classes = CArray([1]) params = { "classifier": classifier, "surrogate_classifier": classifier, "surrogate_data": tr, "distance": 'l1', "lb": lb, "ub": ub, "discrete": discrete, "y_target": self.y_target, "attack_classes": attack_classes, "solver_params": {'eta': 0.5, 'eps': 1e-2} } attack = CAttackEvasionPGDLS(**params) attack.verbose = 1 # sec eval params param_name = 'dmax' dmax = 2 dmax_step = 0.5 param_values = CArray.arange( start=0, step=dmax_step, stop=dmax + dmax_step) # set sec eval object self.sec_eval.append( CSecEval( attack=attack, param_name=param_name, param_values=param_values, ) )
# Let's create a 3-Fold data splitter from secml.data.splitter import CDataSplitterKFold xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state) # Select and set the best training parameters for the classifier print("Estimating the best training parameters...") best_params = clf.estimate_parameters(dataset=tr, parameters=xval_params, splitter=xval_splitter, metric='accuracy', perf_evaluator='xval') print("The best training parameters are: ", best_params) # We can now fit the classifier clf.fit(tr) # Compute predictions on a test set y_pred = clf.predict(ts.X) # Evaluate the accuracy of the classifier acc = metric.performance_score(y_true=ts.Y, y_pred=y_pred) print("Accuracy on test set: {:.2%}".format(acc)) x0, y0 = ts[5, :].X, ts[5, :].Y # Initial sample; add randomness? print(x0.dtype) print(y0.dtype) noise_type = 'l2' # Type of perturbation 'l1' or 'l2' dmax = 0.4 # Maximum perturbation
# preds = secml_sklearn_clf.predict(ds_te_secml.X) # metric = CMetricAccuracy() # acc = metric.performance_score(y_true = ds_te_secml.Y, y_pred = preds) # print("Accuracy on test set: {:.2%}".format(acc)) # probs = secml_sklearn_clf.predict_proba(ds_te_secml.X) #Doesn't work # # #sklearn here isn't supported for performing adversarial attacks, only the native SVM of secml supports adversarial attacks # ############################################################### # # ============================================================================= x, y = ds_te_secml[:, :].X, ds_te_secml[:, :].Y # This won't work if we want to specify the target #class for each example #secml_clf = CClassifierMulticlassOVA(CClassifierSVM, kernel = CKernelRBF(gamma = 10), C = 1) secml_clf = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1) secml_clf.fit(ds_tr_secml) preds = secml_clf.predict(ds_te_secml.X) metric = CMetricAccuracy() acc = metric.performance_score(y_true=ds_te_secml.Y, y_pred=preds) print("Accuracy on test set: {:.2%}".format(acc)) #Performing the attack noise_type = 'l2' dmax = 0.4 lb, ub = None, None # with 0, 1 it goes out of bounds y_target = None #### Here y_target can be some class, indicating which class is expected for the adversarial example #solver_params = { # 'eta': 0.3, # 'max_iter': 100, # 'eps': 1e-4
#problem seems linearly decidable -> try a logistic regression classifier without any parameter estimations from secml.ml.classifiers import CClassifierLogistic #clf_l= CClassifierLogistic() xval_lin_params = {'C': [0.01, 0.1, 1, 10, 100]} # Select and set the best training parameters for the linear classifier print("Estimating the best training parameters for linear kernel...") best_lin_params = clf_lin.estimate_parameters(dataset=tr_set, parameters=xval_lin_params, splitter=xval_splitter, metric='accuracy', perf_evaluator='xval') clf_lin.fit(tr_set) ## Select and set the best training parameters for the linear classifier #print("Estimating the best training parameters for linear kernel...") #best_lin_params = clf_l.estimate_parameters( # dataset=tr_set, # parameters=xval_lin_params, # splitter=xval_splitter, # metric='accuracy', # perf_evaluator='xval' #) #clf_l.fit(tr_set) # Compute predictions on a test set y_pred = clf_lin.predict(ts_set.X)
from secml.ml.classifiers import CClassifierLogistic clf_l= CClassifierLogistic() xval_lin_params = {'C': [0.01, 0.1, 1, 10, 100]} # Select and set the best training parameters for the linear classifier print("Estimating the best training parameters for linear kernel...") best_lin_params = clf_lin.estimate_parameters( dataset=tr_set, parameters=xval_lin_params, splitter=xval_splitter, metric='accuracy', perf_evaluator='xval' ) clf_lin.fit(tr_set) # Select and set the best training parameters for the linear classifier #print("Estimating the best training parameters for linear kernel...") #best_lin_params = clf_l.estimate_parameters( # dataset=tr_set, # parameters=xval_lin_params, # splitter=xval_splitter, # metric='accuracy', # perf_evaluator='xval' #) #clf_l.fit(tr_set) import random from secml.adv.attacks.evasion import CAttackEvasionPGDLS
class TestCPerfEvaluator(CUnitTest): """Unit test for CKernel.""" def setUp(self): # Create dummy dataset (we want a test different from train) loader = CDLRandom(random_state=50000) self.training_dataset = loader.load() self.test_dataset = loader.load() # CREATE CLASSIFIERS kernel = CKernel.create('rbf') self.svm = CClassifierSVM(kernel=kernel) self.svm.verbose = 1 self.logger.info("Using kernel {:}".format(self.svm.kernel.class_type)) def test_parameters_setting(self): # Changing default parameters to be sure are not used self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1, 'n_jobs': 2}) xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]} # DO XVAL FOR CHOOSE BEST PARAMETERS xval_splitter = CDataSplitter.create('kfold', num_folds=5, random_state=50000) # Set the best parameters inside the classifier self.svm.estimate_parameters(self.training_dataset, xval_parameters, xval_splitter, 'accuracy') self.logger.info("SVM has now the following parameters: {:}".format( self.svm.get_params())) self.assertEqual(self.svm.get_params()['C'], 1) self.assertEqual(self.svm.get_params()['kernel.gamma'], 50) # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, CMetric.create('accuracy')) perf_eval.verbose = 1 best_params, best_score = perf_eval.evaluate_params( self.svm, self.training_dataset, xval_parameters) for param in xval_parameters: self.logger.info("Best '{:}' is: {:}".format( param, best_params[param])) self.assertEqual(best_params[param], self.svm.get_params()[param]) self.svm.verbose = 0 parameters_combination = [[1, 1], [1, 50], [10, 1], [10, 50], [100, 1], [100, 50]] par_comb_score = CArray.zeros(len(parameters_combination)) for comb in range(len(parameters_combination)): this_fold_score = [] num_xval_fold = len(xval_splitter.tr_idx) for f in range(num_xval_fold): self.svm.set("C", parameters_combination[comb][0]) self.svm.kernel.gamma = parameters_combination[comb][1] self.svm.fit( self.training_dataset[xval_splitter.tr_idx[f], :].X, self.training_dataset[xval_splitter.tr_idx[f], :].Y) this_fold_predicted = self.svm.predict( self.training_dataset[xval_splitter.ts_idx[f], :].X) this_fold_accuracy = skm.accuracy_score( self.training_dataset[ xval_splitter.ts_idx[f], :].Y.get_data(), this_fold_predicted.get_data()) this_fold_score.append(this_fold_accuracy) par_comb_score[comb] = (np.mean(this_fold_score)) self.logger.info("this fold mean: {:}".format( par_comb_score[comb])) max_combination_score = par_comb_score.max() better_param_comb = parameters_combination[par_comb_score.argmax()] self.logger.info("max combination score founded here: {:}".format( max_combination_score)) self.logger.info( "max comb score founded during xval {:}".format(best_score)) self.assertEqual(max_combination_score, best_score) # set parameters found by xval and check if are the same chosen here self.logger.info("the parameters selected by own xval are:") self.svm.set_params(best_params) self.logger.info("C: {:}".format(self.svm.C)) self.logger.info("kernel.gamma: {:}".format(self.svm.kernel.gamma)) # check c self.assertEqual(better_param_comb[0], self.svm.C) # check gamma self.assertEqual(better_param_comb[1], self.svm.kernel.gamma) def test_nan_metric_value(self): # Changing default parameters to be sure are not used self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1}) xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]} # DO XVAL FOR CHOOSE BEST PARAMETERS xval_splitter = CDataSplitter.create('kfold', num_folds=5, random_state=50000) self.logger.info("Testing metric with some nan") some_nan_metric = CMetricFirstNan() # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, some_nan_metric) perf_eval.verbose = 1 best_params, best_score = perf_eval.evaluate_params( self.svm, self.training_dataset, xval_parameters, pick='last') self.logger.info("best score : {:}".format(best_score)) # The xval should select the only one actual value (others are nan) self.assertEqual(best_score, 1.) self.logger.info("Testing metric with all nan") # This test case involves an all-nan slice self.logger.filterwarnings(action="ignore", message="All-NaN slice encountered", category=RuntimeWarning) all_nan_metric = CMetricAllNan() # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, all_nan_metric) perf_eval.verbose = 1 with self.assertRaises(ValueError): perf_eval.evaluate_params(self.svm, self.training_dataset, xval_parameters, pick='last') def _run_multiclass(self, tr, multiclass, xval_params, expected_best): xval_splitter = CDataSplitter.create('kfold', num_folds=3, random_state=50000) # Set the best parameters inside the classifier best_params = multiclass.estimate_parameters(tr, xval_params, xval_splitter, 'accuracy') self.logger.info( "Multiclass SVM has now the following parameters: {:}".format( multiclass.get_params())) for clf_idx, clf in enumerate(multiclass._binary_classifiers): self.assertEqual(clf.C, expected_best['C']) self.assertEqual(clf.kernel.gamma, expected_best['kernel.gamma']) # Final test: fit using best parameters multiclass.fit(tr.X, tr.Y) for clf in multiclass._binary_classifiers: for param in best_params: self.assertEqual(clf.get_params()[param], best_params[param]) def test_params_multiclass(self): """Parameter estimation for multiclass classifiers.""" # Create dummy dataset (we want a test different from train) tr = CDLRandom(n_classes=4, n_clusters_per_class=1, random_state=50000).load() kernel = CKernel.create('rbf') multiclass = CClassifierMulticlassOVA(CClassifierSVM, C=1, kernel=kernel) multiclass.verbose = 1 xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} expected = {'C': 10.0, 'kernel.gamma': 0.1} self._run_multiclass(tr, multiclass, xval_parameters, expected) self.logger.info("Testing with preprocessor") kernel = CKernel.create('rbf') multiclass = CClassifierMulticlassOVA(CClassifierSVM, C=1, kernel=kernel, preprocess='min-max') multiclass.verbose = 1 xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} expected = {'C': 10.0, 'kernel.gamma': 0.1} self._run_multiclass(tr, multiclass, xval_parameters, expected)
perf_evaluator='xval') # Select and set the best training parameters for the RBF classifier #print("Estimating the best training parameters for RBF kernel...") #best_rbf_params = clf_rbf.estimate_parameters( # dataset=tr, # parameters=xval_rbf_params, # splitter=xval_splitter, # metric='accuracy', # perf_evaluator='xval' #) print(best_lin_params) #train classifier print("start training") clf_lin.fit(data_smp_encoded_secML) #print("linear training ended, begining rbf") #clf_rbf.fit(tr) #print("start linear classif") #clf_l.fit(data_smp_encoded_secML) print("Classifiers trained") # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy metric = CMetricAccuracy() # Compute predictions on a test set y_lin_pred = clf_lin.predict(raw_data_encoded_secML.X) #y_rbf_pred = clf_rbf.predict(ts.X) #y_l_pred = clf_l.predict(raw_data_encoded_secML.X)
class TestCSecEval(CAttackEvasionTestCases): """Unittests for CSecEval (evasion attack).""" def setUp(self): self.clf = CClassifierSVM(C=1.0) self.n_tr = 40 self.n_features = 10 self.seed = 0 self.logger.info( "Loading `random_blobs` with seed: {:}".format(self.seed)) self.ds = self._load_blobs( self.n_features, 2, sparse=False, seed=self.seed) self.tr = self.ds[:self.n_tr, :] self.ts = self.ds[self.n_tr:, :] self.clf.fit(self.tr.X, self.tr.Y) def test_attack_pgd_ls(self): """Test SecEval using CAttackEvasionPGDLS.""" params = { "classifier": self.clf, "double_init_ds": self.tr, "distance": 'l2', "lb": -2, "ub": 2, "y_target": None, "solver_params": {'eta': 0.1, 'eps': 1e-2} } attack = CAttackEvasionPGDLS(**params) attack.verbose = 1 param_name = 'dmax' self._set_and_run(attack, param_name) def test_attack_pgd_ls_discrete(self): """Test SecEval using CAttackEvasionPGDLS on a problematic discrete case with L1 constraint. We alter the classifier so that many weights have the same value. The optimizer should be able to evade the classifier anyway, by changing one feature each iteration. Otherwise, by changing all the feature with the same value at once, the evasion will always fail because the L1 constraint will be violated. """ self.ds = self._discretize_data(self.ds, eta=1) self.ds.X[self.ds.X > 1] = 1 self.ds.X[self.ds.X < -1] = -1 self.tr = self.ds[:self.n_tr, :] self.ts = self.ds[self.n_tr:, :] self.clf.fit(self.tr.X, self.tr.Y) # Set few features to the same max value w_new = self.clf.w.deepcopy() w_new[CArray.randint( self.clf.w.size, shape=5, random_state=0)] = self.clf.w.max() self.clf._w = w_new params = { "classifier": self.clf, "double_init": False, "distance": 'l1', "lb": -1, "ub": 1, "y_target": None, "solver_params": {'eta': 1, 'eps': 1e-2} } attack = CAttackEvasionPGDLS(**params) attack.verbose = 1 param_name = 'dmax' self._set_and_run(attack, param_name, dmax_step=1) def test_attack_cleverhans(self): """Test SecEval using CAttackEvasionCleverhans+FastGradientMethod.""" try: import cleverhans except ImportError as e: import unittest raise unittest.SkipTest(e) from cleverhans.attacks import FastGradientMethod from secml.adv.attacks import CAttackEvasionCleverhans params = { "classifier": self.clf, "surrogate_data": self.tr, "y_target": None, "clvh_attack_class": FastGradientMethod, 'eps': 0.1, 'clip_max': 2, 'clip_min': -2, 'ord': 2 } attack = CAttackEvasionCleverhans(**params) param_name = 'attack_params.eps' self._set_and_run(attack, param_name) def _set_and_run(self, attack, param_name, dmax=2, dmax_step=0.5): """Create the SecEval and run it on test set.""" param_values = CArray.arange( start=0, step=dmax_step, stop=dmax + dmax_step) sec_eval = CSecEval( attack=attack, param_name=param_name, param_values=param_values, ) sec_eval.run_sec_eval(self.ts) self._plot_sec_eval(sec_eval) # At the end of the seceval we expect 0% accuracy self.assertFalse( CArray(sec_eval.sec_eval_data.Y_pred[-1] == self.ts.Y).any()) @staticmethod def _plot_sec_eval(sec_eval): figure = CFigure(height=5, width=5) figure.sp.plot_sec_eval(sec_eval.sec_eval_data, label='SVM', marker='o', show_average=True, mean=True) figure.sp.title(sec_eval.attack.__class__.__name__) figure.subplots_adjust() figure.show() if __name__ == '__main__': CAttackEvasionTestCases.main()
training, validation = splitter.split(dataset) # Normalize the data normalizer = CNormalizerMinMax() training.X = normalizer.fit_transform(training.X) validation.X = normalizer.transform(validation.X) test.X = normalizer.transform(test.X) # Metric to use for training and performance evaluation metric = CMetricAccuracy() # Creation of the multiclass classifier classifier = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1) # We can now fit the classifier classifier.fit(training.X, training.Y) print("Training of classifier complete!") # Compute predictions on a test set predictionY = classifier.predict(test.X) # Bounds of the attack space. Can be set to `None` for unbounded lowerBound, upperBound = validation.X.min(), validation.X.max() # Should be chosen depending on the optimization problem solver_params = { 'eta': 0.05, 'eta_min': 0.05, 'eta_max': None, 'max_iter': 100, 'eps': 1e-6