class TestCRoc(CUnitTest): """Unit test for CRoc.""" def setUp(self): self.dl1 = CDLRandom(n_features=1000, n_redundant=200, n_informative=250, n_clusters_per_class=2, random_state=0) self.dl2 = CDLRandom(n_features=1000, n_redundant=200, n_informative=250, n_clusters_per_class=2, random_state=1000) self.ds1 = self.dl1.load() self.ds2 = self.dl2.load() self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y) self.y1, self.s1 = self.svm.predict(self.ds1.X, return_decision_function=True) self.y2, self.s2 = self.svm.predict(self.ds2.X, return_decision_function=True) self.roc = CRoc() def test_roc_1sample(self): self.roc.compute(CArray([1]), CArray([0])) self.roc.average() # Testing 3 and not 1 as roc is bounded (we add a first and last point) self.assertEqual(self.roc.fpr.size, 3) self.assertEqual(self.roc.tpr.size, 3) def test_compute(self): self.roc.compute(self.ds1.Y, self.s1[:, 1].ravel()) fig = CFigure() fig.sp.semilogx(self.roc.fpr, self.roc.tpr) fig.sp.grid() fig.show() def test_mean(self): self.roc.compute([self.ds1.Y, self.ds2.Y], [self.s1[:, 1].ravel(), self.s2[:, 1].ravel()]) mean_fp, mean_tp, mean_std = self.roc.average(return_std=True) fig = CFigure(linewidth=2) fig.sp.errorbar(self.roc.mean_fpr, self.roc.mean_tpr, yerr=mean_std) for rep in range(self.roc.n_reps): fig.sp.semilogx(self.roc.fpr[rep], self.roc.tpr[rep]) fig.sp.semilogx(mean_fp, mean_tp) fig.sp.grid() fig.show()
def test_performance(self): """ Compare the classifiers performance""" self.logger.info("Testing error performance of the " "classifiers on the training set") for ridge in self.ridges: self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess)) if ridge.preprocess is not None: svm_kernel = ridge.preprocess.deepcopy() else: svm_kernel = None svm = CClassifierSVM(kernel=svm_kernel) svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict( self.dataset.X, return_decision_function=True) label_ridge, y_ridge = ridge.predict( self.dataset.X, return_decision_function=True) acc_svm = CMetric.create('f1').performance_score( self.dataset.Y, label_svm) acc_ridge = CMetric.create('f1').performance_score( self.dataset.Y, label_ridge) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of ridge: {:}".format(acc_ridge)) self.assertGreater(acc_ridge, 0.90, "Accuracy of ridge: {:}".format(acc_ridge))
def test_performance(self): """ Compare the classifiers performance""" self.logger.info("Testing error performance of the " "classifiers on the training set") for sgd in self.sgds: self.logger.info("SGD kernel: {:}".format(sgd.preprocess)) if sgd.preprocess is not None: k = sgd.preprocess.deepcopy() else: k = None svm = CClassifierSVM(kernel=k) svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict(self.dataset.X, return_decision_function=True) label_sgd, y_sgd = sgd.predict(self.dataset.X, return_decision_function=True) acc_svm = CMetric.create('f1').performance_score( self.dataset.Y, label_svm) acc_sgd = CMetric.create('f1').performance_score( self.dataset.Y, label_sgd) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of SGD: {:}".format(acc_sgd)) self.assertGreater(acc_sgd, 0.90, "Accuracy of SGD: {:}".format(acc_sgd))
def test_linear_svm(self): """Performs tests on linear SVM.""" self.logger.info("Testing SVM linear variants (kernel and not)") # Instancing a linear SVM and an SVM with linear kernel linear_svm = CClassifierSVM(kernel=None) kernel_linear_svm = self.svms[0] self.logger.info("SVM w/ linear kernel in the primal") self.assertIsNone(linear_svm.kernel) self.logger.info("Training both classifiers on dense data") linear_svm.fit(self.dataset.X, self.dataset.Y) kernel_linear_svm.fit(self.dataset.X, self.dataset.Y) linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict( self.dataset.X, return_decision_function=True) kernel_linear_svm_pred_y, \ kernel_linear_svm_pred_score = kernel_linear_svm.predict( self.dataset.X, return_decision_function=True) # check prediction self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y) self.logger.info("Training both classifiers on sparse data") linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) kernel_linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) self.assertTrue( linear_svm.w.issparse, "Weights vector is not sparse even " "if training data is sparse") linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict( self.dataset_sparse.X, return_decision_function=True) kernel_linear_svm_pred_y, \ kernel_linear_svm_pred_score = kernel_linear_svm.predict( self.dataset_sparse.X, return_decision_function=True) # check prediction self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)
class TestCPerfEvaluator(CUnitTest): """Unit test for CKernel.""" def setUp(self): # Create dummy dataset (we want a test different from train) loader = CDLRandom(random_state=50000) self.training_dataset = loader.load() self.test_dataset = loader.load() # CREATE CLASSIFIERS kernel = CKernel.create('rbf') self.svm = CClassifierSVM(kernel=kernel) self.svm.verbose = 1 self.logger.info("Using kernel {:}".format(self.svm.kernel.class_type)) def test_parameters_setting(self): # Changing default parameters to be sure are not used self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1, 'n_jobs': 2}) xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]} # DO XVAL FOR CHOOSE BEST PARAMETERS xval_splitter = CDataSplitter.create('kfold', num_folds=5, random_state=50000) # Set the best parameters inside the classifier self.svm.estimate_parameters(self.training_dataset, xval_parameters, xval_splitter, 'accuracy') self.logger.info("SVM has now the following parameters: {:}".format( self.svm.get_params())) self.assertEqual(self.svm.get_params()['C'], 1) self.assertEqual(self.svm.get_params()['kernel.gamma'], 50) # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, CMetric.create('accuracy')) perf_eval.verbose = 1 best_params, best_score = perf_eval.evaluate_params( self.svm, self.training_dataset, xval_parameters) for param in xval_parameters: self.logger.info("Best '{:}' is: {:}".format( param, best_params[param])) self.assertEqual(best_params[param], self.svm.get_params()[param]) self.svm.verbose = 0 parameters_combination = [[1, 1], [1, 50], [10, 1], [10, 50], [100, 1], [100, 50]] par_comb_score = CArray.zeros(len(parameters_combination)) for comb in range(len(parameters_combination)): this_fold_score = [] num_xval_fold = len(xval_splitter.tr_idx) for f in range(num_xval_fold): self.svm.set("C", parameters_combination[comb][0]) self.svm.kernel.gamma = parameters_combination[comb][1] self.svm.fit( self.training_dataset[xval_splitter.tr_idx[f], :].X, self.training_dataset[xval_splitter.tr_idx[f], :].Y) this_fold_predicted = self.svm.predict( self.training_dataset[xval_splitter.ts_idx[f], :].X) this_fold_accuracy = skm.accuracy_score( self.training_dataset[ xval_splitter.ts_idx[f], :].Y.get_data(), this_fold_predicted.get_data()) this_fold_score.append(this_fold_accuracy) par_comb_score[comb] = (np.mean(this_fold_score)) self.logger.info("this fold mean: {:}".format( par_comb_score[comb])) max_combination_score = par_comb_score.max() better_param_comb = parameters_combination[par_comb_score.argmax()] self.logger.info("max combination score founded here: {:}".format( max_combination_score)) self.logger.info( "max comb score founded during xval {:}".format(best_score)) self.assertEqual(max_combination_score, best_score) # set parameters found by xval and check if are the same chosen here self.logger.info("the parameters selected by own xval are:") self.svm.set_params(best_params) self.logger.info("C: {:}".format(self.svm.C)) self.logger.info("kernel.gamma: {:}".format(self.svm.kernel.gamma)) # check c self.assertEqual(better_param_comb[0], self.svm.C) # check gamma self.assertEqual(better_param_comb[1], self.svm.kernel.gamma) def test_nan_metric_value(self): # Changing default parameters to be sure are not used self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1}) xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]} # DO XVAL FOR CHOOSE BEST PARAMETERS xval_splitter = CDataSplitter.create('kfold', num_folds=5, random_state=50000) self.logger.info("Testing metric with some nan") some_nan_metric = CMetricFirstNan() # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, some_nan_metric) perf_eval.verbose = 1 best_params, best_score = perf_eval.evaluate_params( self.svm, self.training_dataset, xval_parameters, pick='last') self.logger.info("best score : {:}".format(best_score)) # The xval should select the only one actual value (others are nan) self.assertEqual(best_score, 1.) self.logger.info("Testing metric with all nan") # This test case involves an all-nan slice self.logger.filterwarnings(action="ignore", message="All-NaN slice encountered", category=RuntimeWarning) all_nan_metric = CMetricAllNan() # Now we compare the parameters chosen before with a new evaluator perf_eval = CPerfEvaluatorXVal(xval_splitter, all_nan_metric) perf_eval.verbose = 1 with self.assertRaises(ValueError): perf_eval.evaluate_params(self.svm, self.training_dataset, xval_parameters, pick='last') def _run_multiclass(self, tr, multiclass, xval_params, expected_best): xval_splitter = CDataSplitter.create('kfold', num_folds=3, random_state=50000) # Set the best parameters inside the classifier best_params = multiclass.estimate_parameters(tr, xval_params, xval_splitter, 'accuracy') self.logger.info( "Multiclass SVM has now the following parameters: {:}".format( multiclass.get_params())) for clf_idx, clf in enumerate(multiclass._binary_classifiers): self.assertEqual(clf.C, expected_best['C']) self.assertEqual(clf.kernel.gamma, expected_best['kernel.gamma']) # Final test: fit using best parameters multiclass.fit(tr.X, tr.Y) for clf in multiclass._binary_classifiers: for param in best_params: self.assertEqual(clf.get_params()[param], best_params[param]) def test_params_multiclass(self): """Parameter estimation for multiclass classifiers.""" # Create dummy dataset (we want a test different from train) tr = CDLRandom(n_classes=4, n_clusters_per_class=1, random_state=50000).load() kernel = CKernel.create('rbf') multiclass = CClassifierMulticlassOVA(CClassifierSVM, C=1, kernel=kernel) multiclass.verbose = 1 xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} expected = {'C': 10.0, 'kernel.gamma': 0.1} self._run_multiclass(tr, multiclass, xval_parameters, expected) self.logger.info("Testing with preprocessor") kernel = CKernel.create('rbf') multiclass = CClassifierMulticlassOVA(CClassifierSVM, C=1, kernel=kernel, preprocess='min-max') multiclass.verbose = 1 xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]} expected = {'C': 10.0, 'kernel.gamma': 0.1} self._run_multiclass(tr, multiclass, xval_parameters, expected)
# metric = CMetricAccuracy() # acc = metric.performance_score(y_true = ds_te_secml.Y, y_pred = preds) # print("Accuracy on test set: {:.2%}".format(acc)) # probs = secml_sklearn_clf.predict_proba(ds_te_secml.X) #Doesn't work # # #sklearn here isn't supported for performing adversarial attacks, only the native SVM of secml supports adversarial attacks # ############################################################### # # ============================================================================= x, y = ds_te_secml[:, :].X, ds_te_secml[:, :].Y # This won't work if we want to specify the target #class for each example #secml_clf = CClassifierMulticlassOVA(CClassifierSVM, kernel = CKernelRBF(gamma = 10), C = 1) secml_clf = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1) secml_clf.fit(ds_tr_secml) preds = secml_clf.predict(ds_te_secml.X) metric = CMetricAccuracy() acc = metric.performance_score(y_true=ds_te_secml.Y, y_pred=preds) print("Accuracy on test set: {:.2%}".format(acc)) #Performing the attack noise_type = 'l2' dmax = 0.4 lb, ub = None, None # with 0, 1 it goes out of bounds y_target = None #### Here y_target can be some class, indicating which class is expected for the adversarial example #solver_params = { # 'eta': 0.3, # 'max_iter': 100, # 'eps': 1e-4 #}
class TestCLossRegression(CUnitTest): """Unittests for CLossRegression and subclasses.""" def setUp(self): self.ds = CDLRandom(n_samples=50, random_state=0).load() self.logger.info("Train an SVM and classify dataset...") self.svm = CClassifierSVM() self.svm.fit(self.ds.X, self.ds.Y) self.labels, self.scores = self.svm.predict( self.ds.X, return_decision_function=True) def test_in_out(self): """Unittest for input and output to loss classes""" def _check_loss(l, n_samples): self.assertIsInstance(l, CArray) self.assertTrue(l.isdense) self.assertEqual(1, l.ndim) self.assertEqual(n_samples, l.size) self.assertIsSubDtype(l.dtype, float) for loss_id in ('e-insensitive', 'e-insensitive-squared', 'quadratic'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) loss_pos = loss_class.loss(self.ds.Y, self.scores[:, 1].ravel()) loss_mean_pos = loss_pos.mean() self.logger.info( "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format( loss_class.__class__.__name__, loss_mean_pos)) _check_loss(loss_pos, self.ds.Y.size) loss = loss_class.loss(self.ds.Y[0], self.scores[0, 1].ravel()) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, 1) with self.assertRaises(ValueError): loss_class.loss(self.ds.Y, self.scores[:, 1]) def test_draw(self): """Drawing the loss functions. Inspired by: https://en.wikipedia.org/wiki/Loss_functions_for_classification """ fig = CFigure() x = CArray.arange(-1, 3.01, 0.01) for loss_id in ('e-insensitive', 'e-insensitive-squared', 'quadratic'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) fig.sp.plot(x, loss_class.loss(CArray([1]), x), label=loss_id) fig.sp.grid() fig.sp.legend() fig.show()
training.X = normalizer.fit_transform(training.X) validation.X = normalizer.transform(validation.X) test.X = normalizer.transform(test.X) # Metric to use for training and performance evaluation metric = CMetricAccuracy() # Creation of the multiclass classifier classifier = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1) # We can now fit the classifier classifier.fit(training.X, training.Y) print("Training of classifier complete!") # Compute predictions on a test set predictionY = classifier.predict(test.X) # Bounds of the attack space. Can be set to `None` for unbounded lowerBound, upperBound = validation.X.min(), validation.X.max() # Should be chosen depending on the optimization problem solver_params = { 'eta': 0.05, 'eta_min': 0.05, 'eta_max': None, 'max_iter': 100, 'eps': 1e-6 } poisonAttack = CAttackPoisoningSVM(classifier=classifier, training_data=training,
#train classifier print("start training") clf_lin.fit(data_smp_encoded_secML) #print("linear training ended, begining rbf") #clf_rbf.fit(tr) #print("start linear classif") #clf_l.fit(data_smp_encoded_secML) print("Classifiers trained") # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy metric = CMetricAccuracy() # Compute predictions on a test set y_lin_pred = clf_lin.predict(raw_data_encoded_secML.X) #y_rbf_pred = clf_rbf.predict(ts.X) #y_l_pred = clf_l.predict(raw_data_encoded_secML.X) # Evaluate the accuracy of the classifier acc_lin = metric.performance_score(y_true=raw_data_encoded_secML.Y, y_pred=y_lin_pred) #acc_rbf = metric.performance_score(y_true=ts.Y, y_pred=y_rbf_pred) #acc_rbf = 0.0 #acc_l = metric.performance_score(y_true=raw_data_encoded_secML.Y, y_pred=y_l_pred) print("Performance evaluations ended:") print(acc_lin) #print(acc_rbf) #print(acc_l)
clf_lin.fit(tr_set) ## Select and set the best training parameters for the linear classifier #print("Estimating the best training parameters for linear kernel...") #best_lin_params = clf_l.estimate_parameters( # dataset=tr_set, # parameters=xval_lin_params, # splitter=xval_splitter, # metric='accuracy', # perf_evaluator='xval' #) #clf_l.fit(tr_set) # Compute predictions on a test set y_pred = clf_lin.predict(ts_set.X) # Metric to use for training and performance evaluation from secml.ml.peval.metrics import CMetricAccuracy metric = CMetricAccuracy() # Evaluate the accuracy of the classifier acc = metric.performance_score(y_true=ts_set.Y, y_pred=y_pred) print("Accuracy on test set: {:.2%}".format(acc)) import random from secml.adv.attacks.evasion import CAttackEvasionPGD #perform adversarial attacks noise_type = 'l2' # Type of perturbation 'l1' or 'l2' #dmax = 20 # Maximum perturbation
class TestCLossClassification(CUnitTest): """Unittests for CLossClassification and subclasses.""" def setUp(self): self.ds = CDLRandom(n_samples=50, random_state=0).load() self.logger.info("Train an SVM and classify dataset...") self.svm = CClassifierSVM() self.svm.fit(self.ds) self.labels, self.scores = self.svm.predict( self.ds.X, return_decision_function=True) def test_one_at_zero(self): """Testing that classification loss return 1 for input 0.""" for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) self.assertEqual(CArray([1.0]), loss_class.loss(CArray([1]), CArray([0]))) def test_in_out(self): """Unittest for input and output to loss classes""" def _check_loss(l, n_samples): self.assertIsInstance(l, CArray) self.assertTrue(l.isdense) self.assertEqual(1, l.ndim) self.assertEqual(n_samples, l.size) self.assertEqual(l.dtype, float) for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) loss = loss_class.loss(self.ds.Y, self.scores) loss_mean = loss.mean() self.logger.info("{:}.loss(y_true, scores).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, self.ds.Y.size) loss_pos = loss_class.loss(self.ds.Y, self.scores[:, 1].ravel()) loss_mean_pos = loss_pos.mean() self.logger.info( "{:}.loss(y_true, scores[:, 1].ravel()).mean():\n".format( loss_class.__class__.__name__, loss_mean_pos)) _check_loss(loss_pos, self.ds.Y.size) self.assertEqual(loss_mean, loss_mean_pos) loss = loss_class.loss(self.ds.Y, self.scores, pos_label=0) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true, scores, pos_label=0).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, self.ds.Y.size) loss_neg = loss_class.loss(self.ds.Y, self.scores[:, 0].ravel()) loss_mean_neg = loss_neg.mean() self.logger.info( "{:}.loss(y_true, scores[:,0].ravel()).mean():\n".format( loss_class.__class__.__name__, loss_mean_neg)) _check_loss(loss_neg, self.ds.Y.size) self.assertEqual(loss_mean, loss_mean_neg) loss = loss_class.loss(self.ds.Y[0], self.scores[0, :]) loss_mean = loss.mean() self.logger.info( "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format( loss_class.__class__.__name__, loss_mean)) _check_loss(loss, 1) def test_draw(self): """Drawing the loss functions. Inspired by: https://en.wikipedia.org/wiki/Loss_functions_for_classification """ fig = CFigure() x = CArray.arange(-1, 3.01, 0.01) fig.sp.plot(x, CArray([1 if i <= 0 else 0 for i in x]), label='0-1 indicator') for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) fig.sp.plot(x, loss_class.loss(CArray([1]), x), label=loss_id) fig.sp.grid() fig.sp.legend() fig.show() def test_grad(self): """Compare analytical gradients with its numerical approximation.""" def _loss_wrapper(scores, loss, true_labels): return loss.loss(true_labels, scores) def _dloss_wrapper(scores, loss, true_labels): return loss.dloss(true_labels, scores) for loss_id in ('hinge', 'hinge-squared', 'square', 'log'): self.logger.info("Creating loss: {:}".format(loss_id)) loss_class = CLoss.create(loss_id) n_elemes = 1 y_true = CArray.randint(0, 2, n_elemes).todense() score = CArray.randn((n_elemes, )) check_grad_val = CFunction( _loss_wrapper, _dloss_wrapper).check_grad(score, 1e-8, loss=loss_class, true_labels=y_true) self.logger.info( "Gradient difference between analytical svm " "gradient and numerical gradient: %s", str(check_grad_val)) self.assertLess( check_grad_val, 1e-4, "the gradient is wrong {:} for {:} loss".format( check_grad_val, loss_id))
class TestCRoc(CUnitTest): """Unit test for CPlotMetric (ROC plots).""" def setUp(self): self.ds_loader = CDLRandom(n_features=1000, n_redundant=200, n_informative=250, n_clusters_per_class=2) self.ds1 = self.ds_loader.load() self.ds2 = self.ds_loader.load() self.y1 = self.ds1.Y self.y2 = self.ds2.Y self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y) _, self.s1 = self.svm.predict(self.ds1.X, return_decision_function=True) _, self.s2 = self.svm.predict(self.ds2.X, return_decision_function=True) self.s1 = self.s1[:, 1].ravel() self.s2 = self.s2[:, 1].ravel() # Roc with not computed average (2 repetitions) self.roc_nomean = CRoc() self.roc_nomean.compute([self.y1, self.y2], [self.s1, self.s2]) # Roc with average (2 repetitions) self.roc_wmean = CRoc() self.roc_wmean.compute([self.y1, self.y2], [self.s1, self.s2]) self.roc_wmean.average() def test_standard(self): """Plot of standard ROC.""" # Testing without input CFigure roc_plot = CFigure() roc_plot.sp.title('ROC Curve Standard') # Plotting 2 times (to show multiple curves) # add one curve for repetition and call it rep 0 and rep 1 of roc 1 roc_plot.sp.plot_roc(self.roc_wmean.mean_fpr, self.roc_wmean.mean_tpr) roc_plot.show() def test_mean(self): """Plot of average ROC.""" # Testing without input CFigure roc_plot = CFigure() roc_plot.sp.title('ROC Curve') # Plotting 2 times (to show 2 curves) roc_plot.sp.plot_roc_mean(self.roc_wmean, label='roc1 mean', plot_std=True) roc_plot.sp.plot_roc_reps(self.roc_wmean, label='roc1') roc_plot.show() # Testing mean plot with no average with self.assertRaises(ValueError): roc_plot.sp.plot_roc_mean(self.roc_nomean) def test_custom_params(self): """Plot of ROC altering default parameters.""" # Testing without input CFigure roc_plot = CFigure() roc_plot.sp.title('ROC Curve - Custom') roc_plot.sp.xlim(0.1, 100) roc_plot.sp.ylim(30, 100) roc_plot.sp.yticks([70, 80, 90, 100]) roc_plot.sp.yticklabels(['70', '80', '90', '100']) # Plotting 2 times (to show 2 curves) roc_plot.sp.plot_roc_mean(self.roc_wmean, label='roc1') roc_plot.sp.plot_roc_mean(self.roc_wmean, label='roc2') roc_plot.show() def test_single(self): """Plot of ROC repetitions.""" # Testing without input CFigure roc_plot = CFigure() roc_plot.sp.title('ROC Curve Repetitions') # Plotting 2 times (to show multiple curves) # add one curve for repetition and call it rep 0 and rep 1 of roc 1 roc_plot.sp.plot_roc_reps(self.roc_nomean, label='roc1') # add one curve for repetition and call it rep 0 and rep 1 of roc 2 roc_plot.sp.plot_roc_reps(self.roc_nomean, label='roc2') roc_plot.show() def test_compare_sklearn(self): import numpy as np from sklearn import svm, datasets from sklearn.metrics import roc_curve, auc from sklearn.model_selection import StratifiedKFold from secml.figure import CFigure roc_fig = CFigure(width=12) # import some data to play with iris = datasets.load_iris() X = iris.data y = iris.target X, y = X[y != 2], y[y != 2] n_samples, n_features = X.shape # Add noisy features random_state = np.random.RandomState(0) X = np.c_[X, random_state.randn(n_samples, 200 * n_features)] # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves classifier = svm.SVC(kernel='linear', probability=True, random_state=random_state) roc_fig.subplot(1, 2, 1) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 1000) cv = StratifiedKFold(n_splits=6) for i, (train, test) in enumerate(cv.split(X, y)): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += np.interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) roc_fig.sp.plot(fpr, tpr, linewidth=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) roc_fig.sp.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= cv.get_n_splits() mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) roc_fig.sp.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, linewidth=2) roc_fig.sp.xlim([-0.05, 1.05]) roc_fig.sp.ylim([-0.05, 1.05]) roc_fig.sp.xlabel('False Positive Rate') roc_fig.sp.ylabel('True Positive Rate') roc_fig.sp.title('Sklearn Receiver operating characteristic example') roc_fig.sp.legend(loc="lower right") roc_fig.sp.grid() self.logger.info("Plotting using our CPLotRoc") roc_fig.subplot(1, 2, 2) score = [] true_y = [] for i, (train, test) in enumerate(cv.split(X, y)): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) true_y.append(CArray(y[test])) score.append(CArray(probas_[:, 1])) self.roc_wmean = CRoc() self.roc_wmean.compute(true_y, score) fp, tp = self.roc_wmean.average() roc_fig.sp.plot([0, 100], [0, 100], '--', color=(0.6, 0.6, 0.6), label='Luck') roc_fig.sp.xticks([0, 20, 40, 60, 80, 100]) roc_fig.sp.xticklabels(['0', '20', '40', '60', '80', '100']) roc_fig.sp.plot_roc_mean(self.roc_wmean, plot_std=True, logx=False, style='go-', label='Mean ROC (area = %0.2f)' % (auc(fp.tondarray(), tp.tondarray()))) roc_fig.sp.xlim([-0.05 * 100, 1.05 * 100]) roc_fig.sp.ylim([-0.05 * 100, 1.05 * 100]) roc_fig.sp.title('SecML Receiver operating characteristic example') roc_fig.sp.legend(loc="lower right") roc_fig.show()
xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state) # Select and set the best training parameters for the classifier print("Estimating the best training parameters...") best_params = clf.estimate_parameters(dataset=tr, parameters=xval_params, splitter=xval_splitter, metric='accuracy', perf_evaluator='xval') print("The best training parameters are: ", best_params) # We can now fit the classifier clf.fit(tr) # Compute predictions on a test set y_pred = clf.predict(ts.X) # Evaluate the accuracy of the classifier acc = metric.performance_score(y_true=ts.Y, y_pred=y_pred) print("Accuracy on test set: {:.2%}".format(acc)) x0, y0 = ts[5, :].X, ts[5, :].Y # Initial sample; add randomness? print(x0.dtype) print(y0.dtype) noise_type = 'l2' # Type of perturbation 'l1' or 'l2' dmax = 0.4 # Maximum perturbation lb, ub = 0, 1 # Bounds of the attack space. Can be set to `None` for unbounded y_target = None # None if `error-generic` or a class label for `error-specific`
training_data = CDataset(x_train, y) validation_data = CDataset(x_val, y_val) test_data = CDataset(xtt, ytt) del xtr del ytr metric = CMetricAccuracy() clf = CClassifierSVM(kernel=CKernelRBF(gamma=GAMMA), C=C) # We can now fit the classifier clf.fit(training_data.X, training_data.Y) print("Training of classifier complete!") # Compute predictions on a test set y_pred = clf.predict(test_data.X) lb, ub = validation_data.X.min(), validation_data.X.max( ) # Bounds of the attack space. Can be set to `None` for unbounded n_poisoning_points = int( n_tr * poison_percentage) # Number of poisoning points to generate # Should be chosen depending on the optimization problem solver_params = { 'eta': 0.05, 'eta_min': 0.05, 'eta_max': None, 'max_iter': 100, 'eps': 1e-6 } # Non-adaptive attacker #################################################################################