def sklearn_comp(array): self.logger.info("Original array is:\n{:}".format(array)) # Sklearn normalizer (requires float dtype input) array_sk = array.astype(float).tondarray() sk_norm = MinMaxScaler().fit(array_sk) target = CArray(sk_norm.transform(array_sk)) # Our normalizer our_norm = CNormalizerMinMax().fit(array) result = our_norm.transform(array) self.logger.info("Correct result is:\n{:}".format(target)) self.logger.info("Our result is:\n{:}".format(result)) self.assert_array_almost_equal(target, result) # Testing out of range normalization self.logger.info("Testing out of range normalization") # Sklearn normalizer (requires float dtype input) target = CArray(sk_norm.transform(array_sk * 2)) # Our normalizer result = our_norm.transform(array * 2) self.logger.info("Correct result is:\n{:}".format(target)) self.logger.info("Our result is:\n{:}".format(result)) self.assert_array_almost_equal(target, result)
def setUp(self): import numpy as np np.random.seed(12345678) # generate synthetic data self.ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, n_clusters_per_class=1, class_sep=1, random_state=0).load() # Add a new class modifying one of the existing clusters self.ds.Y[(self.ds.X[:, 0] > 0).logical_and( self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes # self.kernel = None self.kernel = CKernelRBF(gamma=10) # Data normalization self.normalizer = CNormalizerMinMax() self.ds.X = self.normalizer.fit_transform(self.ds.X) self.multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced', preprocess=None, kernel=self.kernel) self.multiclass.verbose = 0 # Training and classification self.multiclass.fit(self.ds.X, self.ds.Y) self.y_pred, self.score_pred = self.multiclass.predict( self.ds.X, return_decision_function=True)
def plot_loss_after_attack(evasAttack): """ This function plots the evolution of the loss function of the surrogate classifier after an attack is performed. The loss function is normalized between 0 and 1. It helps to know whether parameters given to the attack algorithm are well tuned are not; the loss should be as minimal as possible. The script is inspired from https://secml.gitlab.io/tutorials/11-ImageNet_advanced.html#Visualize-and-check-the-attack-optimization """ n_iter = evasAttack.x_seq.shape[0] itrs = CArray.arange(n_iter) # create a plot that shows the loss during the attack iterations # note that the loss is not available for all attacks fig = CFigure(width=10, height=4, fontsize=14) # apply a linear scaling to have the loss in [0,1] loss = evasAttack.f_seq if loss is not None: loss = CNormalizerMinMax().fit_transform(CArray(loss).T).ravel() fig.subplot(1, 2, 1) fig.sp.xlabel('iteration') fig.sp.ylabel('loss') fig.sp.plot(itrs, loss, c='black') fig.tight_layout() fig.show()
def setUp(self): self.clf = CClassifierMulticlassOVA( classifier=CClassifierSVM, kernel='rbf') self.dataset = CDLRandomBlobs( random_state=3, n_features=2, centers=4).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y)
def _prepare_linear_svm(self, sparse, seed): """Preparare the data required for attacking a LINEAR SVM. - load a blob 2D dataset - create a SVM (C=1) and a minmax preprocessor Parameters ---------- sparse : bool seed : int or None Returns ------- ds : CDataset clf : CClassifierSVM """ ds = self._load_blobs( n_feats=2, # Number of dataset features n_clusters=2, # Number of dataset clusters sparse=sparse, seed=seed) normalizer = CNormalizerMinMax(feature_range=(-1, 1)) clf = CClassifierSVM(C=1.0, preprocess=normalizer) return ds, clf
def test_grad_tr_params_nonlinear(self): """Test `grad_tr_params` on a nonlinear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): clf = CClassifierSVM(kernel='rbf', preprocess=n) clf.fit(self.ds.X, self.ds.Y) self._test_grad_tr_params(clf)
def test_grad_tr_params_linear(self): """Test `grad_tr_params` on a linear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): clf = CClassifierSVM(store_dual_vars=True, preprocess=n) clf.fit(self.ds.X, self.ds.Y) self._test_grad_tr_params(clf)
def setUp(self): """Test for init and fit methods.""" # TODO: remove this filter when `kernel` parameter is removed from Ridge Classifier self.logger.filterwarnings("ignore", message="`kernel` parameter.*", category=DeprecationWarning) # generate synthetic data self.dataset = CDLRandom(n_features=100, n_redundant=20, n_informative=25, n_clusters_per_class=2, random_state=0).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly) self.ridges = [ CClassifierRidge(kernel=kernel() if kernel is not None else None) for kernel in kernel_types ] self.logger.info("Testing RIDGE with kernel unctions: %s", str(kernel_types)) for ridge in self.ridges: ridge.verbose = 2 # Enabling debug output for each classifier ridge.fit(self.dataset)
def test_grad_tr_params_linear(self): """Test `grad_tr_params` on a linear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): clf = CClassifierRidge(preprocess=n) clf.fit(self.ds) self._test_grad_tr_params(clf)
def test_plot(self): """ Compare the classifiers graphically""" ds = CDLRandom(n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, random_state=0).load() ds.X = CNormalizerMinMax().fit_transform(ds.X) fig = self._test_plot(self.ridges[0], ds) fig.savefig(fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_ridge.pdf'))
def test_grad_tr_params_linear(self): """Test `grad_tr_params` on a linear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): clf = CClassifierLogistic(preprocess=n) clf.fit(self.ds.X, self.ds.Y) self.logger.info('w: ' + str(clf.w) + ', b: ' + str(clf.b)) self._test_grad_tr_params(clf)
def setUp(self): self.clf = CClassifierSVM() self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y)
def setUp(self): """Test for init and fit methods.""" self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.nc = CClassifierNearestCentroid()
def _dataset_creation(self): # generate synthetic data self.ds = CDLRandom(n_samples=100, n_classes=3, n_features=2, n_redundant=0, n_clusters_per_class=1, class_sep=1, random_state=0).load() # Add a new class modifying one of the existing clusters self.ds.Y[(self.ds.X[:, 0] > 0).logical_and( self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes self.lb = 0 self.ub = 1 # Data normalization self.normalizer = CNormalizerMinMax( feature_range=(self.lb, self.ub)) self.normalizer = None if self.normalizer is not None: self.ds.X = self.normalizer.fit_transform(self.ds.X)
def test_poisoning_with_normalization_inside(self): """Test the CAttackPoisoning object when the classifier contains a normalizer. """ normalizer = CNormalizerMinMax(feature_range=(-10, 10)) self._test_clf_accuracy(normalizer) # test if the attack is effective and eventually show 2D plots self._test_attack_effectiveness(normalizer)
def setUp(self): """Test for init and fit methods.""" # generate synthetic data self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1, random_state=99).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.logger.info("Testing classifier creation ") self.log = CClassifierLogistic(random_state=99)
def _dataset_creation(self): """Creates a blob dataset. """ self.n_features = 2 # Number of dataset features self.seed = 42 self.n_tr = 50 self.n_ts = 100 self.n_classes = 2 loader = CDLRandomBlobs(n_samples=self.n_tr + self.n_ts, n_features=self.n_features, centers=[(-1, -1), (+1, +1)], center_box=(-2, 2), cluster_std=0.8, random_state=self.seed) self.logger.info("Loading `random_blobs` with seed: {:}".format( self.seed)) dataset = loader.load() splitter = CDataSplitterShuffle(num_folds=1, train_size=self.n_tr, random_state=3) splitter.compute_indices(dataset) self.tr = dataset[splitter.tr_idx[0], :] self.ts = dataset[splitter.ts_idx[0], :] normalizer = CNormalizerMinMax(feature_range=(-1, 1)) self.tr.X = normalizer.fit_transform(self.tr.X) self.ts.X = normalizer.transform(self.ts.X) self.lb = -1 self.ub = 1 self.grid_limits = [(self.lb - 0.1, self.ub + 0.1), (self.lb - 0.1, self.ub + 0.1)]
def test_transform(self): """Test for `.transform()` method.""" self._sklearn_comp( self.array_dense, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp( self.array_sparse, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp( self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp( self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp( self.column_dense, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp( self.column_sparse, MinMaxScaler(), CNormalizerMinMax())
def setUp(self): """Test for init and fit methods.""" # generate synthetic data self.dataset = CDLRandom(n_features=100, n_redundant=20, n_informative=25, n_clusters_per_class=2, random_state=0).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly) self.ridges = [CClassifierRidge( preprocess=kernel() if kernel is not None else None) for kernel in kernel_types] self.logger.info( "Testing RIDGE with kernel functions: %s", str(kernel_types)) for ridge in self.ridges: ridge.verbose = 2 # Enabling debug output for each classifier ridge.fit(self.dataset.X, self.dataset.Y)
def test_normalization(self): """Test data normalization inside CClassifierMulticlassOVO.""" from secml.ml.features.normalization import CNormalizerMinMax ds_norm_x = CNormalizerMinMax().fit_transform(self.dataset.X) multi_nonorm = CClassifierMulticlassOVO(classifier=CClassifierSVM, class_weight='balanced') multi_nonorm.fit(ds_norm_x, self.dataset.Y) pred_y_nonorm = multi_nonorm.predict(ds_norm_x) multi = CClassifierMulticlassOVO(classifier=CClassifierSVM, class_weight='balanced', preprocess='min-max') multi.fit(self.dataset.X, self.dataset.Y) pred_y = multi.predict(self.dataset.X) self.logger.info("Predictions with internal norm:\n{:}".format(pred_y)) self.logger.info( "Predictions with external norm:\n{:}".format(pred_y_nonorm)) self.assertFalse((pred_y_nonorm != pred_y).any())
def test_draw(self): """ Compare the classifiers graphically""" self.logger.info("Testing classifiers graphically") # generate 2D synthetic data dataset = CDLRandom(n_features=2, n_redundant=1, n_informative=1, n_clusters_per_class=1).load() dataset.X = CNormalizerMinMax().fit_transform(dataset.X) self.sgds[0].fit(dataset.X, dataset.Y) svm = CClassifierSVM() svm.fit(dataset.X, dataset.Y) fig = CFigure(width=10, markersize=8) fig.subplot(2, 1, 1) # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function fig.sp.plot_fun(svm.decision_function, grid_limits=dataset.get_bounds(), y=1) fig.sp.title('SVM') fig.subplot(2, 1, 2) # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function fig.sp.plot_fun(self.sgds[0].decision_function, grid_limits=dataset.get_bounds(), y=1) fig.sp.title('SGD Classifier') fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_sgd1.pdf'))
def setUp(self): """Test for init and fit methods.""" # TODO: remove this filter when `kernel` parameter is removed from SGD Classifier self.logger.filterwarnings("ignore", message="`kernel` parameter.*", category=DeprecationWarning) # generate synthetic data self.dataset = CDLRandom(n_features=100, n_redundant=20, n_informative=25, n_clusters_per_class=2, random_state=0).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.logger.info("Testing classifier creation ") self.sgd = CClassifierSGD(regularizer=CRegularizerL2(), loss=CLossHinge(), random_state=0) kernel_types = \ (None, CKernelLinear(), CKernelRBF(), CKernelPoly(degree=3)) self.sgds = [ CClassifierSGD(regularizer=CRegularizerL2(), loss=CLossHinge(), max_iter=500, random_state=0, kernel=kernel if kernel is not None else None) for kernel in kernel_types ] self.logger.info("Testing SGD with kernel functions: %s", str(kernel_types)) for sgd in self.sgds: sgd.verbose = 2 # Enabling debug output for each classifier sgd.fit(self.dataset)
def setUp(self): """Test for init and fit methods.""" # generate synthetic data self.dataset = CDLRandom(n_features=100, n_redundant=20, n_informative=25, n_clusters_per_class=2, random_state=0).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.logger.info("Testing classifier creation ") self.sgd = CClassifierSGD(regularizer=CRegularizerL2(), loss=CLossHinge(), random_state=0) # this is equivalent to C=1 for SGD alpha = 1 / self.dataset.num_samples kernel_types = \ (None, CKernelLinear(), CKernelRBF(), CKernelPoly(degree=3)) self.sgds = [ CClassifierSGD(regularizer=CRegularizerL2(), loss=CLossHinge(), max_iter=1000, random_state=0, alpha=alpha, preprocess=kernel if kernel is not None else None) for kernel in kernel_types ] self.logger.info("Testing SGD with kernel functions: %s", str(kernel_types)) for sgd in self.sgds: sgd.verbose = 0 # Enabling debug output for each classifier sgd.fit(self.dataset.X, self.dataset.Y)
class TestCAttackEvasionPGDLSMNIST(CAttackEvasionTestCases): """Unittests for CAttackEvasionPGDLS on MULTICLASS dataset.""" def setUp(self): import numpy as np np.random.seed(12345678) # generate synthetic data self.ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, n_clusters_per_class=1, class_sep=1, random_state=0).load() # Add a new class modifying one of the existing clusters self.ds.Y[(self.ds.X[:, 0] > 0).logical_and( self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes # self.kernel = None self.kernel = CKernelRBF(gamma=10) # Data normalization self.normalizer = CNormalizerMinMax() self.ds.X = self.normalizer.fit_transform(self.ds.X) self.multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced', preprocess=None, kernel=self.kernel) self.multiclass.verbose = 0 # Training and classification self.multiclass.fit(self.ds.X, self.ds.Y) self.y_pred, self.score_pred = self.multiclass.predict( self.ds.X, return_decision_function=True) def test_indiscriminate(self): """Test indiscriminate evasion.""" self.y_target = None self.logger.info("Test indiscriminate evasion ") expected_x = CArray([0.1783, 0.6249]) self._test_evasion_multiclass(expected_x) def test_targeted(self): """Test targeted evasion.""" self.y_target = 2 self.logger.info("Test target evasion " "(with target class {:}) ".format(self.y_target)) expected_x = CArray([0.9347, 0.3976]) self._test_evasion_multiclass(expected_x) def _test_evasion_multiclass(self, expected_x): # EVASION self.multiclass.verbose = 2 if self.normalizer is not None: lb = self.normalizer.feature_range[0] ub = self.normalizer.feature_range[1] else: lb = None ub = None dmax = 2 self.solver_params = {'eta': 1e-1, 'eta_min': 1.0} eva = CAttackEvasionPGDLS(classifier=self.multiclass, surrogate_classifier=self.multiclass, surrogate_data=self.ds, distance='l2', dmax=dmax, lb=lb, ub=ub, solver_params=self.solver_params, y_target=self.y_target) eva.verbose = 0 # 2 # Points from class 2 region # p_idx = 0 # Points from class 1 region # p_idx = 68 # Points from class 3 region p_idx = 1 # Wrong classified point # p_idx = 53 # Evasion goes up usually # Points from class 0 region # p_idx = 49 # Wrong classified point # p_idx = 27 # Correctly classified point x0 = self.ds.X[p_idx, :] y0 = self.ds.Y[p_idx].item() x_seq = CArray.empty((0, x0.shape[1])) scores = CArray([]) f_seq = CArray([]) x = x0 for d_idx, d in enumerate(range(0, dmax + 1)): self.logger.info("Evasion at dmax: " + str(d)) eva.dmax = d x, f_opt = eva._run(x0=x0, y0=y0, x_init=x) y_pred, score = self.multiclass.predict( x, return_decision_function=True) f_seq = f_seq.append(f_opt) # not considering all iterations, just values at dmax # for all iterations, you should bring eva.x_seq and eva.f_seq x_seq = x_seq.append(x, axis=0) s = score[:, y0 if self.y_target is None else self.y_target] scores = scores.append(s) self.logger.info("Predicted label after evasion: " + str(y_pred)) self.logger.info("Score after evasion: {:}".format(s)) self.logger.info("Objective function after evasion: {:}".format(f_opt)) # Compare optimal point with expected self.assert_array_almost_equal(eva.x_opt.todense().ravel(), expected_x, decimal=4) self._make_plots(x_seq, dmax, eva, x0, scores, f_seq) def _make_plots(self, x_seq, dmax, eva, x0, scores, f_seq): if self.make_figures is False: self.logger.debug("Skipping figures...") return fig = CFigure(height=9, width=10, markersize=6, fontsize=12) # Get plot bounds, taking into account ds and evaded point path bounds_x, bounds_y = self.ds.get_bounds() min_x, max_x = bounds_x min_y, max_y = bounds_y min_x = min(min_x, x_seq[:, 0].min()) max_x = max(max_x, x_seq[:, 0].max()) min_y = min(min_y, x_seq[:, 1].min()) max_y = max(max_y, x_seq[:, 1].max()) ds_bounds = [(min_x, max_x), (min_y, max_y)] # Plotting multiclass decision regions fig.subplot(2, 2, 1) fig = self._plot_decision_function(fig, plot_background=True) fig.sp.plot_path(x_seq, path_style='-', start_style='o', start_facecolor='w', start_edgewidth=2, final_style='o', final_facecolor='k', final_edgewidth=2) # plot distance constraint fig.sp.plot_fun(func=self._rescaled_distance, multipoint=True, plot_background=False, n_grid_points=20, levels_color='k', grid_limits=ds_bounds, levels=[0], colorbar=False, levels_linewidth=2.0, levels_style=':', alpha_levels=.4, c=x0, r=dmax) fig.sp.grid(linestyle='--', alpha=.5, zorder=0) # Plotting multiclass evasion objective function fig.subplot(2, 2, 2) fig = self._plot_decision_function(fig) fig.sp.plot_fgrads(eva._objective_function_gradient, grid_limits=ds_bounds, n_grid_points=20, color='k', alpha=.5) fig.sp.plot_path(x_seq, path_style='-', start_style='o', start_facecolor='w', start_edgewidth=2, final_style='o', final_facecolor='k', final_edgewidth=2) # plot distance constraint fig.sp.plot_fun(func=self._rescaled_distance, multipoint=True, plot_background=False, n_grid_points=20, levels_color='w', grid_limits=ds_bounds, levels=[0], colorbar=False, levels_style=':', levels_linewidth=2.0, alpha_levels=.5, c=x0, r=dmax) fig.sp.plot_fun(lambda z: eva._objective_function(z), multipoint=True, grid_limits=ds_bounds, colorbar=False, n_grid_points=20, plot_levels=False) fig.sp.grid(linestyle='--', alpha=.5, zorder=0) fig.subplot(2, 2, 3) if self.y_target is not None: fig.sp.title("Classifier Score for Target Class (Targ. Evasion)") else: fig.sp.title("Classifier Score for True Class (Indiscr. Evasion)") fig.sp.plot(scores) fig.sp.grid() fig.sp.xlim(0, dmax) fig.sp.xlabel("dmax") fig.subplot(2, 2, 4) fig.sp.title("Objective Function") fig.sp.plot(f_seq) fig.sp.grid() fig.sp.xlim(0, dmax) fig.sp.xlabel("dmax") fig.tight_layout() k_name = self.kernel.class_type if self.kernel is not None else 'lin' fig.savefig( fm.join( self.images_folder, "pgd_ls_multiclass_{:}c_kernel-{:}_target-{:}.pdf".format( self.ds.num_classes, k_name, self.y_target))) def _rescaled_distance(self, x, c, r): """Rescale distance for plot.""" if self.normalizer is not None: c = self.normalizer.inverse_transform(c) x = self.normalizer.inverse_transform(x) constr = CConstraintL2(center=c, radius=r) return x.apply_along_axis(constr.constraint, axis=1) def _get_style(self): """Define the style vector for the different classes.""" if self.ds.num_classes == 3: styles = [('b', 'o', '-'), ('g', 'p', '--'), ('r', 's', '-.')] elif self.ds.num_classes == 4: styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), ('y', 's', '-.'), ('gray', 'D', '--')] else: styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), ('y', 's', '-.'), ('gray', 'D', '--'), ('c', '-.'), ('m', '-'), ('y', '-.')] return styles def _plot_decision_function(self, fig, plot_background=False): """Plot the decision function of a multiclass classifier.""" fig.sp.title('{:} ({:})'.format(self.multiclass.__class__.__name__, self.multiclass.classifier.__name__)) x_bounds, y_bounds = self.ds.get_bounds() styles = self._get_style() for c_idx, c in enumerate(self.ds.classes): fig.sp.scatter(self.ds.X[self.ds.Y == c, 0], self.ds.X[self.ds.Y == c, 1], s=20, c=styles[c_idx][0], edgecolors='k', facecolors='none', linewidths=1, label='c {:}'.format(c)) # Plotting multiclass decision function fig.sp.plot_fun(lambda x: self.multiclass.predict(x), multipoint=True, cmap='Set2', grid_limits=self.ds.get_bounds(offset=5), colorbar=False, n_grid_points=300, plot_levels=True, plot_background=plot_background, levels=[-1, 0, 1, 2], levels_color='gray', levels_style='--') fig.sp.xlim(x_bounds[0] - .05, x_bounds[1] + .05) fig.sp.ylim(y_bounds[0] - .05, y_bounds[1] + .05) fig.sp.legend(loc=9, ncol=5, mode="expand", handletextpad=.1) return fig